[pyoperators] 01/01: Imported Upstream version 0.12.13

Tue Sep 23 18:13:43 UTC 2014

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository pyoperators.

commit 0c06a0a823264c94e2e420b6b19e97ce056dd30e
Author: Ghislain Antony Vaillant <ghisvail at gmail.com>
Date:   Tue Sep 23 19:09:11 2014 +0100

    Imported Upstream version 0.12.13
---
 .coveragerc                             |   25 +
 PKG-INFO                                |  121 +
 README.rst                              |   95 +
 hooks.py                                |  231 +
 pyoperators/__init__.py                 |   43 +
 pyoperators/config.py                   |   32 +
 pyoperators/core.py                     | 4273 ++++++++++++++++++
 pyoperators/fft.py                      |  447 ++
 pyoperators/flags.py                    |  253 ++
 pyoperators/iterative/__init__.py       |   21 +
 pyoperators/iterative/algorithms.py     |  434 ++
 pyoperators/iterative/cg.py             |  215 +
 pyoperators/iterative/core.py           |  414 ++
 pyoperators/iterative/criterions.py     |  349 ++
 pyoperators/iterative/dli.py            |  285 ++
 pyoperators/iterative/lanczos.py        |   99 +
 pyoperators/iterative/linesearch.py     |  159 +
 pyoperators/iterative/optimize.py       |  214 +
 pyoperators/iterative/stopconditions.py |   81 +
 pyoperators/linear.py                   | 1582 +++++++
 pyoperators/memory.py                   |  326 ++
 pyoperators/nonlinear.py                |  923 ++++
 pyoperators/norms.py                    |   41 +
 pyoperators/operators_mpi.py            |  154 +
 pyoperators/operators_pywt.py           |  165 +
 pyoperators/proxy.py                    |  242 +
 pyoperators/rules.py                    |  426 ++
 pyoperators/utils/__init__.py           |    8 +
 pyoperators/utils/cythonutils.c         | 7374 +++++++++++++++++++++++++++++++
 pyoperators/utils/fake_MPI.py           |  154 +
 pyoperators/utils/misc.py               | 1082 +++++
 pyoperators/utils/mpi.py                |  214 +
 pyoperators/utils/testing.py            |  263 ++
 pyoperators/utils/ufuncs.c.src          |  387 ++
 pyoperators/warnings.py                 |   15 +
 setup.py                                |   47 +
 test/test_algorithms.py                 |  127 +
 test/test_broadcastingoperators.py      |  495 +++
 test/test_core.py                       | 2292 ++++++++++
 test/test_criterions.py                 |   74 +
 test/test_delete.py                     |   40 +
 test/test_dense.py                      |  151 +
 test/test_fft.py                        |  152 +
 test/test_flags.py                      |   23 +
 test/test_identity.py                   |   44 +
 test/test_iterative.py                  |   61 +
 test/test_linear.py                     |  295 ++
 test/test_memory.py                     |  105 +
 test/test_mpi.py                        |  143 +
 test/test_nbytes.py                     |   64 +
 test/test_nonlinear.py                  |  275 ++
 test/test_partition.py                  |  234 +
 test/test_proxy.py                      |  114 +
 test/test_pywt.py                       |   44 +
 test/test_reset.py                      |   27 +
 test/test_rules.py                      |  430 ++
 test/test_sparse.py                     |   94 +
 test/test_stopconditions.py             |   46 +
 test/test_str.py                        |  134 +
 test/test_ufuncs.py                     |   52 +
 test/test_utils.py                      |  529 +++
 test/test_zero.py                       |  160 +
 62 files changed, 27399 insertions(+)

diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000..b959328
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,25 @@
+[run]
+branch = True
+source = pyoperators
+
+[report]
+# Regexes for lines to exclude from consideration
+exclude_lines =
+    # Have to re-enable the standard pragma
+    pragma: no coverage
+
+    # Don't complain about missing debug-only code:
+    def __repr__
+    if self\.debug
+
+    # Don't complain if tests don't hit defensive assertion code:
+    raise AssertionError
+    raise NotImplementedError
+
+    # Don't complain if non-runnable code isn't run:
+    if 0:
+    if __name__ == .__main__.:
+    return NotImplemented
+
+[html]
+title = PyOperators Coverage Report
diff --git a/PKG-INFO b/PKG-INFO
new file mode 100644
index 0000000..1ef7c7a
--- /dev/null
+++ b/PKG-INFO
@@ -0,0 +1,121 @@
+Metadata-Version: 1.1
+Name: pyoperators
+Version: 0.12.13
+Summary: Operators and solvers for high-performance computing.
+Home-page: http://pchanial.github.com/pyoperators
+Author: Pierre Chanial
+Author-email: pierre.chanial at gmail.com
+License: CeCILL-B
+Description: ===========
+        PyOperators
+        ===========
+        
+        The PyOperators package defines operators and solvers for high-performance computing. These operators are multi-dimensional functions with optimised and controlled memory management. If linear, they behave like matrices with a sparse storage footprint.
+        
+        Getting started
+        ===============
+        
+        To define an operator, one needs to define a direct function
+        which will replace the usual matrix-vector operation:
+        
+        >>> def f(x, out):
+        ...     out[...] = 2 * x
+        
+        Then, you can instantiate an ``Operator``:
+        
+        >>> A = pyoperators.Operator(direct=f, flags='symmetric')
+        
+        An alternative way to define an operator is to define a subclass:
+        
+        >>> from pyoperators import flags, Operator
+        ... @flags.symmetric
+        ... class MyOperator(Operator):
+        ...     def direct(x, out):
+        ...         out[...] = 2 * x
+        ...
+        ... A = MyOperator()
+        
+        This operator does not have an explicit shape, it can handle inputs of any shape:
+        
+        >>> A(np.ones(5))
+        array([ 2.,  2.,  2.,  2.,  2.])
+        >>> A(np.ones((2,3)))
+        array([[ 2.,  2.,  2.],
+               [ 2.,  2.,  2.]])
+        
+        By setting the ``symmetric`` flag, we ensure that A's transpose is A:
+        
+        >>> A.T is A
+        True
+        
+        For non-explicit shape operators, we get the corresponding dense matrix by specifying the input shape:
+        
+        >>> A.todense(shapein=2)
+        array([[2, 0],
+               [0, 2]])
+        
+        Operators do not have to be linear. Many operators are already `predefined <http://pchanial.github.io/pyoperators/2000/doc-operators/#list>`_, such as the ``IdentityOperator``, the ``DiagonalOperator`` or the nonlinear ``ClipOperator``.
+        
+        The previous ``A`` matrix could be defined more easily like this:
+        
+        >>> from pyoperators import I
+        >>> A = 2 * I
+        
+        where ``I`` is the identity operator with no explicit shape.
+        
+        Operators can be combined together by addition, element-wise multiplication or composition. Note that the operator ``*`` stands for matrix multiplication if the two operators are linear, or for element-wise multiplication otherwise:
+        
+        >>> from pyoperators import I, DiagonalOperator
+        >>> B = 2 * I + DiagonalOperator(range(3))
+        >>> B.todense()
+        array([[2, 0, 0],
+               [0, 3, 0],
+               [0, 0, 4]])
+        
+        Algebraic rules can easily be attached to operators. They are used to simplify expressions to speed up their execution. The ``B`` Operator has been reduced to:
+        
+        >>> B
+        DiagonalOperator(array([2, ..., 4], dtype=int64), broadcast='disabled', dtype=int64, shapein=3, shapeout=3)
+        
+        Many simplifications are available. For instance:
+        
+        >>> from pyoperators import Operator
+        >>> C = Operator(flags='idempotent,linear')
+        >>> C * C is C
+        True
+        >>> D = Operator(flags='involutary')
+        >>> D(D)
+        IdentityOperator()
+        
+        
+        Requirements
+        ============
+        
+        List of requirements:
+        
+        - python 2.6
+        - numpy >= 1.6
+        - scipy >= 0.9
+        
+        Optional requirements:
+        
+        - numexpr (>= 2.0 is better)
+        - PyWavelets : wavelet transforms
+        
+Keywords: scientific computing
+Platform: MacOS X
+Platform: Linux
+Platform: Solaris
+Platform: Unix
+Platform: Windows
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 2 :: Only
+Classifier: Programming Language :: C
+Classifier: Programming Language :: Cython
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Science/Research
+Classifier: Operating System :: OS Independent
+Classifier: Topic :: Scientific/Engineering
+Requires: numpy(>=1.6)
+Requires: scipy(>=0.9)
+Requires: pyfftw
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..4b12ff1
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,95 @@
+===========
+PyOperators
+===========
+
+The PyOperators package defines operators and solvers for high-performance computing. These operators are multi-dimensional functions with optimised and controlled memory management. If linear, they behave like matrices with a sparse storage footprint.
+
+Getting started
+===============
+
+To define an operator, one needs to define a direct function
+which will replace the usual matrix-vector operation:
+
+>>> def f(x, out):
+...     out[...] = 2 * x
+
+Then, you can instantiate an ``Operator``:
+
+>>> A = pyoperators.Operator(direct=f, flags='symmetric')
+
+An alternative way to define an operator is to define a subclass:
+
+>>> from pyoperators import flags, Operator
+... @flags.symmetric
+... class MyOperator(Operator):
+...     def direct(x, out):
+...         out[...] = 2 * x
+...
+... A = MyOperator()
+
+This operator does not have an explicit shape, it can handle inputs of any shape:
+
+>>> A(np.ones(5))
+array([ 2.,  2.,  2.,  2.,  2.])
+>>> A(np.ones((2,3)))
+array([[ 2.,  2.,  2.],
+       [ 2.,  2.,  2.]])
+
+By setting the ``symmetric`` flag, we ensure that A's transpose is A:
+
+>>> A.T is A
+True
+
+For non-explicit shape operators, we get the corresponding dense matrix by specifying the input shape:
+
+>>> A.todense(shapein=2)
+array([[2, 0],
+       [0, 2]])
+
+Operators do not have to be linear. Many operators are already `predefined <http://pchanial.github.io/pyoperators/2000/doc-operators/#list>`_, such as the ``IdentityOperator``, the ``DiagonalOperator`` or the nonlinear ``ClipOperator``.
+
+The previous ``A`` matrix could be defined more easily like this:
+
+>>> from pyoperators import I
+>>> A = 2 * I
+
+where ``I`` is the identity operator with no explicit shape.
+
+Operators can be combined together by addition, element-wise multiplication or composition. Note that the operator ``*`` stands for matrix multiplication if the two operators are linear, or for element-wise multiplication otherwise:
+
+>>> from pyoperators import I, DiagonalOperator
+>>> B = 2 * I + DiagonalOperator(range(3))
+>>> B.todense()
+array([[2, 0, 0],
+       [0, 3, 0],
+       [0, 0, 4]])
+
+Algebraic rules can easily be attached to operators. They are used to simplify expressions to speed up their execution. The ``B`` Operator has been reduced to:
+
+>>> B
+DiagonalOperator(array([2, ..., 4], dtype=int64), broadcast='disabled', dtype=int64, shapein=3, shapeout=3)
+
+Many simplifications are available. For instance:
+
+>>> from pyoperators import Operator
+>>> C = Operator(flags='idempotent,linear')
+>>> C * C is C
+True
+>>> D = Operator(flags='involutary')
+>>> D(D)
+IdentityOperator()
+
+
+Requirements
+============
+
+List of requirements:
+
+- python 2.6
+- numpy >= 1.6
+- scipy >= 0.9
+
+Optional requirements:
+
+- numexpr (>= 2.0 is better)
+- PyWavelets : wavelet transforms
diff --git a/hooks.py b/hooks.py
new file mode 100644
index 0000000..cc9ebde
--- /dev/null
+++ b/hooks.py
@@ -0,0 +1,231 @@
+"""
+The version number is obtained from git tags, branch and commit identifier.
+It has been designed for the following workflow:
+
+- git checkout master
+- modify, commit, commit
+- set version 0.1 in setup.py -> 0.1.dev03
+- modify, commit              -> 0.1.dev04
+- git checkout -b v0.1        -> 0.1.dev04
+- modify, commit              -> 0.1.pre01
+- modify, commit              -> 0.1.pre02
+- git tag 0.1                 -> 0.1
+- modify... and commit        -> 0.1.post01
+- modify... and commit        -> 0.1.post02
+- git tag 0.1.1               -> 0.1.1
+- modify... and commit        -> 0.1.1.post01
+- git checkout master         -> 0.1.dev04
+- set version=0.2 in setup.py -> 0.2.dev01
+- modify, commit              -> 0.2.dev02
+
+When working on the master branch, the dev number is the number of commits
+since the last branch of name "v[0-9.]+"
+
+"""
+import os
+import re
+import sys
+from numpy.distutils.command.build import build
+from numpy.distutils.command.build_ext import build_ext
+from numpy.distutils.command.sdist import sdist
+from numpy.distutils.core import Command
+from subprocess import call, Popen, PIPE
+from warnings import filterwarnings
+
+try:
+    root = os.path.dirname(os.path.abspath(__file__))
+except NameError:
+    root = os.path.dirname(os.path.abspath(sys.argv[0]))
+ABBREV = 5
+BRANCH_REGEX = '^refs/(heads|remotes/origin)/v[0-9.]+$'
+
+
+def get_version(name, default):
+    version = get_version_git(default)
+    if version != '':
+        return version
+    return get_version_init_file(name) or default
+
+
+def get_version_git(default):
+    def run(cmd, cwd=root):
+        git = "git"
+        if sys.platform == "win32":
+            git = "git.cmd"
+        process = Popen([git] + cmd, cwd=cwd, stdout=PIPE, stderr=PIPE)
+        stdout, stderr = process.communicate()
+        if stderr != '':
+            raise RuntimeError(stderr)
+        if process.returncode != 0:
+            raise RuntimeError('Error code: {0}.'.format(process.returncode))
+        return stdout.strip()
+
+    def get_branches():
+        return run(['for-each-ref', '--sort=-committerdate', '--format=%(ref'
+                    'name)', 'refs/heads', 'refs/remotes/origin']).split('\n')
+
+    def get_branch_name():
+        return run(['rev-parse', '--abbrev-ref', 'HEAD'])
+
+    def get_description():
+        try:
+            description = run([
+                'describe', '--tags', '--abbrev={0}'.format(ABBREV)])
+        except RuntimeError:
+            description = run([
+                'describe', '--tags', '--abbrev={0}'.format(ABBREV),
+                '--always']).split('-')
+            return '', '', description[0], '-' + description[1]
+        regex = r"""^
+        (?P<tag>.*?)
+        (?:-
+            (?P<rev>\d+)-g
+            (?P<commit>[0-9a-f]{5,40})
+        )?
+        $"""
+        m = re.match(regex, description, re.VERBOSE)
+        tag, rev, commit = (m.group(_) for _ in 'tag,rev,commit'.split(','))
+        rev = int(rev)
+        return tag, rev, commit
+
+    def get_rev_since_branch(branch):
+        common = run(['merge-base', 'HEAD', branch])
+        return int(run(['rev-list', '--count', 'HEAD', '^' + common]))
+
+    def get_dirty():
+        return '-dirty' if run(['diff-index', 'HEAD']) else ''
+
+    def get_master_rev(default):
+        branches = get_branches()
+        for branch in branches:
+            # filter branches according to BRANCH_REGEX
+            if not re.match(BRANCH_REGEX, branch):
+                continue
+            rev = get_rev_since_branch(branch)
+            if rev > 0:
+                return rev
+        return int(run(['rev-list', '--count', 'HEAD']))
+
+    try:
+        run(['rev-parse', '--is-inside-work-tree'])
+    except (OSError, RuntimeError):
+        return ''
+
+    dirty = get_dirty()
+
+    # check if HEAD is tagged
+    try:
+        return run(['describe', '--tags', '--candidates=0']) + dirty
+    except RuntimeError:
+        pass
+
+    # if the current branch is master, look up the last release branch
+    # to get the dev number
+    branch = get_branch_name()
+    if get_branch_name() == 'master':
+        rev = get_master_rev(default)
+        commit = run(['rev-parse', '--short={}'.format(ABBREV), 'HEAD'])
+        if default != '':
+            return '{}.dev{:02}-g{}{}'.format(default, rev, commit, dirty)
+        return str(rev) + dirty
+
+    isrelease = re.match('^v[0-9.]+$', branch) is not None
+    rev_master = get_rev_since_branch('master')
+    tag, rev_tag, commit = get_description()
+    if isrelease:
+        version = tag
+    else:
+        version = branch
+    if rev_tag > 0:
+        if rev_master < rev_tag:
+            version += '.pre{:02}'.format(rev_master)
+        else:
+            version += '.post{:02}'.format(rev_tag)
+        version += '-g' + commit
+    return version + dirty
+
+
+def get_version_init_file(name):
+    try:
+        f = open(os.path.join(name, '__init__.py')).read()
+    except IOError:
+        return ''
+    m = re.search(r"__version__ = '(.*)'", f)
+    if m is None:
+        return ''
+    return m.groups()[0]
+
+
+def write_version(name, version):
+    try:
+        init = open(os.path.join(root, name, '__init__.py.in')).readlines()
+    except IOError:
+        return
+    init += ['\n', '__version__ = ' + repr(version) + '\n']
+    open(os.path.join(root, name, '__init__.py'), 'w').writelines(init)
+
+
+class BuildCommand(build):
+    def run(self):
+        write_version(self.distribution.get_name(),
+                      self.distribution.get_version())
+        build.run(self)
+
+
+class SDistCommand(sdist):
+    def make_release_tree(self, base_dir, files):
+        write_version(self.distribution.get_name(),
+                      self.distribution.get_version())
+        initfile = os.path.join(self.distribution.get_name(), '__init__.py')
+        if initfile not in files:
+            files.append(initfile)
+        sdist.make_release_tree(self, base_dir, files)
+
+
+class CoverageCommand(Command):
+    description = "run the package coverage"
+    user_options = [('file=', 'f', 'restrict coverage to a specific file'),
+                    ('erase', None,
+                     'erase previously collected coverage before run'),
+                    ('html-dir=', None,
+                     'Produce HTML coverage information in dir')]
+
+    def run(self):
+        cmd = ['nosetests', '--with-coverage', '--cover-html',
+               '--cover-package=' + self.distribution.get_name(),
+               '--cover-html-dir=' + self.html_dir]
+        if self.erase:
+            cmd.append('--cover-erase')
+        call(cmd + [self.file])
+
+    def initialize_options(self):
+        self.file = 'test'
+        self.erase = 0
+        self.html_dir = 'htmlcov'
+
+    def finalize_options(self):
+        pass
+
+
+class TestCommand(Command):
+    description = "run the test suite"
+    user_options = [('file=', 'f', 'restrict test to a specific file')]
+
+    def run(self):
+        call(['nosetests', self.file])
+
+    def initialize_options(self):
+        self.file = 'test'
+
+    def finalize_options(self):
+        pass
+
+
+def get_cmdclass():
+    return {'build': BuildCommand,
+            'build_ext': build_ext,
+            'coverage': CoverageCommand,
+            'sdist': SDistCommand,
+            'test': TestCommand}
+
+filterwarnings('ignore', "Unknown distribution option: 'install_requires'")
diff --git a/pyoperators/__init__.py b/pyoperators/__init__.py
new file mode 100644
index 0000000..4f8c422
--- /dev/null
+++ b/pyoperators/__init__.py
@@ -0,0 +1,43 @@
+"""
+The PyOperators package contains the following modules or packages:
+
+- core : defines the Operator class
+- linear : defines standard linear operators
+- nonlinear : defines non-linear operators (such as thresholding or rounding)
+- iterative : defines iterative algorithms working with operators
+- utils : miscellaneous routines
+- operators_mpi : MPI operators (even if mpi4py is not present)
+- operators_pywt : (optional) loaded if PyWavelets is present.
+
+"""
+
+from .utils import *
+from .utils.mpi import MPI
+from .core import *
+from .fft import *
+from .linear import *
+from .nonlinear import *
+from .operators_mpi import *
+from .proxy import *
+from . import iterative
+from .iterative import pcg
+from .rules import rule_manager
+from .warnings import PyOperatorsWarning
+
+try:
+    from .operators_pywt import *
+except(ImportError):
+    pass
+
+import types
+__all__ = [f for f in dir() if f[0] != '_' and not isinstance(eval(f),
+           types.ModuleType)]
+
+del f  #XXX not necessary with Python3
+del types
+
+I = IdentityOperator()
+O = ZeroOperator()
+X = Variable('X')
+
+__version__ = '0.12.13'
diff --git a/pyoperators/config.py b/pyoperators/config.py
new file mode 100644
index 0000000..b7efa01
--- /dev/null
+++ b/pyoperators/config.py
@@ -0,0 +1,32 @@
+import os
+import site
+from .warnings import warn, PyOperatorsWarning
+
+def getenv(key):
+    val = os.getenv(key, '').strip()
+    if len(val) == 0:
+        return False
+    try:
+        val = int(val)
+    except ValueError:
+        warn("Invalid environment variable {0}='{1}'".format(key, val))
+        return False
+    return bool(val)
+
+LOCAL_PATH = os.getenv('PYOPERATORSPATH')
+if LOCAL_PATH is None:
+    LOCAL_PATH = os.path.join(site.USER_BASE, 'share', 'pyoperators')
+if not os.path.exists(LOCAL_PATH):
+    try:
+        os.makedirs(LOCAL_PATH)
+    except IOError:
+        warn("User path '{0}' cannot be created.".format(LOCAL_PATH),
+             PyOperatorsWarning)
+elif not os.access(LOCAL_PATH, os.W_OK):
+    warn("User path '{0}' is not writable.".format(LOCAL_PATH),
+         PyOperatorsWarning)
+
+PYOPERATORS_NO_MPI = getenv('PYOPERATORS_NO_MPI')
+PYOPERATORS_VERBOSE = getenv('PYOPERATORS_VERBOSE')
+
+del os, site, PyOperatorsWarning, warn, getenv
diff --git a/pyoperators/core.py b/pyoperators/core.py
new file mode 100644
index 0000000..36f20d0
--- /dev/null
+++ b/pyoperators/core.py
@@ -0,0 +1,4273 @@
+#coding: utf-8
+"""
+The core module defines the Operator class. Operators are functions
+which can be added, composed or multiplied by a scalar. See the
+Operator docstring for more information.
+"""
+
+from __future__ import absolute_import, division, print_function
+import copy
+import inspect
+import numpy as np
+import operator
+import pyoperators as po
+import scipy.sparse as sp
+import types
+from collections import MutableMapping, MutableSequence, MutableSet
+from itertools import groupby, izip
+from .flags import (
+    Flags, idempotent, inplace, involutary, linear, real,
+    square, symmetric, update_output)
+from .memory import (
+    empty, garbage_collect, iscompatible, zeros, MemoryPool, MEMORY_ALIGNMENT)
+from .utils import (
+    all_eq, first_is_not, inspect_special_values, isalias, isclassattr,
+    isscalarlike, merge_none, ndarraywrap, operation_assignment, product,
+    renumerate, strenum, strplural, strshape, Timer, tointtuple)
+from .utils.mpi import MPI
+
+__all__ = [
+    'Operator',
+    'AdditionOperator',
+    'BlockColumnOperator',
+    'BlockDiagonalOperator',
+    'BlockRowOperator',
+    'BlockSliceOperator',
+    'CompositionOperator',
+    'ConstantOperator',
+    'DiagonalOperator',
+    'GroupOperator',
+    'HomothetyOperator',
+    'IdentityOperator',
+    'MultiplicationOperator',
+    'ReshapeOperator',
+    'ReductionOperator',
+    'Variable',
+    'ZeroOperator',
+    'asoperator',
+    'timer_operator',
+]
+
+DEBUG = 0
+
+OPERATOR_ATTRIBUTES = ['attrin', 'attrout', 'classin', 'classout', 'commin',
+                       'commout', 'reshapein', 'reshapeout', 'shapein',
+                       'shapeout', 'toshapein', 'toshapeout', 'validatein',
+                       'validateout', 'dtype', 'flags']
+
+
+class Operator(object):
+    """
+    Operator top-level class.
+
+    The operator class is a function factory.
+
+    Attributes
+    ----------
+    attrin/attrout : dict or function
+        If attrout is a dict, its items are added to the output. If it is
+        a function, it takes the input attributes and returns the output attri-
+        butes. The attrin attribute is only used in the reversed direction.
+    classin/classout : ndarray subclass
+        The classout attribute sets the output class. The classin attribute is
+        only used in the reversed direction.
+    commin/commout : mpi4py.Comm
+        The commin and commout attributes store the MPI communicator for the
+        input and output.
+    reshapein/reshapeout : function
+        The reshapein function takes the input shape and returns the output
+        shape. The method is used for implicit output shape operators.
+        The reshapeout function does the opposite.
+    shapein : tuple
+        Operator's input shape.
+    shapeout : tuple
+        Operator's output shape.
+    toshapein/toshapeout : function
+        The toshapein function reshapes a vector into a multi-dimensional array
+        compatible with the operator's input shape. The toshapeout method is
+        only used in the reversed direction.
+    validatein/validateout : function
+        The validatein function raises a ValueError exception if the input
+        shape is not valid. The validateout function is used in the reversed
+        direction
+    flags : Flags
+        The flags describe properties of the operator.
+    dtype : dtype
+        The operator's dtype is used to determine the dtype of its output.
+        Unless it is None, the output dtype is the common type of the operator
+        and input dtypes. If dtype is None, the output dtype is the input
+        dtype.
+    C : Operator
+        Oonjugate operator.
+    T : Operator
+        Tranpose operator.
+    H : Operator
+        Adjoint operator.
+    I : Operator
+        Inverse operator.
+
+    """
+    def __init__(self, direct=None, transpose=None, adjoint=None,
+                 conjugate=None, inverse=None, inverse_transpose=None,
+                 inverse_adjoint=None, inverse_conjugate=None,
+                 attrin={}, attrout={}, classin=None, classout=None,
+                 commin=None, commout=None, reshapein=None, reshapeout=None,
+                 shapein=None, shapeout=None, toshapein=None, toshapeout=None,
+                 validatein=None, validateout=None, dtype=None, flags={},
+                 name=None):
+        for method, name_ in zip(
+            (direct, transpose, adjoint, conjugate, inverse,
+             inverse_transpose, inverse_adjoint, inverse_conjugate),
+            ('direct', 'transpose', 'adjoint', 'conjugate', 'inverse',
+             'inverse_transpose', 'inverse_adjoint', 'inverse_conjugate')):
+            if method is not None:
+                if not hasattr(method, '__call__'):
+                    raise TypeError("The method '%s' is not callable." % name_)
+                # should also check that the method has at least two arguments
+                setattr(self, name_, method)
+
+        self._init_dtype(dtype)
+        self._init_flags(flags)
+        self._init_rules()
+        self._init_name(name)
+        self._init_inout(attrin, attrout, classin, classout, commin, commout,
+                         reshapein, reshapeout, shapein, shapeout, toshapein,
+                         toshapeout, validatein, validateout)
+
+    __name__ = None
+    dtype = None
+    flags = Flags()
+    rules = None
+
+    _C = None
+    _T = None
+    _H = None
+    _I = None
+
+    attrin = {}
+    attrout = {}
+    classin = None
+    classout = None
+    commin = None
+    commout = None
+    shapein = None
+    shapeout = None
+
+    def delete(self):
+        """
+        Delete an operator and its associated operators.
+
+        The operators are morphed into empty shell DeletedOperators and
+        a garbage collection may be triggered according the operator
+        memory footprints.
+
+        """
+        if self._C is None:
+            operators = (self,)
+        else:
+            operators = (self, self._C, self._T, self._H, self._I, self._I._C,
+                         self._I._T, self._I._H)
+        for operator in operators:
+            nbytes = operator.nbytes
+            operator.__class__ = DeletedOperator
+            del operator.__dict__
+            garbage_collect(nbytes)
+
+    @property
+    def nbytes(self):
+        """
+        Approximate memory footprint.
+
+        """
+        return 0
+
+    def reshapein(self, shape):
+        """
+        Return the output shape given an input shape.
+
+        Parameter
+        ---------
+        shape : tuple
+           The input shape. It is guaranteed 1) not to be None although this
+           method returns None if and only if the operator's output shape
+           is unconstrained and 2) to be a tuple.
+
+        Note
+        ----
+        Implicit output shape operators do override this method.
+
+        """
+        return self.shapeout
+
+    def reshapeout(self, shape):
+        """
+        Return the input shape given an output shape.
+
+        Parameter
+        ---------
+        shape : tuple
+           The output shape. It is guaranteed 1) not to be None although this
+           method returns None if and only if the operator's input shape
+           is unconstrained and 2) to be a tuple.
+
+        Note
+        ----
+        Implicit input shape operators do override this method.
+
+        """
+        return self.shapein
+
+    def toshapein(self, v):
+        """
+        Reshape a vector into a multi-dimensional array compatible with
+        the operator's input shape.
+
+        """
+        if self.shapein is None:
+            raise ValueError("The operator '" + self.__name__ + "' does not ha"
+                             "ve an explicit shape.")
+        return v.reshape(self.shapein)
+
+    def toshapeout(self, v):
+        """
+        Reshape a vector into a multi-dimensional array compatible with
+        the operator's output shape.
+
+        """
+        if self.shapeout is None:
+            raise ValueError("The operator '" + self.__name__ + "' does not ha"
+                             "ve an explicit shape.")
+        return v.reshape(self.shapeout)
+
+    def propagate_attributes(self, cls, attr):
+        """
+        Propagate attributes according to operator's attrout. If the class
+        changes, class attributes are removed if they are not class attributes
+        of the new class.
+        """
+        if None not in (self.classout, cls) and self.classout is not cls:
+            for a in attr.keys():
+                if isclassattr(a, cls) and not isclassattr(a, self.classout):
+                    del attr[a]
+        if 'shape_global' in attr:
+            del attr['shape_global']
+        if isinstance(self.attrout, dict):
+            for k, v in self.attrout.items():
+                if isinstance(v, (MutableMapping, MutableSequence,
+                                  MutableSet)):
+                    if hasattr(v, 'copy'):
+                        v = v.copy()
+                    elif type(v) is list:
+                        v = list(v)
+                attr[k] = v
+        else:
+            self.attrout(attr)
+        return self.classout or cls
+
+    def propagate_commin(self, commin):
+        """
+        Propagate MPI communicator of the input to the operands.
+        Operands have the possibility to change during this step.
+
+        """
+        return self
+
+    def propagate_commout(self, commin):
+        """
+        Propagate MPI communicator of the output to the operands.
+        Operands have the possibility to change during this step.
+
+        """
+        return self
+
+    def validatein(self, shapein):
+        """
+        Validate an input shape by raising a ValueError exception if it is
+        invalid.
+
+        """
+        if self.shapein is not None and self.shapein != shapein:
+            raise ValueError(
+                "The input shape '{0}' is incompatible with that of {1}: '{2}'"
+                ".".format(shapein, self.__name__, self.shapein))
+
+    def validateout(self, shapeout):
+        """
+        Validate an output shape by raising a ValueError exception if it is
+        invalid.
+
+        """
+        if self.shapeout is not None and self.shapeout != shapeout:
+            raise ValueError(
+                "The output shape '{0}' is incompatible with that of {1}: '{2"
+                "}'.".format(shapeout, self.__name__, self.shapeout))
+
+    # for the next methods, the following always stand:
+    #    - input and output are not in the memory pool
+    #    - input and output are compatible with the operator's requirements
+    #      in terms of shape, contiguity and alignment.
+    direct = None
+
+    def conjugate(self, input, output):
+        if input.dtype.kind == 'c':
+            with _pool.get(input.shape, input.dtype) as buf:
+                np.conjugate(input, buf)
+            input = buf
+        self.direct(input, output)
+        np.conjugate(output, output)
+
+    transpose = None
+    adjoint = None
+    inverse = None
+    inverse_conjugate = None
+    inverse_transpose = None
+    inverse_adjoint = None
+
+    def __call__(self, x, out=None, operation=operation_assignment,
+                 preserve_input=True):
+
+        if isinstance(x, Operator):
+            if self.flags.idempotent and self is x:
+                return self
+            return CompositionOperator([self, x])
+
+        if self.direct is None:
+            raise NotImplementedError('Call to ' + self.__name__ + ' is not im'
+                                      'plemented.')
+
+        if operation is not operation_assignment:
+            if not self.flags.update_output:
+                raise ValueError(
+                    'This operator does not handle inplace reductions.')
+            if out is None:
+                raise ValueError(
+                    'The output placeholder is not specified.')
+
+        with timer_operator:
+            # get valid input and output
+            i, i_, o, o_ = self._validate_arguments(x, out)
+
+            # perform computation
+            reuse_x = isinstance(x, np.ndarray) and not isalias(x, i) and \
+                not preserve_input
+            reuse_out = isinstance(out, np.ndarray) and not isalias(out, i) \
+                and not isalias(out, o)
+
+            with _pool.set_if(reuse_x, x):
+                with _pool.set_if(reuse_out, out):
+                    if self.flags.update_output:
+                        self.direct(i, o, operation=operation)
+                    else:
+                        self.direct(i, o)
+
+            # add back temporaries for input & output in the memory pool
+            if i_ is not None:
+                _pool.add(i_)
+            if out is None:
+                out = o
+            elif not isalias(out, o):
+                out[...] = o
+                _pool.add(o_)
+
+            # copy over class and attributes
+            cls = x.__class__ if isinstance(x, np.ndarray) else np.ndarray
+            attr = x.__dict__.copy() if hasattr(x, '__dict__') else {}
+            cls = self.propagate_attributes(cls, attr)
+            if cls is np.ndarray and len(attr) > 0:
+                cls = ndarraywrap
+            if out is None:
+                out = o
+            if type(out) is np.ndarray:
+                if cls is np.ndarray:
+                    return out
+                out = out.view(cls)
+            elif type(out) is not cls:
+                out.__class__ = cls
+                if out.__array_finalize__ is not None:
+                    out.__array_finalize__()
+
+            # we cannot simply update __dict__, because of properties.
+            # the iteration is sorted by key, so that attributes beginning with
+            # an underscore are set first.
+            for k in sorted(attr.keys()):
+                setattr(out, k, attr[k])
+            return out
+
+    @property
+    def shape(self):
+        return (product(self.shapeout), product(self.shapein))
+
+    def todense(self, shapein=None, shapeout=None, inplace=False):
+        """
+        Output the dense representation of the Operator as a ndarray.
+
+        Arguments
+        ---------
+        shapein : tuple of ints, (default: None)
+            The operator's input shape if it is not explicit.
+        shapeout : tuple of ints (default: None)
+            The operator's output shape if it is not explicit.
+        inplace : boolean
+            For testing purposes only. By default, this method uses
+            out-of-place operations that directly fill the output array.
+            By setting inplace to True, one can test in-place operations, at
+            the cost of additional copies.
+
+        """
+        shapein, shapeout = self._validate_shapes(shapein, shapeout)
+        if shapein is None:
+            raise ValueError("The operator's input shape is not explicit. Spec"
+                             "ify it with the 'shapein' keyword.")
+        if shapeout is None:
+            raise ValueError("The operator's output shape is not explicit. Spe"
+                             "cify it with the 'shapeout' keyword.")
+        m, n = product(shapeout), product(shapein)
+        dtype = int if self.dtype is None else self.dtype
+        d = np.empty((n, m), dtype)
+
+        if not inplace or not self.flags.inplace:
+            v = zeros(n, dtype)
+            if not self.flags.aligned_output:
+                for i in xrange(n):
+                    v[i] = 1
+                    o = d[i, :].reshape(shapeout)
+                    self.direct(v.reshape(shapein), o)
+                    v[i] = 0
+            else:
+                o = empty(shapeout, dtype)
+                for i in xrange(n):
+                    v[i] = 1
+                    self.direct(v.reshape(shapein), o)
+                    d[i, :] = o.ravel()
+                    v[i] = 0
+            return d.T
+
+        # test in-place mechanism
+        u = empty(max(m, n), dtype)
+        v = u[:n]
+        w = u[:m]
+        for i in xrange(n):
+            v[:] = 0
+            v[i] = 1
+            self.direct(v.reshape(shapein), w.reshape(shapeout))
+            d[i, :] = w
+        return d.T
+
+    def matvec(self, x, out=None):
+        assert not isinstance(x, np.ndarray) or x.flags.contiguous
+        assert out is None or \
+            isinstance(out, np.ndarray) and out.flags.contiguous
+        x = self.toshapein(x)
+        if out is not None:
+            out = self.toshapeout(out)
+        out = self.__call__(x, out=out)
+        return out.ravel()
+
+    def rmatvec(self, x, out=None):
+        return self.T.matvec(x, out=out)
+
+    def set_rule(self, subjects, predicate, operation=None):
+        """
+        Add a rule to the rule list, taking care of duplicates and priorities.
+        Class-matching rules have a lower priority than the others.
+
+        Parameters
+        ----------
+        subjects : str
+            See UnaryRule and BinaryRule documentation.
+        predicate : str
+            See UnaryRule and BinaryRule documentation.
+        operation : CompositeOperator sub class
+            Operation to which applies the rule. It can be:
+                - None, for unary rules
+                - CompositionOperator
+                - AdditionOperator
+                - MultiplicationOperator.
+
+        """
+        # Handle first the case of multiple subclass matching rules
+        if isinstance(subjects, (list, tuple)) and len(subjects) == 2:
+            if isinstance(subjects[0], (list, tuple)):
+                for s in subjects[0][::-1]:
+                    self.set_rule((s, subjects[1]), predicate,
+                                  operation=operation)
+                return
+            if isinstance(subjects[1], (list, tuple)):
+                for s in subjects[1][::-1]:
+                    self.set_rule((subjects[0], s), predicate,
+                                  operation=operation)
+                return
+
+        rule = po.rules.Rule(subjects, predicate)
+
+        if len(rule.subjects) > 2:
+            raise ValueError('Only unary and binary rules are allowed.')
+
+        if operation is None and len(rule.subjects) == 2:
+            raise ValueError('The operation is not specified.')
+
+        # get the rule list for the specified operation
+        if operation is None:
+            if None not in self.rules:
+                self.rules[None] = []
+            rules = self.rules[None]
+        elif issubclass(operation, CommutativeCompositeOperator):
+            if rule.subjects[-1] == '.':
+                rule.subjects = rule.subjects[::-1]
+                rule.reference = 0
+            if operation not in self.rules:
+                self.rules[operation] = []
+            rules = self.rules[operation]
+        else:
+            if operation not in self.rules:
+                self.rules[operation] = {'left': [], 'right': []}
+            rules = self.rules[operation]['left' if rule.reference == 0 else
+                                          'right']
+        ids = [r.subjects for r in rules]
+
+        # first, try to override existing rule
+        try:
+            index = ids.index(rule.subjects)
+            rules[index] = rule
+            return
+        except ValueError:
+            pass
+
+        # class matching rules have lower priority
+        if len(rule.subjects) == 1 or \
+           isinstance(rule.other, str) and not rule.other.startswith('{'):
+            rules.insert(0, rule)
+            return
+
+        # search for subclass rules
+        for index, r in enumerate(rules):
+            if isinstance(r.other, type):
+                break
+        else:
+            rules.append(rule)
+            return
+
+        # insert the rule after more specific ones
+        cls = rule.other
+        classes = [r.other for r in rules[index:]]
+        is_subclass = [issubclass(cls, c) for c in classes]
+        is_supclass = [issubclass(c, cls) for c in classes]
+        try:
+            index2 = is_subclass.index(True)
+        except ValueError:
+            try:
+                index2 = len(is_supclass) - is_supclass[::-1].index(True)
+            except ValueError:
+                index2 = 0
+        rules.insert(index + index2, rule)
+
+    def del_rule(self, subjects, operation=None):
+        """
+        Delete an operator rule.
+
+        If the rule does not exist, a ValueError exception is raised.
+
+        Parameters
+        ----------
+        subjects : str
+            The subjects of the rule to be deleted.
+        operation : CompositeOperator sub class
+            Operation to which applies the rule to be deleted. It can be:
+            CompositionOperator, AdditionOperator and MultiplicationOperator.
+            For unary rules, the value must be None.
+        """
+        subjects = po.rules.Rule._split_subject(subjects)
+        if len(subjects) > 2:
+            raise ValueError('Only unary and binary rules are allowed.')
+        if operation is None and len(subjects) == 2:
+            raise ValueError('The operation is not specified.')
+        if operation not in self.rules:
+            if None not in self.rules:
+                raise ValueError('There is no unary rule.')
+            raise ValueError("The operation '{0}' has no rules.".format(type(
+                             operation).__name__))
+        rules = self.rules[operation]
+        if operation is not None:
+            right = subjects[-1] == '.'
+            if issubclass(operation, CommutativeCompositeOperator):
+                if right:
+                    subjects = subjects[::-1]
+            else:
+                rules = rules['right' if right else 'left']
+        index = [r.subjects for r in rules].index(subjects)
+        del rules[index]
+
+    @property
+    def C(self):
+        """ Return the complex-conjugate of the operator. """
+        if self._C is None:
+            self._generate_associated_operators()
+        return self._C
+
+    @property
+    def T(self):
+        """ Return the transpose of the operator. """
+        if self._T is None:
+            self._generate_associated_operators()
+        return self._T
+
+    @property
+    def H(self):
+        """ Return the adjoint of the operator. """
+        if self._H is None:
+            self._generate_associated_operators()
+        return self._H
+
+    @property
+    def I(self):
+        """ Return the inverse of the operator. """
+        if self._I is None:
+            self._generate_associated_operators()
+        return self._I
+
+    def copy(self):
+        """ Return a copy of the operator. """
+        return copy.copy(self)
+
+    @staticmethod
+    def _find_common_type(dtypes):
+        """ Return dtype of greater type rank. """
+        dtypes = [d for d in dtypes if d is not None]
+        if len(dtypes) == 0:
+            return None
+        return np.find_common_type(dtypes, [])
+
+    def _generate_associated_operators(self):
+        """
+        Compute at once the conjugate, transpose, adjoint and inverse operators
+        of the instance and of themselves.
+
+        """
+        rules = dict((r.subjects[0], r) for r in self.rules.get(None, {}))
+        flags = self.flags
+
+        if flags.real:
+            C = self
+        elif 'C' in rules:
+            C = _copy_direct(self, rules['C'](self))
+        else:
+            C = _copy_direct(
+                self, Operator(direct=self.conjugate,
+                               name=self.__name__ + '.C',
+                               flags={'linear': flags.linear,
+                                      'symmetric': flags.symmetric,
+                                      'hermitian': flags.hermitian,
+                                      'idempotent': flags.idempotent,
+                                      'involutary': flags.involutary,
+                                      'orthogonal': flags.orthogonal,
+                                      'unitary': flags.unitary}))
+
+        new_flags = {
+            'linear': flags.linear,
+            'idempotent': flags.idempotent,
+            'involutary': flags.involutary,
+            'orthogonal': flags.orthogonal,
+            'unitary': flags.unitary}
+        if flags.symmetric:
+            T = self
+        elif 'T' in rules:
+            T = _copy_reverse(self, rules['T'](self))
+        elif flags.real and 'H' in rules:
+            T = _copy_reverse(self, rules['H'](self))
+        elif flags.orthogonal and 'I' in rules:
+            T = _copy_reverse(self, rules['I'](self))
+        elif self.transpose is not None:
+            T = _copy_reverse(
+                self, Operator(direct=self.transpose,
+                               name=self.__name__ + '.T', flags=new_flags))
+        else:
+            T = None
+
+        if flags.hermitian:
+            H = self
+        elif flags.symmetric:
+            H = C
+        elif flags.real:
+            H = T
+        elif 'H' in rules:
+            H = _copy_reverse(self, rules['H'](self))
+        elif flags.unitary and 'I' in rules:
+            H = _copy_reverse(self, rules['I'](self))
+        elif self.adjoint is not None:
+            H = _copy_reverse(
+                self, Operator(direct=self.adjoint,
+                               name=self.__name__ + '.H', flags=new_flags))
+        else:
+            H = None
+
+        if T is None:
+            if H is not None:
+                if flags.real:
+                    T = H
+                else:
+                    T = _copy_reverse(
+                        self, Operator(direct=H.conjugate, name=
+                                       self.__name__ + '.T', flags=new_flags))
+            else:
+                T = _copy_reverse(
+                    self, Operator(name=self.__name__ + '.T', flags=new_flags))
+                if flags.real:
+                    H = T
+
+        if H is None:
+            H = _copy_reverse(
+                self, Operator(direct=T.conjugate if T is not None else None,
+                               name=self.__name__ + '.H', flags=new_flags))
+
+        if flags.involutary:
+            I = self
+        elif flags.orthogonal:
+            I = T
+        elif flags.unitary:
+            I = H
+        elif 'I' in rules:
+            I = _copy_reverse(self, rules['I'](self))
+        else:
+            I = _copy_reverse(
+                self, Operator(direct=self.inverse,
+                               name=self.__name__ + '.I',
+                               flags={'linear': flags.linear,
+                                      'idempotent': flags.idempotent,
+                                      'involutary': flags.involutary,
+                                      'orthogonal': flags.orthogonal,
+                                      'unitary': flags.unitary}))
+
+        new_flags = {
+            'idempotent': flags.idempotent,
+            'involutary': flags.involutary,
+            'orthogonal': flags.orthogonal,
+            'unitary': flags.unitary}
+        if flags.real:
+            IC = I
+        elif flags.orthogonal:
+            IC = H
+        elif flags.unitary:
+            IC = T
+        elif flags.involutary:
+            IC = C
+        elif 'IC' in rules:
+            IC = _copy_reverse(self, rules['IC'](self))
+        else:
+            if self.inverse_conjugate is not None:
+                func = self.inverse_conjugate
+            elif I is not None:
+                func = I.conjugate
+            else:
+                func = None
+            IC = _copy_reverse(
+                self, Operator(direct=func, name=self.__name__ + '.I.C',
+                               flags=new_flags))
+
+        if flags.orthogonal:
+            IT = self
+        elif flags.symmetric:
+            IT = I
+        elif flags.unitary:
+            IT = C
+        elif flags.involutary:
+            IT = T
+        elif 'IT' in rules:
+            IT = _copy_direct(self, rules['IT'](self))
+        elif self.inverse_transpose is not None:
+            IT = _copy_direct(
+                self, Operator(direct=self.inverse_transpose,
+                               name=self.__name__ + '.I.T', flags=new_flags))
+        else:
+            IT = None
+
+        if flags.unitary:
+            IH = self
+        elif flags.hermitian:
+            IH = I
+        elif flags.orthogonal:
+            IH = C
+        elif flags.involutary:
+            IH = H
+        elif flags.symmetric:
+            IH = IC
+        elif flags.real:
+            IH = IT
+        elif 'IH' in rules:
+            IH = _copy_direct(self, rules['IH'](self))
+        elif self.inverse_adjoint is not None:
+            IH = _copy_direct(
+                self, Operator(direct=self.inverse_adjoint,
+                               name=self.__name__ + '.I.H', flags=new_flags))
+        else:
+            IH = None
+
+        if IT is None:
+            if IH is not None:
+                if flags.real:
+                    IT = IH
+                else:
+                    IT = _copy_direct(
+                        self, Operator(direct=IH.conjugate,
+                                       name=self.__name__ + '.I.T',
+                                       flags=new_flags))
+            else:
+                IT = _copy_direct(
+                    self, Operator(name=self.__name__ + '.I.T',
+                                   flags=new_flags))
+                if flags.real:
+                    IH = IT
+
+        if IH is None:
+            IH = _copy_direct(
+                self, Operator(direct=IT.conjugate if IT is not None else None,
+                               name=self.__name__ + '.I.H', flags=new_flags))
+
+        # once all the associated operators are instanciated, we set all their
+        # associated operators. To do so, we use the fact that the transpose,
+        # adjoint, conjugate and inverse operators are commutative and
+        # involutary.
+        self._C, self._T, self._H, self._I = C, T, H, I
+        C._C, C._T, C._H, C._I = self, H, T, IC
+        T._C, T._T, T._H, T._I = H, self, C, IT
+        H._C, H._T, H._H, H._I = T, C, self, IH
+        I._C, I._T, I._H, I._I = IC, IT, IH, self
+        IC._C, IC._T, IC._H, IC._I = I, IH, IT, C
+        IT._C, IT._T, IT._H, IT._I = IH, I, IC, T
+        IH._C, IH._T, IH._H, IH._I = IT, IC, I, H
+
+    def _init_dtype(self, dtype):
+        if dtype is not None:
+            dtype = np.dtype(dtype)
+        self.dtype = dtype
+
+    def _init_flags(self, flags):
+
+        self._set_flags(flags)
+
+        # A non-complex dtype sets the real flag to true.
+        if self.dtype is None or self.dtype.kind != 'c':
+            self._set_flags('real')
+
+        if self.flags.real:
+            if self.flags.symmetric:
+                self._set_flags('hermitian')
+            if self.flags.hermitian:
+                self._set_flags('symmetric')
+            if self.flags.orthogonal:
+                self._set_flags('unitary')
+            if self.flags.unitary:
+                self._set_flags('orthogonal')
+
+        if self.flags.orthogonal:
+            if self.flags.idempotent:
+                self._set_flags('symmetric')
+            if self.flags.symmetric:
+                self._set_flags('idempotent')
+
+        if self.flags.unitary:
+            if self.flags.idempotent:
+                self._set_flags('hermitian')
+            if self.flags.hermitian:
+                self._set_flags('idempotent')
+
+        if self.flags.involutary:
+            if self.flags.symmetric:
+                self._set_flags('orthogonal')
+            if self.flags.orthogonal:
+                self._set_flags('symmetric')
+            if self.flags.hermitian:
+                self._set_flags('unitary')
+            if self.flags.unitary:
+                self._set_flags('hermitian')
+
+        if self.flags.idempotent:
+            if any([self.flags.orthogonal, self.flags.unitary,
+                    self.flags.involutary]):
+                self._set_flags('orthogonal, unitary, involutary')
+
+        if isinstance(flags, (dict, str)):
+            auto_flags = ('shape_input', 'shape_output')
+            mask = [f in flags for f in auto_flags]
+            if any(mask):
+                raise ValueError(
+                    'The {0} {1} cannot be set.'.format(
+                        strplural(np.sum(mask), 'flag', nonumber=True),
+                        strenum([a for a, m in zip(auto_flags, mask) if m])))
+
+        if isinstance(self.direct, np.ufunc):
+            if self.direct.nin != 1 or self.direct.nout != 1:
+                raise TypeError('A ufunc with several inputs or outputs cannot'
+                                ' be converted to an Operator.')
+            real = True
+            if all(_[3] in 'EFDGOSUV' for _ in self.direct.types):
+                real = False
+                if self.dtype is None:
+                    self.dtype = np.dtype(np.complex128)
+            elif all(_[3] in 'efdgEFDGOSUV' for _ in self.direct.types):
+                if self.dtype is None:
+                    self.dtype = np.dtype(np.float64)
+            if real:
+                self._set_flags('real')
+            self._set_flags('inplace')
+            self._set_flags('square')
+            self._set_flags('separable')
+            if self.direct is np.negative:
+                self._set_flags('linear')
+
+        if self.flags.inplace:
+            aligned = max(self.flags.aligned_input,
+                          self.flags.aligned_output)
+            contiguous = max(self.flags.contiguous_input,
+                             self.flags.contiguous_output)
+            self._set_flags({'aligned_input': aligned,
+                             'aligned_output': aligned,
+                             'contiguous_input': contiguous,
+                             'contiguous_output': contiguous})
+
+    def _init_rules(self):
+        """ Translate flags into rules. """
+        if self.rules is None:
+            self.rules = {}
+
+        if self.flags.real:
+            self.set_rule('C', '.')
+        if self.flags.symmetric:
+            self.set_rule('T', '.')
+        if self.flags.hermitian:
+            self.set_rule('H', '.')
+        if self.flags.involutary:
+            self.set_rule('I', '.')
+
+        self.set_rule('I,.', '1', CompositionOperator)
+        if self.flags.orthogonal:
+            self.set_rule('T,.', '1', CompositionOperator)
+        if self.flags.unitary:
+            self.set_rule('H,.', '1', CompositionOperator)
+        if self.flags.idempotent:
+            self.set_rule('.,.', '.', CompositionOperator)
+        if self.flags.involutary:
+            self.set_rule('.,.', '1', CompositionOperator)
+
+    def _init_inout(self, attrin, attrout, classin, classout, commin, commout,
+                    reshapein, reshapeout, shapein, shapeout, toshapein,
+                    toshapeout, validatein, validateout):
+        """
+        Set methods and attributes dealing with the input and output handling.
+        """
+
+        if isinstance(attrin, (dict, types.FunctionType, types.MethodType)):
+            if not isinstance(attrin, dict) or len(attrin) > 0:
+                self.attrin = attrin
+        else:
+            raise TypeError(
+                "The 'attrin' keyword should be a dictionary or a function.")
+        if isinstance(attrout, (dict, types.FunctionType, types.MethodType)):
+            if not isinstance(attrout, dict) or len(attrout) > 0:
+                self.attrout = attrout
+        else:
+            raise TypeError(
+                "The 'attrout' keyword should be a dictionary or a function.")
+        if type(classin) is type and issubclass(classin, np.ndarray):
+            self.classin = classin
+        elif classin is not None:
+            raise TypeError(
+                "The 'classin' keyword is not an ndarray subclass.")
+        if type(classout) is type and issubclass(classout, np.ndarray):
+            self.classout = classout
+        elif classout is not None:
+            raise TypeError(
+                "The 'classout' keyword is not an ndarray subclass.")
+        if commin is not None:
+            self.commin = commin
+        if commout is not None:
+            self.commout = commout
+        if reshapein is not None:
+            self.reshapein = reshapein
+        if reshapeout is not None:
+            self.reshapeout = reshapeout
+        if toshapein is not None:
+            self.toshapein = toshapein
+        if toshapeout is not None:
+            self.toshapeout = toshapeout
+        if validatein is not None:
+            self.validatein = validatein
+        if validateout is not None:
+            self.validateout = validateout
+
+        shapein = tointtuple(shapein)
+        shapeout = tointtuple(shapeout)
+        self.shapein = shapein
+        self.shapeout = shapeout
+        if shapein is not None:
+            shapeout = tointtuple(self.reshapein(shapein))
+            if self.shapeout is None:
+                self.shapeout = shapeout
+        if shapeout is not None:
+            shapein = tointtuple(self.reshapeout(shapeout))
+            if self.shapein is None:
+                self.shapein = shapein
+
+        if shapein is not None:
+            self.validatein(shapein)
+        if shapeout is not None:
+            self.validateout(shapeout)
+
+        if self.shapein is not None and self.shapeout is not None:
+            self._set_flags(square=self.shapein == self.shapeout)
+
+        if self.flags.square:
+            if self.shapein is None:
+                self.shapein = self.shapeout
+            else:
+                self.shapeout = self.shapein
+            self.reshapein = lambda x: x
+            self.reshapeout = self.reshapein
+            self.validatein = self.validatein or self.validateout
+            self.validateout = self.validatein
+            if self.toshapein.im_func is Operator.toshapein.im_func and \
+               self.toshapeout.im_func is not Operator.toshapeout.im_func:
+                self.toshapein = self.toshapeout
+            else:
+                self.toshapeout = self.toshapein
+
+        if self.shapein is not None:
+            try:
+                del self.toshapein
+            except AttributeError:
+                pass
+        if self.shapeout is not None:
+            try:
+                del self.toshapeout
+            except AttributeError:
+                pass
+
+        flag_is = 'explicit' if self.shapein is not None else 'implicit' if \
+            self.reshapeout != Operator.reshapeout.__get__(self, type(self)) \
+            else 'unconstrained'
+        flag_os = 'explicit' if self.shapeout is not None else 'implicit' \
+            if self.reshapein != Operator.reshapein.__get__(self, type(self)) \
+            else 'unconstrained'
+        self._set_flags(shape_input=flag_is, shape_output=flag_os)
+
+        if flag_is == 'explicit':
+            self.reshapeout = Operator.reshapeout.__get__(self, type(self))
+            self.validatein = Operator.validatein.__get__(self, type(self))
+        if flag_os == 'explicit':
+            if self.flags.square:
+                self.reshapein = self.reshapeout
+                self.validateout = self.validatein
+            else:
+                self.reshapein = Operator.reshapein.__get__(self, type(self))
+                self.validateout = Operator.validateout.__get__(
+                    self, type(self))
+
+    def _init_name(self, name):
+        """ Set operator's __name__ attribute. """
+        if name is None:
+            if self.__name__ is not None:
+                return
+            if type(self) is not Operator:
+                name = type(self).__name__
+            elif self.direct is not None and self.direct.__name__ not in \
+                    ('<lambda>', 'direct'):
+                name = self.direct.__name__
+            else:
+                name = 'Operator'
+        self.__name__ = name
+
+    def _reset(self, **keywords_):
+        """
+        Use this method with cautious: the operator's flags are carried over
+        unless the 'flag' keyword is specified. It may lead to inconsistencies.
+
+        """
+        keywords = dict((k, v)
+                        for k, v in self.__dict__.items()
+                        if k in OPERATOR_ATTRIBUTES)
+        keywords.update(keywords_)
+
+        # reset attributes
+        for attr in OPERATOR_ATTRIBUTES + ['_C', '_T', '_H', '_I']:
+            if attr in self.__dict__:
+                del self.__dict__[attr]
+
+        # re-init operator with new attributes
+        Operator.__init__(self, **keywords)
+
+    def _set_flags(self, flags=None, **keywords):
+        """ Set flags to an Operator. """
+        if isinstance(flags, Flags) and len(keywords) == 0:
+            self.flags = flags
+            return
+        flags = self.validate_flags(flags, **keywords)
+        true_flags = [k for k, v in flags.items() if v is True]
+        if any(_ in true_flags
+               for _ in ['hermitian', 'involutary', 'orthogonal', 'symmetric',
+                         'unitary']):
+            if true_flags != ('involutary',):
+                flags['linear'] = True
+            # custom reshapein override the square flag
+            if self.reshapein == Operator.reshapein.__get__(self, type(self)):
+                flags['square'] = True
+        self.flags = self.flags._replace(**flags)
+
+    def _validate_arguments(self, input, output):
+        """
+        Return the input and output as ndarray instances.
+        If required, allocate the output.
+        """
+        input = np.array(input, copy=False)
+        dtype = self._find_common_type([input.dtype, self.dtype])
+
+        input_ = None
+        output_ = None
+
+        # if the input is not compatible, copy it into a buffer from the pool
+        if input.dtype != dtype or not iscompatible(input, input.shape, dtype,
+           self.flags.aligned_input, self.flags.contiguous_input):
+            if output is not None and self.flags.inplace and iscompatible(
+               output, input.shape, dtype, self.flags.aligned_input,
+               self.flags.contiguous_input):
+                buf = output
+            else:
+                input_ = _pool.extract(input.shape, dtype,
+                                       self.flags.aligned_input,
+                                       self.flags.contiguous_input)
+                buf = input_
+            input, input[...] = _pool.view(buf, input.shape, dtype), input
+
+        # check compatibility of provided output
+        if output is not None:
+            if not isinstance(output, np.ndarray):
+                raise TypeError('The output argument is not an ndarray.')
+            output = output.view(np.ndarray)
+            if output.dtype != dtype:
+                raise ValueError(
+                    "The output has an invalid dtype '{0}'. Expected dtype is "
+                    "'{1}'.".format(output.dtype, dtype))
+
+            # if the output does not fulfill the operator's alignment &
+            # contiguity requirements, or if the operator is out-of-place and
+            # an in-place operation is required, let's use a temporary buffer
+            if not iscompatible(output, output.shape, dtype,
+               self.flags.aligned_output, self.flags.contiguous_output) or \
+               isalias(input, output) and not self.flags.inplace:
+                output_ = _pool.extract(
+                    output.shape, dtype, self.flags.aligned_output,
+                    self.flags.contiguous_output)
+                output = _pool.view(output_, output.shape, dtype)
+            shapeout = output.shape
+        else:
+            shapeout = None
+
+        shapein, shapeout = self._validate_shapes(input.shape, shapeout)
+
+        # if the output is not provided, allocate it
+        if output is None:
+            if self.flags.shape_input == 'implicit' and \
+               self.flags.shape_output == 'unconstrained':
+                raise ValueError(
+                    'The output shape of an implicit input shape and unconstra'
+                    'ined output shape operator cannot be inferred.')
+            if shapeout is None:
+                shapeout = input.shape
+            output = empty(shapeout, dtype, description=
+                           "for {0}'s output.".format(self.__name__))
+        return input, input_, output, output_
+
+    @staticmethod
+    def validate_flags(flags, **keywords):
+        """ Return flags as a dictionary. """
+        if flags is None:
+            return keywords
+        if isinstance(flags, dict):
+            flags = flags.copy()
+        elif isinstance(flags, Flags):
+            flags = dict((k, v) for k, v in zip(Flags._fields, flags))
+        elif isinstance(flags, (list, tuple, str)):
+            if isinstance(flags, str):
+                flags = [f.strip() for f in flags.split(',')]
+            flags = dict((f, True) for f in flags)
+        else:
+            raise TypeError("The operator flags have an invalid type '{0}'.".
+                            format(flags))
+        flags.update(keywords)
+        if any(not isinstance(f, str) for f in flags):
+            raise TypeError("Invalid type for the operator flags: {0}."
+                            .format(flags))
+        if any(f not in Flags._fields for f in flags):
+            raise ValueError(
+                "Invalid operator flags '{0}'. The properties must be one of t"
+                "he following: ".format(flags.keys()) + strenum(
+                Flags._fields) + '.')
+        return flags
+
+    def _validate_shapes(self, shapein, shapeout):
+        """
+        Validate that the arguments shapein and shapeout are compatible with
+        the input and output shapes of the operator. The arguments can be None
+        to signify that they are unknown. The input and output shapes of the
+        operator (inferred from the known arguments if necessary) are then
+        returned.
+        This method should be used with initialised operators.
+
+        """
+        shapein = tointtuple(shapein)
+        if shapein is not None:
+            self.validatein(shapein)
+        if self.flags.shape_output == 'explicit':
+            shapeout_ = self.shapeout
+        elif self.flags.shape_output == 'unconstrained' or shapein is None:
+            shapeout_ = None
+        else:
+            shapeout_ = tointtuple(self.reshapein(shapein))
+            self.validateout(shapeout_)
+
+        shapeout = tointtuple(shapeout)
+        if shapeout is not None:
+            self.validateout(shapeout)
+        if self.flags.shape_input == 'explicit':
+            shapein_ = self.shapein
+        elif self.flags.shape_input == 'unconstrained' or shapeout is None:
+            shapein_ = None
+        else:
+            shapein_ = tointtuple(self.reshapeout(shapeout))
+            self.validatein(shapein_)
+
+        if None not in (shapein, shapein_) and shapein != shapein_:
+            raise ValueError(
+                "The specified input shape '{0}' is incompatible with the expe"
+                "cted one '{1}'.".format(shapein, shapein_))
+        if None not in (shapeout, shapeout_) and shapeout != shapeout_:
+            raise ValueError(
+                "The specified output shape '{0}' is incompatible with the exp"
+                "ected one '{1}'.".format(shapeout, shapeout_))
+
+        return (first_is_not([shapein, shapein_], None),
+                first_is_not([shapeout, shapeout_], None))
+
+    def __truediv__(self, other):
+        return MultiplicationOperator([self,
+                                       po.nonlinear.PowerOperator(-1)(other)])
+    __div__ = __truediv__
+
+    def __rtruediv__(self, other):
+        return MultiplicationOperator([other,
+                                       po.nonlinear.PowerOperator(-1)(self)])
+    __rdiv__ = __rtruediv__
+
+    def __mul__(self, other):
+        if isinstance(other, (Variable, VariableTranspose)):
+            return other.__rmul__(self)
+        if (self.flags.linear and
+            not isscalarlike(other) and
+            isinstance(other, (np.ndarray, list, tuple)) and
+            not isinstance(other, np.matrix)):
+                return self(other)
+        try:
+            other = asoperator(other)
+        except TypeError:
+            return NotImplemented
+        if not self.flags.linear or not other.flags.linear:
+            return MultiplicationOperator([self, other])
+        # ensure that A * A is A if A is idempotent
+        if self.flags.idempotent and self is other:
+            return self
+        return CompositionOperator([self, other])
+
+    def __rmul__(self, other):
+        if (self.flags.linear and
+            not isscalarlike(other) and
+            isinstance(other, (np.ndarray, list, tuple)) and
+            not isinstance(other, np.matrix)):
+                return self.T(other)
+        try:
+            other = asoperator(other)
+        except TypeError:
+            return NotImplemented
+        if not self.flags.linear or not other.flags.linear:
+            return MultiplicationOperator([other, self])
+        return CompositionOperator([other, self])
+
+    def __pow__(self, n):
+        if not self.flags.linear:
+            return po.nonlinear.PowerOperator(n)(self)
+        if not np.allclose(n, np.round(n)):
+            raise ValueError("The exponent '{0}' is not an integer.".format(n))
+        if n == -1:
+            return self.I
+        if n == 0:
+            return IdentityOperator(shapein=self.shapein)
+        if n == 1:
+            return self
+        if n > 0:
+            return CompositionOperator(n * [self])
+        return CompositionOperator((-n) * [self.I])
+
+    def __add__(self, other):
+        return AdditionOperator([self, other])
+
+    def __radd__(self, other):
+        return AdditionOperator([other, self])
+
+    def __sub__(self, other):
+        return AdditionOperator([self, -other])
+
+    def __rsub__(self, other):
+        return AdditionOperator([other, -self])
+
+    def __neg__(self):
+        return HomothetyOperator(-1) * self
+
+    def __eq__(self, other):
+        if self is other:
+            return True
+        if type(self) is not type(other):
+            return False
+        d1 = self.__dict__.copy()
+        d2 = other.__dict__.copy()
+        for k in 'rules', '_C', '_T', '_H', '_I', '_D':
+            if k in d1:
+                del d1[k]
+            if k in d2:
+                del d2[k]
+        return all_eq(d1, d2)
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __str__(self):
+        if self.__name__ is None:
+            return type(self).__name__ + ' [not initialized]'
+        if self.flags.linear and (self.shapein is not None or
+                                  self.shapeout is not None):
+            shapein = '?' if self.shapein is None else strshape(self.shapein)
+            shapeout = '?' if self.shapeout is None else \
+                strshape(self.shapeout)
+            if self.flags.square and self.shapein is not None and \
+               len(self.shapein) > 1:
+                s = shapein + '²'
+            else:
+                s = shapeout + 'x' + shapein
+            s += ' '
+        else:
+            s = ''
+        name = self.__name__
+        if name != 'Operator':
+            name = name.replace('Operator', '')
+        s += name.lower()
+        return s
+
+    def __repr__(self):
+        if self.__name__ is None:
+            return type(self).__name__ + ' [not initialized]'
+
+        a = []
+        init = getattr(self, '__init_original__', self.__init__)
+        vars, args, keywords, defaults = inspect.getargspec(init)
+        if defaults is None:
+            defaults = []
+        else:
+            defaults = list(defaults)
+
+        #XXX it would be better to walk the Operator's hirarchy
+        # to grab all keywords.
+        if 'shapein' not in vars:
+            vars.append('shapein')
+            defaults.append(None)
+        if 'shapeout' not in vars:
+            vars.append('shapeout')
+            defaults.append(None)
+
+        for ivar, var in enumerate(vars):
+            if var in ('flags', 'self'):
+                continue
+            if var == 'shapeout' and self.flags.shape_output == 'implicit':
+                continue
+            if var == 'shapein' and self.flags.shape_input == 'implicit':
+                continue
+            if var == 'reshapeout' and self.flags.square and \
+               self.flags.shape_input == 'implicit':
+                continue
+
+            val = getattr(self, var, None)
+            if isinstance(val, types.MethodType):
+                continue
+            nargs = len(vars) - len(defaults)
+            if ivar >= nargs:
+                try:
+                    if val == defaults[ivar - nargs]:
+                        continue
+                except:
+                    if val is defaults[ivar - nargs]:
+                        continue
+            if var == 'reshapein' and self.flags.square and \
+               self.flags.shape_output == 'implicit':
+                s = 'lambda x:x'
+            elif var in ('commin', 'commout'):
+                if val is MPI.COMM_WORLD:
+                    s = 'MPI.COMM_WORLD'
+                elif val is MPI.COMM_SELF:
+                    s = 'MPI.COMM_SELF'
+                else:
+                    s = str(val)
+            elif isinstance(val, Operator):
+                s = 'Operator()'
+            elif type(val) is type:
+                s = val.__module__ + '.' + val.__name__
+            elif var in ['shapein', 'shapeout']:
+                s = strshape(val)
+            elif isinstance(val, np.ndarray) and val.ndim == 0:
+                s = repr(val[()])
+            elif isinstance(val, np.ndarray):
+                s = 'array' if type(val) is np.ndarray else type(val).__name__
+                s += '(' + val.ndim * '['
+                s += str(val.flat[0])
+                if val.size > 1:
+                    s += ', ' if val.size == 2 else ', ..., '
+                    s += str(val.flat[-1])
+                s += val.ndim * ']' + ', dtype={0})'.format(val.dtype)
+            elif var == 'dtype':
+                s = str(val)
+            else:
+                s = repr(val)
+
+            if ivar < nargs:
+                a += [s]
+            else:
+                a += [var + '=' + s]
+        return self.__name__ + '(' + ', '.join(a) + ')'
+
+
+class DeletedOperator(Operator):
+    def __init__(self):
+        raise NotImplementedError('A DeletedOperator cannot be instantiated.')
+
+    __name__ = 'DeletedOperator'
+
+
+ at real
+ at symmetric
+ at idempotent
+ at involutary
+ at update_output
+class CopyOperator(Operator):
+    """
+    Copy operator.
+
+    Unlike IdentityOperator, this is an out-of-place operator.
+
+    """
+    def direct(self, input, output, operation=operation_assignment):
+        operation(output, input)
+
+
+class CompositeOperator(Operator):
+    """
+    Abstract class for handling a list of operands.
+
+    Attributes
+    ----------
+    operands : list of Operators
+        List of operands.
+
+    Methods
+    -------
+    can_morph : boolean method
+        If the composite operator has only one operand (being the argument
+        itself or being the result of simplifications by binary rules),
+        this method specifues if the composite should morph into its operand.
+        Default is False.
+
+    Notes
+    -----
+    Composites can morph into their single operand if the attribute
+    'morph_single_operand' is set to True. As a consequence, one should make
+    sure to return right after the call in the parent __init__ method.
+
+    """
+    def __init__(self, operands, dtype=None, **keywords):
+        self._validate_comm(operands)
+        if dtype is None:
+            dtype = self._find_common_type(o.dtype for o in operands)
+        self.operands = operands
+        Operator.__init__(self, dtype=dtype, **keywords)
+        self.propagate_commin(self.commin)
+        self.propagate_commout(self.commout)
+
+    morph_single_operand = True
+
+    @property
+    def nbytes(self):
+        d = dict((id(_), _) for _ in self.operands)
+        unique = set(d.keys())
+        return sum(d[_].nbytes for _ in unique)
+
+    def propagate_attributes(self, cls, attr):
+        return self.operands[0].propagate_attributes(cls, attr)
+
+    def propagate_commin(self, commin):
+        if commin is None:
+            return self
+        self.commin = commin
+        for i, op in enumerate(self.operands):
+            self.operands[i] = op.propagate_commin(commin)
+        return self
+
+    def propagate_commout(self, commout):
+        if commout is None:
+            return self
+        self.commout = commout
+        for i, op in enumerate(self.operands):
+            self.operands[i] = op.propagate_commout(commout)
+        return self
+
+    def _apply_rules(self, ops):
+        return ops
+
+    def _validate_operands(self, operands, constant=False):
+        if not isinstance(operands, (list, tuple, types.GeneratorType)):
+            operands = [operands]
+        return [asoperator(op, constant=constant) for op in operands]
+
+    def _validate_comm(self, operands):
+        comms = [op.commin for op in operands if op.commin is not None]
+        if len(set(id(c) for c in comms)) > 1:
+            raise ValueError('The input MPI communicators are incompatible.')
+        comms = [op.commout for op in operands if op.commout is not None]
+        if len(set(id(c) for c in comms)) > 1:
+            raise ValueError('The output MPI communicators are incompatible.')
+        return operands
+
+    def __str__(self):
+        if isinstance(self, AdditionOperator):
+            op = ' + '
+        elif isinstance(self, MultiplicationOperator):
+            op = u' {0} '.format(u'\u00d7').encode('utf-8')
+        elif isinstance(self, (BlockDiagonalOperator, BlockSliceOperator)):
+            op = u' {0} '.format(u'\u2295').encode('utf-8')
+        else:
+            op = ' * '
+
+        # parentheses for AdditionOperator and BlockDiagonalOperator
+        operands = ['({0})'.format(o) if isinstance(o, (AdditionOperator,
+                    BlockDiagonalOperator)) else str(o) for o in self.operands]
+
+        # some special cases
+        if isinstance(self, BlockDiagonalOperator) and len(operands) > 2:
+            operands = [operands[0], '...', operands[-1]]
+        elif isinstance(self, CompositionOperator) and \
+                isinstance(self.operands[0], HomothetyOperator):
+            # remove trailing 'I'
+            operands[0] = operands[0][:-1]
+            if self.operands[0].data == -1:
+                operands[0] += '1'
+
+        return op.join(operands)
+
+    def __repr__(self):
+        r = self.__name__ + '(['
+        rops = [repr(op) for op in self.operands]
+        components = []
+        for i, rop in enumerate(rops):
+            if i != len(rops) - 1:
+                rop += ','
+            components.extend(rop.split('\n'))
+        r += '\n    '+'\n    '.join(components) + '])'
+        return r
+
+
+class CommutativeCompositeOperator(CompositeOperator):
+    """
+    Abstract class for commutative composite operators, such as the addition or
+    the element-wise multiplication.
+
+    """
+    def __init__(self, operands, operation=None, **keywords):
+        keywords = self._get_attributes(operands, **keywords)
+        operands = self._apply_rules(operands)
+        if len(operands) == 1 and self.morph_single_operand:
+            self.__class__ = operands[0].__class__
+            self.__dict__ = operands[0].__dict__.copy()
+            self._reset(**keywords)
+            return
+        CompositeOperator.__init__(self, operands, **keywords)
+        self.set_rule(('.', Operator), lambda s, o: type(s)(s.operands + [o]),
+                      type(self))
+        self.set_rule(('.', type(self)), lambda s, o:
+                      type(s)(s.operands + o.operands), type(self))
+        self.operation = operation
+
+    def direct(self, input, output):
+        operands = list(self.operands)
+        assert len(operands) > 1
+
+        # we need a temporary buffer if all operands can do inplace reductions
+        # except no more than one, which is move as first operand
+        try:
+            ir = [o.flags.update_output for o in operands]
+            index = ir.index(False)
+            operands[0], operands[index] = operands[index], operands[0]
+            need_temporary = ir.count(False) > 1
+        except ValueError:
+            need_temporary = False
+
+        operands[0].direct(input, output)
+        ii = 0
+        with _pool.get_if(need_temporary, output.shape, output.dtype) as buf:
+            for op in operands[1:]:
+                if op.flags.update_output:
+                    op.direct(input, output, operation=self.operation)
+                else:
+                    op.direct(input, buf)
+                    self.operation(output, buf)
+                ii += 1
+
+    def propagate_attributes(self, cls, attr):
+        return Operator.propagate_attributes(self, cls, attr)
+
+    def _apply_rules(self, ops):
+        if po.rules.rule_manager['none']:
+            return ops
+
+        if DEBUG:
+            strcls = type(self).__name__.upper()[:-8]
+
+            def print_operands():
+                print()
+                print(len(strcls) * '=' + '=========')
+                print(strcls + ' OPERANDS')
+                print(len(strcls) * '=' + '=========')
+                for i, op in enumerate(ops):
+                    print('{0}: {1!r}'.format(i, op))
+            print_operands()
+
+        if len(ops) <= 1:
+            if DEBUG:
+                print('OUT (only one operand)')
+                print()
+            return ops
+        i = 0
+        while i < len(ops):
+            if type(self) not in ops[i].rules:
+                i += 1
+                continue
+            j = 0
+            consumed = False
+            while j < len(ops):
+                if j != i:
+                    for rule in ops[i].rules[type(self)]:
+                        if DEBUG:
+                            print("({0}, {1}): testing rule '{2}'".
+                                  format(i, j, rule))
+                        new_ops = rule(ops[i], ops[j])
+                        if new_ops is None:
+                            continue
+                        if DEBUG:
+                            print('Because of rule {0}:'.format(rule))
+                            print('     MERGING ({0}, {1}) into {2!s} ~ {2!r}'.
+                                  format(i, j, new_ops))
+                        del ops[j]
+                        if j < i:
+                            i -= 1
+                        ops[i] = new_ops
+                        if DEBUG:
+                            print_operands()
+                        consumed = True
+                        break
+                    if consumed:
+                        break
+                if consumed:
+                    break
+                j += 1
+            if consumed:
+                continue
+            i += 1
+
+        # move this up to avoid creations of temporaries
+        i = [i for i, o in enumerate(ops) if isinstance(o, HomothetyOperator)]
+        if len(i) > 0:
+            ops.insert(0, ops[i[0]])
+            del ops[i[0]+1]
+            if ops[0].data == 0 and len(ops) > 1:
+                del ops[0]
+        return ops
+
+    @classmethod
+    def _get_attributes(cls, operands, **keywords):
+        attr = {
+            'attrin': first_is_not((o.attrin for o in operands), None),
+            'attrout': first_is_not((o.attrout for o in operands), None),
+            'classin': first_is_not((o.classin for o in operands), None),
+            'classout': first_is_not((o.classout for o in operands), None),
+            'commin': first_is_not((o.commin for o in operands), None),
+            'commout': first_is_not((o.commout for o in operands), None),
+            'dtype': cls._find_common_type(o.dtype for o in operands),
+            'flags': cls._merge_flags(operands),
+            'reshapein': cls._merge_reshapein(operands),
+            'reshapeout': cls._merge_reshapeout(operands),
+            'shapein': cls._merge_shape((o.shapein for o in operands), 'in'),
+            'shapeout': cls._merge_shape((o.shapeout for o in operands),
+                                         'out'),
+            'toshapein': first_is_not((o.toshapein for o in operands), None),
+            'toshapeout': first_is_not((o.toshapeout for o in operands), None),
+            'validatein': first_is_not((o.validatein for o in operands), None),
+            'validateout': first_is_not((o.validateout for o in operands),
+                                        None)}
+        for k, v in keywords.items():
+            if k is not 'flags':
+                attr[k] = v
+        attr['flags'].update(
+            Operator.validate_flags(keywords.get('flags', {})))
+        return attr
+
+    @staticmethod
+    def _merge_flags(operands):
+        return {
+            'real': all(o.flags.real for o in operands),
+            'aligned_input': max(o.flags.aligned_input for o in operands),
+            'aligned_output': max(o.flags.aligned_output for o in operands),
+            'contiguous_input': any(o.flags.contiguous_input
+                                    for o in operands),
+            'contiguous_output': any(o.flags.contiguous_output
+                                     for o in operands)}
+
+    @staticmethod
+    def _merge_reshapein(operands):
+        if any(o.flags.shape_output == 'explicit' for o in operands):
+            return None
+        if all(o.flags.shape_output == 'unconstrained' for o in operands):
+            return None
+        return first_is_not((o.reshapein for o in operands
+                             if o.flags.shape_output == 'implicit'), None)
+
+    @staticmethod
+    def _merge_reshapeout(operands):
+        if any(o.flags.shape_input == 'explicit' for o in operands):
+            return None
+        if all(o.flags.shape_input == 'unconstrained' for o in operands):
+            return None
+        return first_is_not((o.reshapeout for o in operands
+                             if o.flags.shape_input == 'implicit'), None)
+
+    @staticmethod
+    def _merge_shape(shapes, inout):
+        shapes = [s for s in shapes if s is not None]
+        if len(shapes) == 0:
+            return None
+        if any(s != shapes[0] for s in shapes):
+            raise ValueError('The {0}put shapes are incompatible: {1}.'.format(
+                             inout, strenum(shapes, 'and')))
+        return shapes[0]
+
+
+class AdditionOperator(CommutativeCompositeOperator):
+    """
+    Class for operator addition
+
+    If at least one of the input already is the result of an addition,
+    a flattened list of operators is created by associativity, to simplify
+    reduction.
+
+    """
+    def __init__(self, operands, **keywords):
+        operands = self._validate_operands(operands)
+        CommutativeCompositeOperator.__init__(self, operands, operator.iadd,
+                                              **keywords)
+        if not isinstance(self, CommutativeCompositeOperator):
+            return
+        self.set_rule('C', lambda s: type(s)([m.C for m in s.operands]))
+        self.set_rule('T', lambda s: type(s)([m.T for m in s.operands]))
+        self.set_rule('H', lambda s: type(s)([m.H for m in s.operands]))
+
+    @staticmethod
+    def _merge_flags(operands):
+        flags = CommutativeCompositeOperator._merge_flags(operands)
+        flags.update({
+            'linear': all(op.flags.linear for op in operands),
+            'separable': all(o.flags.separable for o in operands),
+            'square': any(o.flags.square for o in operands),
+            'symmetric': all(op.flags.symmetric for op in operands),
+            'hermitian': all(op.flags.symmetric for op in operands)})
+        return flags
+
+
+class MultiplicationOperator(CommutativeCompositeOperator):
+    """
+    Class for Hadamard (element-wise) multiplication of operators.
+
+    If at least one of the input already is the result of an multiplication,
+    a flattened list of operators is created by associativity, to simplify
+    reduction.
+
+    """
+    def __init__(self, operands, **keywords):
+        operands = self._validate_operands(operands, constant=True)
+        CommutativeCompositeOperator.__init__(self, operands, operator.imul,
+                                              **keywords)
+        if not isinstance(self, CommutativeCompositeOperator):
+            return
+        self.set_rule('C', lambda s: type(s)([m.C for m in s.operands]))
+
+    @staticmethod
+    def _merge_flags(operands):
+        flags = CommutativeCompositeOperator._merge_flags(operands)
+        flags.update({
+            'separable': all(o.flags.separable for o in operands),
+            'square': any(o.flags.square for o in operands)})
+        return flags
+
+
+ at square
+class BlockSliceOperator(CommutativeCompositeOperator):
+    """
+    Class for multiple disjoint slices.
+
+    The elements of the input not included in the slices are copied over to
+    the output. This is due to fact that it is not easy to derive
+    the complement of a set of slices. To set those values to zeros, you might
+    use MaskOperator or write a custom operator.
+    Currently, there is no check to verify that the slices are disjoint.
+    Non-disjoint slices can lead to unexpected results.
+
+    Examples
+    --------
+    >>> op = BlockSliceOperator(HomothetyOperator(3), slice(None,None,2))
+    >>> op(np.ones(6))
+    array([ 3.,  1.,  3.,  1.,  3.,  1.])
+
+    >>> op = BlockSliceOperator([ConstantOperator(1), ConstantOperator(2)],
+    ...                         ([slice(0, 2), slice(0, 2)],
+    ...                          [slice(2, 4), slice(2, 4)]))
+    >>> op(np.zeros((4,4)))
+    array([[ 1.,  1.,  0.,  0.],
+           [ 1.,  1.,  0.,  0.],
+           [ 0.,  0.,  2.,  2.],
+           [ 0.,  0.,  2.,  2.]])
+
+    """
+    def __init__(self, operands, slices, **keywords):
+        operands = self._validate_operands(operands)
+        if any(not op.flags.square and op.flags.shape_output != 'unconstrained'
+               for op in operands):
+            raise ValueError('Input operands must be square.')
+        if not isinstance(slices, (list, tuple, types.GeneratorType, slice)):
+            raise TypeError('Invalid input slices.')
+        if isinstance(slices, slice):
+            slices = (slices,)
+        else:
+            slices = tuple(slices)
+        if len(operands) != len(slices):
+            raise ValueError(
+                "The number of slices '{0}' is not equal to the number of oper"
+                "ands '{1}'.".format(len(slices), len(operands)))
+
+        CommutativeCompositeOperator.__init__(self, operands, **keywords)
+        self.slices = slices
+        self.set_rule('C', lambda s: BlockSliceOperator(
+                      [op.C for op in s.operands], s.slices))
+        self.set_rule('T', lambda s: BlockSliceOperator(
+                      [op.T for op in s.operands], s.slices))
+        self.set_rule('H', lambda s: BlockSliceOperator(
+                      [op.H for op in s.operands], s.slices))
+        self.set_rule(('.', HomothetyOperator),
+                      lambda s, o: BlockSliceOperator(
+                          [o.data * op for op in s.operands], s.slices),
+                      CompositionOperator)
+
+    morph_single_operand = False
+
+    def direct(self, input, output):
+        if not isalias(input, output):
+            output[...] = input
+        for s, op in zip(self.slices, self.operands):
+            i = input[s]
+            o = output[s]
+            with _pool.copy_if(i, op.flags.aligned_input,
+                               op.flags.contiguous_input) as i:
+                with _pool.copy_if(o, op.flags.aligned_output,
+                                   op.flags.contiguous_output) as o:
+                    op.direct(i, o)
+
+    @classmethod
+    def _get_attributes(cls, operands, **keywords):
+        attr = {
+            'dtype': cls._find_common_type(o.dtype for o in operands),
+            'flags': cls._merge_flags(operands),
+        }
+        for k, v in keywords.items():
+            if k is not 'flags':
+                attr[k] = v
+        attr['flags'].update(
+            Operator.validate_flags(keywords.get('flags', {})))
+        return attr
+
+    @staticmethod
+    def _merge_flags(operands):
+        flags = CommutativeCompositeOperator._merge_flags(operands)
+        flags.update({
+            'linear': all(op.flags.linear for op in operands),
+            'symmetric': all(op.flags.symmetric for op in operands),
+            'hermitian': all(op.flags.hermitian for op in operands),
+            'inplace': all(op.flags.inplace for op in operands)})
+        return flags
+
+
+class NonCommutativeCompositeOperator(CompositeOperator):
+    """
+    Abstract class for non-commutative composite operators, such as
+    the composition.
+
+    """
+    def _apply_rules(self, ops):
+        if po.rules.rule_manager['none']:
+            return ops
+
+        if DEBUG:
+            def print_rules(i, rules):
+                print('Rules for ({0}, {1}):'.format(i, i+1))
+                for i, r in enumerate(rules):
+                    print('    {0}: {1}'.format(i, r))
+                print()
+
+            def print_operands():
+                print()
+                print('====================')
+                print('COMPOSITION OPERANDS')
+                print('====================')
+                for i, op in enumerate(ops):
+                    print('{0}: {1!r}'.format(i, op))
+            import pdb
+            print()
+            print()
+            print()
+            pdb.traceback.print_stack()
+            print_operands()
+
+        if len(ops) <= 1:
+            if DEBUG:
+                print('OUT (only one operand)')
+                print()
+            return ops
+
+        # Get the NonCommutativeCompositeOperator direct subclass
+        cls = type(self).__mro__[-5]
+
+        i = len(ops) - 2
+        # loop over the len(ops)-1 pairs of operands
+        while i >= 0:
+            o1 = ops[i]
+            o2 = ops[i+1]
+            rules1 = o1.rules[cls]['left'] if cls in o1.rules else []
+            rules2 = o2.rules[cls]['right'] if cls in o2.rules else []
+
+            def key_rule(x):
+                if isinstance(x.other, str):
+                    return 0
+                if x.reference == 0:
+                    return 1000 - len(type(o1).__mro__) - len(x.other.__mro__)
+                return 1000 - len(x.other.__mro__) - len(type(o2).__mro__)
+
+            rules = rules1 + rules2
+            rules.sort(key=key_rule)
+
+            if DEBUG > 1:
+                print_rules(i, rules)
+            consumed = False
+            for rule in rules:
+                new_ops = rule(o1, o2)
+                if new_ops is None:
+                    continue
+                consumed = True
+                if DEBUG:
+                    print('Because of rule {0}:'.format(rule))
+                if isinstance(new_ops, tuple):
+                    if len(new_ops) != 2:
+                        raise NotImplementedError()
+                    ops[i], ops[i+1] = new_ops
+                    if DEBUG:
+                        print('    DOUBLE CHANGE: {0} into {1}'.format(
+                              i, new_ops[0]))
+                        print('    DOUBLE CHANGE: {0} into {1}'.format(
+                              i+1, new_ops[1]))
+                        print_operands()
+                    i += 1
+                    break
+                if DEBUG:
+                    print('     MERGING ({0}, {1}) into {2!s} ~ {2!r}'.format(
+                          i, i+1, new_ops))
+                cls._merge(new_ops, o1, o2)
+                del ops[i+1]
+                ops[i] = new_ops
+                if DEBUG:
+                    print_operands()
+                break
+
+            if consumed and i < len(ops) - 1:
+                continue
+
+            i -= 1
+
+        if DEBUG:
+            print('OUT', end=' ')
+            if len(ops) == 1:
+                print('(only one operand)')
+            else:
+                print('(because of rule exhaustion)')
+            print()
+            print()
+
+        return ops
+
+
+ at inplace
+class CompositionOperator(NonCommutativeCompositeOperator):
+    """
+    Class handling operator composition.
+
+    If at least one of the input already is the result of a composition,
+    a flattened list of operators is created by associativity, to simplify
+    reduction.
+
+    """
+    def __init__(self, operands, **keywords):
+        operands = self._validate_operands(operands)
+        operands = self._apply_rules(operands)
+        if len(operands) == 1 and self.morph_single_operand:
+            self.__class__ = operands[0].__class__
+            self.__dict__ = operands[0].__dict__.copy()
+            return
+        keywords = self._get_attributes(operands, **keywords)
+        self._info = {}
+        NonCommutativeCompositeOperator.__init__(self, operands, **keywords)
+        self.set_rule('C', lambda s: CompositionOperator(
+                      [m.C for m in s.operands]))
+        self.set_rule('T', lambda s: CompositionOperator(
+                      [m.T for m in s.operands[::-1]]))
+        self.set_rule('H', lambda s: CompositionOperator(
+                      [m.H for m in s.operands[::-1]]))
+        self.set_rule('I', lambda s: CompositionOperator(
+                      [m.I for m in s.operands[::-1]]))
+        self.set_rule('IC', lambda s: CompositionOperator(
+                      [m.I.C for m in s.operands[::-1]]))
+        self.set_rule('IT', lambda s: CompositionOperator(
+                      [m.I.T for m in s.operands]))
+        self.set_rule('IH', lambda s: CompositionOperator(
+                      [m.I.H for m in s.operands]))
+        self.set_rule(('.', CompositionOperator), lambda s, o:
+                      CompositionOperator(s.operands + o.operands),
+                      CompositionOperator)
+        self.set_rule(('.', Operator), lambda s, o: CompositionOperator(
+                      s.operands + [o]), CompositionOperator)
+        self.set_rule((Operator, '.'), lambda o, s: CompositionOperator(
+                      [o] + s.operands), CompositionOperator)
+
+    def direct(self, input, output, operation=operation_assignment,
+               preserve_input=True):
+
+        preserve_input &= not isalias(input, output)
+        preserve_output = operation is not operation_assignment
+
+        shapeouts, dtypes, ninplaces, bufsizes, aligneds, contiguouss = \
+            self._get_info(input, output, preserve_input)
+
+        i = i_ = input
+        if isalias(input, output):
+            o_ = output if output.nbytes > input.nbytes else input
+        else:
+            o_ = output
+        iop = len(self.operands) - 1
+        ngroups = len(ninplaces)
+        reuse_output = True
+
+        # outer loop over groups of operators
+        for igroup, (ninplace, bufsize, aligned, contiguous) in renumerate(
+                zip(ninplaces, bufsizes, aligneds, contiguouss)):
+
+            if igroup != ngroups - 1:
+
+                # get output for the current outplace operator if possible
+                reuse_output = not preserve_output and (igroup % 2 == 0) and \
+                    iscompatible(output, bufsize, np.int8, aligned,
+                                 contiguous) and not isalias(output, i) or \
+                    igroup == 0
+                if reuse_output:
+                    o_ = output
+                else:
+                    o_ = _pool.extract(bufsize, np.int8, aligned, contiguous)
+                    _pool.add(output)
+                o = _pool.view(o_, shapeouts[iop], dtypes[iop])
+                op = self.operands[iop]
+
+                # perform out-of place operation
+                if iop == 0 and self.flags.update_output:
+                    op.direct(i, o, operation=operation)
+                else:
+                    op.direct(i, o)
+                iop -= 1
+
+                # set the input buffer back in the pool
+                if (igroup < ngroups - 2 or not preserve_input) and \
+                   not isalias(i_, output):
+                    _pool.add(i_)
+                i = o
+                i_ = o_
+
+            # loop over inplace operations
+            for n in range(ninplace):
+                o = _pool.view(o_, shapeouts[iop], dtypes[iop])
+                op = self.operands[iop]
+                op.direct(i, o)
+                i = o
+                iop -= 1
+
+            # get the output out of the pool
+            if not reuse_output:
+                _pool.remove(output)
+
+        if ngroups >= 2 and not preserve_input and \
+           not isalias(input, output):
+            _pool.remove(input)
+
+    def propagate_attributes(self, cls, attr):
+        for op in reversed(self.operands):
+            cls = op.propagate_attributes(cls, attr)
+        return cls
+
+    def propagate_commin(self, commin):
+        if commin is None:
+            return self
+        self.commin = commin
+        for i, op in reversed(list(enumerate(self.operands))):
+            if op.commin is not None:
+                commin = op.commout
+            else:
+                op = op.propagate_commin(commin)
+                self.operands[i] = op
+                commin = op.commout or commin
+        return self
+
+    def propagate_commout(self, commout):
+        if commout is None:
+            return self
+        self.commout = commout
+        for i, op in enumerate(self.operands):
+            if op.commout is not None:
+                commout = op.commin
+            else:
+                op = op.propagate_commout(commout)
+                self.operands[i] = op
+                commout = op.commin or commout
+        return self
+
+    def _apply_rules(self, ops):
+        if po.rules.rule_manager['none']:
+            return ops
+        ops = self._apply_rule_homothety(ops)
+        return NonCommutativeCompositeOperator._apply_rules(self, ops)
+
+    def _apply_rule_homothety(self, operands):
+        """
+        Group scalars from homothety operators and try to inject the result
+        into operators that can absorb scalars.
+
+        """
+        return sum((self._apply_rule_homothety_linear(list(group))
+                    if linear else list(group) for linear, group in
+                    groupby(operands, lambda o: o.flags.linear)), [])
+
+    def _apply_rule_homothety_linear(self, operands):
+        if len(operands) <= 1:
+            return operands
+        scalar = np.array(1, bool)
+        for i, op in enumerate(operands):
+            if isinstance(op, IdentityOperator) or \
+               not isinstance(op, HomothetyOperator):
+                continue
+            scalar = scalar * op.data
+            operands[i] = _copy_direct(op, IdentityOperator())
+
+        if scalar == 1:
+            return operands
+
+        # can the factor be absorbed by one of the operators?
+        h = HomothetyOperator(scalar)
+        try:
+            for iop, op in enumerate(operands):
+                if isinstance(op, IdentityOperator):
+                    continue
+                if CompositionOperator not in op.rules:
+                    continue
+                for rule in op.rules[CompositionOperator]['left']:
+                    if rule.subjects != ('.', HomothetyOperator):
+                        continue
+                    try:
+                        new_op = rule(op, h)
+                    except:
+                        continue
+                    if new_op is not None:
+                        raise StopIteration()
+                for rule in op.rules[CompositionOperator]['right']:
+                    if rule.subjects != (HomothetyOperator, '.'):
+                        continue
+                    try:
+                        new_op = rule(h, op)
+                    except:
+                        continue
+                    if new_op is not None:
+                        raise StopIteration()
+        except StopIteration:
+            operands[iop] = _copy_direct(op, new_op)
+        else:
+            operands.insert(0, h)
+        return operands
+
+    def _get_info(self, input, output, preserve_input):
+        """
+        Given the context in which the composition is taking place:
+            1) input and output shape, dtype, alignment and contiguity
+            2) in-place or out-of-place composition
+            3) whether the input should be preserved,
+
+        the routine returns the requirements for the intermediate buffers of
+        the composition and the information to perform the composition:
+            1) output shape and dtype of each operator
+            2) groups of operators that will operate on the same output buffer
+        Except for the innermost group, which only contains in-place operators
+        a group is an out-of-place operator followed by a certain number of
+        in-place operators
+            3) minimum buffer size, alignment and contiguity requirements
+        for each group.
+
+        For example, in the composition of I*I*O*I*O*I*I*I*O*I (I:in-place,
+        O:out-of-place operator), the groups are 'IIO', 'IO', 'IIIO' and 'I'.
+        For 'I*O', the groups are 'IO' and an empty group ''.
+
+        """
+        shapein = input.shape
+        shapeout = output.shape
+        dtypein = input.dtype
+        dtypeout = output.dtype
+        alignedin = input.__array_interface__['data'][0] \
+            % MEMORY_ALIGNMENT == 0
+        alignedout = output.__array_interface__['data'][0] \
+            % MEMORY_ALIGNMENT == 0
+        contiguousin = input.flags.contiguous
+        contiguousout = output.flags.contiguous
+
+        id_ = (shapein, shapeout, dtypein, dtypeout, alignedin, alignedout,
+               contiguousin, contiguousout, preserve_input)
+
+        try:
+            return self._info[id_]
+        except KeyError:
+            pass
+
+        shapes = self._get_shapes(shapein, shapeout, self.operands)[:-1]
+        if None in shapes:
+            raise ValueError(
+                "The composition of an unconstrained input shape operator by a"
+                "n unconstrained output shape operator is ambiguous.")
+        dtypes = self._get_dtypes(input.dtype)
+        sizes = [product(s) * d.itemsize for s, d in izip(shapes, dtypes)]
+
+        ninplaces, aligneds, contiguouss = self._get_requirements()
+
+        # make last operand out-of-place
+        if preserve_input and self.operands[-1].flags.inplace or \
+           not alignedin and aligneds[-1] or \
+           not contiguousin and contiguouss[-1]:
+            assert ninplaces[-1] > 0
+            ninplaces[-1] -= 1
+            ninplaces += [0]
+            aligneds += [alignedin]
+            contiguouss += [contiguousin]
+
+        # make first operand out-of-place
+        if sizes[0] < max([s for s in sizes[:ninplaces[0]+1]]) or \
+           not alignedout and aligneds[0] or \
+           not contiguousout and contiguouss[0]:
+            assert ninplaces[0] > 0
+            ninplaces[0] -= 1
+            ninplaces.insert(0, 0)
+            aligneds.insert(0, alignedout)
+            contiguouss.insert(0, contiguousout)
+
+        bufsizes = self._get_bufsizes(sizes, ninplaces)
+
+        v = shapes, dtypes, ninplaces, bufsizes, aligneds, contiguouss
+        self._info[id_] = v
+
+        return v
+
+    def _get_bufsizes(self, sizes, ninplaces):
+        bufsizes = []
+        iop = 0
+        for n in ninplaces[:-1]:
+            bufsizes.append(max(sizes[iop:iop+n+1]))
+            iop += n + 1
+        bufsizes.append(sizes[-1])
+        return bufsizes
+
+    def _get_dtypes(self, dtype):
+        dtypes = []
+        for op in self.operands[::-1]:
+            dtype = self._find_common_type([dtype, op.dtype])
+            dtypes.insert(0, dtype)
+        return dtypes
+
+    def _get_requirements(self):
+        aligneds = []
+        contiguouss = []
+        ninplaces = []
+        ninplace = 0
+        aligned = False
+        contiguity = False
+        iop = len(self.operands) - 1
+
+        # loop over operators
+        while iop >= 0:
+
+            # loop over in-place operators
+            while iop >= 0:
+                op = self.operands[iop]
+                iop -= 1
+                if not op.flags.inplace:
+                    aligned = max(aligned, op.flags.aligned_input)
+                    contiguity = max(contiguity, op.flags.contiguous_input)
+                    break
+                ninplace += 1
+                aligned = max(aligned, op.flags.aligned_input)
+                contiguity = max(contiguity, op.flags.contiguous_input)
+
+            ninplaces.insert(0, ninplace)
+            aligneds.insert(0, aligned)
+            contiguouss.insert(0, contiguity)
+
+            ninplace = 0
+            aligned = op.flags.aligned_output
+            contiguity = op.flags.contiguous_output
+
+        if not op.flags.inplace:
+            ninplaces.insert(0, ninplace)
+            aligneds.insert(0, aligned)
+            contiguouss.insert(0, contiguity)
+
+        return ninplaces, aligneds, contiguouss
+
+    @staticmethod
+    def _get_shapes(shapein, shapeout, operands):
+        """
+        Return the output, intermediate and input shapes of the composed
+        operands as a list.
+        """
+        n = len(operands)
+        shapes = [shapeout] + (n - 1) * [None] + [shapein]
+
+        # scanning from the innermost to the outermost operand
+        for i in range(n-1, -1, -1):
+            op = operands[i]
+            if shapes[i+1] is None:
+                s = op.shapeout
+            else:
+                s = tointtuple(op.reshapein(shapes[i+1]))
+            if i == 0 and None not in (shapes[0], s) and s != shapes[0]:
+                raise ValueError("Incompatible shape in composition.")
+            if s is not None:
+                shapes[i] = s
+
+        # scanning from the outermost to the innermost operand
+        for i in range(n):
+            op = operands[i]
+            if shapes[i] is None:
+                s = op.shapein
+            else:
+                s = tointtuple(op.reshapeout(shapes[i]))
+            if None not in (shapes[i+1], s) and s != shapes[i+1]:
+                raise ValueError("Incompatible shape in composition.")
+            if s is not None:
+                shapes[i+1] = s
+
+        return shapes
+
+    @classmethod
+    def _get_attributes(cls, operands, **keywords):
+        shapes = cls._get_shapes(operands[-1].shapein, operands[0].shapeout,
+                                 operands)
+        attr = {
+            'attrin': cls._merge_attr([o.attrin for o in operands]),
+            'attrout': cls._merge_attr([o.attrout for o in operands[::-1]]),
+            'classin': first_is_not((o.classin for o in operands[::-1]), None),
+            'classout': first_is_not((o.classout for o in operands), None),
+            'commin': first_is_not((o.commin for o in operands[::-1]), None),
+            'commout': first_is_not((o.commout for o in operands), None),
+            'dtype': cls._find_common_type(o.dtype for o in operands),
+            'flags': cls._merge_flags(operands),
+            'reshapein': cls._merge_reshapein(operands),
+            'reshapeout': cls._merge_reshapeout(operands),
+            'shapein': shapes[-1],
+            'shapeout': shapes[0],
+            'toshapein': operands[-1].toshapein,
+            'toshapeout': operands[0].toshapeout,
+            'validatein': operands[-1].validatein,
+            'validateout': operands[0].validateout,
+        }
+        attr.update(keywords)
+        return attr
+
+    @classmethod
+    def _merge(cls, op, op1, op2):
+        """
+        Ensure that op = op1*op2 has a correct shapein, shapeout, etc.
+
+        """
+        # bail if the merging has already been done
+        if any(isinstance(o, CompositionOperator) for o in [op1, op2]):
+            return
+        keywords = cls._get_attributes([op1, op2], flags=op.flags)
+        op._reset(**keywords)
+
+    @staticmethod
+    def _merge_attr(attrs):
+        if all(a is None for a in attrs):
+            return None
+        if all(a is None or isinstance(a, dict) for a in attrs):
+            attr = {}
+            for a in attrs:
+                if a is not None:
+                    attr.update(a)
+            return attr
+
+        def func(attr):
+            for a in attrs:
+                if isinstance(a, dict):
+                    attr.update(a)
+                else:
+                    a(attr)
+        return func
+
+    @staticmethod
+    def _merge_flags(operands):
+        return {
+            'linear': all(op.flags.linear for op in operands),
+            'real': all(op.flags.real for op in operands),
+            'square': all(op.flags.square for op in operands),
+            'separable': all(op.flags.separable for op in operands),
+            'update_output': operands[0].flags.update_output,
+            'aligned_input': operands[-1].flags.aligned_input,
+            'aligned_output': operands[0].flags.aligned_output,
+            'contiguous_input': operands[-1].flags.contiguous_input,
+            'contiguous_output': operands[0].flags.contiguous_output}
+
+    @staticmethod
+    def _merge_reshapein(operands):
+        if any(o.flags.shape_output != 'implicit' for o in operands):
+            return None
+        if all(o.flags.square for o in operands):
+            return operands[-1].reshapein
+
+        def reshapein(shape):
+            for o in operands[::-1]:
+                shape = tointtuple(o.reshapein(shape))
+            return shape
+        return reshapein
+
+    @staticmethod
+    def _merge_reshapeout(operands):
+        if any(o.flags.shape_input != 'implicit' for o in operands):
+            return None
+        if all(o.flags.square for o in operands):
+            return operands[0].reshapeout
+
+        def reshapeout(shape):
+            for o in operands:
+                shape = tointtuple(o.reshapeout(shape))
+            return shape
+        return reshapeout
+
+    def _validate_comm(self, operands):
+        for op1, op2 in zip(operands[:-1], operands[1:]):
+            commin = op1.commin
+            commout = op2.commout
+            if None not in (commin, commout) and commin is not commout:
+                raise ValueError('The MPI communicators are incompatible.')
+        return operands
+
+    def __str__(self):
+        if len(self.operands) == 0:
+            return str(self.operands[0])
+
+        s = ''
+        for linear, group in groupby(reversed(self.operands),
+                                     lambda _: _.flags.linear):
+            group = tuple(group)
+            if linear:
+                s_group = ' * '.join(str(_) for _ in reversed(group))
+                if len(s) == 0:
+                    s = s_group
+                    continue
+                need_protection = len(group) > 1 or ' ' in s_group
+                if need_protection:
+                    s = '({0})({1})'.format(s_group, s)
+                else:
+                    s = s_group + '({0})'.format(s)
+                continue
+            for op in group:
+                s_op = str(op)
+                if len(s) == 0:
+                    s = s_op
+                    continue
+                if '...' not in s_op:
+                    s = '{0}({1})'.format(s_op, s)
+                    continue
+                protected = '...,' in s_op or ', ...' in s_op
+                need_protection = ' ' in s #XXX fail for f(..., z=1)
+                if not protected and need_protection:
+                    s = s_op.replace('...', '({0})'.format(s))
+                else:
+                    s = s_op.replace('...', s)
+        return s
+
+
+class GroupOperator(CompositionOperator):
+    """
+    CompositionOperator subclass, without the associativity rules.
+
+    Use this operator to make sure that properties such as dtype are not
+    lost by composing with other operators.
+
+    """
+    def __init__(self, operands, **keywords):
+        CompositionOperator.__init__(self, operands, **keywords)
+        if not isinstance(self, GroupOperator):
+            return
+
+        dtype = self._find_common_type(o.dtype for o in self.operands)
+        switch_T_H = self.flags.real and dtype is not None and \
+            dtype.kind == 'c'
+        if switch_T_H:
+            T, H, IT, IH = 'H', 'T', 'IH', 'IT'
+        else:
+            T, H, IT, IH = 'T', 'H', 'IT', 'IH'
+
+        self.set_rule('C', lambda s: GroupOperator(
+            [m.C for m in s.operands], name=self.__name__ + '.C'))
+        self.set_rule(T, lambda s: GroupOperator(
+            [m.T for m in s.operands[::-1]], name=self.__name__ + '.T'))
+        self.set_rule(H, lambda s: GroupOperator(
+            [m.H for m in s.operands[::-1]], name=self.__name__ + '.H'))
+        self.set_rule('I', lambda s: GroupOperator(
+            [m.I for m in s.operands[::-1]], name=self.__name__ + '.I'))
+        self.set_rule('IC', lambda s: GroupOperator(
+            [m.I.C for m in s.operands[::-1]], name=self.__name__ + '.I.C'))
+        self.set_rule(IT, lambda s: GroupOperator(
+            [m.I.T for m in s.operands], name=self.__name__ + '.I.T'))
+        self.set_rule(IH, lambda s: GroupOperator(
+            [m.I.H for m in s.operands], name=self.__name__ + '.I.H'))
+        self.del_rule(('.', CompositionOperator), CompositionOperator)
+        self.del_rule(('.', Operator), CompositionOperator)
+        self.del_rule((Operator, '.'), CompositionOperator)
+
+    morph_single_operand = False
+
+
+class BlockOperator(NonCommutativeCompositeOperator):
+    """
+    Abstract base class for BlockColumnOperator, BlockDiagonalOperator and
+    BlockRowOperator.
+
+    """
+    def __init__(self, operands, partitionin=None, partitionout=None,
+                 axisin=None, axisout=None, new_axisin=None, new_axisout=None,
+                 **keywords):
+
+        operands = self._validate_operands(operands)
+        if len(operands) == 1:
+            self.__class__ = operands[0].__class__
+            self.__dict__ = operands[0].__dict__.copy()
+            return
+
+        if not isinstance(self, BlockRowOperator) and axisout is None and \
+           new_axisout is None:
+            self.__class__ = BlockRowOperator
+            self.__init__(operands, partitionin, axisin, new_axisin)
+            return
+        if not isinstance(self, BlockColumnOperator) and axisin is None and \
+           new_axisin is None:
+            self.__class__ = BlockColumnOperator
+            self.__init__(operands, partitionout, axisout, new_axisout)
+            return
+        if type(self) is BlockOperator:
+            self.__class__ = BlockDiagonalOperator
+            self.__init__(operands, partitionin, axisin, axisout, new_axisin,
+                          new_axisout)
+            return
+
+        # from now on, self is of type Block(Column|Diagonal|Row)Operator
+        if new_axisin is not None:
+            if partitionin is None:
+                partitionin = len(operands) * (1,)
+            elif any(p not in (None, 1) for p in partitionin):
+                raise ValueError(
+                    'If the block operator input shape has one more dimension '
+                    'than its blocks, the input partition must be a tuple of o'
+                    'nes.')
+        if new_axisout is not None:
+            if partitionout is None:
+                partitionout = len(operands) * (1,)
+            elif any(p not in (None, 1) for p in partitionout):
+                raise ValueError(
+                    'If the block operator output shape has one more dimension'
+                    ' than its blocks, the output partition must be a tuple of'
+                    ' ones.')
+
+        if axisin is not None and new_axisin is not None:
+            raise ValueError("The keywords 'axisin' and 'new_axisin' are exclu"
+                             "sive.")
+        if axisout is not None and new_axisout is not None:
+            raise ValueError("The keywords 'axisout' and 'new_axisout' are exc"
+                             "lusive.")
+
+        if partitionin is partitionout is None:
+            raise ValueError('No partition is provided.')
+        if partitionin is not None:
+            if len(partitionin) != len(operands):
+                raise ValueError('The number of operators must be the same as '
+                                 'the length of the input partition.')
+            partitionin = merge_none(partitionin, self._get_partitionin(
+                operands, partitionout, axisin, axisout, new_axisin,
+                new_axisout))
+        if partitionout is not None:
+            if len(partitionout) != len(operands):
+                raise ValueError('The number of operators must be the same as '
+                                 'the length of the output partition.')
+            partitionout = merge_none(partitionout, self._get_partitionout(
+                                      operands, partitionin, axisin, axisout,
+                                      new_axisin, new_axisout))
+
+        self.partitionin = tointtuple(partitionin)
+        self.partitionout = tointtuple(partitionout)
+        self.axisin = axisin
+        self.new_axisin = new_axisin
+        self.axisout = axisout
+        self.new_axisout = new_axisout
+
+        keywords = self._get_attributes(operands, **keywords)
+        CompositeOperator.__init__(self, operands, **keywords)
+
+        if self.shapein is not None:
+            n = len(self.shapein)
+            if self.axisin is not None and self.axisin < 0:
+                self.axisin += n
+            elif self.new_axisin is not None and self.new_axisin < 0:
+                self.new_axisin += n
+        if self.shapeout is not None:
+            n = len(self.shapeout)
+            if self.axisout is not None and self.axisout < 0:
+                self.axisout += n
+            elif self.new_axisout is not None and self.new_axisout < 0:
+                self.new_axisout += n
+
+        self.set_rule('C', lambda s: BlockOperator(
+            [op.C for op in s.operands], s.partitionin, s.partitionout,
+            s.axisin, s.axisout, s.new_axisin, s.new_axisout))
+        self.set_rule('T', lambda s: BlockOperator(
+            [op.T for op in s.operands], s.partitionout, s.partitionin,
+            s.axisout, s.axisin, s.new_axisout, s.new_axisin))
+        self.set_rule('H', lambda s: BlockOperator(
+            [op.H for op in s.operands], s.partitionout, s.partitionin,
+            s.axisout, s.axisin, s.new_axisout, s.new_axisin))
+
+        if isinstance(self, BlockDiagonalOperator):
+            self.set_rule('I', lambda s: type(s)(
+                [op.I for op in s.operands], s.partitionout, s.axisout,
+                s.axisin, s.new_axisout, s.new_axisin))
+            self.set_rule('IC', lambda s: type(s)(
+                [op.I.C for op in s.operands], s.partitionout, s.axisout,
+                s.axisin, s.new_axisout, s.new_axisin))
+            self.set_rule('IT', lambda s: type(s)(
+                [op.I.T for op in s.operands], s.partitionin, s.axisin,
+                s.axisout, s.new_axisin, s.new_axisout))
+            self.set_rule('IH', lambda s: type(s)(
+                [o.I.H for o in s.operands], s.partitionin, s.axisin,
+                s.axisout, s.new_axisin, s.new_axisout))
+
+        self.set_rule(('.', Operator), self._rule_operator_add,
+                      AdditionOperator)
+        self.set_rule(('.', Operator), self._rule_operator_mul,
+                      MultiplicationOperator)
+        self.set_rule(('.', Operator), self._rule_operator_rcomp,
+                      CompositionOperator)
+        self.set_rule((Operator, '.'), self._rule_operator_lcomp,
+                      CompositionOperator)
+        self.set_rule(('.', type(self)), self._rule_blocksameoperator_add,
+                      AdditionOperator)
+        self.set_rule(('.', type(self)), self._rule_blocksameoperator_mul,
+                      MultiplicationOperator)
+        self.set_rule(('.', BlockOperator), self._rule_blockoperator_comp,
+                      CompositionOperator)
+
+    def __mul__(self, other):
+        if isinstance(other, BlockOperator) and not other.flags.linear:
+            if isinstance(self, BlockRowOperator) and \
+               isinstance(other, BlockDiagonalOperator) or \
+               isinstance(self, BlockDiagonalOperator) and \
+               isinstance(other, BlockColumnOperator) or \
+               isinstance(self, BlockRowOperator) and \
+               isinstance(other, BlockColumnOperator):
+                new_op = self._rule_blockoperator_noncommutative(
+                    self, other, MultiplicationOperator)
+                if new_op is not None:
+                    return new_op
+        return NonCommutativeCompositeOperator.__mul__(self, other)
+
+    def toshapein(self, v):
+        if self.shapein is not None:
+            return v.reshape(self.shapein)
+        if self.partitionin is None:
+            return self.operands[0].toshapein(v)
+        axisin = self.axisin if self.axisin is not None else self.new_axisin
+        if None in self.partitionin or axisin not in (0, -1):
+            raise ValueError('Ambiguous reshaping.')
+        p = sum(self.partitionin)
+        if v.size == p:
+            return v
+        if axisin == 0:
+            return v.reshape((p, -1))
+        return v.reshape((-1, p))
+
+    def toshapeout(self, v):
+        if self.shapeout is not None:
+            return v.reshape(self.shapeout)
+        if self.partitionout is None:
+            return self.operands[0].toshapeout(v)
+        axisout = self.axisout if self.axisout is not None else \
+            self.new_axisout
+        if None in self.partitionout or axisout not in (0, -1):
+            raise ValueError('Ambiguous reshaping.')
+        p = sum(self.partitionout)
+        if v.size == p:
+            return v
+        if axisout == 0:
+            return v.reshape((p, -1))
+        return v.reshape((-1, p))
+
+    def _get_attributes(self, operands, **keywords):
+        # UGLY HACK: required by self.reshapein/out. It may be better to make
+        # the _get_attributes a class method, pass all partitionin/out etc
+        # stuff and inline the reshapein/out methods to get shapein/shapeout.
+        self.operands = operands
+
+        attr = {
+            'attrin': first_is_not((o.attrin for o in operands), None),
+            'attrout': first_is_not((o.attrout for o in operands), None),
+            'classin': first_is_not((o.classin for o in operands), None),
+            'classout': first_is_not((o.classout for o in operands), None),
+            'commin': first_is_not((o.commin for o in operands), None),
+            'commout': first_is_not((o.commout for o in operands), None),
+            'dtype': self._find_common_type(o.dtype for o in operands),
+            'flags': self._merge_flags(operands),
+            'shapein': self.reshapeout(None),
+            'shapeout': self.reshapein(None),
+        }
+        for k, v in keywords.items():
+            if k is not 'flags':
+                attr[k] = v
+        attr['flags'].update(
+            Operator.validate_flags(keywords.get('flags', {})))
+        return attr
+
+    @staticmethod
+    def _get_partition(shapes, axis, new_axis):
+        if new_axis is not None:
+            return len(shapes) * (1,)
+        return tuple(None if s is None else s[axis] for s in shapes)
+
+    @staticmethod
+    def _get_partitionin(ops, partitionout, axisin, axisout, new_axisin,
+                         new_axisout):
+        """ Infer the input partition from the output partition. """
+        if new_axisin is not None:
+            return len(ops) * (1,)
+
+        if partitionout is None:
+            return [o.shapein[axisin] if o.shapein else None for o in ops]
+
+        if new_axisout is None:
+            ndim_min = axisout+1 if axisout >= 0 else -axisout
+        else:
+            ndim_min = 0
+        partitionin = len(ops) * [None]
+        for i, op in enumerate(ops):
+            if op.shapein is not None:
+                partitionin[i] = op.shapein[axisin]
+                continue
+            if partitionout[i] is None:
+                continue
+            pin = []
+            # for implicit input shape operators, we should make sure that
+            # partitionin does not depend on the rank of the output
+            for ndim in range(ndim_min, 33):
+                shapeout = ndim * [0]
+                if new_axisout is None:
+                    shapeout[axisout] = partitionout[i]
+                try:
+                    shapein = tointtuple(op.reshapeout(tuple(shapeout)))
+                    pin.append(shapein[axisin])
+                except IndexError:
+                    continue
+            if len(pin) == 0 or any(p != pin[0] for p in pin):
+                continue
+            partitionin[i] = pin[0]
+        return tuple(partitionin)
+
+    @staticmethod
+    def _get_partitionout(ops, partitionin, axisin, axisout, new_axisin,
+                          new_axisout):
+        """ Infer the output partition from the input partition. """
+        if new_axisout is not None:
+            return len(ops) * (1,)
+
+        if partitionin is None:
+            return [o.shapeout[axisout] if o.shapeout else None for o in ops]
+
+        if new_axisin is None:
+            ndim_min = axisin+1 if axisin >= 0 else -axisin
+        else:
+            ndim_min = 0
+        partitionout = len(ops) * [None]
+        for i, op in enumerate(ops):
+            if op.shapeout is not None:
+                partitionout[i] = op.shapeout[axisout]
+                continue
+            if partitionin[i] is None:
+                continue
+            pout = []
+            # for implicit output shape operators, we should make sure that
+            # partitionout does not depend on the rank of the input
+            for ndim in range(ndim_min, 33):
+                shapein = ndim * [0]
+                if new_axisin is None:
+                    shapein[axisin] = partitionin[i]
+                try:
+                    shapeout = tointtuple(op.reshapein(tuple(shapein)))
+                    pout.append(shapeout[axisout])
+                except IndexError:
+                    continue
+            if len(pout) == 0 or any(p != pout[0] for p in pout):
+                continue
+            partitionout[i] = pout[0]
+        return tuple(partitionout)
+
+    @staticmethod
+    def _get_shape_composite(shapes, p, axis, new_axis):
+        """ Return composite shape from operand shapes. """
+        explicit = [s for s in shapes if s is not None]
+        if len(explicit) == 0:
+            return None
+        shape = explicit[0]
+
+        if p is None or new_axis is not None:
+            if any(s != shape for s in explicit):
+                raise ValueError("The operands have incompatible shapes: '{0}'"
+                                 ".".format(shapes))
+            if p is None:
+                return shape
+            a = new_axis
+            if new_axis < 0:
+                a += len(shape) + 1
+            return shape[:a] + (len(p),) + shape[a:]
+
+        rank = len(shape)
+        if any(len(s) != rank for s in explicit):
+            raise ValueError(
+                "The blocks do not have the same number of dimensions: '{0}'.".
+                format(shapes))
+        if any(shapes[i] is not None and shapes[i][axis] != p[i]
+                for i in range(len(p)) if p[i] is not None):
+            raise ValueError(
+                "The blocks have shapes '{0}' incompatible with the partition "
+                "{1}.".format(shapes, p))
+        if len(explicit) != 1:
+            ok = [all(s is None or s[i] == shape[i] for s in shapes)
+                  for i in range(rank)]
+            ok[axis] = True
+            if not all(ok):
+                raise ValueError(
+                    "The dimensions of the blocks '{0}' are not the same along"
+                    " axes other than that of the partition '{1}'.".format(
+                    shapes, p))
+
+        p = merge_none(p, [s[axis] if s is not None else None for s in shapes])
+        if None in p:
+            return None
+
+        shape = list(shape)
+        shape[axis] = sum(p)
+        return tointtuple(shape)
+
+    @staticmethod
+    def _get_shape_operands(shape, partition, partition_other, axis, new_axis):
+        """ Return operand shapes from composite shape. """
+        if partition is None:
+            return len(partition_other) * (shape,)
+        if None in partition or shape is None:
+            return len(partition) * (None,)
+        if new_axis is not None:
+            shape_ = list(shape)
+            del shape_[new_axis]
+            shapes = len(partition) * (tuple(shape_),)
+            return shapes
+        shapes = []
+        for p in partition:
+            shape_ = list(shape)
+            shape_[axis] = p
+            shapes.append(tuple(shape_))
+        return tuple(shapes)
+
+    @staticmethod
+    def _get_slices(partition, axis, new_axis):
+        """ Return an iterator of the block slices. """
+        if new_axis is not None:
+            axis = new_axis
+        if axis >= 0:
+            s = (axis+1) * [slice(None)] + [Ellipsis]
+        else:
+            s = [Ellipsis] + (-axis) * [slice(None)]
+        dest = 0
+        for n in partition:
+            if new_axis is not None:
+                s[new_axis] = dest
+            else:
+                s[axis] = slice(dest, dest + n)
+            dest += n
+            yield list(s)
+
+    def get_slicesin(self, partitionin=None):
+        """ Return an iterator of the block input slices. """
+        if partitionin is None:
+            partitionin = self.partitionin
+        return self._get_slices(partitionin, self.axisin, self.new_axisin)
+
+    def get_slicesout(self, partitionout=None):
+        """ Return an iterator of the block output slices. """
+        if partitionout is None:
+            partitionout = self.partitionout
+        return self._get_slices(partitionout, self.axisout, self.new_axisout)
+
+    @staticmethod
+    def _merge_flags(operands):
+        return {'linear': all(op.flags.linear for op in operands),
+                'real': all(op.flags.real for op in operands)}
+
+    def reshapein(self, shapein):
+        shapeins = self._get_shape_operands(
+            shapein, self.partitionin, self.partitionout, self.axisin,
+            self.new_axisin)
+        shapeouts = [o.shapeout if s is None else tointtuple(o.reshapein(s))
+                     for o, s in zip(self.operands, shapeins)]
+        return self._get_shape_composite(shapeouts, self.partitionout,
+                                         self.axisout, self.new_axisout)
+
+    def reshapeout(self, shapeout):
+        shapeouts = self._get_shape_operands(
+            shapeout, self.partitionout, self.partitionin, self.axisout,
+            self.new_axisout)
+        shapeins = [o.shapein if s is None else tointtuple(o.reshapeout(s))
+                    for o, s in zip(self.operands, shapeouts)]
+        return self._get_shape_composite(shapeins, self.partitionin,
+                                         self.axisin, self.new_axisin)
+
+    @staticmethod
+    def _validate_partition_commutative(op1, op2):
+        axisin1 = op1.axisin if op1.axisin is not None else op1.new_axisin
+        axisin2 = op2.axisin if op2.axisin is not None else op2.new_axisin
+        axisout1 = op1.axisout if op1.axisout is not None else op1.new_axisout
+        axisout2 = op2.axisout if op2.axisout is not None else op2.new_axisout
+        if axisin1 != axisin2 or axisout1 != axisout2:
+            return None
+        if op1.axisin is not None and op2.new_axisin is not None or \
+           op1.new_axisin is not None and op2.axisin is not None or \
+           op1.axisout is not None and op2.new_axisout is not None or \
+           op1.new_axisout is not None and op2.axisout is not None:
+            #XXX we could handle these cases with a reshape
+            return None
+        try:
+            return merge_none(op1.partitionout, op2.partitionout), \
+                   merge_none(op1.partitionin, op2.partitionin)
+        except ValueError:
+            return None
+
+    @staticmethod
+    def _validate_partition_composition(op1, op2):
+        axisin1 = first_is_not([op1.axisin, op1.new_axisin], None)
+        axisout2 = first_is_not([op2.axisout, op2.new_axisout], None)
+        if axisin1 < 0 and op2.shapeout is not None:
+            axisin1 += len(op2.shapeout)
+        if axisout2 < 0 and op1.shapein is not None:
+            axisout2 += len(op1.shapein)
+        if axisin1 != axisout2:
+            return None
+        if op1.axisin is not None and op2.new_axisout is not None or \
+           op1.new_axisin is not None and op2.axisout is not None:
+            #XXX we could handle these cases with a reshape
+            return None
+        p1 = op1.partitionin
+        p2 = op2.partitionout
+        if p1 is None or p2 is None:
+            return None
+        try:
+            p = merge_none(p1, p2)
+        except ValueError:
+            return None
+        pout = None if op1.partitionout is None else op1._get_partitionout(
+            op1.operands, p, op1.axisin, op1.axisout, op1.new_axisin,
+            op1.new_axisout)
+        pin = None if op2.partitionin is None else op2._get_partitionin(
+            op2.operands, p, op2.axisin, op2.axisout, op2.new_axisin,
+            op2.new_axisout)
+
+        return None if pout is None else merge_none(op1.partitionout, pout), \
+               None if pin is None else merge_none(op2.partitionin, pin)
+
+    @staticmethod
+    def _rule_operator_commutative(self, op, cls):
+        if not op.flags.separable:
+            return None
+        return BlockOperator(
+            [cls([o, op]) for o in self.operands], self.partitionin,
+            self.partitionout, self.axisin, self.axisout, self.new_axisin,
+            self.new_axisout)
+
+    @staticmethod
+    def _rule_operator_add(self, op):
+        """ Rule for BlockOperator + Operator. """
+        return self._rule_operator_commutative(self, op, AdditionOperator)
+
+    @staticmethod
+    def _rule_operator_mul(self, op):
+        """ Rule for BlockOperator x Operator. """
+        return self._rule_operator_commutative(self, op,
+                                               MultiplicationOperator)
+
+    @staticmethod
+    def _rule_operator_lcomp(op, self):
+        """ Rule for Operator(BlockOperator). """
+        if self.partitionout is None:
+            return None
+        if isinstance(op, BlockOperator):
+            return None
+        if not op.flags.separable:
+            return None
+        n = len(self.partitionout)
+        partitionout = self._get_partitionout(
+            n * [op], self.partitionout, self.axisout, self.axisout,
+            self.new_axisout, self.new_axisout)
+        return BlockOperator(
+            [op(o) for o in self.operands], self.partitionin, partitionout,
+            self.axisin, self.axisout, self.new_axisin, self.new_axisout)
+
+    @staticmethod
+    def _rule_operator_rcomp(self, op):
+        """ Rule for BlockOperator(Operator). """
+        if self.partitionin is None:
+            return None
+        if not op.flags.separable:
+            return None
+        n = len(self.partitionin)
+        partitionin = self._get_partitionin(
+            n * [op], self.partitionin, self.axisin, self.axisin,
+            self.new_axisin, self.new_axisin)
+        return BlockOperator(
+            [o(op) for o in self.operands], partitionin, self.partitionout,
+            self.axisin, self.axisout, self.new_axisin, self.new_axisout)
+
+    @staticmethod
+    def _rule_blocksameoperator_commutative(p1, p2, operation):
+        partitions = p1._validate_partition_commutative(p1, p2)
+        if partitions is None:
+            return None
+        partitionout, partitionin = partitions
+        operands = [operation([o1, o2]) for o1, o2 in
+                    zip(p1.operands, p2.operands)]
+        return BlockOperator(
+            operands, partitionin, partitionout, p1.axisin, p1.axisout,
+            p1.new_axisin, p1.new_axisout)
+
+    @staticmethod
+    def _rule_blocksameoperator_add(p1, p2):
+        """ Rule for same type BlockOperator + BlockOperator. """
+        return p1._rule_blocksameoperator_commutative(p1, p2, AdditionOperator)
+
+    @staticmethod
+    def _rule_blocksameoperator_mul(p1, p2):
+        """ Rule for same type BlockOperator x BlockOperator. """
+        return p1._rule_blocksameoperator_commutative(p1, p2,
+                                                      MultiplicationOperator)
+
+    @staticmethod
+    def _rule_blockoperator_noncommutative(p1, p2, cls):
+        partitions = p1._validate_partition_composition(p1, p2)
+        if partitions is None:
+            return None
+        partitionout, partitionin = partitions
+        operands = [cls([o1, o2]) for o1, o2 in zip(p1.operands, p2.operands)]
+        if partitionin is partitionout is None:
+            return AdditionOperator(operands)
+        axisin, axisout = p2.axisin, p1.axisout
+        new_axisin, new_axisout = p2.new_axisin, p1.new_axisout
+        return BlockOperator(
+            operands, partitionin, partitionout, axisin, axisout, new_axisin,
+            new_axisout)
+
+    @staticmethod
+    def _rule_blockoperator_comp(p, q):
+        """ Rule for BlockOperator(BlockOperator). """
+        return p._rule_blockoperator_noncommutative(p, q, CompositionOperator)
+
+
+class BlockDiagonalOperator(BlockOperator):
+    """
+    Block diagonal operator.
+
+    If a new axis 'new_axisin' is specified, the input shapes of the blocks
+    must be the same, and the input is iterated along this axis. Otherwise,
+    the input shapes of the blocks must be the same except for one same
+    dimension 'axisin': the axis along which the input is partitioned.
+
+    If a new axis 'new_axisout' is specified, the output shapes of the blocks
+    must be the same, and the output is stacked along this axis. Otherwise,
+    the output shapes of the blocks must be the same except for one same
+    dimension 'axisout': the axis along which the output is partitioned.
+    This operator can be used to process data chunk by chunk.
+
+    This operator can be used to process data chunk by chunk.
+
+    The direct methods of the partition operators may be called with non-C or
+    non-Fortran contiguous input and output arrays, so care must be taken when
+    interfacing C or Fortran code.
+
+    Parameters
+    ----------
+    operators : Operator list
+        Operators that will populate the diagonal blocks.
+    partitionin : tuple of int
+        Partition of the number of elements along the input partition axis, to
+        be provided if at least one of the input operators is implicit-shape
+    axisin : int
+        Input partition axis (default is 0)
+    axisout : int
+        Output partition axis (default is the input partition axis)
+
+    Example
+    -------
+    o1, o2 = Operator(shapein=(16,4)), Operator(shapein=(16,3))
+    p = BlockDiagonalOperator([o1, o2], axisin=-1)
+    print(p.shapein)
+    (16,7)
+
+    """
+    def __init__(self, operands, partitionin=None, axisin=None, axisout=None,
+                 new_axisin=None, new_axisout=None, **keywords):
+
+        operands = self._validate_operands(operands)
+
+        if axisout is None:
+            axisout = axisin
+        if new_axisout is None:
+            new_axisout = new_axisin
+        if axisin is None:
+            axisin = axisout
+        if new_axisin is None:
+            new_axisin = new_axisout
+
+        if axisin is None and new_axisin is None:
+            raise NotImplementedError('Free partitioning not implemented yet.')
+
+        if partitionin is None:
+            partitionin = self._get_partition(
+                [op.shapein for op in operands], axisin, new_axisin)
+        partitionin = tointtuple(partitionin)
+        partitionout = len(partitionin) * (None,)
+
+        BlockOperator.__init__(self, operands, partitionin, partitionout,
+                               axisin, axisout, new_axisin, new_axisout,
+                               **keywords)
+
+    def direct(self, input, output):
+        if None in self.partitionout:
+            partitionout = list(self.partitionout)
+            for i, o in enumerate(self.operands):
+                if partitionout[i] is not None:
+                    continue
+                if self.partitionin[i] is None:
+                    raise ValueError('The shape of an operator with implicit p'
+                                     'artition cannot be inferred.')
+                shapein = list(input.shape)
+                shapein[self.axisin] = self.partitionin[i]
+                partitionout[i] = tointtuple(
+                    o.reshapein(shapein))[self.axisout]
+        else:
+            partitionout = self.partitionout
+
+        for op, sin, sout in zip(self.operands, self.get_slicesin(),
+                                 self.get_slicesout(partitionout)):
+            i = input[sin]
+            o = output[sout]
+            with _pool.copy_if(i, op.flags.aligned_input,
+                               op.flags.contiguous_input) as i:
+                with _pool.copy_if(o, op.flags.aligned_output,
+                                   op.flags.contiguous_output) as o:
+                    op.direct(i, o)
+
+    @staticmethod
+    def _merge_flags(operands):
+        flags = BlockOperator._merge_flags(operands)
+        flags.update({'square': all(op.flags.square for op in operands),
+                      'symmetric': all(op.flags.symmetric for op in operands),
+                      'hermitian': all(op.flags.hermitian for op in operands),
+                      'inplace': all(op.flags.inplace for op in operands)})
+        return flags
+
+
+class BlockColumnOperator(BlockOperator):
+    """
+    Block column operator.
+
+    The input shapes of the blocks must be the same.
+    If a new axis 'new_axisout' is specified, the output shapes of the blocks
+    must be the same, and the output is stacked along this axis. Otherwise,
+    the output shapes of the blocks must be the same except for one same
+    dimension 'axisout': the axis along which the output is partitioned.
+    This operator can be used to process data chunk by chunk.
+
+    Example
+    -------
+    >>> I = IdentityOperator(shapein=3)
+    >>> op = BlockColumnOperator([I,2*I], axisout=0)
+    >>> op.todense()
+    array([[1, 0, 0],
+           [0, 1, 0],
+           [0, 0, 1],
+           [2, 0, 0],
+           [0, 2, 0],
+           [0, 0, 2]])
+
+    """
+    def __init__(self, operands, partitionout=None, axisout=None,
+                 new_axisout=None, **keywords):
+
+        operands = self._validate_operands(operands)
+
+        if axisout is None and new_axisout is None:
+            raise NotImplementedError('Free partitioning not implemented yet.')
+
+        if partitionout is None:
+            partitionout = self._get_partition(
+                [op.shapeout for op in operands], axisout, new_axisout)
+        partitionout = tointtuple(partitionout)
+
+        BlockOperator.__init__(self, operands, partitionout=partitionout,
+                               axisout=axisout, new_axisout=new_axisout,
+                               **keywords)
+
+    def direct(self, input, output):
+        if None in self.partitionout:
+            partitionout = list(self.partitionout)
+            for i, op in enumerate(self.operands):
+                if partitionout[i] is None:
+                    partitionout[i] = tointtuple(op.reshapein(input.shape)
+                                                 [self.axisout])
+        else:
+            partitionout = self.partitionout
+
+        for op, sout in zip(self.operands, self.get_slicesout(partitionout)):
+            o = output[sout]
+            with _pool.copy_if(o, op.flags.aligned_output,
+                               op.flags.contiguous_output) as o:
+                op.direct(input, o)
+
+    def __str__(self):
+        operands = ['[{0}]'.format(o) for o in self.operands]
+        if len(operands) > 2:
+            operands = [operands[0], '...', operands[-1]]
+        return '[ ' + ' '.join(operands) + ' ]'
+
+
+class BlockRowOperator(BlockOperator):
+    """
+    Block row operator.
+
+    The output shapes of the blocks must be the same.
+    If a new axis 'new_axisin' is specified, the input shapes of the blocks
+    must be the same, and the input is iterated along this axis. Otherwise,
+    the input shapes of the blocks must be the same except for one same
+    dimension 'axisin': the axis along which the input is partitioned.
+    This operator can be used to process data chunk by chunk.
+
+    Example
+    -------
+    >>> I = IdentityOperator(shapein=3)
+    >>> op = BlockRowOperator([I,2*I], axisin=0)
+    >>> op.todense()
+    array([[1, 0, 0, 2, 0, 0],
+           [0, 1, 0, 0, 2, 0],
+           [0, 0, 1, 0, 0, 2]])
+
+    """
+    def __init__(self, operands, partitionin=None, axisin=None,
+                 new_axisin=None, operation=operator.iadd, **keywords):
+
+        operands = self._validate_operands(operands)
+
+        if axisin is None and new_axisin is None:
+            raise NotImplementedError('Free partitioning not implemented yet.')
+
+        if partitionin is None:
+            partitionin = self._get_partition(
+                [op.shapein for op in operands], axisin, new_axisin)
+        partitionin = tointtuple(partitionin)
+
+        keywords['flags'] = Operator.validate_flags(
+            keywords.get('flags', {}), linear=operation is operator.iadd)
+        BlockOperator.__init__(self, operands, partitionin=partitionin, axisin=
+                               axisin, new_axisin=new_axisin, **keywords)
+
+        self.operation = operation
+        self._need_temporary = any(not o.flags.update_output for o in
+                                   self.operands[1:])
+
+    def direct(self, input, output):
+        if None in self.partitionin:
+            partitionin = list(self.partitionin)
+            for i, op in enumerate(self.operands):
+                if partitionin[i] is None:
+                    partitionin[i] = tointtuple(op.reshapeout(output.shape)
+                                                [self.axisin])
+        else:
+            partitionin = self.partitionin
+
+        sins = tuple(self.get_slicesin(partitionin))
+        i = input[sins[0]]
+        op = self.operands[0]
+        with _pool.copy_if(i, op.flags.aligned_input,
+                           op.flags.contiguous_input) as i:
+            op.direct(i, output)
+
+        with _pool.get_if(self._need_temporary, output.shape, output.dtype,
+                          self.__name__) as buf:
+
+            for op, sin in zip(self.operands, sins)[1:]:
+                i = input[sin]
+                with _pool.copy_if(i, op.flags.aligned_input,
+                                   op.flags.contiguous_input) as i:
+                    if op.flags.update_output:
+                        op.direct(i, output, operation=self.operation)
+                    else:
+                        op.direct(i, buf)
+                        self.operation(output, buf)
+
+    def __str__(self):
+        operands = [str(o) for o in self.operands]
+        if len(operands) > 2:
+            operands = [operands[0], '...', operands[-1]]
+        return '[[ ' + ' '.join(operands) + ' ]]'
+
+
+ at real
+ at linear
+ at inplace
+class ReshapeOperator(Operator):
+    """
+    Operator that reshapes arrays.
+
+    Example
+    -------
+    >>> op = ReshapeOperator(6, (3,2))
+    >>> op(np.ones(6)).shape
+    (3, 2)
+    """
+    def __init__(self, shapein, shapeout, **keywords):
+        if shapein is None:
+            raise ValueError('The input shape is None.')
+        if shapeout is None:
+            raise ValueError('The output shape is None.')
+        shapein = tointtuple(shapein)
+        shapeout = tointtuple(shapeout)
+        if product(shapein) != product(shapeout):
+            raise ValueError('The total size of the output must be unchanged.')
+        if shapein == shapeout:
+            self.__class__ = IdentityOperator
+            self.__init__(shapein, **keywords)
+            return
+        Operator.__init__(self, shapein=shapein, shapeout=shapeout, **keywords)
+        self.set_rule('T', lambda s: ReshapeOperator(s.shapeout, s.shapein))
+        self.set_rule((type(self), '.'), self._rule_reshape,
+                      CompositionOperator)
+
+    def direct(self, input, output):
+        if isalias(input, output):
+            pass
+        output.ravel()[:] = input.ravel()
+
+    @staticmethod
+    def _rule_reshape(other, self):
+        return ReshapeOperator(self.shapein, other.shapeout)
+
+    def __str__(self):
+        return strshape(self.shapeout) + '←' + strshape(self.shapein)
+
+
+class BroadcastingBase(Operator):
+    """
+    Abstract class for operators that operate on a data array and
+    the input array, and for which broadcasting of the data array across
+    the input array is required.
+
+    Leftward broadcasting is the normal numpy's broadcasting along the slow
+    dimensions, if the array is stored in C order. Rightward broadcasting is
+    a broadcasting along the fast dimensions.
+
+    The following classes subclass BroadcastingBase :
+
+    BroadcastingBase
+        > ConstantOperator
+        > DiagonalBase
+              > DiagonalOperator
+              > DiagonalNumexprOperator
+              > MaskOperator
+        > PackOperator
+        > UnpackOperator
+
+    """
+    def __init__(self, data, broadcast, **keywords):
+        if broadcast is None:
+            raise ValueError('The broadcast mode is not specified.')
+        data = np.asarray(data)
+        broadcast = broadcast.lower()
+        values = ('leftward', 'rightward', 'disabled', 'scalar')
+        if broadcast not in values:
+            raise ValueError(
+                "Invalid value '{0}' for the broadcast keyword. Expected value"
+                "s are {1}.".format(broadcast, strenum(values)))
+        if data.ndim == 0 and broadcast in ('leftward', 'rightward'):
+            broadcast = 'scalar'
+        self.broadcast = broadcast
+        self.data = data
+        Operator.__init__(self, **keywords)
+        self.set_rule(('.', BlockOperator),
+                      lambda s, o: s._rule_right_block(
+                          s, o, CompositionOperator), CompositionOperator)
+        self.set_rule((BlockOperator, '.'),
+                      lambda o, s: s._rule_left_block(o, s),
+                      CompositionOperator)
+        self.set_rule(('.', BlockOperator),
+                      lambda s, o: s._rule_right_block(s, o, AdditionOperator),
+                      AdditionOperator)
+        self.set_rule(('.', BlockOperator),
+                      lambda s, o: s._rule_right_block(
+                          s, o, MultiplicationOperator),
+                      MultiplicationOperator)
+
+    @property
+    def nbytes(self):
+        return self.data.nbytes
+
+    def get_data(self):
+        return self.data
+
+    @staticmethod
+    def _rule_broadcast(b1, b2, cls, operation):
+        b = set([b1.broadcast, b2.broadcast])
+        if 'leftward' in b and 'rightward' in b:
+            return None
+        if 'disabled' in b:
+            broadcast = 'disabled'
+        elif 'leftward' in b:
+            broadcast = 'leftward'
+        elif 'rightward' in b:
+            broadcast = 'rightward'
+        else:
+            broadcast = 'scalar'
+        if 'rightward' in b:
+            data = operation(b1.get_data().T, b2.get_data().T).T
+        else:
+            data = operation(b1.get_data(), b2.get_data())
+
+        return cls(data, broadcast)
+
+    @staticmethod
+    def _rule_block(self, op, shape, partition, axis, new_axis, func_operation,
+                    *args, **keywords):
+        if partition is None:
+            return
+        if None in partition and self.broadcast != 'scalar':
+            return
+
+        b = self.broadcast
+        ndim = self.data.ndim
+        axis_ = first_is_not([axis, new_axis], None)
+
+        # determine if the broadcasting data should be replicated or sliced
+        do_replicate = False
+        if b == 'scalar':
+            if shape is None:
+                do_replicate = True
+        elif b == 'disabled':
+            pass
+        elif shape is None:
+            if new_axis is not None and ndim == 1 and (
+                    new_axis == -1 and b == 'rightward' or
+                    new_axis == 0 and b == 'leftward'):
+                do_replicate = True
+            elif b == 'rightward':
+                if axis_ > ndim:
+                    do_replicate = True
+                elif axis_ < 0:
+                    return
+            else:
+                if axis_ < -ndim:
+                    do_replicate = True
+                elif axis_ >= 0:
+                    return
+        else:
+            if b == 'rightward':
+                if axis_ >= ndim:
+                    do_replicate = True
+            else:
+                if axis is not None:
+                    axis = axis - len(shape)
+                else:
+                    new_axis = new_axis - len(shape)
+                if axis_ - len(shape) < -ndim:
+                    do_replicate = True
+
+        if do_replicate:
+            ops = [func_operation(self, o) for o in op.operands]
+        else:
+            data = self._as_strided(shape)
+            argspec = inspect.getargspec(type(self).__init__)
+            nargs = len(argspec.args) - 1 - (
+                len(argspec.defaults) if argspec.defaults is not None else 0)
+            slices = op._get_slices(partition, axis, new_axis)
+            ops = []
+            for s, o in zip(slices, op.operands):
+                if nargs == 0:
+                    sliced = type(self)(*args, **keywords)
+                else:
+                    sliced = type(self)(data[s], broadcast=b,
+                                        *args, **keywords)
+                ops.append(func_operation(sliced, o))
+
+        return BlockOperator(ops, op.partitionin, op.partitionout, op.axisin,
+                             op.axisout, op.new_axisin, op.new_axisout)
+
+    @staticmethod
+    def _rule_left_block(op, self):
+        func_op = lambda o, b: CompositionOperator([b, o])
+        return self._rule_block(self, op, op.shapein, op.partitionin,
+                                op.axisin, op.new_axisin, func_op)
+
+    @staticmethod
+    def _rule_right_block(self, op, cls):
+        func_op = lambda o, b: cls([o, b])
+        return self._rule_block(self, op, op.shapeout, op.partitionout,
+                                op.axisout, op.new_axisout, func_op)
+
+    def _as_strided(self, shape):
+        if shape is None:
+            return self.data
+        strides = len(shape) * [0]
+        if self.broadcast == 'rightward':
+            delta = 0
+        else:
+            delta = len(shape) - self.data.ndim
+        v = self.data.itemsize
+        for i in range(self.data.ndim-1, -1, -1):
+            s = self.data.shape[i]
+            if s == 1:
+                continue
+            strides[i+delta] = v
+            v *= s
+        return np.lib.stride_tricks.as_strided(self.data, shape, strides)
+
+
+ at symmetric
+class DiagonalBase(BroadcastingBase):
+    """
+    Base class for DiagonalOperator, DiagonalNumexprOperator, MaskOperator.
+
+    """
+    def __init__(self, data, broadcast, **keywords):
+        BroadcastingBase.__init__(self, data, broadcast, **keywords)
+        self.set_rule(('.', DiagonalBase),
+                      lambda s, o: s._rule_broadcast(
+                          s, o, DiagonalOperator, np.add),
+                      AdditionOperator)
+        self.set_rule(('.', ConstantOperator),
+                      lambda s, o: s._rule_broadcast(
+                          s, o, DiagonalOperator, np.multiply),
+                      MultiplicationOperator)
+        self.set_rule(('.', DiagonalBase),
+                      lambda s, o: s._rule_multiply(s, o),
+                      MultiplicationOperator)
+        self.set_rule(('.', DiagonalBase),
+                      lambda s, o: s._rule_broadcast(
+                          s, o, DiagonalOperator, np.multiply),
+                      CompositionOperator)
+
+    @staticmethod
+    def _rule_multiply(b1, b2):
+        b = set([b1.broadcast, b2.broadcast])
+        if 'leftward' in b and 'rightward' in b:
+            return
+        if 'disabled' in b:
+            b = 'disabled'
+        elif 'leftward' in b:
+            b = 'leftward'
+        elif 'rightward' in b:
+            b = 'rightward'
+        else:
+            b = 'scalar'
+        if 'rightward' in b:
+            data = (b1.get_data().T * b2.get_data().T).T
+        else:
+            data = b1.get_data() * b2.get_data()
+        return MultiplicationOperator(
+            [ConstantOperator(data, broadcast=b),
+             po.nonlinear.PowerOperator(2)])
+
+
+ at inplace
+class DiagonalOperator(DiagonalBase):
+    """
+    Diagonal operator.
+
+    Arguments
+    ---------
+    data : ndarray
+      The diagonal coefficients
+
+    broadcast : 'rightward' or 'disabled' (default 'disabled')
+      If broadcast == 'rightward', the diagonal is broadcasted along the fast
+      axis.
+
+    Exemple
+    -------
+    >>> A = DiagonalOperator(np.arange(1, 6, 2))
+    >>> A.todense()
+    array([[1, 0, 0],
+           [0, 3, 0],
+           [0, 0, 5]])
+
+    >>> A = DiagonalOperator([1, 2], broadcast='rightward', shapein=(2, 2))
+    >>> A.todense()
+    array([[1, 0, 0, 0],
+           [0, 1, 0, 0],
+           [0, 0, 2, 0],
+           [0, 0, 0, 2]])
+
+    """
+    def __init__(self, data, broadcast=None, dtype=None, **keywords):
+        data = np.asarray(data)
+        if broadcast is None:
+            broadcast = 'scalar' if data.ndim == 0 else 'disabled'
+        if broadcast == 'disabled':
+            keywords['shapein'] = data.shape
+            keywords['shapeout'] = data.shape
+        n = data.size
+        nmones, nzeros, nones, other, same = inspect_special_values(data)
+        if nzeros == n and not isinstance(self, ZeroOperator):
+            keywords['flags'] = Operator.validate_flags(
+                keywords.get('flags', {}), square=True)
+            self.__class__ = ZeroOperator
+            self.__init__(dtype=dtype, **keywords)
+            return
+        if nones == n and not isinstance(self, IdentityOperator):
+            self.__class__ = IdentityOperator
+            self.__init__(dtype=dtype, **keywords)
+            return
+        if same and not isinstance(self, (HomothetyOperator, ZeroOperator)):
+            self.__class__ = HomothetyOperator
+            self.__init__(data.flat[0], dtype=dtype, **keywords)
+            return
+        if nones + nzeros == n and not isinstance(self,
+                                                  (HomothetyOperator,
+                                                   po.linear.MaskOperator)):
+            self.__class__ = po.linear.MaskOperator
+            self.__init__(~data.astype(np.bool8), broadcast=broadcast,
+                          **keywords)
+            return
+        if nmones + nones == n:
+            keywords['flags'] = self.validate_flags(keywords.get('flags', {}),
+                                                    involutary=True)
+        if dtype is None and (data.ndim > 0 or data not in (0, 1)):
+            dtype = data.dtype
+        DiagonalBase.__init__(self, data, broadcast, dtype=dtype, **keywords)
+
+    def direct(self, input, output):
+        if self.broadcast == 'rightward':
+            np.multiply(input.T, self.get_data().T, output.T)
+        else:
+            np.multiply(input, self.get_data(), output)
+
+    def conjugate(self, input, output):
+        if self.broadcast == 'rightward':
+            np.multiply(input.T, np.conjugate(self.get_data()).T, output.T)
+        else:
+            np.multiply(input, np.conjugate(self.get_data()), output)
+
+    def inverse(self, input, output):
+        if self.broadcast == 'rightward':
+            np.divide(input.T, self.get_data().T, output.T)
+        else:
+            np.divide(input, self.get_data(), output)
+
+    def inverse_conjugate(self, input, output):
+        if self.broadcast == 'rightward':
+            np.divide(input.T, np.conjugate(self.get_data()).T, output.T)
+        else:
+            np.divide(input, np.conjugate(self.get_data()), output)
+
+    def __pow__(self, n):
+        if n in (-1, 0, 1):
+            return BroadcastingBase.__pow__(self, n)
+        return DiagonalOperator(self.get_data()**n, broadcast=self.broadcast)
+
+    def validatein(self, shape):
+        n = self.data.ndim
+        if len(shape) < n:
+            raise ValueError("Invalid number of dimensions.")
+
+        if self.broadcast == 'rightward':
+            it = zip(shape[:n], self.data.shape[:n])
+        else:
+            it = zip(shape[-n:], self.data.shape[-n:])
+        for si, sd in it:
+            if sd != 1 and sd != si:
+                raise ValueError("The data array cannot be broadcast across th"
+                                 "e input.")
+
+    def toshapein(self, v):
+        if self.shapein is not None:
+            return v.reshape(self.shapein)
+        if self.data.ndim < 1:
+            return v
+
+        sd = list(self.data.shape)
+        n = sd.count(1)
+        if n > 1:
+            raise ValueError('Ambiguous broadcasting.')
+        if n == 0:
+            if self.broadcast == 'rightward':
+                sd.append(-1)
+            else:
+                sd.insert(0, -1)
+        else:
+            sd[sd.index(1)] = -1
+
+        try:
+            v = v.reshape(sd)
+        except ValueError:
+            raise ValueError("Invalid broadcasting.")
+
+        return v
+
+
+class HomothetyOperator(DiagonalOperator):
+    """
+    Multiplication by a scalar.
+
+    """
+    def __init__(self, data, **keywords):
+        data = np.asarray(data)
+        if data.ndim > 0:
+            if any(s != 0 for s in data.strides) and \
+               np.any(data.flat[0] != data):
+                raise ValueError("The input is not a scalar.")
+            data = np.asarray(data.flat[0])
+
+        DiagonalOperator.__init__(self, data, **keywords)
+        if type(self) is not HomothetyOperator:
+            return
+        self.set_rule('C', lambda s: HomothetyOperator(np.conjugate(s.data)))
+        self.set_rule('I', lambda s: HomothetyOperator(
+            1/s.data if s.data != 0 else np.nan))
+        self.set_rule('IC', lambda s: HomothetyOperator(
+            np.conjugate(1/s.data) if s.data != 0 else np.nan))
+
+    def __str__(self):
+        data = self.data.flat[0]
+        if data == int(data):
+            data = int(data)
+        if data == 1:
+            return 'I'
+        if data == -1:
+            return '-I'
+        return str(data) + 'I'
+
+
+ at real
+ at idempotent
+ at involutary
+class IdentityOperator(HomothetyOperator):
+    """
+    A subclass of HomothetyOperator with data = 1.
+
+    Examples
+    --------
+    >>> I = IdentityOperator()
+    >>> I.todense(shapein=3)
+    array([[1, 0, 0],
+           [0, 1, 0],
+           [0, 0, 1]])
+
+    >>> I = IdentityOperator(shapein=2)
+    >>> I * np.arange(2)
+    array([0, 1])
+
+    """
+    def __init__(self, shapein=None, **keywords):
+        HomothetyOperator.__init__(self, 1, shapein=shapein, **keywords)
+        self.set_rule(('.', Operator), self._rule_left, CompositionOperator)
+        self.set_rule((Operator, '.'), self._rule_right, CompositionOperator)
+
+    def direct(self, input, output):
+        if isalias(input, output):
+            pass
+        output[...] = input
+
+    @staticmethod
+    def _rule_left(self, operator):
+        return operator.copy()
+
+    @staticmethod
+    def _rule_right(operator, self):
+        return operator.copy()
+
+
+ at idempotent
+ at inplace
+ at update_output
+class ConstantOperator(BroadcastingBase):
+    """
+    Non-linear constant operator.
+
+    """
+    def __init__(self, data, broadcast=None, dtype=None, **keywords):
+        data = np.asarray(data)
+        if broadcast is None:
+            broadcast = 'scalar' if data.ndim == 0 else 'disabled'
+        if broadcast == 'disabled':
+            keywords['shapeout'] = data.shape
+        if data.ndim > 0 and np.all(data == data.flat[0]):
+            self.__init__(data.flat[0], dtype=dtype, **keywords)
+            return
+        if not isinstance(self, ZeroOperator) and data.ndim == 0 and data == 0:
+            self.__class__ = ZeroOperator
+            self.__init__(dtype=dtype, **keywords)
+            return
+
+        if dtype is None and not isinstance(self, ZeroOperator):
+            dtype = data.dtype
+        BroadcastingBase.__init__(self, data, broadcast, dtype=dtype,
+                                  **keywords)
+        self.set_rule('C', lambda s: ConstantOperator(
+            s.data.conjugate(), broadcast=s.broadcast))
+#        if self.flags.shape_input == 'unconstrained' and \
+#           self.flags.shape_output != 'implicit':
+#            self.set_rule('T', '.')
+        self.set_rule(('.', Operator), self._rule_left, CompositionOperator)
+        self.set_rule((Operator, '.'), self._rule_right, CompositionOperator)
+        self.set_rule(('.', Operator), self._rule_mul, MultiplicationOperator)
+        self.set_rule(('.', ConstantOperator),
+                      lambda s, o: s._rule_broadcast(
+                          s, o, ConstantOperator, np.add),
+                      AdditionOperator)
+        self.set_rule(('.', ConstantOperator),
+                      lambda s, o: s._rule_broadcast(
+                          s, o, ConstantOperator, np.multiply),
+                      MultiplicationOperator)
+
+    def direct(self, input, output, operation=operation_assignment):
+        if self.broadcast == 'rightward':
+            operation(output.T, self.data.T)
+        else:
+            operation(output, self.data)
+
+    @staticmethod
+    def _rule_left(self, op):
+        if op.commin is not None or op.commout is not None:
+            return None
+        return self.copy()
+
+    @staticmethod
+    def _rule_right(op, self):
+        if op.commin is not None or op.commout is not None:
+            return None
+        if op.flags.shape_output == 'unconstrained':
+            return None
+        if self.flags.shape_output == 'explicit':
+            data = self._as_strided(self.shapeout)
+        elif op.flags.shape_input == 'explicit':
+            data = self._as_strided(op.shapein)
+        else:
+            return None
+        return ConstantOperator(op(data))
+
+    @staticmethod
+    def _rule_mul(self, op):
+        if not isinstance(op, CompositionOperator) and not op.flags.linear:
+            return
+        s = DiagonalOperator(self.data, broadcast=self.broadcast)
+        return CompositionOperator([s, op])
+
+    @staticmethod
+    def _rule_left_block(op, self):
+        return
+
+    @staticmethod
+    def _rule_right_block(self, op, cls):
+        if cls is CompositionOperator:
+            return
+        return BroadcastingBase._rule_right_block(self, op, cls)
+
+    def __neg__(self):
+        return ConstantOperator(
+            -self.data, broadcast=self.broadcast, shapein=self.shapein,
+            shapeout=self.shapeout, reshapein=self.reshapein,
+            reshapeout=self.reshapeout, dtype=self.dtype)
+
+    def __str__(self):
+        return str(self.data)
+
+
+ at real
+ at linear
+class ZeroOperator(ConstantOperator):
+    """
+    A subclass of ConstantOperator with data = 0.
+
+    """
+    def __init__(self, *args, **keywords):
+        ConstantOperator.__init__(self, 0, **keywords)
+        self.del_rule(('.', BlockOperator), MultiplicationOperator)
+        self.del_rule(('.', ConstantOperator), MultiplicationOperator)
+        self.del_rule(('.', Operator), MultiplicationOperator)
+        self.set_rule('T', lambda s: ZeroOperator())
+        self.set_rule(('.', Operator), lambda s, o: o.copy(), AdditionOperator)
+        self.set_rule(('.', Operator), lambda s, o: s.copy(),
+                      MultiplicationOperator)
+
+    def direct(self, input, output, operation=operation_assignment):
+        operation(output, 0)
+
+    @staticmethod
+    def _rule_left(self, op):
+        if op.commin is not None or op.commout is not None:
+            return None
+        return ZeroOperator()
+
+    @staticmethod
+    def _rule_right(op, self):
+        if op.commin is not None or op.commout is not None:
+            return None
+        if op.flags.linear:
+            return ZeroOperator()
+        return ConstantOperator._rule_right(op, self)
+
+    def __neg__(self):
+        return self
+
+
+class ReductionOperator(Operator):
+    """
+    Reduction-along-axis operator.
+
+    Parameters
+    ----------
+    func : ufunc or function
+        Function used for the reduction. If the input is a ufunc, its 'reduce'
+        method is used.
+    axis : integer, optional
+        Axis along which the reduction is performed. If None, all dimensions
+        are collapsed.
+    dtype : dtype, optional
+        Reduction data type.
+    skipna : boolean, optional
+        If this is set to True, the reduction is done as if any NA elements
+        were not counted in the array. The default, False, causes the NA values
+        to propagate, so if any element in a set of elements being reduced is
+        NA, the result will be NA.
+
+    Example
+    -------
+    >>> op = ReductionOperator(np.nansum)
+    >>> op([np.nan, 1, 2])
+    array(3.0)
+
+    """
+    def __init__(self, func, axis=None, dtype=None, skipna=False, **keywords):
+        if axis is None:
+            keywords['shapeout'] = ()
+        if isinstance(func, np.ufunc):
+            if func.nin != 2:
+                raise TypeError(
+                    "The input ufunc '{0}' has {1} input argument. Expected nu"
+                    "mber is 2.".format(func.__name__, func.nin))
+            if func.nout != 1:
+                raise TypeError(
+                    "The input ufunc '{0}' has {1} output arguments. Expected "
+                    "number is 1.".format(func.__name__, func.nout))
+            if np.__version__ < '2':
+                if axis is None:
+                    direct = lambda x, out: func.reduce(x.flat, 0, dtype, out)
+                else:
+                    direct = lambda x, out: func.reduce(x, axis, dtype, out)
+            else:
+                direct = lambda x, out: func.reduce(x, axis, dtype, out,
+                                                    skipna=skipna)
+        elif isinstance(func, types.FunctionType):
+            vars, junk, junk, junk = inspect.getargspec(func)
+            if 'axis' not in vars:
+                raise TypeError("The input function '{0}' does not have an 'ax"
+                                "is' argument.".format(func.__name__))
+            kw = {}
+            if 'dtype' in vars:
+                kw['dtype'] = dtype
+            if 'skipna' in vars:
+                kw['skipna'] = skipna
+            if 'out' not in vars:
+                def direct(x, out):
+                    out[...] = func(x, axis=axis, **kw)
+            else:
+                direct = lambda x, out: func(x, axis=axis, out=out, **kw)
+        self.axis = axis
+        Operator.__init__(self, direct=direct, dtype=dtype, **keywords)
+
+    def reshapein(self, shape):
+        if self.axis == -1:
+            return shape[:-1]
+        return shape[:self.axis] + shape[self.axis+1:]
+
+    def validatein(self, shape):
+        if len(shape) == 0:
+            raise TypeError('Cannot reduce on scalars.')
+        if self.axis is None:
+            return
+        if len(shape) < (self.axis+1 if self.axis >= 0 else abs(self.axis)):
+            raise ValueError('The input shape has an insufficient number of di'
+                             'mensions.')
+
+
+ at linear
+ at square
+class Variable(Operator):
+    """
+    Fake operator to represent a variable.
+
+    """
+    def __init__(self, name, shape=None):
+        self.name = name
+        Operator.__init__(self, shapein=shape)
+        self.set_rule('T',
+                      lambda s: VariableTranspose(self.name, self.shapein))
+        self.set_rule(('.', Operator), self._rule_rcomp, CompositionOperator)
+
+    @staticmethod
+    def _rule_rcomp(self, other):
+        raise TypeError('A variable cannot be composed with an operator.')
+
+    def __mul__(self, other):
+        if isinstance(other, Variable):
+            return MultiplicationOperator([self, other])
+        if isinstance(other, VariableTranspose):
+            return CompositionOperator([self, other])
+        if np.isscalar(other) or isinstance(other, HomothetyOperator) or \
+           isinstance(other, (list, tuple, np.ndarray)) and \
+           not isinstance(other, np.matrix):
+            return CompositionOperator([other, self])
+        try:
+            other = asoperator(other)
+        except TypeError:
+            return NotImplemented
+        return MultiplicationOperator([other, self])
+
+    def __rmul__(self, other):
+        try:
+            other = asoperator(other)
+        except TypeError:
+            return NotImplemented
+        if other.flags.linear:
+            return CompositionOperator([other, self])
+        return MultiplicationOperator([other, self])
+
+    def __pow__(self, n):
+        return po.nonlinear.PowerOperator(n)(self)
+
+    def __str__(self):
+        return self.name
+
+    __repr__ = __str__
+
+
+ at linear
+ at square
+class VariableTranspose(Operator):
+    """
+    Fake operator to represent a transposed variable.
+
+    """
+    def __init__(self, name, shape=None):
+        self.name = name
+        Operator.__init__(self, shapein=shape)
+        self.set_rule('T', lambda s: Variable(self.name, self.shapein))
+        self.set_rule((Operator, '.'), self._rule_lcomp, CompositionOperator)
+
+    @staticmethod
+    def _rule_lcomp(self, other):
+        raise ValueError('An operator cannot be composed with a transposed var'
+                         'iable.')
+
+    def __mul__(self, other):
+        if isinstance(other, VariableTranspose):
+            raise TypeError('Transposed variables cannot be multiplied.')
+        if isinstance(other, Variable):
+            return CompositionOperator([self, other])
+        if isscalarlike(other) or isinstance(other, HomothetyOperator):
+            return CompositionOperator([other, self])
+        if isinstance(other, np.ndarray) and not isinstance(other, np.matrix):
+            return CompositionOperator([self, DiagonalOperator(other)])
+        try:
+            other = asoperator(other)
+        except TypeError:
+            return NotImplemented
+        if not other.flags.linear:
+            raise TypeError('Multiplying a transposed variable by a non-linear'
+                            ' operator does not make sense.')
+        return CompositionOperator([self, other])
+
+    def __rmul__(self, other):
+        if np.isscalar(other) or isinstance(other, HomothetyOperator):
+            return CompositionOperator([self, other])
+        raise TypeError('An operator cannot be composed with a transposed vari'
+                        'able.')
+
+    def __str__(self):
+        return self.name + '.T'
+
+    __repr__ = __str__
+
+
+def _copy_direct(source, target):
+    keywords = {}
+    for attr in OPERATOR_ATTRIBUTES:
+        if attr != 'flags':
+            v = getattr(source, attr)
+            if attr in ('reshapein', 'reshapeout', 'toshapein', 'toshapeout',
+                        'validatein', 'validateout'):
+                if v == getattr(Operator, attr).__get__(source, type(source)):
+                    continue
+            keywords[attr] = v
+    Operator.__init__(target, **keywords)
+    return target
+
+
+def _copy_reverse(source, target):
+    keywords = {}
+    for attr in OPERATOR_ATTRIBUTES:
+        if attr != 'flags':
+            v = getattr(source, attr)
+            if attr in ('reshapein', 'reshapeout', 'toshapein', 'toshapeout',
+                        'validatein', 'validateout'):
+                if v == getattr(Operator, attr).__get__(source, type(source)):
+                    continue
+            keywords[_swap_inout(attr)] = v
+    Operator.__init__(target, **keywords)
+    return target
+
+
+def _swap_inout(s):
+    if s.endswith('in'):
+        return s[:-2] + 'out'
+    elif s.endswith('out'):
+        return s[:-3] + 'in'
+    return s
+
+
+def asoperator(x, constant=False, **keywords):
+    """
+    Return input as an Operator.
+
+    Parameters
+    ----------
+    x : object
+        The input can be one of the following:
+            - a callable (including ufuncs)
+            - array_like (including matrices)
+            - a numpy or python scalar
+            - scipy.sparse.linalg.LinearOperator
+    constant : boolean, optional
+        If True, return a ConstantOperator instead of a HomothetyOperator for
+        scalars. Default is False.
+    flags : dictionary
+        The operator flags.
+
+    """
+    if isinstance(x, Operator):
+        return x
+
+    if isinstance(x, np.ufunc):
+        return Operator(x, **keywords)
+
+    if np.isscalar(x) or isinstance(x, (list, tuple)):
+        x = np.array(x)
+
+    if isinstance(x, np.ndarray):
+        if constant and not isinstance(x, np.matrix):
+            return ConstantOperator(x, **keywords)
+        if x.ndim == 0:
+            return HomothetyOperator(x, **keywords)
+        if x.ndim == 1:
+            return DiagonalOperator(x, shapein=x.shape[-1], **keywords)
+        return po.linear.DenseBlockDiagonalOperator(
+            x, shapein=x.shape[:-2] + (x.shape[-1],), **keywords)
+
+    if sp.issparse(x):
+        return po.linear.SparseOperator(x, **keywords)
+
+    if hasattr(x, 'matvec') and hasattr(x, 'rmatvec') and \
+       hasattr(x, 'shape'):
+        def direct(input, output):
+            output[...] = x.matvec(input)
+
+        def transpose(input, output):
+            output[...] = x.rmatvec(input)
+        keywords['flags'] = Operator.validate_flags(keywords.get('flags', {}),
+                                                    linear=True)
+        return Operator(direct=direct, transpose=transpose,
+                        shapein=x.shape[1], shapeout=x.shape[0],
+                        dtype=x.dtype, **keywords)
+
+    if callable(x):
+        def direct(input, output):
+            output[...] = x(input)
+        keywords['flags'] = Operator.validate_flags(keywords.get('flags', {}),
+                                                    inplace=True)
+        return Operator(direct, **keywords)
+
+    try:
+        op = sp.linalg.aslinearoperator(x)
+    except Exception as e:
+        raise TypeError(e)
+    return asoperator(op, **keywords)
+
+
+def asoperator1d(x):
+    x = asoperator(x)
+    r = ReshapeOperator(x.shape[1], x.shapein)
+    s = ReshapeOperator(x.shapeout, x.shape[0])
+    return s * x * r
+
+_pool = MemoryPool()
+timer_operator = Timer(cumulative=True)
diff --git a/pyoperators/fft.py b/pyoperators/fft.py
new file mode 100644
index 0000000..a19522d
--- /dev/null
+++ b/pyoperators/fft.py
@@ -0,0 +1,447 @@
+from __future__ import absolute_import, division, print_function
+
+import numpy as np
+import os
+import time
+
+from .config import LOCAL_PATH
+from .core import (
+    AdditionOperator, CompositionOperator, DiagonalOperator, HomothetyOperator,
+    Operator, _pool)
+from .flags import aligned, contiguous, inplace, linear, real, square, unitary
+from .memory import empty
+from .utils import (complex_dtype, isalias, omp_num_threads, product,
+                    tointtuple)
+from .utils.ufuncs import multiply_conjugate
+from .warnings import warn, PyOperatorsWarning
+
+__all__ = ['ConvolutionOperator', 'FFTOperator']
+
+try:
+    import pyfftw
+    FFTW_DEFAULT_NUM_THREADS = omp_num_threads()
+    FFTW_WISDOM_FILES = tuple(os.path.join(LOCAL_PATH, 'fftw{0}.wisdom'.format(
+                              t)) for t in ['', 'f', 'l'])
+    FFTW_WISDOM_MIN_DELAY = 0.1
+    _is_fftw_wisdom_loaded = False
+except:
+    warn('The pyFFTW library is not installed.', PyOperatorsWarning)
+
+# FFTW out-of-place transforms:
+# PRESERVE_INPUT: default except c2r and hc2r
+# DESTROY_INPUT: default for c2r and hc2r, only possibility for multi c2r
+
+OPERATOR_ATTRIBUTES = ['attrin', 'attrout', 'classin', 'classout', 'commin',
+                       'commout', 'reshapein', 'reshapeout', 'shapein',
+                       'shapeout', 'toshapein', 'toshapeout', 'validatein',
+                       'validateout', 'dtype', 'flags']
+
+
+ at linear
+ at square
+ at inplace
+ at aligned
+ at contiguous
+class _FFTWConvolutionOperator(Operator):
+    """
+    Multi-dimensional convolution by a real or complex kernel,
+    using the discrete Fourier transform.
+
+    """
+    def __init__(self, kernel, shapein, axes=None, fftw_flag='FFTW_MEASURE',
+                 nthreads=None, dtype=None, **keywords):
+        """
+        Parameters
+        ----------
+        kernel : array-like
+            The multi-dimensional convolution kernel.
+        shapein : tuple
+            The shape of the input to be convolved by the kernel.
+        axes : tuple
+            Axes along which the convolution is performed. Convolution over
+            less axes than the operator's input is not yet supported.
+        fftw_flag : string
+            list of strings and is a subset of the flags that FFTW allows for
+            the planners. Specifically, FFTW_ESTIMATE, FFTW_MEASURE,
+            FFTW_PATIENT and FFTW_EXHAUSTIVE are supported. These describe the
+            increasing amount of effort spent during the planning stage to
+            create the fastest possible transform. Usually, FFTW_MEASURE is
+            a good compromise and is the default.
+        nthreads : int
+            Tells how many threads to use when invoking FFTW or MKL. Default is
+            the number of cores.
+        dtype : dtype
+            Operator's dtype.
+
+        """
+        kernel = np.array(kernel, dtype=dtype, copy=False)
+        dtype = kernel.dtype
+        if dtype.kind not in ('f', 'c'):
+            kernel = kernel.astype(float)
+            dtype = kernel.dtype
+
+        if shapein is None:
+            raise ValueError('The input shape is not specified.')
+
+        shapein = tointtuple(shapein)
+        if len(shapein) != kernel.ndim:
+            raise ValueError(
+                "The kernel dimension '{0}' is incompatible with that of the s"
+                "pecified shape '{1}'.".format(kernel.ndim, len(shapein)))
+
+        # if the kernel is larger than the image, we don't crop it since it
+        # might affect normalisation of the kernel
+        if any([ks > s for ks, s in zip(kernel.shape, shapein)]):
+            raise ValueError('The kernel must not be larger than the input.')
+
+        if axes is None:
+            axes = range(len(shapein))
+        axes = tointtuple(axes)
+        nthreads = nthreads or FFTW_DEFAULT_NUM_THREADS
+        fftw_flag = fftw_flag.upper()
+
+        if dtype.kind == 'c':
+            n = product(shapein)
+            fft = _FFTWComplexForwardOperator(shapein, axes, fftw_flag,
+                                              nthreads, dtype, **keywords)
+            kernel_fft = _get_kernel_fft(kernel, shapein, dtype, shapein,
+                                         dtype, fft.oplan)
+            kernel_fft /= n
+            self.__class__ = CompositionOperator
+            self.__init__([n, fft.H, DiagonalOperator(kernel_fft), fft])
+            return
+
+        dtype_ = complex_dtype(dtype)
+        shape_ = self._reshape_to_halfstorage(shapein, axes)
+        _load_wisdom()
+        aligned = self.flags.aligned_input
+        contiguous = True
+        with _pool.get(shapein, dtype, aligned, contiguous) as in_:
+            with _pool.get(shape_, dtype_, aligned, contiguous) as out:
+                t0 = time.time()
+                fplan = pyfftw.FFTW(in_, out, axes=axes,
+                                    flags=[fftw_flag],
+                                    direction='FFTW_FORWARD',
+                                    threads=nthreads)
+                bplan = pyfftw.FFTW(out, in_, axes=axes,
+                                    flags=[fftw_flag],
+                                    direction='FFTW_BACKWARD',
+                                    threads=nthreads)
+
+        if time.time() - t0 > FFTW_WISDOM_MIN_DELAY:
+            _save_wisdom()
+
+        kernel_fft = _get_kernel_fft(kernel, shapein, dtype, shape_,
+                                     dtype_, fplan)
+        kernel_fft /= product(shapein)
+        self.__class__ = _FFTWRealConvolutionOperator
+        self.__init__(kernel_fft, fplan, bplan, axes, fftw_flag, nthreads,
+                      shapein, dtype, **keywords)
+
+    def _reshape_to_halfstorage(self, shape, axes):
+        shape = list(shape)
+        shape[axes[-1]] = shape[axes[-1]] // 2 + 1
+        return shape
+
+
+ at real
+ at linear
+ at square
+ at inplace
+ at aligned
+ at contiguous
+class _FFTWRealConvolutionOperator(Operator):
+    """
+    Convolution by a real kernel.
+    The first argument is the FFT of the real kernel. It is not necessarily
+    aligned.
+
+    """
+    def __init__(self, kernel_fft, fplan, bplan, axes, fftw_flag, nthreads,
+                 shapein=None, dtype=None, **keywords):
+        self.kernel = kernel_fft
+        self._fplan = fplan
+        self._bplan = bplan
+        self.axes = axes
+        self.nthreads = nthreads
+        self.fftw_flag = fftw_flag
+
+        Operator.__init__(self, shapein=shapein, dtype=dtype, **keywords)
+        self.set_rule('T', lambda s: _FFTWRealConvolutionTransposeOperator(
+            s.kernel, s._fplan, s._bplan, s.axes, s.fftw_flag, s.nthreads))
+        self.set_rule(('.', HomothetyOperator), self._rule_homothety,
+                      CompositionOperator)
+        self.set_rule(('.', _FFTWRealConvolutionOperator), self.
+                      _rule_add_real, AdditionOperator)
+        self.set_rule(('.', _FFTWRealConvolutionOperator), self.
+                      _rule_cmp_real, CompositionOperator)
+        self.set_rule(('.', _FFTWComplexBackwardOperator), self.
+                      _rule_complex_backward, CompositionOperator)
+        self.set_rule((_FFTWComplexForwardOperator, '.'), self.
+                      _rule_complex_forward, CompositionOperator)
+
+    def direct(self, input, output):
+        shape = self.kernel.shape
+        dtype = self.kernel.dtype
+        aligned = self.flags.aligned_input
+        contiguous = True
+        with _pool.get(shape, dtype, aligned, contiguous) as buf:
+            self._fplan.update_arrays(input, buf)
+            self._fplan.execute()
+            buf *= self.kernel
+            self._bplan.update_arrays(buf, output)
+            self._bplan.execute()
+
+    def get_kernel(self, out=None):
+        if out is not None:
+            out[...] = self.kernel
+        return self.kernel
+
+    @property
+    def nbytes(self):
+        return self.kernel.nbytes
+
+    @staticmethod
+    def _rule_homothety(self, scalar):
+        kernel = empty(self.kernel.shape, self.kernel.dtype)
+        self.get_kernel(kernel)
+        kernel *= scalar.data
+        result = _FFTWRealConvolutionOperator(
+            kernel, self._fplan, self._bplan, self.axes, self.fftw_flag,
+            self.nthreads, self.shapein, self.dtype)
+        return result
+
+    @staticmethod
+    def _rule_add_real(self, other):
+        if isinstance(other, _FFTWRealConvolutionTransposeOperator):
+            # spare allocation in other.get_kernel (if self is not a transpose)
+            self, other = other, self
+        kernel = empty(self.kernel.shape, self.kernel.dtype)
+        self.get_kernel(kernel)
+        np.add(kernel, other.get_kernel(), kernel)
+        result = _FFTWRealConvolutionOperator(
+            kernel, self._fplan, self._bplan, self.axes, self.fftw_flag,
+            self.nthreads, self.shapein, self.dtype)
+        return result
+
+    @staticmethod
+    def _rule_cmp_real(self, other):
+        if isinstance(other, _FFTWRealConvolutionTransposeOperator):
+            # spare allocation in other.get_kernel (if self is not a transpose)
+            self, other = other, self
+        kernel = empty(self.kernel.shape, self.kernel.dtype)
+        self.get_kernel(kernel)
+        kernel *= other.get_kernel()
+        kernel *= product(self.shapein)
+        result = _FFTWRealConvolutionOperator(
+            kernel, self._fplan, self._bplan, self.axes, self.fftw_flag,
+            self.nthreads, self.shapein, self.dtype)
+        return result
+
+    @staticmethod
+    def _rule_complex_backward(self, other):
+        kernel = self._restore_kernel().astype(self.kernel.dtype)
+        other.H.direct(kernel, kernel)
+        kernel /= product(self.shapein)
+        return other, DiagonalOperator(kernel)
+
+    @staticmethod
+    def _rule_complex_forward(other, self):
+        kernel = self._restore_kernel().astype(self.kernel.dtype)
+        other.direct(kernel, kernel)
+        return DiagonalOperator(kernel), other
+
+    def _restore_kernel(self):
+        shape = self.kernel.shape
+        dtype = self.kernel.dtype
+        aligned = self.flags.aligned_input
+        contiguous = True
+        with _pool.get(shape, dtype, aligned, contiguous) as x:
+            self.get_kernel(x)
+            y = empty(self.shapein, self.dtype)
+            self._bplan.update_arrays(x, y)
+        self._bplan.execute()
+        return y
+
+
+class _FFTWRealConvolutionTransposeOperator(_FFTWRealConvolutionOperator):
+    """
+    Transpose of the convolution by a real kernel.
+
+    """
+    __name__ = '_FFTW_RealConvolutionOperator.T'
+
+    def get_kernel(self, out=None):
+        return np.conjugate(self.kernel, out)
+
+    def direct(self, input, output):
+        with _pool.get(self.kernel.shape, self.kernel.dtype) as buf:
+            self._fplan.update_arrays(input, buf)
+            self._fplan.execute()
+            multiply_conjugate(buf, self.kernel, buf)
+            self._bplan.update_arrays(buf, output)
+            self._bplan.execute()
+
+
+ at linear
+ at square
+ at inplace
+ at aligned
+ at contiguous
+class _FFTWComplexOperator(Operator):
+    def __init__(self, shapein, axes=None, fftw_flag='FFTW_MEASURE',
+                 nthreads=None, dtype=complex, **keywords):
+        shapein = tointtuple(shapein)
+        if axes is None:
+            axes = range(len(shapein))
+        self.axes = tointtuple(axes)
+        self.fftw_flag = fftw_flag.upper()
+        self.nthreads = nthreads or FFTW_DEFAULT_NUM_THREADS
+        dtype = np.dtype(dtype)
+        _load_wisdom()
+        Operator.__init__(self, shapein=shapein, dtype=dtype, **keywords)
+
+    def direct(self, input, output):
+        if isalias(input, output):
+            self.iplan.update_arrays(input, output)
+            self.iplan.execute()
+        else:
+            self.oplan.update_arrays(input, output)
+            self.oplan.execute()
+
+
+ at unitary
+class _FFTWComplexForwardOperator(_FFTWComplexOperator):
+    """
+    Complex multi-dimensional forward Discrete Fourier Transform.
+
+    """
+    def __init__(self, shapein, axes=None, fftw_flag='FFTW_MEASURE',
+                 nthreads=None, dtype=complex, **keywords):
+        """
+        Parameters
+        ----------
+        shapein : tuple
+            The shape of the input to be Fourier-transformed
+        axes : tuple
+            Axes along which the transform is performed.
+        fftw_flag : string
+            FFTW flag for the planner: FFTW_ESTIMATE, FFTW_MEASURE,
+            FFTW_PATIENT or FFTW_EXHAUSTIVE. These describe the
+            increasing amount of effort spent during the planning stage to
+            create the fastest possible transform. Usually, FFTW_MEASURE is
+            a good compromise and is the default.
+        nthreads : int
+            Tells how many threads to use when invoking FFTW or MKL. Default is
+            the number of cores.
+        dtype : dtype
+            Operator's complex dtype.
+
+        """
+        _FFTWComplexOperator.__init__(self, shapein, axes, fftw_flag,
+                                      nthreads, dtype, **keywords)
+
+        self.set_rule('H', lambda s:
+                      HomothetyOperator(1 / product(s.shapein)) *
+                      _FFTWComplexBackwardOperator(s.shapein, forward=s))
+        self.set_rule((_FFTWComplexBackwardOperator, '.'), lambda o, s:
+                      HomothetyOperator(product(s.shapein)),
+                      CompositionOperator)
+
+        with _pool.get(shapein, dtype) as in_:
+            t0 = time.time()
+            self.iplan = pyfftw.FFTW(in_, in_, axes=self.axes,
+                                     flags=[self.fftw_flag],
+                                     direction='FFTW_FORWARD',
+                                     threads=self.nthreads)
+            with _pool.get(shapein, dtype) as out:
+                self.oplan = pyfftw.FFTW(in_, out, axes=self.axes,
+                                         flags=[self.fftw_flag],
+                                         direction='FFTW_FORWARD',
+                                         threads=self.nthreads)
+        if time.time() - t0 > FFTW_WISDOM_MIN_DELAY:
+            _save_wisdom()
+
+
+class _FFTWComplexBackwardOperator(_FFTWComplexOperator):
+    """
+    Complex multi-dimensional backward Discrete Fourier Transform.
+
+    """
+    def __init__(self, shapein, dtype=None, forward=None, **keywords):
+
+        dtype = dtype or forward.dtype
+        _FFTWComplexOperator.__init__(self, shapein, forward.axes,
+                                      forward.fftw_flag,
+                                      forward.nthreads, dtype, **keywords)
+        self.set_rule('H', lambda s:
+                      HomothetyOperator(product(s.shapein)) * forward)
+        self.set_rule((_FFTWComplexForwardOperator, '.'), lambda o, s:
+                      HomothetyOperator(product(s.shapein)),
+                      CompositionOperator)
+
+        with _pool.get(shapein, dtype) as in_:
+            t0 = time.time()
+            self.iplan = pyfftw.FFTW(in_, in_, axes=self.axes,
+                                     flags=[self.fftw_flag],
+                                     direction='FFTW_BACKWARD',
+                                     threads=self.nthreads)
+            with _pool.get(shapein, dtype) as out:
+                self.oplan = pyfftw.FFTW(in_, out, axes=self.axes,
+                                         flags=[self.fftw_flag],
+                                         direction='FFTW_BACKWARD',
+                                         threads=self.nthreads)
+        if time.time() - t0 > FFTW_WISDOM_MIN_DELAY:
+            _save_wisdom()
+
+
+def _get_kernel_fft(kernel, shapein, dtypein, shapeout, dtypeout, fft):
+    with _pool.get(shapein, dtypein) as kernel_padded:
+        ker_slice = [slice(0, s) for s in kernel.shape]
+        kernel_padded[...] = 0
+        kernel_padded[ker_slice] = kernel
+        ker_origin = (np.array(kernel.shape)-1) // 2
+        for axis, o in enumerate(ker_origin):
+            kernel_padded = np.roll(kernel_padded, int(-o), axis=axis)
+        kernel_fft = empty(shapeout, dtypeout)
+        fft.update_arrays(kernel_padded, kernel_fft)
+        fft.execute()
+        return kernel_fft
+
+
+def _load_wisdom():
+    """ Loads the 3 wisdom files. """
+    global _is_fftw_wisdom_loaded
+    if _is_fftw_wisdom_loaded:
+        return
+
+    def load(filename):
+        try:
+            with open(filename) as f:
+                wisdom = f.read()
+        except IOError:
+            wisdom = ''
+        return wisdom
+
+    wisdom = [load(f) for f in FFTW_WISDOM_FILES]
+    pyfftw.import_wisdom(wisdom)
+    _is_fftw_wisdom_loaded = True
+
+
+def _save_wisdom():
+    """ Save wisdom as 3 files. """
+    wisdom = pyfftw.export_wisdom()
+    for filename, w in zip(FFTW_WISDOM_FILES, wisdom):
+        try:
+            os.remove(filename)
+        except OSError:
+            pass
+        if len(w) == 0:
+            continue
+        with open(filename, 'w') as f:
+            f.write(w)
+
+
+# make FFTW the default
+ConvolutionOperator = _FFTWConvolutionOperator
+FFTOperator = _FFTWComplexForwardOperator
diff --git a/pyoperators/flags.py b/pyoperators/flags.py
new file mode 100644
index 0000000..3f3c8e2
--- /dev/null
+++ b/pyoperators/flags.py
@@ -0,0 +1,253 @@
+"""
+Define the Flags class and the decorators for Operator subclasses.
+These decorators update their 'flags' attribute to specify properties such as
+linear, square etc.
+
+"""
+from collections import namedtuple
+
+
+class Flags(namedtuple(
+        'Flags',
+        ['linear',
+         'square',      # shapein == shapeout
+         'real',        # o.C = o
+         'symmetric',   # o.T = o
+         'hermitian',   # o.H = o
+         'idempotent',  # o * o = o
+         'involutary',  # o * o = I
+         'orthogonal',  # o * o.T = I
+         'unitary',     # o * o.H = I
+         'separable',   # o*[B1...Bn] = [o*B1...o*Bn]
+         'aligned_input',      # aligned input requirement
+         'aligned_output',     # aligned output requirement
+         'contiguous_input',   # contiguous input requirement
+         'contiguous_output',  # contiguous output requirement
+         'inplace',            # handle in-place operation
+         'outplace',           # handle out-of-place operation
+         'update_output',      # handle operations on output
+         'destroy_input',      # input modification in out-of-place operation
+         'shape_input',
+         'shape_output'])):
+    """ Informative flags about the operator. """
+    def __new__(cls):
+        t = 15*(False,) + (True, False, False, '', '')
+        return super(Flags, cls).__new__(cls, *t)
+
+    def __str__(self):
+        n = max(len(f) for f in self._fields)
+        fields = ['  ' + f.upper().ljust(n) + ' : ' for f in self._fields]
+        return '\n'.join([f + str(v) for f, v in zip(fields, self)])
+
+    def __repr__(self):
+        n = max(len(f) for f in self._fields)
+        fields = [f.ljust(n) + '= ' for f in self._fields]
+        return type(self).__name__ + '(\n  ' + ',\n  '.join(
+            f + repr(v) for f, v in zip(fields, self)) + ')'
+
+
+def flags(cls, *arg, **keywords):
+    """
+    Decorator to set any flag.
+
+    """
+    base = cls.__mro__[-2]
+    base.__dict__['_set_flags'](cls, *arg, **keywords)
+    return cls
+
+
+def linear(cls):
+    """
+    Decorator for linear operators.
+    It sets the 'linear' flags.
+
+    """
+    return flags(cls, 'linear')
+
+
+def square(cls):
+    """
+    Decorator for square operators, i.e. operators whose input and output
+    shapes are identical.
+    It sets the 'square' flags.
+
+    """
+    return flags(cls, 'square')
+
+
+def real(cls):
+    """
+    Decorator for real operators, i.e. operators that are equal to
+    their conjugate.
+    It sets the 'real' flags.
+
+    """
+    return flags(cls, 'real')
+
+
+def symmetric(cls):
+    """
+    Decorator for symmetric operators, i.e. operators that are equal to their
+    transpose.
+    It sets the 'linear' and 'symmetric' flags.
+
+    """
+    return flags(cls, 'symmetric')
+
+
+def hermitian(cls):
+    """
+    Decorator for hermitian operators, i.e. operators that are equal to their
+    adjoint.
+    It sets the 'linear' and 'hermitian' flags.
+
+    """
+    return flags(cls, 'hermitian')
+
+
+def idempotent(cls):
+    """
+    Decorator for idempotent operators, i.e. operators whose composition
+    by themselves is equal to themselves.
+    It sets the 'idempotent' flag.
+
+    """
+    return flags(cls, 'idempotent')
+
+
+def involutary(cls):
+    """
+    Decorator for involutary operators, i.e. operators whose composition
+    by themselves is equal to the identity.
+    It sets the 'involutary' flag.
+
+    """
+    return flags(cls, 'involutary')
+
+
+def orthogonal(cls):
+    """
+    Decorator for orthogonal operators, i.e. real operators whose composition
+    by their transpose is equal to the identity.
+    It sets the 'real', 'linear' and 'orthogonal' flags.
+
+    """
+    return flags(cls, 'orthogonal')
+
+
+def unitary(cls):
+    """
+    Decorator for orthogonal operators, i.e. operators whose composition
+    by their adjoint is equal to the identity.
+    It sets the 'linear' and 'unitary' flags.
+
+    """
+    return flags(cls, 'unitary')
+
+
+def aligned(cls):
+    """
+    Decorator to ensure that both input and output of the operator are
+    aligned in memory. It sets the 'alignment_input' and 'alignment_output'
+    attributes to True.
+
+    """
+    return flags(cls, 'aligned_input,aligned_output')
+
+
+def aligned_input(cls):
+    """
+    Decorator to ensure that operator's input is aligned in memory.
+    It sets the 'alignment_input' attribute to True.
+
+    """
+    return flags(cls, 'aligned_input')
+
+
+def aligned_output(cls):
+    """
+    Decorator to ensure that operator's output is aligned in memory.
+    It sets the 'alignment_output' attribute to True.
+
+    """
+    return flags(cls, 'aligned_output')
+
+
+def contiguous(cls):
+    """
+    Decorator to ensure that both input and output of the operator are
+    C-contiguous in memory. It sets the 'contiguous_input' and
+    'contiguous_output' attributes to True.
+
+    """
+    return flags(cls, 'contiguous_input,contiguous_output')
+
+
+def contiguous_input(cls):
+    """
+    Decorator to ensure that operator's input is C-contiguous in memory.
+    It sets the 'contiguous_input' attribute to True.
+
+    """
+    return flags(cls, 'contiguous_input')
+
+
+def contiguous_output(cls):
+    """
+    Decorator to ensure that operator's output is C-contiguous in memory.
+    It sets the 'contiguous_output' attribute to True.
+
+    """
+    return flags(cls, 'contiguous_output')
+
+
+def destroy_input(cls):
+    """
+    Decorator specifying that during an out-of-place operation, the input
+    buffer may be altered. It sets the 'destroy_input' attribute to True
+
+    """
+    return flags(cls, 'destroy_input')
+
+
+def inplace(cls):
+    """
+    Decorator for inplace operators, i.e operators that can handle input and
+    output pointing to the same memory location (though the input and output
+    size may be different).
+    It sets the 'inplace' attribute to True.
+
+    """
+    return flags(cls, 'inplace')
+
+
+def inplace_only(cls):
+    """
+    Decorator for inplace operators, i.e operators that can handle input and
+    output pointing to the same memory location (though the input and output
+    size may be different).
+    It sets the 'inplace' attribute to True and the 'outplace' to False.
+
+    """
+    return flags(cls, inplace=True, outplace=False)
+
+
+def separable(cls):
+    """
+    Decorator for separable operators, i.e. operators P which satisfy for  any
+    block operator B = [B1, ..., Bn] the property:
+        P(B) = [P(B1), ..., P(Bn)] and
+        B(P) = [B1(P), ..., Bn(P)]
+    It sets the 'separable' flags.
+
+    """
+    return flags(cls, 'separable')
+
+
+def update_output(cls):
+    """
+    Decorator for operators that can update the output.
+    It sets the 'update_output' flag.
+
+    """
+    return flags(cls, 'update_output')
diff --git a/pyoperators/iterative/__init__.py b/pyoperators/iterative/__init__.py
new file mode 100644
index 0000000..5812ed0
--- /dev/null
+++ b/pyoperators/iterative/__init__.py
@@ -0,0 +1,21 @@
+"""
+Package defining algorithm working on Operators.
+Contains the following modules:
+
+- criterions: define criterions to be minimized by algorithms.
+
+- optimize: a wrapper for scipy.optimize "fmin" functions.
+
+- algorithms: defines iterative minimization algorithms working on criterions.
+
+- dli: Defines the Lanczos algorithm and the DoubleLoopInference algorithm.
+"""
+
+from .core import *
+from .stopconditions import *
+from .cg import *
+
+# these modules are WIP
+from . import algorithms, criterions, dli, optimize
+
+del core
diff --git a/pyoperators/iterative/algorithms.py b/pyoperators/iterative/algorithms.py
new file mode 100644
index 0000000..e456597
--- /dev/null
+++ b/pyoperators/iterative/algorithms.py
@@ -0,0 +1,434 @@
+"""
+Implements iterative algorithm class.
+"""
+import numpy as np
+from copy import copy
+try:
+    import pylab
+except:
+    pass
+
+from .linesearch import optimal_step
+from .criterions import norm2, quadratic_criterion, huber_criterion
+
+__all__ = [
+    'ConjugateGradient',
+    'HuberConjugateGradient',
+    'acg',
+    'hacg',
+    'fletcher_reeves',
+    'polak_ribiere',
+]
+
+# DEPRECATED
+class Algorithm(object):
+    """
+    Abstract class to define iterative algorithms.
+
+    Attributes
+    ----------
+
+    niterations : int
+        Current iteration number.
+
+    Methods
+    -------
+
+    initialize : Set variables to initial state.
+
+    run : performs the optimization until stop_condition is reached or
+          Ctrl-C is pressed.
+
+    next : perform one iteration and return current solution.
+
+    callback : user-defined function to print status or save variables.
+
+    cont : continue the optimization skipping initialiaztion.
+
+    """
+    def initialize(self):
+        print('The Algorithm class is deprecated.')
+        self.iter_ = 0
+        self.current_solution = None
+
+    def callback(self):
+        pass
+
+    def iterate(self, n=1):
+        """
+        Perform n iterations and return current solution.
+        """
+        for i in xrange(n):
+            self.iter_ += 1
+            self.callback(self)
+        return self.current_solution
+
+    def run(self):
+        """
+        Perform the optimization.
+        """
+        self.initialize()
+        self.iterate() # at least 1 iteration
+        self.cont()
+        self.at_exit()
+        return self.current_solution
+
+    def cont(self):
+        """
+        Continue an interrupted estimation (like call but avoid
+        initialization).
+        """
+        while not self.stop_condition(self):
+            self.iterate()
+        return self.current_solution
+
+    def at_exit(self):
+        """
+        Perform some task at exit.
+        Does nothing by default.
+        """
+        pass
+
+    def __call__(self):
+        print("Deprecation warning: use 'run' method instead.")
+        self.run()
+
+
+# defaults
+TOL = 1e-6
+GTOL = 1e-6
+MAXITER = None
+
+# stop conditions
+class StopCondition(object):
+    """
+    A class defining stop conditions for iterative algorithms.
+    Must be called with an Algorithm instance as argument.
+    """
+    def _test_maxiter(self, algo):
+        return algo.iter_ >= self.maxiter
+    def _test_tol(self, algo):
+        self.resid = np.abs(algo.last_criterion - algo.current_criterion)
+        self.resid /= algo.first_criterion
+        return self.resid < self.tol
+    def _test_gtol(self, algo):
+        return algo.current_gradient_norm < self.gtol
+    _all_tests = [_test_maxiter, _test_tol, _test_gtol]
+    def __init__(self, maxiter=None, tol=None, gtol=None, cond=np.any):
+        """
+        Generate a StopCondition instance.
+
+        Parameters
+        ----------
+        maxiter: int (None)
+            If not None, stops after a fixed number of iterations.
+        tol: float (None)
+            If not None, stops when the criterion decreases by less than
+            tol times the first criterion value.
+        gtol: float (None)
+            If not None, stops when the norm of the gradient falls below
+            gtol.
+        cond: np.any, np.all
+            If cond==np.any, stops when any of the above condition is True.
+            If cond==np.all, stops when all of the above condition is True.
+        """
+        self.cond = cond
+        self.maxiter = maxiter
+        self.tol = tol
+        self.gtol = gtol
+        self.all_val = [self.maxiter, self.tol, self.gtol]
+        # filter out tests with None values
+        self.tests_val = [val for val in self.all_val
+                          if val is not None]
+        self.tests = [test
+                      for test, val in zip(self._all_tests, self.all_val)
+                      if val is not None]
+        # store values for printing
+        self.resid = None
+    def __call__(self, algo):
+        return self.cond([test(self, algo) for test in self.tests])
+    def str(self, algo):
+        """
+        Returns a string with current condition values.
+        """
+        if self.resid is not None and self.tol is not None:
+            return "\t %1.2e / %1.2e" % (self.resid, self.tol)
+        else:
+            return "\t Residual"
+
+default_stop = StopCondition(maxiter=MAXITER, tol=TOL, gtol=GTOL)
+
+# update types
+
+def fletcher_reeves(algo):
+    """
+    Fletcher-Reeves descent direction update method.
+    """
+    return algo.current_gradient_norm / algo.last_gradient_norm
+
+def polak_ribiere(algo):
+    """
+    Polak-Ribiere descent direction update method.
+    """
+    b =  np.dot(algo.current_gradient.T,
+                (algo.current_gradient - algo.last_gradient))
+    b /= np.norm(algo.last_gradient)
+    return b
+
+# callback function
+
+class Callback(object):
+    """
+    A Callback instance is called by an Algorithm at each iteration
+    with the Algorithm instance as input. It can be used to display
+    convergence information at each iteration (iteration number,
+    criterion value), display the current solution or store it on
+    disk.
+    """
+    def __init__(self, verbose=False, savefile=None, shape=()):
+        """
+        Parameters
+        ----------
+        verbose: boolean (default False)
+            If True, iteration number and criterion value are displayed.
+        savefile: str or file
+            If not None, the current iteration, criterion value and solution
+            are stored with numpy savez function.
+        shape: 2-tuple
+            Shape of the solution.
+            If not empty tuple, pylab plot or imshow are called to display
+            current solution (solution should be 1D or 2D).
+
+        Returns
+        -------
+        None
+
+        """
+        self.verbose = verbose
+        self.savefile = savefile
+        self.shape = shape
+        self.im = None
+    def print_status(self, algo):
+        if self.verbose:
+            if algo.iter_ == 1:
+                print('Iteration \t Criterion')
+            print_str = "\t%i \t %e" % (algo.iter_, algo.current_criterion)
+            print_str += algo.stop_condition.str(algo)
+            print(print_str)
+    def save(self, algo):
+        if self.savefile is not None:
+            var_dict = {
+                "iter":algo.iter_,
+                "criterion":algo.current_criterion,
+                "solution":algo.current_solution,
+                }
+            np.savez(self.savefile, **var_dict)
+    def imshow(self, algo):
+        if algo.iter_ == 1:
+            self.im = pylab.imshow(algo.current_solution.reshape(self.shape))
+        else:
+            self.im.set_data(algo.current_solution.reshape(self.shape))
+        pylab.draw()
+        pylab.show()
+    def plot(self, algo):
+        import pylab
+        if algo.iter_ == 1:
+            self.im = pylab.plot(algo.current_solution)[0]
+        else:
+            y = algo.current_solution
+            self.im.set_ydata(y)
+            pylab.ylim((y.min(), y.max()))
+        pylab.draw()
+        pylab.show()
+    def __call__(self, algo):
+        if self.verbose:
+            self.print_status(algo)
+        if self.savefile is not None:
+            self.save(algo)
+        if self.shape is not None:
+            if len(self.shape) == 1:
+                self.plot(algo)
+            elif len(self.shape) == 2:
+                self.imshow(algo)
+
+default_callback = Callback()
+
+# algorithms
+
+class ConjugateGradient(Algorithm):
+    """
+    Apply the conjugate gradient algorithm to a Criterion instance.
+
+    Parameters
+    ----------
+
+    criterion : Criterion
+        A Criterion instance. It should have following methods and attributes:
+            __call__ : returns criterion values at given point
+            diff : returns gradient (1st derivative) of criterion at given point
+            shapein: the shape of the input of criterion
+
+    x0 : ndarray (None)
+        The first guess of the algorithm.
+
+    callback : function (default_callback)
+        Perform some printing / saving operations at each iteration.
+
+    stop_condition : function (default_stop)
+        Defines when the iterations should stop
+
+    update_type : function (fletcher_reeves)
+        Type of descent direction update : e.g. fletcher_reeves, polak_ribiere
+
+    line_search : function (optimal step)
+        Line search method to find the minimum along each direction at each
+        iteration.
+
+    Returns
+    -------
+
+    Returns an algorithm instance. Optimization is performed by
+    calling this instance.
+
+    """
+    def __init__(self, criterion, x0=None,
+                 callback=default_callback,
+                 stop_condition=default_stop,
+                 update_type=fletcher_reeves,
+                 line_search=optimal_step, **kwargs):
+        self.criterion = criterion
+        self.gradient = criterion.diff
+        self.shapein = self.criterion.shapein
+        # functions
+        self.callback = callback
+        self.stop_condition = stop_condition
+        self.update_type = update_type
+        self.line_search = line_search
+        self.kwargs = kwargs
+        # to store values
+        self.current_criterion = np.inf
+        self.current_solution = None
+        self.current_gradient = None
+        self.current_gradient_norm = None
+        self.current_descent = None
+        self.last_criterion = np.inf
+        self.last_solution = None
+        self.last_gradient = None
+        self.last_gradient_norm = None
+        self.last_descent = None
+    def initialize(self):
+        """
+        Initialize required values.
+        """
+        Algorithm.initialize(self)
+        self.first_guess()
+        self.first_criterion = self.criterion(self.current_solution)
+        self.current_criterion = self.first_criterion
+    def first_guess(self, x0=None):
+        """
+        Sets current_solution attribute to initial value.
+        """
+        if x0 is None:
+            self.current_solution = np.zeros(np.prod(self.shapein))
+        else:
+            self.current_solution = copy(x0)
+    # update_* functions encode the actual algorithm
+    def update_gradient(self):
+        self.last_gradient = copy(self.current_gradient)
+        self.current_gradient = self.gradient(self.current_solution)
+    def update_gradient_norm(self):
+        self.last_gradient_norm = copy(self.current_gradient_norm)
+        self.current_gradient_norm = norm2(self.current_gradient)
+    def update_descent(self):
+        if self.iter_ == 0:
+            self.current_descent = - self.current_gradient
+        else:
+            self.last_descent = copy(self.current_descent)
+            b = self.update_type(self)
+            self.current_descent = - self.current_gradient + b * self.last_descent
+    def update_solution(self):
+        self.last_solution = copy(self.current_solution)
+        a = self.line_search(self)
+        self.current_solution += a * self.current_descent
+    def update_criterion(self):
+        self.last_criterion = copy(self.current_criterion)
+        self.current_criterion = self.criterion(self.current_solution)
+    def iterate(self):
+        """
+        Update all values.
+        """
+        self.update_gradient()
+        self.update_gradient_norm()
+        self.update_descent()
+        self.update_solution()
+        self.update_criterion()
+        Algorithm.iterate(self)
+    def at_exit(self):
+        self.current_solution.resize(self.criterion.shapein)
+
+class QuadraticConjugateGradient(ConjugateGradient):
+    """
+    A subclass of ConjugateGradient using a QuadraticCriterion.
+    """
+    def __init__(self, model, data, priors=[], hypers=[], covariances=None,
+                 **kwargs):
+        criterion = quadratic_criterion(model, data, hypers=hypers,
+                                       priors=priors, covariances=covariances)
+        ConjugateGradient.__init__(self, criterion, **kwargs)
+
+class HuberConjugateGradient(ConjugateGradient):
+    """
+    A subclass of ConjugateGradient using an HuberCriterion.
+    """
+    def __init__(self, model, data, priors=[], hypers=[], deltas=None, **kwargs):
+        criterion = huber_criterion(model, data, hypers=hypers, priors=priors,
+                                    deltas=deltas)
+        ConjugateGradient.__init__(self, criterion, **kwargs)
+ 
+# for backward compatibility
+def define_stop_condition(**kwargs):
+    defaults = {'maxiter':None, 'tol':TOL, 'gtol':GTOL, 'cond':np.any}
+    new_kwargs = dict((k,kwargs.get(k,v)) for k,v in defaults.items())
+    return StopCondition(**new_kwargs)
+
+def define_callback(**kwargs):
+    defaults = {'verbose':False, 'savefile':None, 'shape':()}
+    new_kwargs = dict((k,kwargs.get(k,v)) for k,v in defaults.items())
+    return Callback(**new_kwargs)
+
+def acg(model, data, priors=[], hypers=[], covariances=None, return_algo=False,
+        **kwargs):
+        stop_condition = define_stop_condition(**kwargs)
+        callback = define_callback(**kwargs)
+        algorithm = QuadraticConjugateGradient(model, data, priors=priors,
+                                               hypers=hypers,
+                                               covariances=covariances,
+                                               stop_condition=stop_condition,
+                                               callback=callback,
+                                               **kwargs)
+        sol = algorithm.run()
+        if return_algo:
+            return sol, algorithm
+        else:
+            return sol
+
+def hacg(model, data, priors=[], hypers=[], deltas=None, return_algo=False, **kwargs):
+    stop_condition = define_stop_condition(**kwargs)
+    callback = define_callback(**kwargs)
+    algorithm = HuberConjugateGradient(model, data, priors=priors,
+                                       hypers=hypers, deltas=deltas,
+                                       stop_condition=stop_condition,
+                                       callback=callback,
+                                       **kwargs)
+    sol = algorithm.run()
+    return sol
+
+# other
+
+def normalize_hyper(hyper, y, x):
+    """
+    Normalize hyperparamaters so that they are independent of pb size
+    """
+    nx = float(x.size)
+    ny = float(y.size)
+    return np.asarray(hyper) * ny / nx
diff --git a/pyoperators/iterative/cg.py b/pyoperators/iterative/cg.py
new file mode 100644
index 0000000..8a913e5
--- /dev/null
+++ b/pyoperators/iterative/cg.py
@@ -0,0 +1,215 @@
+from __future__ import absolute_import, division, print_function
+import numpy as np
+import time
+
+from ..core import IdentityOperator, asoperator
+from ..memory import empty, zeros
+from ..utils.mpi import MPI
+from .core import AbnormalStopIteration, IterativeAlgorithm
+from .stopconditions import MaxIterationStopCondition
+
+__all__ = ['pcg']
+
+
+class PCGAlgorithm(IterativeAlgorithm):
+    """
+    OpenMP/MPI Preconditioned conjugate gradient iteration to solve A x = b.
+
+    """
+    def __init__(self, A, b, x0=None, tol=1.e-5, maxiter=300, M=None,
+                 disp=False, callback=None, reuse_initial_state=False):
+        """
+        Parameters
+        ----------
+        A : {Operator, sparse matrix, dense matrix}
+            The real or complex N-by-N matrix of the linear system
+            ``A`` must represent a hermitian, positive definite matrix
+        b : {array, matrix}
+            Right hand side of the linear system. Has shape (N,) or (N,1).
+        x0  : {array, matrix}
+            Starting guess for the solution.
+        tol : float, optional
+            Tolerance to achieve. The algorithm terminates when either the
+            relative residual is below `tol`.
+        maxiter : integer, optional
+            Maximum number of iterations.  Iteration will stop after maxiter
+            steps even if the specified tolerance has not been achieved.
+        M : {Operator, sparse matrix, dense matrix}, optional
+            Preconditioner for A.  The preconditioner should approximate the
+            inverse of A.  Effective preconditioning dramatically improves the
+            rate of convergence, which implies that fewer iterations are needed
+            to reach a given error tolerance.
+        disp : boolean
+            Set to True to display convergence message
+        callback : function, optional
+            User-supplied function to call after each iteration.  It is called
+            as callback(self), where self is an instance of this class.
+        reuse_initial_state : boolean, optional
+            If set to True, the buffer initial guess (if provided) is reused
+            during the iterations. Beware of side effects!
+
+        Returns
+        -------
+        x : array
+            The converged solution.
+
+        Raises
+        ------
+        pyoperators.AbnormalStopIteration : if the solver reached the maximum
+            number of iterations without reaching specified tolerance.
+
+        """
+        dtype = A.dtype or np.dtype(float)
+        if dtype.kind == 'c':
+            raise TypeError('The complex case is not yet implemented.')
+        elif dtype.kind != 'f':
+            dtype = np.dtype(float)
+        b = np.array(b, dtype, copy=False)
+
+        if x0 is None:
+            x0 = zeros(b.shape, dtype)
+
+        abnormal_stop_condition = MaxIterationStopCondition(
+            maxiter, 'Solver reached maximum number of iterations without reac'
+            'hing specified tolerance.')
+
+        IterativeAlgorithm.__init__(
+            self, x=x0, abnormal_stop_condition=abnormal_stop_condition,
+            disp=disp, dtype=dtype, reuse_initial_state=reuse_initial_state,
+            inplace_recursion=True, callback=callback)
+
+        A = asoperator(A)
+        if A.shapein is None:
+            raise ValueError('The operator input shape is not explicit.')
+        if A.shapein != b.shape:
+            raise ValueError(
+                "The operator input shape '{0}' is incompatible with that of t"
+                "he RHS '{1}'.".format(A.shapein, b.shape))
+        self.A = A
+        self.b = b
+        self.comm = A.commin
+        self.norm = lambda x: _norm2(x, self.comm)
+        self.dot = lambda x, y: _dot(x, y, self.comm)
+
+        if M is None:
+            M = IdentityOperator()
+        self.M = asoperator(M)
+
+        self.tol = tol
+        self.b_norm = self.norm(b)
+        self.d = empty(b.shape, dtype)
+        self.q = empty(b.shape, dtype)
+        self.r = empty(b.shape, dtype)
+        self.s = empty(b.shape, dtype)
+
+    def initialize(self):
+        IterativeAlgorithm.initialize(self)
+
+        if self.b_norm == 0:
+            self.error = 0
+            self.x[...] = 0
+            raise StopIteration('RHS is zero.')
+
+        self.r[...] = self.b
+        self.r -= self.A(self.x)
+        self.error = np.sqrt(self.norm(self.r) / self.b_norm)
+        if self.error < self.tol:
+            raise StopIteration('Solver reached maximum tolerance.')
+        self.M(self.r, self.d)
+        self.delta = self.dot(self.r, self.d)
+
+    def iteration(self):
+        self.A(self.d, self.q)
+        alpha = self.delta / self.dot(self.d, self.q)
+        self.x += alpha * self.d
+        self.r -= alpha * self.q
+        self.error = np.sqrt(self.norm(self.r) / self.b_norm)
+        if self.error < self.tol:
+            raise StopIteration('Solver reached maximum tolerance.')
+
+        self.M(self.r, self.s)
+        delta_old = self.delta
+        self.delta = self.dot(self.r, self.s)
+        beta = self.delta / delta_old
+        self.d *= beta
+        self.d += self.s
+
+    @staticmethod
+    def callback(self):
+        if self.disp:
+            print('{0:4}: {1}'.format(self.niterations, self.error))
+
+
+def pcg(A, b, x0=None, tol=1.e-5, maxiter=300, M=None, disp=False,
+        callback=None, reuse_initial_state=False):
+    """
+    Parameters
+    ----------
+    A : {Operator, sparse matrix, dense matrix}
+        The real or complex N-by-N matrix of the linear system
+        ``A`` must represent a hermitian, positive definite matrix
+    b : {array, matrix}
+        Right hand side of the linear system. Has shape (N,) or (N,1).
+    x0  : {array, matrix}
+        Starting guess for the solution.
+    tol : float, optional
+        Tolerance to achieve. The algorithm terminates when either the
+        relative residual is below `tol`.
+    maxiter : integer, optional
+        Maximum number of iterations.  Iteration will stop after maxiter
+        steps even if the specified tolerance has not been achieved.
+    M : {Operator, sparse matrix, dense matrix}, optional
+        Preconditioner for A.  The preconditioner should approximate the
+        inverse of A.  Effective preconditioning dramatically improves the
+        rate of convergence, which implies that fewer iterations are needed
+        to reach a given error tolerance.
+    disp : boolean
+        Set to True to display convergence message
+    callback : function, optional
+        User-supplied function to call after each iteration.  It is called
+        as callback(self), where self is an instance of this class.
+    reuse_initial_state : boolean, optional
+        If set to True, the buffer initial guess (if provided) is reused
+        during the iterations. Beware of side effects!
+
+    Returns
+    -------
+    output : dict whose keys are
+        'x' : the converged solution.
+        'success' : boolean indicating success
+        'message' : string indicating cause of failure
+
+    """
+    time0 = time.time()
+    algo = PCGAlgorithm(A, b, x0=x0, tol=tol, maxiter=maxiter, disp=disp,
+                        M=M, callback=callback,
+                        reuse_initial_state=reuse_initial_state)
+    try:
+        output = algo.run()
+        success = True
+        message = ''
+    except AbnormalStopIteration as e:
+        output = algo.finalize()
+        success = False
+        message = str(e)
+    return {'x': output,
+            'success': success,
+            'message': message,
+            'nit': algo.niterations,
+            'error': algo.error,
+            'time': time.time() - time0}
+
+
+def _norm2(x, comm):
+    x = x.ravel()
+    n = np.array(np.dot(x, x))
+    if comm is not None:
+        comm.Allreduce(MPI.IN_PLACE, n)
+    return n
+
+
+def _dot(x, y, comm):
+    d = np.array(np.dot(x.ravel(), y.ravel()))
+    if comm is not None:
+        comm.Allreduce(MPI.IN_PLACE, d)
+    return d
diff --git a/pyoperators/iterative/core.py b/pyoperators/iterative/core.py
new file mode 100644
index 0000000..b2a6668
--- /dev/null
+++ b/pyoperators/iterative/core.py
@@ -0,0 +1,414 @@
+from __future__ import absolute_import, division, print_function
+"""
+This module defines the base class IterativeAlgorithm.
+
+"""
+
+import collections
+import numpy as np
+import re
+
+from ..utils import strenum, uninterruptible_if
+from ..memory import empty
+from .stopconditions import NoStopCondition
+
+__all__ = ['AbnormalStopIteration', 'IterativeAlgorithm']
+
+NO_STOP_CONDITION = NoStopCondition()
+
+
+class AbnormalStopIteration(Exception):
+    pass
+
+
+class IterativeAlgorithm(object):
+    """
+    Abstract class for iterative algorithms.
+
+    In pseudo code, the flow of an IterativeAlgorithm is the following:
+
+        niterations = order of recursion minus one
+        try:
+            initialize
+            do infinite loop:
+                test stop conditions
+                niterations += 1
+                try:
+                    iteration defining the new state
+                except StopIteration:
+                    callback
+                    update state
+                    break
+                except AbnormalStopIteration:
+                    raise
+                callback
+                update state
+        except StopIteration:
+            pass
+        except AbnormalStopIteration:
+            raise
+        return the output of the finalize method.
+
+    The user can supply the following:
+    - an initialize method
+    - an iteration method
+    - a normal stop condition
+    - an abnormal stop condition
+    - a callback function
+
+    Attributes
+    ----------
+    info : dict
+        This dictionary contains the recursion names of the recursion
+        variables, their dtype and their shape.
+    niterations : int
+        Number of completed iterations. During the first pass, its value is
+        equal to the recursion order minus one when the stop conditions are
+        checked and to the recursion order in the iteration method and
+        the callback function.
+    order : int
+        Recursion order. For example:
+        order=1: x_new = f(x)
+        order=2: x_new = f(x, x_old)
+        order=3: x_new = f(x, x_old, x_old2)
+        order=4: x_new = f(x, x_old, x_old2, x_old3)
+        and so on.
+
+    Methods
+    -------
+    initialize : Set variables to initial state.
+    run : iterate until a stop condition is reached or Ctrl-C is pressed.
+    cont : continue the algorithm
+    restart : restart the algorithm if possible
+    next : perform one (or more) iterations.
+    callback : user-defined function to print status or save variables.
+
+    """
+    def __init__(self, allocate_new_state=True, callback=None,
+                 clean_interrupt=True, disp=False, dtype=float,
+                 inplace_recursion=False,
+                 normal_stop_condition=NO_STOP_CONDITION,
+                 abnormal_stop_condition=NO_STOP_CONDITION,
+                 reuse_initial_state=False, **keywords):
+        """
+        Parameters
+        ----------
+        allocate_new_state : boolean, optional
+            Tells if the buffers for the new state should be allocated
+            beforehand. If true, the iteration method should reuse these
+            buffers (ex: self.x_new[...] = ..) instead of creating new
+            references (ex: self.x_new = ..).
+        callback : callable, optional
+            User-defined function to do actions such as printing status or
+            plotting or saving variables. It is a callable with a single
+            argument, namely the IterativeAlgorithm instance.
+        clean_interrupt : boolean, optional
+            An IterativeAlgorithm instance can be interrupted by pressing
+            CTRL-C and still be restarted or iterated. There is a small
+            overhead associated to it. To disable this feature, set this
+            argument to False.
+        disp : boolean
+            If true, display iteration message
+        dtype : numpy.dtype, optional
+            Data type used to coerce the initial state variable to the same
+            precision. It does not alter the data kind: complex variables stay
+            complex.
+        inplace_recursion : boolean, optional
+            In some algorithm, it is not necessary to keep a copy of two
+            states. It is then advisable to do the update in-place. For a given
+            variable 'x', if the value of this argument is False, the variables
+            'x_new' and 'x' will be available. If the value is True, only 'x'
+            will be and the argument allocate_new_state has no effect.
+        normal_stop_condition : StopCondition, optional
+            The normal stop condition that will termintate the iteration.
+        abnormal_stop_condition : StopCondition, optional
+            The abnormal stop condition that will termintate the iteration.
+            If such stop condition is met, an error message is printed and
+        reuse_initial_state : boolean, optional
+            Tells whether or not the buffers of the input initial state
+            variables can be reused during these iterations. If True, beware
+            of the side effects. Besides, the algorithm could not be restarted,
+            as the initial state is lost.
+
+        """
+        self.clean_interrupt = clean_interrupt
+        self.disp = disp
+        self._set_buffer_handling(inplace_recursion, allocate_new_state,
+                                  reuse_initial_state)
+        self._set_order(keywords)
+        self._set_variables(keywords)
+        self._set_initial_state(keywords, dtype)
+        self._set_callback(callback)
+        self._set_stop_conditions(normal_stop_condition,
+                                  abnormal_stop_condition)
+        self.niterations = self.order - 1
+
+    def __iter__(self):
+        return self
+
+    @staticmethod
+    def callback(self):
+        """ Callback function, called after each iteration.. """
+        if not self.disp:
+            return
+        if self.inplace_recursion:
+            current = self.finalize()
+        elif len(self.variables) == 1:
+            current = getattr(self, self.variables[0] + '_new')
+        else:
+            dict((v, getattr(self, v + '_new')) for v in self.variables)
+        print('{0:4}: {1}'.format(self.niterations, current))
+
+    def cont(self):
+        """ Continue an interrupted computation. """
+        if self.niterations == 0:
+            raise RuntimeError("The iterative algorithm is not yet started. Us"
+                               "e the 'run' method.")
+        try:
+            return self.next(np.iinfo(int).max)
+        except StopIteration:
+            pass
+        except AbnormalStopIteration:
+            raise
+        return self.finalize()
+
+    def finalize(self):
+        """
+        Perform some task at exit and return the value of the variables as
+        a dictionary if there are more than one recursion variable and as
+        the variable's value otherwise.
+
+        """
+        if len(self.variables) == 1:
+            return getattr(self, self.variables[0])
+        return dict((v, getattr(self, v)) for v in self.variables)
+
+    def initialize(self):
+        """
+        Initialize the iterative algorithm by setting the initial values.
+
+        """
+        if self.niterations > self.order - 1 and self.reuse_initial_state:
+            raise RuntimeError(
+                'It is not possible to restart an algorithm for which the init'
+                'ial state has not been saved. Instantiate the algorithme with'
+                ' the keyword reuse_initial_state set to False.')
+        self.niterations = self.order - 1
+        self.success = True
+        skip_new = not self.inplace_recursion
+
+        # _set_buffer_handling scheme:
+        # 1) copy=False, 2) True, 3) False, 4) False, 5) True
+        copy = (self.inplace_recursion or self.allocate_new_state) and \
+               not self.reuse_initial_state
+        for var, info in self.info.items():
+            for n, b in zip(info['names'][skip_new:],
+                            self._initial_state[var]):
+                #XXX FIXME: b should be aligned...
+                b = np.array(b, info['dtype'], order='c', copy=copy)
+                setattr(self, n, b)
+
+    def next(self, n=1):
+        """ Perform n iterations and return current solution. """
+        if self.niterations == self.order - 1:
+            self.initialize()
+        for i in xrange(n):
+            with uninterruptible_if(self.clean_interrupt):
+                self._check_stop_conditions()
+                self.niterations += 1
+                try:
+                    self.iteration()
+                except StopIteration:
+                    self.callback(self)
+                    self._update_variables()
+                    raise
+                except AbnormalStopIteration:
+                    raise
+                self.callback(self)
+                self._update_variables()
+        return self.finalize()
+
+    def iteration(self):
+        """
+        Algorithm actual iteration, It defines the new state from the previous
+        ones.
+
+        """
+        raise NotImplementedError("The algorithm does not define an 'iteration"
+                                  "' method.")
+
+    def restart(self, n=None):
+        """ Restart the algorithm. """
+        self.initialize()
+        return self.run(n)
+
+    def run(self, n=None):
+        """ Run the algorithm. """
+        if self.niterations > self.order - 1:
+            raise RuntimeError("The iterative algorithm is already started. Us"
+                               "e the methods 'restart' or 'cont' instead.")
+        n = n or np.iinfo(int).max
+        try:
+            return self.next(n)
+        except StopIteration:
+            pass
+        except AbnormalStopIteration:
+            raise
+        return self.finalize()
+
+    def _check_stop_conditions(self):
+        """
+        Raise a StopIteration if the normal stop condition is met. Raise an
+        AbnormalStopIteration if the abnormal stop condition is met.
+
+        """
+        self.normal_stop_condition(self)
+        try:
+            self.abnormal_stop_condition(self)
+        except StopIteration as e:
+            raise AbnormalStopIteration(e)
+
+    def _get_suffix(self):
+        """
+        Return list of string ['_new', '', '_old', '_old2, ...] according
+        to recursion order.
+
+        """
+        if self.inplace_recursion:
+            return ['']
+        suffix = ['_new', '']
+        if self.order == 1:
+            return suffix
+        suffix += ['_old']
+        if self.order == 2:
+            return suffix
+        return suffix + ['_old{0}'.format(o-1) for o in range(3, self.order+1)]
+
+    def _set_buffer_handling(self, inplace_recursion, allocate_new_state,
+                             reuse_initial_state):
+        """
+        There are only 5 buffer handling schemes:
+        1) out-of-place recursion, pre-allocate new state, the initial state
+        buffers are reused during the iterations (IAR= False, True, True)
+        2) out-of-place recursion, pre-allocate new state, the initial state
+        is copied for the first iteration (IAR= False, True, False)
+        3) out-of-place recursion, do not pre-allocate new state, the initial
+        state is passed to the first iteration, where it should not be altered.
+        (IAR= False, False, False)
+        4) inplace recursion, reuse initial state (IAR= True, False, True)
+        5) inplace recursion, do not reuse initial state (True, False, False)
+
+        """
+        self.allocate_new_state = allocate_new_state
+        self.inplace_recursion = inplace_recursion
+        self.reuse_initial_state = reuse_initial_state
+        if inplace_recursion:
+            self.allocate_new_state = False
+        elif not allocate_new_state:
+            self.reuse_initial_state = False
+
+    def _set_callback(self, callback):
+        """ Set the callback function, if specified. """
+        if callback is None:
+            return
+        if not callable(callback):
+            raise TypeError('The callback function is not callable.')
+        self.callback = callback
+
+    def _set_initial_state(self, keywords, default_dtype):
+        # _initial_state contains references to the input initial state:
+        # no copy nor casting is done.
+        self.info = {}
+        self._initial_state = {}
+        self._buffers = {} if self.allocate_new_state else None
+        suffix = self._get_suffix()
+
+        for var in self.variables:
+            names = tuple(var + s for s in suffix)
+            shapes = []
+            initial_state = []
+            dtype_fixed = keywords.get(var + '_dtype', None)
+            dtype = np.dtype(dtype_fixed or default_dtype)
+            skip_new = not self.inplace_recursion
+            for n in names[skip_new:]:
+                val = keywords[n]
+                if not isinstance(val, np.ndarray):
+                    val = np.array(val)
+                # if the variable's dtype is not specified, we'll look for
+                # promotion to complex from the initial values
+                if dtype_fixed is None and dtype.kind == 'f' and \
+                   val.dtype.kind == 'c':
+                    dtype = np.dtype('complex' + str(2*int(dtype.name[5:])))
+                shapes.append(val.shape)
+                initial_state.append(keywords[n])
+
+            shape = shapes[0]
+            if any(s != shape for s in shapes[1:]):
+                raise ValueError("The shapes of the initial values of '{0}' ar"
+                                 "e incompatible: {1}.".format(var, shapes))
+
+            self.info[var] = {'names': names,
+                              'shape': shape,
+                              'dtype': dtype}
+            self._initial_state[var] = initial_state
+            if self.allocate_new_state:
+                setattr(self, var + '_new', empty(shape, dtype))
+
+        # make sure that the initial buffers don't point the same memory loc.
+        if self.reuse_initial_state:
+            names = []
+            addresses = []
+            for var in self.variables:
+                names += self.info[var]['names'][skip_new:]
+                addresses += [b.__array_interface__['data'][0]
+                              if isinstance(b, np.ndarray) else 0
+                              for b in self._initial_state[var]]
+            d = collections.defaultdict(list)
+            for n, a in zip(names, addresses):
+                d[a].append(n)
+            duplicates = [v for k, v in d.items() if len(v) > 1 and k != 0]
+            if len(duplicates) > 0:
+                raise ValueError(
+                    'Some initial values refer to the same buffer: {0}.'.
+                    format(strenum(('='.join(d) for d in duplicates), 'and')))
+
+    def _set_order(self, keywords):
+        """ Set the order of the recursion. """
+        order = 1
+        if any(k.endswith('_old') for k in keywords):
+            order += 1
+            while any(k.endswith('_old'+str(order)) for k in keywords):
+                order += 1
+        self.order = order
+
+    def _set_stop_conditions(self, normal_stop, abnormal_stop):
+        """ Set the stop conditions. """
+        if not callable(normal_stop) or not callable(abnormal_stop):
+            raise TypeError('The stop conditions must be callable.')
+        self.normal_stop_condition = normal_stop
+        self.abnormal_stop_condition = abnormal_stop
+
+    def _set_variables(self, keywords):
+        """ Set the variable names of the recursion. """
+        regex = re.compile(r'^((?!(_old[0-9]*|_new|_dtype)$).)*$')
+        variables = list(set(k for k in keywords if regex.match(k)))
+        variables.sort()
+
+        suffix = self._get_suffix()
+        for var in variables:
+            for s in suffix:
+                if s != '_new' and var + s not in keywords:
+                    raise ValueError("The initial value '{0}' is not specified"
+                                     ".".format(var + s))
+        self.variables = variables
+
+    def _update_variables(self):
+        """ Cyclic update of the variables. """
+        if self.inplace_recursion:
+            return
+        for var in self.variables:
+            names = self.info[var]['names']
+            buffers = tuple(getattr(self, n) for n in names)
+            setattr(self, names[0], buffers[-1])
+            for n, b in zip(names[1:], buffers[:-1]):
+                setattr(self, n, b)
diff --git a/pyoperators/iterative/criterions.py b/pyoperators/iterative/criterions.py
new file mode 100644
index 0000000..5181f50
--- /dev/null
+++ b/pyoperators/iterative/criterions.py
@@ -0,0 +1,349 @@
+"""
+This module is obsolete.
+It implements Criterions. Those are functions defined from
+Norms and Operators to be minimized by iterative algorithms (such as
+the conjugate-gradient).
+
+Interfaces with the scipy.optimize algorithms are defined through
+their __call__ and diff methods and their shapein attribute.
+"""
+
+import copy
+import numpy as np
+from ..core import Operator, IdentityOperator
+
+__all__ = ['norm2',
+           'dnorm2',
+           'normp',
+           'dnormp',
+           'huber',
+           'dhuber',
+           'hnorm',
+           'dhnorm',
+           'Norm2',
+           'Huber',
+           'Normp',
+           'Criterion',
+           'quadratic_criterion',
+           'huber_criterion',
+           'normp_criterion']
+
+# norms
+# =====
+
+# 2-norm
+def norm2(x):
+    return np.dot(x.ravel().T, x.ravel())
+
+def dnorm2(x):
+    return 2 * x
+
+# p-norm
+def normp(p=2):
+    def norm(t):
+        return np.sum(np.abs(t) ** p)
+    return norm
+
+def dnormp(p=2):
+    def norm(t):
+        return np.sign(t) * p * (np.abs(t) ** (p - 1))
+    return norm
+
+# huber norm
+def huber(t, delta=1):
+    """Apply the huber function to the vector t, with transition delta"""
+    t_out = t.flatten()
+    quadratic_index = np.where(np.abs(t_out) < delta)
+    linear_index = np.where(np.abs(t_out) >= delta)
+    t_out[quadratic_index] = np.abs(t_out[quadratic_index]) ** 2
+    t_out[linear_index] = 2 * delta * np.abs(t_out[linear_index]) - delta ** 2
+    return np.reshape(t_out, t.shape)
+
+def dhuber(t, delta=1):
+    """Apply the derivation of the Huber function to t, transition: delta"""
+    t_out = t.flatten()
+    quadratic_index = np.where(np.abs(t_out) < delta)
+    linear_index_positive = np.where(t_out >= delta)
+    linear_index_negative = np.where(t_out <= - delta)
+    t_out[quadratic_index] = 2 * t_out[quadratic_index]
+    t_out[linear_index_positive] = 2 * delta
+    t_out[linear_index_negative] = - 2 * delta
+    return np.reshape(t_out, t.shape)
+
+def hnorm(d=None):
+    if d is None:
+        return norm2
+    else:
+        def norm(t):
+            return np.sum(huber(t, d))
+        return norm
+
+def dhnorm(d=None):
+    if d is None:
+        return dnorm2
+    else:
+        def norm(t):
+            return dhuber(t, d)
+        return norm
+
+# for operations on norms
+def _scalar_mul(func1, scalar):
+    def func(x):
+        return scalar * func1(x)
+    return func
+
+# norm classes
+class Norm(object):
+    """
+    An abstract class to define norm classes.
+    """
+    def __call__(self, x):
+        return self._call(x)
+    def diff(self, x):
+        return self._diff(x)
+    def __mul__(self, x):
+        # returns a norm with modified _call and _diff
+        if np.isscalar(x):
+            kwargs = dict((k,v) for k,v in self.__dict__.items() \
+                          if k[0] != '_')
+            N = type(self)(kwargs)
+            N._call = _scalar_mul(self._call, x)
+            N._diff = _scalar_mul(self._diff, x)
+            if hasattr(N, "_hessian"):
+                N._hessian = _scalar_mul(self._hessian, x)
+        else:
+            raise ValueError("Expects only scalar multiplication")
+        return N
+    __imul__ = __mul__
+    __rmul__ = __mul__
+
+class Norm2(Norm):
+    """
+    A norm-2 class. Optionally accepts a covariance matrix C.
+    If C is given, the norm would be : np.dot(x.T, C * x).
+    Otherwise, it would be norm2(x).
+
+    Parameters
+    ----------
+
+    C : LinearOperator (None)
+        The covariance matrix of the norm.
+
+    Returns
+    -------
+    Returns a Norm2 instance with a __call__ and a diff method.
+    """
+    def __init__(self, C=None):
+        def call(x):
+            return norm2(x)
+        def diff(x):
+            return 2 * x
+        def hessian(x):
+            return 2 * IdentityOperator(shapein=x.size)
+        def c_call(x):
+            return np.dot(x.T, C * x)
+        def c_diff(x):
+            return 2 * C * x
+        def c_hessian(x):
+            return 2 * C
+        self.C = C
+        if C is None:
+            self._call = call
+            self._diff = diff
+            self._hessian = hessian
+        else:
+            self._call = c_call
+            self._diff = c_diff
+            self._hessian = c_hessian
+
+class Huber(Norm):
+    """
+    An Huber norm class.
+
+    Parameters
+    ----------
+
+    delta: float
+       The Huber parameter of the norm.
+       if abs(x_i) is below delta, returns x_i ** 2
+       else returns 2 * delta * x_i - delta ** 2
+
+    Returns
+    -------
+    Returns an Huber instance with a __call__ and a diff method.
+     """
+    def __init__(self, delta):
+        self.delta = delta
+        self._call = hnorm(d=delta)
+        self._diff = dhnorm(d=delta)
+
+class Normp(Norm):
+    """
+    An Norm-p class.
+
+    Parameters
+    ----------
+
+    p: float
+       The power of the norm.
+       The norm will be np.sum(np.abs(x) ** p)
+
+    Returns
+    -------
+    Returns a Normp instance with a __call__ and a diff method.
+     """
+    def __init__(self, p):
+        self.p = p
+        self._call = normp(p=p)
+        self._diff = dnormp(p=p)
+
+# criterion elements
+# ==================
+
+class CriterionElement(object):
+    def __init__(self, norm, op, data=None):
+        # test inputs
+        if not isinstance(norm, Norm):
+            raise ValueError("First parameter should be a Norm instance")
+        self.norm = norm
+        if not isinstance(op, Operator):
+            raise ValueError("First parameter should be an Operator instance")
+        self.op = op
+        self.shapein = op.shapein
+        if not (isinstance(data, np.ndarray) or data is None):
+            raise ValueError("data parameter should be ndarray or None")
+        if data is not None and not data.shape == np.prod(op.shapeout):
+            raise ValueError("data shape sould equal operator shapeout")
+        self.data = data
+
+        # cache result
+        self.last_x = None
+        self.last_ox = None
+
+        # define call and diff
+        def _call(x):
+            if not self._islastx(x):
+                self._storex(x)
+            return self.norm(self.last_ox)
+        def _diff(x):
+            if not self._islastx(x):
+                self._storex(x)
+            return self.op.T * self.norm.diff(self.last_ox)
+        def _data_call(x):
+            if not self._islastx(x):
+                self._storex(x)
+            return self.norm(self.last_ox - data)
+        def _data_diff(x):
+            if not self._islastx(x):
+                self._storex(x)
+            return self.op.T * self.norm.diff(self.last_ox - data)
+        if data is None:
+            self._call = _call
+            self._diff = _diff
+        else:
+            self._call = _data_call
+            self._diff = _data_diff
+
+    def _islastx(self, x):
+        return np.all(x == self.last_x)
+
+    def _storex(self, x):
+        self.last_x = copy.copy(x)
+        self.last_ox = self.op * x
+
+    def __call__(self, x):
+        return self._call(x)
+
+    def diff(self, x):
+        return self._diff(x)
+
+    def __mul__(self, x):
+        """returns a criterion element with modified norm"""
+        if np.isscalar(x):
+            new_norm = x * self.norm
+            return CriterionElement(new_norm, self.op, self.data)
+        else:
+            raise ValueError("Expects only scalar multiplication")
+    __imul__ = __mul__
+    __rmul__ = __mul__
+
+    def __add__(self, x):
+        """Returns a criterion"""
+        if isinstance(x, CriterionElement):
+            if self.shapein != x.shapein:
+                raise ValueError("CriterionElements should have the same shape.")
+            return Criterion([self, x])
+        elif isinstance(x, Criterion):
+            if self.shapein != x.shapein:
+                raise ValueError("CriterionElements should have the same shape.")
+            return Criterion([self, ] + x.elements)
+        elif x == 0.:
+            return Criterion([self,])
+        else:
+            raise ValueError("Expects Criterion or CriterionElement")
+    __radd__ = __add__
+    __iadd__ = __add__
+
+# criterions
+# ===========
+
+class Criterion(object):
+    def __init__(self, elements):
+        if np.any([el.shapein != elements[0].shapein for el in elements]):
+            raise ValueError("CriterionElements should have the same shape.")
+        self.elements = elements
+        self.shapein = elements[0].shapein
+    def __call__(self, x):
+        return sum([el(x) for el in self.elements])
+    def diff(self, x):
+        return sum([el.diff(x) for el in self.elements])
+
+    def __mul__(self, x):
+        """returns a criterion element with modified norm"""
+        if np.isscalar(x):
+            return Criterion([x * e for e in self.elements])
+        else:
+            raise ValueError("Expects only scalar multiplication")
+    __imul__ = __mul__
+    __rmul__ = __mul__
+
+    def __add__(self, x):
+        """Returns a criterion"""
+        if isinstance(x, Criterion):
+            return Criterion(self.elements + x.elements)
+        elif isinstance(x, CriterionElement):
+            return Criterion(self.elements + [x,])
+        elif x == 0.:
+            return Criterion([self,])
+        else:
+            raise ValueError("Expects Criterion or scalar")
+    __radd__ = __add__
+    __iadd__ = __add__
+
+# generate criterions
+def quadratic_criterion(model, data, hypers=[], priors=[], covariances=None):
+    if covariances is None:
+        norms = [Norm2(), ] * (1 + len(hypers))
+    else:
+        norms = [Norm2(C) for C in covariances]
+    likelihood = CriterionElement(norms[0], model, data=data)
+    prior_elements = [CriterionElement(n, p) for n, p in zip(norms[1:], priors)]
+    prior = sum([h * p for h, p in zip(hypers, prior_elements)])
+    criterion = likelihood + prior
+    return criterion
+
+def huber_criterion(model, data, hypers=[], priors=[], deltas=[]):
+    norms = [Huber(d) for d in deltas]
+    likelihood = CriterionElement(norms[0], model, data=data)
+    prior_elements = [CriterionElement(n, p) for n, p in zip(norms[1:], priors)]
+    prior = sum([h *p for h, p in zip(hypers, prior_elements)])
+    criterion = likelihood + prior
+    return criterion
+
+def normp_criterion(model, data, hypers=[], priors=[], ps=[]):
+    norms = [Normp(p) for p in ps]
+    likelihood = CriterionElement(norms[0], model, data=data)
+    prior_elements = [CriterionElement(n, p) for n, p in zip(norms[1:], priors)]
+    prior = sum([h *p for h, p in zip(hypers, prior_elements)])
+    criterion = likelihood + prior
+    return criterion
diff --git a/pyoperators/iterative/dli.py b/pyoperators/iterative/dli.py
new file mode 100644
index 0000000..1d99136
--- /dev/null
+++ b/pyoperators/iterative/dli.py
@@ -0,0 +1,285 @@
+"""
+Implements Double loop inference algorithms.
+
+Reference
+---------
+
+Bayesian Inference and Optimal Design for the Sparse Linear Model,
+Matthias W. Seeger
+
+http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.165.8284&rep=rep1&type=pdf
+
+"""
+from copy import copy
+import numpy as np
+from .algorithms import Algorithm, default_callback, StopCondition
+from .criterions import Norm2
+from .lanczos import LanczosAlgorithm
+from .optimize import FminNCG
+from ..core import DiagonalOperator, IdentityOperator, asoperator, asoperator1d
+
+DEFAULT_STOP = StopCondition(maxiter=5)
+
+# reference recommands this initial z value
+Z0 = 0.05
+
+__all__ = ['DoubleLoopAlgorithm']
+
+
+class Criterion(object):
+    def __init__(self, algo):
+        self.algo = algo
+        self.shapein = self.algo.model.shapein
+        # likelihood norm
+        self.norm = Norm2(C=algo.noise_covariance)
+        # storing
+        self.last_u = None
+        self.Xu = None
+        self.Bu = None
+
+    def islast(self, u):
+        return np.all(u == self.last_u)
+
+    def load_last(self):
+        return self.Xu, self.Bu
+
+    def get_projections(self, u):
+        if self.islast(u):
+            return self.load_last()
+        else:
+            self.last_u = copy(u)
+            X = self.algo.model
+            B = self.algo.prior
+            self.Xu = X * u
+            self.Bu = B * u
+            return self.Xu, self.Bu
+
+    def likelihood(self, u):
+        sigma = self.algo.sigma
+        y = self.algo.data
+        Xu, Bu = self.get_projections(u)
+        return sigma ** (-2) * self.norm(Xu - y)
+
+    def dlike(self, u):
+        sigma = self.algo.sigma
+        X = self.algo.model
+        y = self.algo.data
+        Xu, Bu = self.get_projections(u)
+        return sigma ** (-2) * X.T * self.norm.diff(Xu - y)
+
+    def d2like(self, u):
+        sigma = self.algo.sigma
+        X = self.algo.model
+        N = getattr(self.algo, "noise_covariance", None)
+        if N is None:
+            N = IdentityOperator()
+        return sigma ** (-2) * X.T * N * X
+
+    def d2lik_p(self, u, p):
+        return self.d2like(u) * p
+
+    def penalization(self, u):
+        sigma = self.algo.sigma
+        t = self.algo.tau
+        z = self.algo.z
+        Xu, Bu = self.get_projections(u)
+        e = t * np.sqrt(z + (np.abs(Bu) / sigma) ** 2)
+        #e = ne.evaluate("2 * t * sqrt(z + (abs(Bu) / sigma) ** 2)")
+        return e.sum()
+
+    def dpen(self, u):
+        sigma = self.algo.sigma
+        B = self.algo.prior
+        t = self.algo.tau
+        z = self.algo.z
+        Xu, Bu = self.get_projections(u)
+        e = 2 * (t * Bu) / np.sqrt(z + (Bu / sigma) ** 2)
+        #e = ne.evaluate("2 * (t * Bu) / sqrt(z + (Bu / sigma) ** 2)")
+        return (B.T * e) / (sigma ** 2)
+
+    def d2pen(self, u):
+        sigma = self.algo.sigma
+        B = self.algo.prior
+        t = self.algo.tau
+        z = self.algo.z
+        Xu, Bu = self.get_projections(u)
+        rho = (t * z) / ((z + (Bu / sigma) ** 2) ** (1.5) * sigma ** 2)
+        #rho = ne.evaluate("(t * z) / ((z + (Bu / sigma) ** 2) ** (1.5) * sigma ** 2)")
+        return B.T * DiagonalOperator(rho) * B
+
+    def d2pen_p(self, u, p):
+        return self.d2pen(u) * p
+
+    def __call__(self, u):
+        return (self.likelihood(u) + self.penalization(u)).view(np.ndarray)
+
+    def gradient(self, u):
+        return (self.dlike(u) + self.dpen(u)).view(np.ndarray)
+
+    def hessian(self, u):
+        return self.d2like(u) + self.d2pen(u)
+
+    def hessian_p(self, u, p):
+        return (self.hessian(u) * p).view(np.ndarray)
+
+
+class DoubleLoopAlgorithm(Algorithm):
+    """
+    A subclass of Algorithm implementing the double loop algorithm.
+
+    Parameters
+    ----------
+
+    model : LinearOperator
+        Linear model linking data and unknowns.
+    data : ndarray
+        Data.
+    prior : LinearOperator
+        Prior.
+    tau : ndarray (optional)
+        Parameters of the Laplace potential on priors coefficients.
+    sigma : float  (optional)
+        Likelihood standard deviation.
+    lanczos : dict
+        Keyword arguments of the Lanczos decomposition.
+    fmin_args : dict
+        Keyword arguments of the function minimization.
+
+    Notes
+    -----
+
+    An iteration of DoubleLoopAlgorithm consists in two steps, the
+    inner loop and the outer loop. The outer loop is the computation
+    of a Lanczos approximation of the posterior covariance.  The inner
+    loop is a Newton-Conjugate-Gradient minimization of a criterion
+    with penalty terms determined by the Lanczos step.
+
+    """
+    def __init__(self, model, data, prior, noise_covariance=None,
+                 tau=None, sigma=1., optimizer=FminNCG,
+                 lanczos={"maxiter": 300}, fmin_args={},
+                 callback=default_callback,
+                 stop_condition=DEFAULT_STOP):
+
+        model = asoperator(model)
+        self.shapein = model.shapein
+        self.model = asoperator1d(model)
+        self.data_shape = data.shape
+        self.data = data.ravel()
+        self.prior = asoperator1d(prior)
+        if noise_covariance is not None:
+            noise_covariance = asoperator1d(noise_covariance)
+        self.noise_covariance = noise_covariance
+        # tau can be None or scalar or vector
+        if tau is None:
+            self.tau = np.ones(prior.shape[0])
+        elif np.asarray(tau).size == prior.shape[0]:
+            self.tau = tau
+        else:
+            try:
+                if not np.isscalar(tau):
+                    tau = np.asscalar(tau)
+                self.tau = tau * np.ones(prior.shape[0])
+            except(ValueError):
+                raise ValueError("Incorrect shape for tau.")
+        self.sigma = sigma
+        self.optimizer = optimizer
+        self.lanczos = lanczos
+        self.fmin_args = fmin_args
+        #
+        self.callback = callback
+        self.stop_condition = stop_condition
+        # to store internal variables
+        self.z = None
+        self.gamma = None
+        self.inv_gamma = None
+        self.g_star = None
+        self.current_solution = None
+        self.last_solution = None
+        self.inv_cov = None
+        self.inv_cov_approx = None
+        self.criterion = None
+
+    def initialize(self):
+        """
+        Set parameters to initial values.
+        """
+        self.z = Z0 * np.ones(self.model.shape[1])
+        self.g_star = 0.
+        self.current_solution = np.zeros(self.model.shape[1])
+        self.iter_ = 0
+        self.gamma = np.ones(self.prior.shape[0])
+        self.update_inv_gamma()
+
+    def iterate(self):
+        print("Iteration %i / %i" %
+              (self.iter_ + 1, self.stop_condition.maxiter))
+        print("Outer loop")
+        self.outer()
+        print("Inner loop")
+        self.inner()
+        return Algorithm.iterate(self)
+
+    # outer loop
+    def outer(self):
+        """
+        Outer loop : Lanczos approximation.
+        """
+        self.update_inv_cov()
+        self.update_inv_cov_approx()
+        self.update_z()
+        self.update_g_star()
+
+    def update_inv_cov(self):
+        D = DiagonalOperator(self.gamma ** (-1), dtype=self.prior.dtype)
+        X = self.model
+        B = self.prior
+        N = self.noise_covariance
+        if N is None:
+            self.inv_cov = X.T * X + B.T * D * B
+        else:
+            self.inv_cov = X.T * N * X + B.T * D * B
+
+    def update_inv_cov_approx(self):
+        lanczos = LanczosAlgorithm(self.inv_cov, **self.lanczos)
+        self.inv_cov_approx = lanczos.run()
+
+    def update_z(self):
+        # get eigenvalues, eigenvectors
+        e = self.inv_cov_approx.eigenvalues
+        v = self.inv_cov_approx.eigenvectors
+        B = self.prior
+        self.z = sum([ei * (B * vi) ** 2 for ei, vi in zip(e, v.T)])
+
+    def update_g_star(self):
+        self.g_star = np.dot(self.z.T, self.inv_gamma)
+        self.g_star -= self.inv_cov_approx.logdet()
+
+    # inner loop
+    def inner(self):
+        """
+        Inner loop : Penalized minimization.
+        """
+        self.update_current_solution()
+        self.update_gamma()
+        self.update_inv_gamma()
+
+    def update_current_solution(self):
+        self.inner_criterion = Criterion(self)
+        self.last_solution = copy(self.current_solution)
+        self.inner_algo = self.optimizer(self.inner_criterion,
+                                         self.last_solution,
+                                         **self.fmin_args)
+        self.current_solution = self.inner_algo()
+
+    def update_gamma(self):
+        s = np.abs(self.prior * self.current_solution)
+        sn2 = (s / self.sigma) ** 2
+        self.gamma = np.sqrt(self.z + sn2) / self.tau
+
+    def update_inv_gamma(self):
+        self.inv_gamma = self.gamma ** (-1)
+
+    # at exit
+    def at_exit(self):
+        self.current_solution.resize(self.shapein)
diff --git a/pyoperators/iterative/lanczos.py b/pyoperators/iterative/lanczos.py
new file mode 100644
index 0000000..943b5e9
--- /dev/null
+++ b/pyoperators/iterative/lanczos.py
@@ -0,0 +1,99 @@
+from __future__ import absolute_import, division, print_function
+
+import numpy as np
+from ..core import asoperator
+from ..linear import EigendecompositionOperator, TridiagonalOperator
+from .core import IterativeAlgorithm
+from .stopconditions import MaxIterationStopCondition
+
+
+class LanczosAlgorithm(IterativeAlgorithm):
+    """
+    Tridiagonalization Lanczos step and eigendecomposition at exit.
+
+    http://en.wikipedia.org/wiki/Lanczos_algorithm
+    """
+    def __init__(self, A, v0=None, maxiter=300):
+        """
+        Use Lanczos algorithm to approximate a linear Operator.
+
+        Parameters
+        ----------
+        A: Operator
+            The Operator to be approximated.
+        maxiter: int or None (defaults 300)
+            Number of iteration (equals number of eigenvalues).
+            If set to None, stops at A.shape[0]
+
+        Returns
+        -------
+        A LanczosAlgorithm instance. To get the approximated Operator,
+        calling this instance is required.
+
+        Notes
+        -----
+        Starting point is a normalized random vector so results may
+        differ from one call to another with the same input parameters.
+
+        The Operator approximation is returned as a
+        EigendecompositionOperator which can be easily inverted.
+        """
+        self.A = asoperator(A)
+        self.n = self.A.shape[0]
+        self.maxiter = maxiter
+        self.norm = lambda x: np.sqrt(np.dot(x, x)) #XXX //ise me
+        stop_condition = MaxIterationStopCondition(maxiter)
+
+        IterativeAlgorithm.__init__(self, normal_stop_condition=stop_condition)
+        self.v0 = v0
+        # tridiagonal matrix coefficients
+        self.alpha = np.zeros(self.maxiter)
+        self.beta = np.zeros(self.maxiter)
+        self.vectors = np.zeros((self.maxiter+1, self.n))
+
+    def initialize(self):
+        IterativeAlgorithm.initialize(self)
+        if self.v0 is None:
+            v0 = np.random.randn(self.n)
+        else:
+            v0 = self.v0.ravel()
+        v0 /= self.norm(v0)
+        self.vectors[0] = v0
+        self.alpha[...] = 0
+        self.beta[...] = 0
+
+    def iteration(self):
+        n = self.niterations
+        v = self.vectors[n-1]
+        v_new = self.vectors[n]
+        self.A.matvec(v, out=v_new)
+
+        # orthogonalisation
+        if n > 1:
+            v_new -= self.beta[n-2] * self.vectors[n-2]
+
+        alpha = np.dot(v_new, v)
+        v_new -= alpha * v
+        beta = self.norm(v_new)
+        v_new /= beta
+
+        # update
+        self.alpha[n-1] = alpha
+        self.beta[n-1] = beta
+
+    def finalize(self):
+        """
+        Convert alpha and beta to a TridiagonalOperator and perform
+        eigendecomposition.
+
+        """
+        T = TridiagonalOperator(self.alpha, self.beta[:-1])
+        # use band matrix eigendecomposition as LAPACK's SEV routines (?STEV*)
+        # for symmetric tridiagonal matrices are not available in scipy 0.10
+        E = T.toband().eigen()
+
+        # multiply T eigenvectors with lanczos vectors
+        w = E.eigenvalues
+        v = np.dot(self.vectors[:-1, :].T, E.eigenvectors)
+
+        return EigendecompositionOperator(v=v, w=w)
diff --git a/pyoperators/iterative/linesearch.py b/pyoperators/iterative/linesearch.py
new file mode 100644
index 0000000..f007779
--- /dev/null
+++ b/pyoperators/iterative/linesearch.py
@@ -0,0 +1,159 @@
+"""
+Line searches: find minimum of a multivariate function. 
+
+Optionnaly depends on scipy.optimize for some line searches.
+
+Available:
+
+- optimal step (exact minimum if Criterion is quadratic (only Norm2
+  norms))
+
+- Backtracking : starts with optimal steps and reduces step until
+  criterion decreases.
+
+if scipy.optimize is in PYTHONPATH:
+
+- LineSearch, LineSearchArmijo, LineSearchWolfe1; LineSearchWolfe2
+"""
+import numpy as np
+from .criterions import Norm2
+
+__all__ = ['optimal_step',
+           'Backtracking',
+           'default_backtracking']
+
+
+def optimal_step(algo):
+    """
+    Finds quadratic optimal step of a criterion.
+
+    Arguments
+    ----------
+
+    algo: Algoritm instance with the following attributes:
+      current_descent, current_gradient, criterion. The criterion
+      attribute should be a Criterion instance with the following
+      attributes: model, priors, hypers, norms.
+
+    Returns
+    -------
+    a: float
+      The optimal step.
+    """
+    # get variables from criterion
+    d = algo.current_descent
+    g = algo.current_gradient
+    norms = [el.norm for el in algo.criterion.elements]
+    # replace norms by Norm2 if not a Norm2 instance
+    # to handle properly Norm2 with C covariance matrices ...
+    norms = [n if isinstance(n, Norm2) else Norm2() for n in norms]
+    ops = [el.op for el in algo.criterion.elements]
+    # compute quadratic optimal step
+    a = -.5 * np.dot(d.T, g)
+    a /= np.sum([N(O * d) for N, O in zip(norms, ops)])
+    return a
+
+
+class Backtracking(object):
+    def __init__(self, maxiter=10, tau=.5):
+        self.maxiter = maxiter
+        self.tau = tau
+
+    def __call__(self, algo):
+        x = algo.current_solution
+        d = algo.current_descent
+        a = optimal_step(algo)
+        i = 0
+        f0 = algo.current_criterion
+        fi = 2 * f0
+        while (i < self.maxiter) and (fi > f0):
+            i += 1
+            a *= self.tau
+            xi = x + a * d
+            fi = algo.criterion(xi)
+        return a
+
+default_backtracking = Backtracking()
+
+# if optimize package available wrap line search for use in algorithms
+try:
+    from scipy.optimize import linesearch
+except ImportError:
+    pass
+
+if 'linesearch' in locals():
+    class LineSearch(object):
+        """
+        Wraps scipy.optimize.linesearch.line_search
+        """
+        def __init__(self, args=(), **kwargs):
+            self.args = args
+            self.kwargs = kwargs
+            self.f = None
+            self.fprime = None
+            self.xk = None
+            self.pk = None
+            self.gfk = None
+            self.old_fval = None
+            self.old_old_fval = None
+            self.step = None
+
+        def get_values(self, algo):
+            self.f = algo.criterion
+            self.fprime = algo.gradient
+            self.xk = algo.current_solution
+            self.pk = algo.current_descent
+            self.gfk = algo.current_gradient
+            self.old_fval = algo.current_criterion
+            self.old_old_fval = algo.last_criterion
+
+        def _line_search(s):
+            line_search = linesearch.line_search
+            out = line_search(s.f, s.fprime, s.xk, s.pk, gfk=s.gfk,
+                              old_fval=s.old_fval,
+                              old_old_fval=s.old_old_fval,
+                              args=s.args, **s.kwargs)
+            s.step = out[0]
+
+        def __call__(self, algo):
+            # get values
+            self.get_values(algo)
+            # perform line search
+            self._line_search()
+            # if no output given, fallback to optimal step ...
+            if self.step is None:
+                self.step = optimal_step(algo)
+            return self.step
+
+
+    class LineSearchArmijo(LineSearch):
+        """
+        Wraps scipy.optimize.linesearch.line_search_armijo.
+        """
+        def _line_search(s):
+            armijo = linesearch.line_search_armijo
+            out = armijo(s.f, s.xk, s.pk, s.gfk, s.old_fval, args=s.args,
+                         **s.kwargs)
+            s.step = out[0]
+
+
+    class LineSearchWolfe1(LineSearch):
+        """
+        Wraps scipy.optimize.linesearch.line_search_wolfe1
+        """
+        def _line_search(s):
+            wolfe1 = linesearch.line_search_wolfe1
+            out = wolfe1(s.f, s.fprime, s.xk, s.pk, s.gfk, s.old_fval,
+                         s.old_old_fval, args=s.args, **s.kwargs)
+            s.step = out[0]
+
+
+    class LineSearchWolfe2(LineSearch):
+        """
+        Wraps scipy.optimize.linesearch.line_search_wolfe2
+        """
+        def _line_search(s):
+            wolfe2 = linesearch.line_search_wolfe2
+            out = wolfe2(s.f, s.fprime, s.xk, s.pk, s.gfk, s.old_fval,
+                         s.old_old_fval, args=s.args, **s.kwargs)
+            s.step = out[0]
diff --git a/pyoperators/iterative/optimize.py b/pyoperators/iterative/optimize.py
new file mode 100644
index 0000000..51fae5a
--- /dev/null
+++ b/pyoperators/iterative/optimize.py
@@ -0,0 +1,214 @@
+"""
+Wraps scipy.optimize.fmin_* algorithms using Criterion instances.
+"""
+import numpy as np
+import scipy.optimize as opt
+
+__all__ = [
+    'FminBFGS',
+    'FminCG',
+    'FminCOBYLA',
+    'FminLBFGSB',
+    'FminNCG',
+    'FminPowell',
+    'FminSLSQP',
+    'FminTNC',
+]
+
+
+class FminWrapper(object):
+    """
+    Abstract class to generate wrappers around scipy.optimize fmin_*
+    functions.
+
+    Parameters
+    -----------
+
+    criterion : Criterion
+        A criterion function with __call__ and gradient methods.
+    x0 : ndarray (None)
+        First guess
+    args=() : tuple
+        Extra arguments for the criterion function
+    kwargs : dict
+        Parameters of the fmin_function
+
+    fmin function docstring
+    ------------------------
+    """
+    def __init__(self, criterion, x0=None, *args, **kwargs):
+        self.criterion = criterion
+        self.gradient = getattr(criterion, "gradient", None)
+        self.hessian = getattr(criterion, "hessian", None)
+        self.hessian_p = getattr(criterion, "hessian_p", None)
+        self.shapein = criterion.shapein
+        self.args = args
+        self.kwargs = kwargs
+        self.first_guess(x0)
+        # to store solution
+        self.current_solution = None
+        self.optimizer_output = None
+
+    def first_guess(self, x0=None):
+        """
+        Sets current_solution attribute to initial value.
+        """
+        if x0 is None:
+            self.current_solution = np.zeros(self.shapein)
+        else:
+            self.current_solution = np.array(x0)
+
+
+class FminCOBYLA(FminWrapper):
+    __doc__ = FminWrapper.__doc__ + opt.fmin_cobyla.__doc__
+
+    def __init__(self, criterion, cons, x0=None, *args, **kwargs):
+        self.cons = cons
+        FminWrapper.__init__(self, criterion, x0=x0, *args, **kwargs)
+
+    def __call__(self):
+        self.first_guess()
+        self.current_solution = opt.fmin_cobyla(self.criterion,
+                                                self.current_solution,
+                                                self.cons,
+                                                args=self.args,
+                                                **self.kwargs)
+        # output depends on kwargs ...
+        if isinstance(self.optimizer_output, tuple):
+            self.current_solution = self.optimizer_output[0]
+        else:
+            self.current_solution = self.optimizer_output
+        return self.current_solution
+
+
+class FminPowell(FminWrapper):
+    __doc__ = FminWrapper.__doc__ + opt.fmin_powell.__doc__
+
+    def __call__(self):
+        self.first_guess()
+        self.optimizer_output = opt.fmin_powell(self.criterion,
+                                                self.current_solution,
+                                                args=self.args,
+                                                **self.kwargs)
+        # output depends on kwargs ...
+        if isinstance(self.optimizer_output, tuple):
+            self.current_solution = self.optimizer_output[0]
+        else:
+            self.current_solution = self.optimizer_output
+        return self.current_solution
+
+
+class FminCG(FminWrapper):
+    __doc__ = FminWrapper.__doc__ + opt.fmin_cg.__doc__
+
+    def __call__(self):
+        self.first_guess()
+        self.optimizer_output = opt.fmin_cg(self.criterion,
+                                            self.current_solution,
+                                            fprime=self.gradient,
+                                            args=self.args,
+                                            **self.kwargs)
+        # output depends on kwargs ...
+        if isinstance(self.optimizer_output, tuple):
+            self.current_solution = self.optimizer_output[0]
+        else:
+            self.current_solution = self.optimizer_output
+        return self.current_solution
+
+
+class FminTNC(FminWrapper):
+    __doc__ = FminWrapper.__doc__ + opt.fmin_tnc.__doc__
+
+    def __call__(self):
+        self.first_guess()
+        self.optimizer_output = opt.fmin_tnc(self.criterion,
+                                             self.current_solution,
+                                             fprime=self.gradient,
+                                             args=self.args,
+                                             **self.kwargs)
+        # output depends on kwargs ...
+        if isinstance(self.optimizer_output, tuple):
+            self.current_solution = self.optimizer_output[0]
+        else:
+            self.current_solution = self.optimizer_output
+        return self.current_solution
+
+
+class FminNCG(FminWrapper):
+    __doc__ = FminWrapper.__doc__ + opt.fmin_ncg.__doc__
+
+    def __init__(self, criterion, x0=None, *args, **kwargs):
+        super(FminNCG, self).__init__(criterion, x0=x0, *args, **kwargs)
+        if self.hessian_p is None:
+            raise ValueError(
+                "Cannot run FminNCG if the criterion has no hessian.")
+
+    def __call__(self):
+        self.first_guess()
+        self.optimizer_output = opt.fmin_ncg(self.criterion,
+                                             self.current_solution,
+                                             fprime=self.gradient,
+                                             fhess_p=self.hessian_p,
+                                             args=self.args,
+                                             **self.kwargs)
+        # output depends on kwargs ...
+        if isinstance(self.optimizer_output, tuple):
+            self.current_solution = self.optimizer_output[0]
+        else:
+            self.current_solution = self.optimizer_output
+        return self.current_solution
+
+
+class FminLBFGSB(FminWrapper):
+    __doc__ = FminWrapper.__doc__ + opt.fmin_l_bfgs_b.__doc__
+
+    def __call__(self):
+        self.first_guess()
+        self.optimizer_output = opt.fmin_l_bfgs_b(self.criterion,
+                                                  self.current_solution,
+                                                  fprime=self.gradient,
+                                                  args=self.args,
+                                                  **self.kwargs)
+        # output depends on kwargs ...
+        if isinstance(self.optimizer_output, tuple):
+            self.current_solution = self.optimizer_output[0]
+        else:
+            self.current_solution = self.optimizer_output
+        return self.current_solution
+
+
+class FminSLSQP(FminWrapper):
+    __doc__ = FminWrapper.__doc__ + opt.fmin_slsqp.__doc__
+
+    def __call__(self):
+        self.first_guess()
+        self.optimizer_output = opt.fmin_slsqp(self.criterion,
+                                               self.current_solution,
+                                               fprime=self.gradient,
+                                               args=self.args,
+                                               **self.kwargs)
+
+        # output depends on kwargs ...
+        if isinstance(self.optimizer_output, tuple):
+            self.current_solution = self.optimizer_output[0]
+        else:
+            self.current_solution = self.optimizer_output
+        return self.current_solution
+
+
+class FminBFGS(FminWrapper):
+    __doc__ = FminWrapper.__doc__ + opt.fmin_bfgs.__doc__
+
+    def __call__(self):
+        self.first_guess()
+        self.optimizer_output = opt.fmin_bfgs(self.criterion,
+                                              self.current_solution,
+                                              fprime=self.gradient,
+                                              args=self.args,
+                                              **self.kwargs)
+        # output depends on kwargs ...
+        if isinstance(self.optimizer_output, tuple):
+            self.current_solution = self.optimizer_output[0]
+        else:
+            self.current_solution = self.optimizer_output
+        return self.current_solution
diff --git a/pyoperators/iterative/stopconditions.py b/pyoperators/iterative/stopconditions.py
new file mode 100644
index 0000000..979dfd3
--- /dev/null
+++ b/pyoperators/iterative/stopconditions.py
@@ -0,0 +1,81 @@
+"""
+Module defining stop conditions for iterative algorithms.
+
+"""
+
+from __future__ import absolute_import, division, print_function
+
+__all__ = ['StopCondition',
+           'MaxErrorStopCondition',
+           'MaxIterationStopCondition']
+
+
+class StopCondition(object):
+    """
+    A class defining stop conditions for iterative algorithms. It must be
+    called with an Algorithm instance as argument. To stop the iterations,
+    the instance must raise a StopIteration instance.
+
+    """
+    def __init__(self, condition, message):
+        self.condition = condition
+        self.message = message
+
+    def __call__(self, s):
+        if self.condition(s):
+            raise StopIteration(self.message)
+
+    def __or__(self, other):
+        return OrStopCondition([self, other])
+
+    def __str__(self):
+        return self.message
+
+
+class OrStopCondition(StopCondition):
+    def __init__(self, stop_conditions):
+        self.operands = tuple(stop_conditions)
+
+    def __call__(self, s):
+        for c in self.operands:
+            c(s)
+
+    def __str__(self):
+        ' or '.join(str(c) for c in self.operands)
+
+
+class NoStopCondition(StopCondition):
+    def __init__(self):
+        StopCondition.__init__(self, lambda s: False, 'no stop condition')
+
+    def __or__(self, other):
+        return other
+
+
+class MaxErrorStopCondition(StopCondition):
+    """
+    Stop if the 'error' attribute is less than the specified maximum tolerance.
+
+    """
+    def __init__(self, maxerror, message='The maximum error is reached.'):
+        self.maxerror = maxerror
+        StopCondition.__init__(self, lambda s: s.error <= maxerror, message)
+
+    def __str__(self):
+        return 'maxerror={0}'.format(self.maxerror)
+
+
+class MaxIterationStopCondition(StopCondition):
+    """
+    Stop if the 'niterations' attribute is equal to the specified maximum
+    number of iterations.
+
+    """
+    def __init__(self, maxiteration, message='The maximum number of iterations'
+                 ' is reached.'):
+        self.maxiteration = maxiteration
+        StopCondition.__init__(self, lambda s: s.niterations == maxiteration,
+                               message)
+
+    def __str__(self):
+        return 'maxiteration={0}'.format(self.maxiteration)
diff --git a/pyoperators/linear.py b/pyoperators/linear.py
new file mode 100644
index 0000000..b98a743
--- /dev/null
+++ b/pyoperators/linear.py
@@ -0,0 +1,1582 @@
+from __future__ import absolute_import, division, print_function
+import multiprocessing
+import numexpr
+import numpy as np
+import operator
+try:
+    import pyfftw
+except:
+    pass
+import scipy.sparse as sp
+import sys
+from itertools import izip
+from scipy.sparse.linalg import eigsh
+from .core import (
+    BlockRowOperator, BroadcastingBase, CompositionOperator, ConstantOperator,
+    DiagonalBase, DiagonalOperator, HomothetyOperator, IdentityOperator,
+    Operator, ReductionOperator, ZeroOperator, operation_assignment, _pool)
+from .flags import (
+    contiguous, idempotent, inplace, linear, real, square, symmetric,
+    update_output)
+from .memory import empty
+from .utils import (
+    broadcast_shapes, cast, complex_dtype, float_dtype, inspect_special_values,
+    isalias, izip_broadcast, pi, product, strshape, tointtuple, ufuncs)
+from .warnings import warn, PyOperatorsWarning
+
+__all__ = [
+    'BandOperator',
+    'DegreesOperator',
+    'DenseOperator',
+    'DenseBlockDiagonalOperator',
+    'DiagonalNumexprOperator',
+    'DifferenceOperator',
+    'EigendecompositionOperator',
+    'IntegrationTrapezeOperator',
+    'MaskOperator',
+    'PackOperator',
+    'RadiansOperator',
+    'Rotation2dOperator',
+    'Rotation3dOperator',
+    'SparseOperator',
+    'SumOperator',
+    'SymmetricBandOperator',
+    'SymmetricBandToeplitzOperator',
+    'TridiagonalOperator',
+    'UnpackOperator',
+]
+
+
+class DegreesOperator(HomothetyOperator):
+    """
+    Convert angles from radians to degrees.
+
+    Example
+    -------
+    >>> d = DegreesOperator()
+    >>> d(np.pi/2)
+    array(90.0)
+
+    """
+    def __init__(self, dtype=float, **keywords):
+        HomothetyOperator.__init__(self, 180 / pi(dtype), **keywords)
+        self.set_rule('I', lambda s: RadiansOperator(s.dtype))
+
+
+ at linear
+class DenseBase(Operator):
+    def __init__(self, data, naxes=None, naxesin=None, naxesout=None,
+                 naxesextra=None, dtype=None, issquare=None, **keywords):
+        data = np.asarray(data)
+        if data.ndim == 0:
+            self.__class__ = HomothetyOperator
+            self.__init__(data, dtype=dtype, **keywords)
+            return
+        data = np.atleast_2d(data)
+        if naxes is not None and (naxes < 1 or 2 * naxes > data.ndim):
+            raise ValueError('Invalid naxes keyword.')
+        if naxesin is None and naxesout is None:
+            if naxes is None:
+                naxes = 1
+            naxesin = naxes
+            naxesout = naxes
+        elif naxesin is None:
+            if naxesout < 1 or naxesout >= data.ndim:
+                raise ValueError('Invalid naxesout keyword.')
+            if naxesextra is not None:
+                naxesin = data.ndim - naxesextra - naxesout
+        elif naxesout is None:
+            if naxesin < 1 or naxesin >= data.ndim:
+                raise ValueError('Invalid naxesin keyword.')
+            if naxesextra is not None:
+                naxesout = data.ndim - naxesextra - naxesin
+        if naxesin is None or naxesout is None:
+            raise ValueError('The keywords naxesin and naxesout must be both s'
+                             'pecified.')
+        if naxesextra is None:
+            naxesextra = data.ndim - naxesin - naxesout
+            if naxesextra == 0 and not isinstance(self, DenseOperator):
+                self.__class__ = DenseOperator
+                self.__init__(data, naxesin=naxesin, dtype=None, **keywords)
+                return
+            if naxesextra < 0:
+                raise ValueError(
+                    "The number of input and output dimensions ('{0}' and '{1}"
+                    "') exceeds the number of dimensions of the input array {2"
+                    "}.".format(naxesin, naxesout, data.ndim))
+            naxesextra = data.ndim - naxesin - naxesout
+        if naxesin + naxesout + naxesextra != data.ndim:
+            raise ValueError(
+                "The number of dimensions of the input array '{0}' is too larg"
+                "e. The expected number is '{1}'. To disambiguate the handling"
+                " of the extra dimension(s), use the operators DenseBlockColum"
+                "nOperator, DenseBlockDiagonalOperator or DenseBlockRowOperato"
+                "r.".format(data.ndim, naxesin + naxesout + naxesextra))
+        if dtype is None:
+            dtype = float_dtype(data.dtype)
+        else:
+            dtype = np.dtype(dtype)
+        data = np.array(data, dtype=dtype, copy=False)
+
+        self.data = data
+        self.naxesin = int(naxesin)
+        self.naxesout = int(naxesout)
+        self.naxesextra = int(naxesextra)
+        self._sl = data.shape[:naxesextra]
+        self._sm = data.shape[-naxesin-naxesout:-naxesin]
+        self._sn = data.shape[-naxesin:]
+        self._l = product(self._sl)
+        self._m = product(self._sm)
+        self._n = product(self._sn)
+        _data = data.reshape(self._sl + (self._m, self._n))
+        if not isalias(_data, data):
+            # this warning only happens if naxesin or naxesout > 1
+            warn('The input array could not be reshaped without making a copy.'
+                 ' To avoid potential duplication of the data in memory, consi'
+                 'der giving a contiguous data argument.', PyOperatorsWarning)
+            data = _data.reshape(data.shape)
+        self._data = _data
+        keywords['flags'] = self.validate_flags(
+            keywords.get('flags', {}),
+            real=dtype.kind != 'c',
+#            square=self._sm == self._sm if issquare is None else issquare,
+            contiguous_input=self.naxesin > 1,
+            contiguous_output=self.naxesout > 1)
+        Operator.__init__(self, dtype=dtype, **keywords)
+        self.set_rule('T', self._rule_transpose)
+        self.set_rule(('.', HomothetyOperator), self._rule_homothety,
+                      CompositionOperator)
+
+    @property
+    def nbytes(self):
+        return self.data.nbytes
+
+    def validatein(self, shape):
+        if len(shape) < self.naxesin or shape[-self.naxesin:] != self._sn:
+            return ValueError(
+                "The input shape '{0}' is invalid. The last dimension(s) shoul"
+                "d be '{1}'.".format(shape, self._sn))
+
+    def validateout(self, shape):
+        if len(shape) < self.naxesout or shape[-self.naxesout:] != self._sm:
+            return ValueError(
+                "The output shape '{0}' is invalid. The last dimension(s) shou"
+                "ld be '{1}'.".format(shape, self._sm))
+
+    @staticmethod
+    def _rule_homothety(self, other):
+        return type(self)(other.data * self.data, naxesin=self.naxesin,
+                          naxesout=self.naxesout)
+
+
+class DenseBlockDiagonalOperator(DenseBase):
+    """
+    Operator with broadcastable same dimension diagonal dense blocks.
+
+    If the array used to store the diagonal blocks has a shape (L, M, N),
+    the shape of the output of the operator applied over an input of shape:
+        - (N,) will be (L, M)
+        - (L, N) will be (L, M)
+        - (P, 1, N) will be (P, L, M)
+    Broadcasting the input along an axis (when 1 are prepended to the input or
+    when the input axis length is 1) stacks as columns the operator blocks
+    along this axis.
+
+    Example
+    -------
+    >>> data = [[[1, 1, 1]], [[1, -1, 1]]]
+    >>> np.shape(data)
+    (2, 1, 3)
+    >>> d = DenseBlockDiagonalOperator(data, dtype=int)
+    >>> print(d(np.ones(3)).shape)  # the input is broadcast
+    (2, 1)
+    >>> print(d.todense(shapein=3))
+    [[ 1  1  1]
+     [ 1 -1  1]]
+    >>> print(d(np.ones([2, 3])).shape)
+    (2, 1)
+    >>> print(d.todense(shapein=(2, 3)))
+    [[ 1  1  1  0  0  0]
+     [ 0  0  0  1 -1  1]]
+    >>> print(d(np.ones([3, 2, 3])).shape)
+    (3, 2, 1)
+    >>> print(d.todense(shapein=(3, 2, 3)))
+    [[ 1  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
+     [ 0  0  0  1 -1  1  0  0  0  0  0  0  0  0  0  0  0  0]
+     [ 0  0  0  0  0  0  1  1  1  0  0  0  0  0  0  0  0  0]
+     [ 0  0  0  0  0  0  0  0  0  1 -1  1  0  0  0  0  0  0]
+     [ 0  0  0  0  0  0  0  0  0  0  0  0  1  1  1  0  0  0]
+     [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1 -1  1]]
+
+    """
+    def __init__(self, data, naxes=None, naxesin=None, naxesout=None,
+                 **keywords):
+        DenseBase.__init__(self, data, naxes=naxes, naxesin=naxesin,
+                           naxesout=naxesout, **keywords)
+        if not isinstance(self, DenseBase):
+            return
+        if self.shapein is not None:
+            extrashapein = self.shapein[:-self.naxesin]
+            if broadcast_shapes(extrashapein, self._sl) != extrashapein:
+                raise NotImplementedError(
+                    'With this explicit input shape, the operator is not diago'
+                    'nal block anymore. Its transpose would not be incorrect a'
+                    'nd DenseBlockColumnOperator is not yet implemented.')
+        self.set_rule(('.', DenseBlockDiagonalOperator), self._rule_mul,
+                      CompositionOperator)
+
+    def direct(self, input, output):
+        # L, M, N * L', N -> L", M
+        if self.naxesin > 1:
+            input = input.reshape(input.shape[:-self.naxesin] + (self._n,))
+        if self.naxesout > 1:
+            output = output.reshape(output.shape[:-self.naxesout] + (self._m,))
+        np.einsum('...mn,...n->...m', self._data, input, out=output)
+
+    def reshapein(self, shape):
+        # L', N -> L", M
+        return broadcast_shapes(self._sl, shape[:-self.naxesin]) + self._sm
+
+    def validatein(self, shape):
+        # L', N
+        DenseBase.validatein(self, shape)
+        broadcast_shapes(shape[:-self.naxesin], self._sl)
+
+    def validateout(self, shape):
+        # L", M
+        DenseBase.validateout(self, shape)
+        broadcast_shapes(shape[:-self.naxesout], self._sl)
+
+    @staticmethod
+    def _rule_transpose(self):
+        data = self.data
+        for i in range(self.naxesin):
+            data = np.rollaxis(data, -1, self.naxesextra)
+        return DenseBlockDiagonalOperator(data, naxesin=self.naxesout,
+                                          naxesout=self.naxesin)
+
+    @staticmethod
+    def _rule_mul(self, other):
+        if self._sn != other._sm:
+            raise ValueError('Incompatible shape in composition.')
+        if other.naxesextra == 0:
+            _data = np.dot(self._data, other._data)
+        else:
+            _data = np.einsum('...ij,...jk->...ik', self._data, other._data)
+        data = _data.reshape(_data.shape[:-2] + self._sm + other._sn)
+        return DenseBlockDiagonalOperator(
+            data, naxesin=other.naxesin, naxesout=self.naxesout)
+
+
+class DenseOperator(DenseBlockDiagonalOperator):
+    """
+    Dense operator. The operator can be broadcast over the inputs.
+
+    If the dense array is a matrix of shape (M, N), the application of
+    the operator over an input of shape (P, N) will result in an output
+    of shape (P, M).
+
+    Example
+    -------
+    >>> m = np.array([[1., 2., 3.],
+    ...               [2., 3., 4.]])
+    >>> d = DenseOperator(m)
+    >>> d([1, 0, 0])
+    array([ 1.,  2.])
+
+    >>> theta = np.pi / 4
+    >>> m = [[np.cos(theta), -np.sin(theta)],
+    ...      [np.sin(theta),  np.cos(theta)]]
+    >>> input = [[1, 0], [0, 1], [-1, 0], [0, -1]]
+    >>> op = DenseOperator(m)
+    >>> print(op(input))
+    [[ 0.70710678  0.70710678]
+     [-0.70710678  0.70710678]
+     [-0.70710678 -0.70710678]
+     [ 0.70710678 -0.70710678]]
+    >>> print(op.T(op(input)))
+    [[ 1.  0.]
+     [ 0.  1.]
+     [-1.  0.]
+     [ 0. -1.]]
+
+    """
+    def __init__(self, data, naxes=None, naxesin=None, naxesout=None,
+                 **keywords):
+        DenseBlockDiagonalOperator.__init__(
+            self, data, naxes=naxes, naxesin=naxesin, naxesout=naxesout,
+            naxesextra=0, **keywords)
+
+    def direct(self, input, output):
+        # M, N * P, N -> P, M
+        if self.naxesin > 1:
+            input = input.reshape(input.shape[:-self.naxesin] + (self._n,))
+        if self.naxesout > 1:
+            output = output.reshape(output.shape[:-self.naxesout] + (self._m,))
+        np.dot(input, self._data.T, output)
+
+    def reshapeout(self, shape):
+        # P, M -> P, N
+        return shape[:-self.naxesout] + self._sn
+
+
+ at linear
+ at contiguous
+ at update_output
+class SparseBase(Operator):
+    def __init__(self, matrix, dtype=None, shapein=None, shapeout=None,
+                 **keywords):
+        if dtype is None:
+            dtype = matrix.dtype
+        if shapein is None:
+            shapein = matrix.shape[1]
+        elif product(shapein) != matrix.shape[1]:
+            raise ValueError(
+                "The input shape '{0}' is incompatible with the sparse matrix "
+                "shape {1}.".format(shapein, matrix.shape))
+        if shapeout is None:
+            shapeout = matrix.shape[0]
+        elif product(shapeout) != matrix.shape[0]:
+            raise ValueError(
+                "The output shape '{0}' is incompatible with the sparse matrix"
+                " shape {1}.".format(shapeout, matrix.shape))
+        self.matrix = matrix
+        Operator.__init__(self, shapein=shapein, shapeout=shapeout,
+                          dtype=dtype, **keywords)
+
+    @property
+    def nbytes(self):
+        m = self.matrix
+        if hasattr(m, 'nbytes'):
+            return m.nbytes
+        if isinstance(m, (sp.csc_matrix, sp.csr_matrix, sp.bsr_matrix)):
+            return m.data.nbytes + m.indices.nbytes + m.indptr.nbytes
+        if isinstance(m, sp.coo_matrix):
+            return m.data.nbytes + 2 * m.row.nbytes
+        if isinstance(m, sp.dia_matrix):
+            return m.data.nbytes + m.offsets.nbytes
+        if isinstance(m, sp.dok_matrix):
+            sizeoftuple = sys.getsizeof(())
+            return (24 * m.ndim + m.dtype.itemsize +
+                    2 * sizeoftuple + 24) * len(m.viewitems())
+        try:
+            return m.data.nbytes
+        except AttributeError:
+            pass
+        raise TypeError("The sparse format '{0}' is not handled."
+                        .format(type(m)))
+
+
+class SparseOperator(SparseBase):
+    """
+    Operator handling sparse matrix storages.
+
+    The sparse storage can be anyone from the scipy.sparse package (except
+    the LIL format, which is not suited for matrix-vector multiplication):
+        - bsr_matrix: Block Sparse Row matrix
+        - coo_matrix: A sparse matrix in COOrdinate format
+        - csc_matrix: Compressed Sparse Column matrix
+        - csr_matrix: Compressed Sparse Row matrix
+        - dia_matrix: Sparse matrix with DIAgonal storage
+        - dok_matrix: Dictionary Of Keys based sparse matrix
+
+    Example
+    -------
+    >>> from scipy.sparse import csr_matrix
+    >>> sm = csr_matrix([[1, 0, 2, 0],
+    ...                  [0, 0, 3, 0],
+    ...                  [4, 5, 6, 0],
+    ...                  [1, 0, 0, 1]])
+    >>> so = SparseOperator(sm)
+    >>> so([1, 0, 0, 0])
+    array([1, 0, 4, 1])
+    >>> so.T([1, 0, 0, 0])
+    array([1, 0, 2, 0])
+
+    """
+    def __init__(self, matrix, dtype=None, shapein=None, shapeout=None,
+                 **keywords):
+        """
+        matrix : sparse matrix from scipy.sparse
+           The sparse matrix to be wrapped into an Operator.
+
+        """
+        if not sp.issparse(matrix):
+            raise TypeError('The input sparse matrix type is not recognised.')
+        if isinstance(matrix, sp.lil_matrix):
+            raise TypeError('The LIL format is not suited for arithmetic opera'
+                            'tions.')
+        SparseBase.__init__(self, matrix, dtype=dtype, shapein=shapein,
+                            shapeout=shapeout, **keywords)
+        self.set_rule('T', lambda s: SparseOperator(s.matrix.transpose()))
+        self.set_rule(('.', HomothetyOperator), lambda s, o:
+                      SparseOperator(o * s.matrix), CompositionOperator)
+
+    def direct(self, input, output, operation=operation_assignment):
+        input = input.ravel().astype(output.dtype)
+        output = output.ravel()
+        if operation is operation_assignment:
+            output[...] = 0
+        elif operation is not operator.iadd:
+            raise ValueError('Invalid reduction operation.')
+        m = self.matrix
+        if isinstance(m, sp.dok_matrix):
+            for (i, j), v in m.iteritems():
+                output[i] += v * input[j]
+            return
+        M, N = m.shape
+        fn = getattr(sp.sparsetools, m.format + '_matvec')
+        if isinstance(m, (sp.csr_matrix, sp.csc_matrix)):
+            fn(M, N, m.indptr, m.indices, m.data, input, output)
+        elif isinstance(m, sp.coo_matrix):
+            fn(m.nnz, m.row, m.col, m.data, input, output)
+        elif isinstance(m, sp.bsr_matrix):
+            R, C = m.blocksize
+            fn(M // R, N // C, R, C, m.indptr, m.indices, m.data.ravel(),
+               input, output)
+        elif isinstance(m, sp.dia_matrix):
+            fn(M, N, len(m.offsets), m.data.shape[1], m.offsets, m.data,
+               input, output)
+        else:
+            raise NotImplementedError()
+
+    def todense(self, shapein=None, shapeout=None, inplace=False):
+        return self.matrix.toarray()
+
+
+ at inplace
+class DiagonalNumexprOperator(DiagonalBase):
+    """
+    DiagonalOperator whose diagonal elements are calculated on the fly using
+    the numexpr package and that can be separated when added or multiplied
+    to a block operator.
+
+    Note
+    ----
+    When such instance is added or multiplied to another DiagonalOperator
+    (or subclass, such as an instance of this class), an algebraic
+    simplification takes place, which results in a regular (dense) diagonal
+    operator.
+
+    Example
+    -------
+    >>> alpha = np.arange(100.)
+    >>> d = DiagonalNumexprOperator(alpha, '(x/x0)**data', {'x':1.2, 'x0':1.})
+
+    """
+    def __init__(self, data, expr, global_dict=None, var='data',
+                 broadcast=None, dtype=None, **keywords):
+        if not isinstance(expr, str):
+            raise TypeError('The second argument is not a string expression.')
+        if numexpr.__version__ < '2.0.2':
+            keywords['flags'] = self.validate_flags(keywords.get('flags', {}),
+                                                    inplace=False)
+        data = np.asarray(data)
+        if broadcast is None:
+            broadcast = 'scalar' if data.ndim == 0 else 'disabled'
+        if broadcast == 'disabled':
+            keywords['shapein'] = data.shape
+        if dtype is None:
+            dtype = float_dtype(data.dtype)
+
+        self.expr = expr
+        self.var = var
+        self.global_dict = global_dict
+        self._global_dict = {} if global_dict is None else global_dict.copy()
+        self._global_dict[var] = data.T \
+            if broadcast is not None and broadcast.lower() == 'rightward' \
+            else data
+        DiagonalBase.__init__(self, data, broadcast, dtype=dtype, **keywords)
+
+    def direct(self, input, output):
+        if self.broadcast == 'rightward':
+            input = input.T
+            output = output.T
+        numexpr.evaluate('(' + self.expr + ') * input',
+                         global_dict=self._global_dict, out=output)
+
+    def get_data(self):
+        local_dict = {self.var: self.data}
+        return numexpr.evaluate(self.expr, local_dict=local_dict,
+                                global_dict=self.global_dict)
+
+    @staticmethod
+    def _rule_block(self, op, shape, partition, axis, new_axis,
+                    func_operation):
+        if type(self) is not DiagonalNumexprOperator:
+            return None
+        return DiagonalOperator._rule_block(
+            self, op, shape, partition, axis, new_axis, func_operation,
+            self.expr, global_dict=self.global_dict, var=self.var)
+
+
+ at real
+class IntegrationTrapezeOperator(BlockRowOperator):
+    """
+    Return a block row operator whose blocks are the weights to perform
+    a trapeze integration.
+
+    This operator can be used to integrate over X the bivariate function
+        f = f(X,Y).
+    Let's assume f is sampled at n abscissa x_n non necessarily equally spaced
+        f_i(Y) = f(x_i, Y).
+    The operator IntegrationTrapezeOperator returns a block row operator
+        W = [ w_1 I ... w_n I]
+    such that, given the block column operator
+            [ f_1 ]
+        F = [ ... ]
+            [ f_n ],
+    the product
+        W * F = w_1 * f_1 + ... + w_n * f_n
+    performs a trapeze integration of f(X,Y) over the bins [x_i,x_(i+1)]
+    for i in 1..n-1.
+
+    Example
+    -------
+    >>> from pyoperators import BlockColumnOperator
+    >>> f = np.power
+    >>> x = [0.5,1,2,4]
+    >>> F = BlockColumnOperator(
+    ...         [Operator(lambda i, o, v=v: f(v, i, o), flags='square')
+    ...          for v in x], new_axisout=0)
+    >>> W = IntegrationTrapezeOperator(x)
+    >>> int_f = W(F)
+    >>> int_f([0,1,2])
+    array([  3.5   ,   7.875 ,  22.8125])
+    >>> [np.trapz(f(x, a), x) for a in [0, 1, 2]]
+    [3.5, 7.875, 22.8125]
+
+    """
+    def __init__(self, x, new_axisin=0, **keywords):
+        x = np.asarray(x)
+        if x.size < 2:
+            raise ValueError('At least two abscissa are required.')
+        if np.any(np.diff(x) < 0) and np.any(np.diff(x) > 0):
+            raise ValueError('The abscissa are not monotonous.')
+
+        w = np.empty_like(x)
+        w[0] = 0.5 * (x[1] - x[0])
+        w[1:-1] = 0.5 * (x[2:]-x[:-2])
+        w[-1] = 0.5 * (x[-1] - x[-2])
+        ops = [HomothetyOperator(_) for _ in w]
+        BlockRowOperator.__init__(self, ops, new_axisin=new_axisin, **keywords)
+
+
+ at real
+ at idempotent
+ at inplace
+class MaskOperator(DiagonalBase):
+    """
+    A subclass of DiagonalOperator with 0 (True) and 1 (False) on the diagonal.
+
+    Exemple
+    -------
+    >>> M = MaskOperator([True, False])
+    >>> M.todense()
+    array([[0, 0],
+           [0, 1]])
+
+    Notes
+    -----
+    We follow the convention of MaskedArray, where True means masked.
+
+    """
+    def __init__(self, data, broadcast=None, **keywords):
+        data = np.array(data, dtype=bool, copy=False)
+        if broadcast is None:
+            broadcast = 'scalar' if data.ndim == 0 else 'disabled'
+        if broadcast == 'disabled':
+            keywords['shapein'] = data.shape
+        nmones, nzeros, nones, other, same = inspect_special_values(data)
+        if data.size in (nzeros, nones):
+            if nzeros == data.size:
+                self.__class__ = IdentityOperator
+                self.__init__(**keywords)
+                return
+            if nones == data.size:
+                keywords['flags'] = Operator.validate_flags(
+                    keywords.get('flags', {}), square=True)
+                self.__class__ = ZeroOperator
+                self.__init__(**keywords)
+                return
+        DiagonalBase.__init__(self, data, broadcast, **keywords)
+
+    def direct(self, input, output):
+        if self.broadcast == 'rightward':
+            ufuncs.masking(input.T, self.data.T, output.T)
+        else:
+            ufuncs.masking(input, self.data, output)
+
+    def get_data(self):
+        return ~self.data
+
+
+ at real
+ at linear
+class PackBase(BroadcastingBase):
+    def __init__(self, data, broadcast, **keywords):
+        self.n = np.sum(data)
+        BroadcastingBase.__init__(self, data, broadcast, **keywords)
+
+    def _reshape_packed(self, shape):
+        if self.broadcast == 'rightward':
+            return self.data.shape + shape[1:]
+        return shape[:-1] + self.data.shape
+
+    def _reshape_unpacked(self, shape):
+        if self.broadcast == 'rightward':
+            return (self.n,) + shape[self.data.ndim:]
+        return shape[:-self.data.ndim] + (self.n,)
+
+    def _validate_packed(self, shape):
+        actual = shape[0 if self.broadcast == 'rightward' else -1]
+        if actual != self.n:
+            raise ValueError(
+                "The shape '{0}' is incompatible with that expected '{1}'.".
+                format(strshape(shape),
+                       strshape((self.n,), broadcast=self.broadcast)))
+
+    def _validate_unpacked(self, shape):
+        if self.broadcast == 'rightward':
+            actual = shape[:self.data.ndim]
+        else:
+            actual = shape[-self.data.ndim:]
+        if actual != self.data.shape:
+            raise ValueError(
+                "The shape '{0}' is incompatible with that expected '{1}'.".
+                format(strshape(shape),
+                       strshape(self.data.shape, broadcast=self.broadcast)))
+
+
+class PackOperator(PackBase):
+    """
+    Pack an ndarray into a vector under the control of a boolean mask.
+    The value True means that the element is kept.
+
+    """
+    def __init__(self, data, broadcast='disabled', **keywords):
+        data = np.array(data, bool, copy=False)
+        if np.all(data == data.flat[0]):
+            if data.flat[0]:
+                self.__class__ = DiagonalOperator
+                self.__init__(data, broadcast=broadcast, **keywords)
+                return
+        if broadcast.lower() == 'disabled':
+            keywords['shapein'] = data.shape
+            keywords['shapeout'] = np.sum(data)
+        PackBase.__init__(self, data, broadcast,
+                          reshapein=self._reshape_unpacked,
+                          reshapeout=self._reshape_packed,
+                          validatein=self._validate_unpacked,
+                          validateout=self._validate_packed, **keywords)
+        self.set_rule('T', lambda s: UnpackOperator(s.data,
+                                                    broadcast=s.broadcast))
+        self.set_rule('T,.', '1', CompositionOperator)
+
+    def direct(self, input, output):
+        if self.broadcast == 'rightward':
+            output[...] = input[self.data, ...]
+        else:
+            output[...] = input[..., self.data]
+
+    @staticmethod
+    def _rule_left_block(op, self):
+        return
+
+
+class UnpackOperator(PackBase):
+    """
+    Unpack a vector into an ndarray under the control of a mask.
+    The value True means that the element is kept.
+
+    """
+    def __init__(self, data, broadcast='disabled', **keywords):
+        data = np.array(data, bool, copy=False)
+        if np.all(data == data.flat[0]):
+            if data.flat[0]:
+                self.__class__ = DiagonalOperator
+                self.__init__(data, broadcast=broadcast, **keywords)
+                return
+        if broadcast.lower() == 'disabled':
+            keywords['shapein'] = np.sum(data)
+            keywords['shapeout'] = data.shape
+        PackBase.__init__(self, data, broadcast,
+                          reshapein=self._reshape_packed,
+                          reshapeout=self._reshape_unpacked,
+                          validatein=self._validate_packed,
+                          validateout=self._validate_unpacked, **keywords)
+        self.set_rule('T', lambda s: PackOperator(s.data,
+                                                  broadcast=s.broadcast))
+        self.set_rule('T,.', '1', CompositionOperator)
+
+    def direct(self, input, output):
+        output[...] = 0
+        if self.broadcast == 'rightward':
+            output[self.data, ...] = input
+        else:
+            output[..., self.data] = input
+
+    @staticmethod
+    def _rule_right_block(self, op, cls):
+        if cls is CompositionOperator:
+            return
+        return BroadcastingBase._rule_right_block(self, op, cls)
+
+
+class RadiansOperator(HomothetyOperator):
+    """
+    Convert angles from degrees to radians.
+
+    Example
+    -------
+    >>> r = RadiansOperator()
+    >>> r(180)
+    array(3.141592653589793)
+
+    """
+    def __init__(self, dtype=float, **keywords):
+        HomothetyOperator.__init__(self, pi(dtype) / 180, **keywords)
+        self.set_rule('I', lambda s: DegreesOperator(s.dtype))
+
+
+ at real
+class Rotation2dOperator(DenseBlockDiagonalOperator):
+    """
+    2-d rotation operator.
+
+    Parameters
+    ----------
+    angle : float, array-like
+        The counter-clockwise rotation angle, in radians.
+    degrees : bool, optional
+        If set, the angle input is in degrees, instead of radians.
+
+    Example
+    -------
+    >>> r = Rotation2dOperator([45, 90], degrees=True)
+    >>> r([1, 0])
+    >>> print(r([1, 0]))
+    [[  7.07106781e-01   7.07106781e-01]
+     [  6.12323400e-17   1.00000000e+00]]
+
+    """
+    def __init__(self, angle, degrees=False, dtype=None, **keywords):
+        angle = np.asarray(angle)
+        if dtype is None:
+            dtype = float_dtype(angle.dtype)
+        angle = np.asarray(angle, dtype)
+        if degrees:
+            angle = np.radians(angle)
+        cosa = np.cos(angle)
+        sina = np.sin(angle)
+        m = np.array([[cosa, -sina], [sina, cosa]], dtype=dtype)
+        for i in range(angle.ndim):
+            m = np.rollaxis(m, -1)
+        keywords['flags'] = self.validate_flags(
+            keywords.get('flags', {}), orthogonal=True)
+
+        DenseBlockDiagonalOperator.__init__(self, m, **keywords)
+
+
+ at real
+class Rotation3dOperator(DenseBlockDiagonalOperator):
+    """
+    Operator for 3-d active rotations about 1, 2 or 3 axes.
+
+    The rotation axes are specified one by one by selecting a convention.
+
+    For intrinsic rotations (in which the coordinate system changes with
+    the rotation), the following conventions are possible:
+        X, Y, Z,
+        XY', XZ', YX', YZ', ZX', ZY',
+        XZ'X'', XZ'Y'', XY'X'', XY'Z'',  YX'Y'', YX'Z'',
+        YZ'Y'', YZ'X'', ZY'Z'', ZY'X'', ZX'Z'' and ZX'Y''.
+    The primes denote the rotated axes after the first elemental rotation and
+    the double primes the rotated axes after the second one.
+
+    And for the extrinsic rotations (in which the original coordinate system
+    remains motionless):
+        X, Y, Z,
+        XY, XZ, YX, YZ, ZX, ZY,
+        XZX, XZY, XYX, XYZ, YXY, YXZ, YZY, YZX, ZYZ, ZYX, ZXZ and ZXY.
+
+    Parameters
+    ----------
+    convention : string
+        Specify from left to right the axes about which the elemental rotations
+        are performed.
+    a1 : float, array-like
+        Rotation angle about the first axis, in radians.
+    a2 : float, array-like
+        Rotation angle about the second axis, in radians.
+    a3 : float, array-like
+        Rotation angle about the third axis, in radians.
+    degrees : bool, optional
+        If set, the angle inputs are in degrees, instead of radians.
+
+    Example
+    -------
+    >>> r1 = Rotation3dOperator("Y", 90, degrees=True)
+    >>> print(r1([1, 0, 0]))
+    [  6.12323400e-17   0.00000000e+00  -1.00000000e+00]
+    >>> r2 = Rotation3dOperator("XYZ", 30, 40, 50, degrees=True)
+    >>> print(r2([1, 0, 0]))
+    [ 0.49240388  0.58682409 -0.64278761]d
+    >>> r3 = Rotation3dOperator("ZY'X''", 50, 40, 30, degrees=True)
+    >>> print(r3([1, 0, 0]))
+    [ 0.49240388  0.58682409 -0.64278761]
+
+    """
+    def __init__(self, convention, a1, a2=0, a3=0, degrees=False, dtype=None,
+                 **keywords):
+        if not isinstance(convention, str):
+            raise TypeError('Invalid type for the input convention.')
+        convention = convention.upper()
+        if any(l not in "XYZ'" for l in convention):
+            raise ValueError("Invalid convention '{0}'.".format(convention))
+        a1 = np.asarray(a1)
+        a2 = np.asarray(a2)
+        a3 = np.asarray(a3)
+        if dtype is None:
+            dtype = np.find_common_type([float_dtype(a.dtype)
+                                         for a in (a1, a2, a3)], [])
+        a1 = np.asarray(a1, dtype)
+        a2 = np.asarray(a2, dtype)
+        a3 = np.asarray(a3, dtype)
+        if degrees:
+            a1 = np.radians(a1)
+            a2 = np.radians(a2)
+            a3 = np.radians(a3)
+        convention = convention.upper()
+        naxes = len(convention.replace("'", ''))
+
+        # Extrinsic to intrinsic rotation conversion
+        if naxes == 2 and len(convention) == 2:
+            convention = convention[1] + convention[0] + "'"
+            a1, a2 = a2, a1
+        elif naxes == 3 and len(convention) == 3:
+            convention = convention[2:0:-1] + "'" + convention[0] + "''"
+            a1, a3 = a3, a1
+
+        c1 = np.cos(a1)
+        s1 = np.sin(a1)
+        c2 = np.cos(a2)
+        s2 = np.sin(a2)
+        c3 = np.cos(a3)
+        s3 = np.sin(a3)
+        gm = lambda *args: self._get_matrix(*(args + (dtype,)))
+        if convention == 'X':
+            m = gm(1,  0,  0,
+                   0, c1,-s1,
+                   0, s1, c1)
+        elif convention == 'Y':
+            m = gm( c1, 0, s1,
+                     0, 1,  0,
+                   -s1, 0, c1)
+        elif convention == 'Z':
+            m = gm(c1,-s1, 0,
+                   s1, c1, 0,
+                    0,  0, 1)
+        elif convention == "XZ'":
+            m = gm(c2, -s2, 0,
+                   c1*s2, c1*c2, -s1,
+                   s1*s2, c2*s1, c1)
+        elif convention == "XY'":
+            m = gm(c2, 0, s2,
+                   s1*s2, c1, -c2*s1,
+                   -c1*s2, s1, c1*c2)
+        elif convention == "YX'":
+            m = gm(c1, s1*s2, c2*s1,
+                   0, c2, -s2,
+                   -s1, c1*s2, c1*c2)
+        elif convention == "YZ'":
+            m = gm(c1*c2, -c1*s2, s1,
+                   s2, c2, 0,
+                   -c2*s1, s1*s2, c1)
+        elif convention == "ZY'":
+            m = gm(c1*c2,  -s1, c1*s2,
+                   c2*s1, c1, s1*s2,
+                   -s2, 0, c2)
+        elif convention == "ZX'":
+            m = gm(c1, -c2*s1, s1*s2,
+                   s1, c1*c2, -c1*s2,
+                   0, s2, c2)
+        elif convention == "XZ'X''":
+            m = gm(c2, -c3*s2, s2*s3,
+                   c1*s2, c1*c2*c3 - s1*s3,  -c3*s1 - c1*c2*s3,
+                   s1*s2, c1*s3 + c2*c3*s1, c1*c3 - c2*s1*s3)
+        elif convention == "XZ'Y''":
+            m = gm(c2*c3, -s2, c2*s3,
+                   s1*s3 + c1*c3*s2, c1*c2, c1*s2*s3 - c3*s1,
+                   c3*s1*s2 - c1*s3, c2*s1, c1*c3 + s1*s2*s3)
+        elif convention == "XY'X''":
+            m = gm(c2, s2*s3, c3*s2,
+                   s1*s2, c1*c3 - c2*s1*s3, -c1*s3 - c2*c3*s1,
+                   -c1*s2, c3*s1 + c1*c2*s3, c1*c2*c3 - s1*s3)
+        elif convention == "XY'Z''":
+            m = gm(c2*c3, -c2*s3, s2,
+                   c1*s3 + c3*s1*s2, c1*c3 - s1*s2*s3, -c2*s1,
+                   s1*s3 - c1*c3*s2, c3*s1 + c1*s2*s3, c1*c2)
+        elif convention == "YX'Y''":
+            m = gm(c1*c3 - c2*s1*s3, s1*s2, c1*s3 + c2*c3*s1,
+                   s2*s3, c2, -c3*s2,
+                   -c3*s1 - c1*c2*s3, c1*s2, c1*c2*c3 - s1*s3)
+        elif convention == "YX'Z''":
+            m = gm(c1*c3 + s1*s2*s3, c3*s1*s2 - c1*s3, c2*s1,
+                   c2*s3, c2*c3, -s2,
+                   c1*s2*s3 - c3*s1, s1*s3 + c1*c3*s2, c1*c2)
+        elif convention == "YZ'Y''":
+            m = gm(c1*c2*c3 - s1*s3, - c1*s2, c3*s1 + c1*c2*s3,
+                   c3*s2, c2, s2*s3,
+                   -c1*s3 - c2*c3*s1, s1*s2, c1*c3 - c2*s1*s3)
+        elif convention == "YZ'X''":
+            m = gm(c1*c2, s1*s3 - c1*c3*s2, c3*s1 + c1*s2*s3,
+                   s2, c2*c3, -c2*s3,
+                   -c2*s1, c1*s3 + c3*s1*s2, c1*c3 - s1*s2*s3)
+        elif convention == "ZY'Z''":
+            m = gm(c1*c2*c3 - s1*s3,  -c3*s1 - c1*c2*s3, c1*s2,
+                   c1*s3 + c2*c3*s1, c1*c3 - c2*s1*s3, s1*s2,
+                   -c3*s2, s2*s3, c2)
+        elif convention == "ZY'X''":
+            m = gm(c1*c2, c1*s2*s3 - c3*s1, s1*s3 + c1*c3*s2,
+                   c2*s1, c1*c3 + s1*s2*s3, c3*s1*s2 - c1*s3,
+                   -s2, c2*s3, c2*c3)
+        elif convention == "ZX'Z''":
+            m = gm(c1*c3 - c2*s1*s3, -c1*s3 - c2*c3*s1, s1*s2,
+                   c3*s1 + c1*c2*s3, c1*c2*c3 - s1*s3, - c1*s2,
+                   s2*s3, c3*s2, c2)
+        elif convention == "ZX'Y''":
+            m = gm(c1*c3 - s1*s2*s3, -c2*s1, c1*s3 + c3*s1*s2,
+                   c3*s1 + c1*s2*s3, c1*c2, s1*s3 - c1*c3*s2,
+                   -c2*s3, s2, c2*c3)
+        else:
+            raise ValueError(
+                "Invalid rotation convention {0}.".format(convention))
+
+        keywords['flags'] = self.validate_flags(
+            keywords.get('flags', {}), orthogonal=True)
+        DenseBlockDiagonalOperator.__init__(self, m, **keywords)
+
+    @staticmethod
+    def _get_matrix(a11, a12, a13,
+                    a21, a22, a23,
+                    a31, a32, a33, dtype):
+        a11, a12, a13, a21, a22, a23, a31, a32, a33 = np.broadcast_arrays(
+            a11, a12, a13, a21, a22, a23, a31, a32, a33)
+        m = np.empty(a11.shape + (3, 3), dtype)
+        m[..., 0, 0] = a11
+        m[..., 0, 1] = a12
+        m[..., 0, 2] = a13
+        m[..., 1, 0] = a21
+        m[..., 1, 1] = a22
+        m[..., 1, 2] = a23
+        m[..., 2, 0] = a31
+        m[..., 2, 1] = a32
+        m[..., 2, 2] = a33
+        return m
+
+
+ at real
+ at linear
+class SumOperator(ReductionOperator):
+    """
+    Sum-along-axis operator.
+
+    Parameters
+    ----------
+    axis : integer, optional
+        Axis along which the reduction is performed. If None, all dimensions
+        are collapsed.
+    dtype : dtype, optional
+        Reduction data type.
+    skipna : boolean, optional
+        If this is set to True, the reduction is done as if any NA elements
+        were not counted in the array. The default, False, causes the NA values
+        to propagate, so if any element in a set of elements being reduced is
+        NA, the result will be NA.
+
+    Example
+    -------
+    >>> op = SumOperator()
+    >>> op([1,2,3])
+    array(6)
+
+    """
+    def __init__(self, axis=None, dtype=None, skipna=True, **keywords):
+        if np.__version__ < '2':
+            func = np.nansum if skipna else np.add
+        else:
+            func = np.add
+        ReductionOperator.__init__(self, func, axis=axis, dtype=dtype,
+                                   skipna=skipna, **keywords)
+
+    def transpose(self, input, output):
+        if self.axis is None:
+            shape = None
+        elif self.axis == -1:
+            shape = input.shape + (1,)
+        else:
+            shape = input.shape[:self.axis] + (1,) + input.shape[self.axis:]
+        output[...] = input.reshape(shape)
+
+
+ at linear
+ at square
+class TridiagonalOperator(Operator):
+    def __init__(self, diagonal, subdiagonal, superdiagonal=None, dtype=None,
+                 **keywords):
+        """
+        Store a tridiagonal operator in the form of 3 arrays.
+
+        TODO: there is no such gtmv in BLAS. Implement fast (r)matvec or
+              investigate making it a BandOperator subclass
+        =====
+
+        Parameters
+        ----------
+        diagonal : ndarray of size N
+            The diagonal of the matrix.
+
+        subdiagonal : ndarray of size the size N-1
+            The subdiagonal of the matrix.
+
+        superdiagonal : ndarray of size the size N-1
+            The superdiagonal of the matrix. If it is None, the superdiagonal
+            is assumed to be the conjugate of the subdiagonal.
+
+        Exemple
+        -------
+        >>> import operators
+        >>> T = operators.TridiagonalOperator([1, 2, 3], [4, 5], [6, 7])
+        >>> T.todense()
+        array([[1, 6, 0],
+               [4, 2, 7],
+               [0, 5, 3]])
+
+        """
+        diagonal, subdiagonal, superdiagonal = cast(
+            [diagonal, subdiagonal, superdiagonal], dtype=dtype)
+        dtype = diagonal.dtype
+
+        if diagonal.ndim != 1:
+            raise ValueError('The diagonal must be a 1-dimensional array.')
+        if subdiagonal.ndim != 1:
+            raise ValueError('The diagonal must be a 1-dimensional array.')
+        if superdiagonal is not None and superdiagonal.ndim != 1:
+            raise ValueError('The diagonal must be a 1-dimensional array.')
+
+        shapein = diagonal.size
+        if subdiagonal.size not in (1, shapein - 1):
+            raise ValueError(
+                'The sub diagonal should be the length of the diagonal minus o'
+                'ne or a scalar.')
+        if superdiagonal is not None and \
+           superdiagonal.size not in (1, shapein - 1):
+            raise ValueError('The super diagonal should be the length of the d'
+                             'iagonal minus one or a scalar.')
+
+        if superdiagonal is None:
+            superdiagonal = subdiagonal.conj()
+
+        self.diagonal = diagonal
+        self.subdiagonal = subdiagonal
+        self.superdiagonal = superdiagonal
+
+        flags = {'real': dtype.kind != 'c',
+                 'symmetric': np.allclose(self.subdiagonal,
+                                          self.superdiagonal),
+                 'hermitian': np.allclose(self.diagonal.imag, 0) and
+                              np.allclose(self.subdiagonal,
+                                          self.superdiagonal.conj())}
+        keywords['flags'] = flags
+        keywords['shapein'] = shapein
+
+        Operator.__init__(self, dtype=dtype, **keywords)
+        self.set_rule('T', lambda s: TridiagonalOperator(
+            s.diagonal, s.superdiagonal, s.subdiagonal))
+        self.set_rule('C', lambda s: TridiagonalOperator(
+            s.diagonal.conj(), s.subdiagonal.conj(), s.superdiagonal.conj()))
+        self.set_rule('H', lambda s: TridiagonalOperator(
+            s.diagonal.conj(), s.superdiagonal.conj(), s.subdiagonal.conj()))
+
+    def direct(self, input, output):
+        output[:] = self.diagonal * input
+        output[:-1] += self.superdiagonal * input[1:]
+        output[1:] += self.subdiagonal * input[:-1]
+
+    def transpose(self, input, output):
+        output = self.diagonal * input
+        output[:-1] += self.subdiagonal * input[1:]
+        output[1:] += self.superdiagonal * input[:-1]
+
+    @property
+    def nbytes(self):
+        return self.diagonal.nbytes + self.subdiagonal.nbytes + \
+               self.superdiagonal.nbytes
+
+    def todense(self):
+        #XXX optimize me
+        out = np.zeros(self.shape, dtype=self.dtype)
+        out += np.diag(self.diagonal)
+        out += np.diag(self.subdiagonal, -1)
+        out += np.diag(self.superdiagonal, 1)
+        return out
+
+    def toband(self):
+        """
+        Convert the TridiagonalOperator into a BandOperator
+        """
+        if not self.flags.symmetric:
+            kl, ku = 1, 1
+            n = self.shape[1]
+            ab = np.zeros((kl + ku + 1, n), self.dtype)
+            diags = (self.subdiagonal, self.diagonal, self.superdiagonal)
+            for i, d in zip((-1, 0, 1), diags):
+                ab[_band_diag(ku, i)] = d
+            return BandOperator(ab, kl, ku)
+        else:
+            u = 2  # tridiagonal
+            n = self.shape[0]
+            # convert to ab format (lower)
+            ab = np.zeros((u, n), self.dtype)
+            ab[0] = self.diagonal
+            ab[1, :-1] = self.subdiagonal
+            return SymmetricBandOperator(ab, lower=True)
+
+
+ at linear
+ at square
+class BandOperator(Operator):
+    """
+    Store a band matrix in ab format as defined in LAPACK
+    documentation.
+
+    TODO:direct and transpose methods should call BLAS2 gbmv (not yet in scipy)
+    =====
+
+    a[i, j] is stored in ab[ku + 1 + i - j, j]
+
+    for max(1, j -ku) < i < min(m, j + kl)
+
+    Band storage of A (5, 5), kl = 2, ku = 1 :
+
+     *  a01 a12 a23 a34
+    a00 a11 a22 a33 a44
+    a10 a21 a32 a43  *
+    a20 a31 a42  *   *
+
+    Arguments
+    ----------
+    shape : 2-tuple
+        Shape of the dense matrix equivalent.
+    kl : int
+        Number of subdiagonals
+    ku : int
+        Number of superdiagonals
+
+    Notes
+    -----
+    For a description of band matrices see LAPACK doc :
+
+    http://www.netlib.org/lapack/lug/node124.html
+
+    """
+    def __init__(self, ab, kl, ku, **kwargs):
+        """
+        Generate a BandOperator instance
+
+        Arguments
+        ---------
+        shape : 2-tuple
+           The shape of the operator
+        ab : ndarray with ndim == 2
+           Store the bands of the matrix using LAPACK storage scheme.
+        kl : int
+            Number of subdiagonals
+        ku : int
+            Number of superdiagonals
+        """
+        shapein = ab.shape[1]
+        self.ab = ab
+        self.kl = kl
+        self.ku = ku
+        self.kwargs = kwargs
+
+        return Operator.__init__(self, shapein=shapein, **kwargs)
+
+    def direct(self, x, out):
+        # diag
+        out[:] = self.ab[self.ku] * x
+        # upper part
+        for i in xrange(self.ku):
+            j = self.ku - i
+            out[:-j] += self.ab[i, j:] * x[j:]
+        for i in xrange(self.ku, self.kl + self.ku):
+            # lower part
+            out[i:] += self.ab[i + 1, :-i] * x[:-i]
+
+    def transpose(self, x, out):
+        rab = self._rab
+        rkl, rku = self.ku, self.kl
+        # diag
+        out = self.rab[self.ku] * x
+        # upper part
+        for i in xrange(rku):
+            j = rku - i
+            out[:-j] += rab[i, j:] * x[j:]
+        for i in xrange(rku, rkl + rku):
+            # lower part
+            out[i:] += rab[i + 1, :-i] * x[:-i]
+
+    @property
+    def nbytes(self):
+        return self.ab.nbytes
+
+    def diag(self, i=0):
+        """
+        Returns the i-th diagonal (subdiagonal if i < 0, superdiagonal
+        if i >0).
+        """
+        return self.ab[_band_diag(self.ku, i)]
+
+    @property
+    def rab(self):
+        """
+        Output the ab form of the transpose operator.
+        """
+        ab = self.ab
+        kl, ku = self.kl, self.ku
+        rku, rkl = kl, ku
+        rab = np.zeros(ab.shape, dtype=ab.dtype)
+        for i in xrange(- kl, ku + 1):
+            rab[_band_diag(rku, -i)] = self.diag(i)
+        return rab
+
+
+def _band_diag(ku, i=0):
+    """
+    Return a slice to get the i-th line of a band operator
+    """
+    # diagonal
+    if i == 0:
+        return slice(ku, ku + 1)
+    # superdiagonal
+    if i > 0:
+        return (slice(ku - i, ku - i + 1, None), slice(i, None, None))
+    # subdiagonal
+    if i < 0:
+        return (slice(ku - i, ku - i + 1, None), slice(None, i, None))
+
+
+class LowerTriangularOperator(BandOperator):
+    """
+    A BandOperator with no upper diagonals (ku=0)
+    """
+    def __init__(self, ab, **kwargs):
+        kl = ab.shape[0] - 1
+        ku = 0
+        BandOperator.__init__(self, ab, kl, ku, **kwargs)
+
+
+class UpperTriangularOperator(BandOperator):
+    """
+    A BandOperator with no lower diagonals (kl=0)
+    """
+    def __init__(self, ab, **kwargs):
+        kl = 0
+        ku = ab.shape[0] - 1
+        BandOperator.__init__(self, ab, kl, ku, **kwargs)
+
+
+ at symmetric
+class SymmetricBandOperator(Operator):
+    """
+    SymmetricBandOperator do not store diagonal datas in the same
+    format as BandOperator does. This is not a subclass of
+    BandOperator.
+
+    TODO: direct method should call BLAS2 sbmv (not yet in scipy)
+    =====
+
+    """
+    def __init__(self, ab, lower=True, **kwargs):
+        kwargs['shapein'] = ab.shape[1]
+        self.ab = ab
+        self.lower = lower
+        self.kwargs = kwargs
+        return Operator.__init__(self, **kwargs)
+
+    def direct(self, x, out):
+        out[:] = self.ab[0] * x
+        for i in xrange(1, self.ab.shape[0]):
+            # upper part
+            out[:-i] += self.ab[i, :-i] * x[i:]
+            # lower part
+            out[i:] += self.ab[i, :-i] * x[:-i]
+
+    @property
+    def nbytes(self):
+        return self.ab.nbytes
+
+    @property
+    def rab(self):
+        return self.ab
+
+    def eigen(self, eigvals_only=False, overwrite_a_band=False, select='a',
+              select_range=None, max_ev=0):
+        """
+        Solve real symmetric or complex hermitian band matrix
+        eigenvalue problem.
+
+        Uses scipy.linalg.eig_banded function.
+        """
+        from scipy.linalg import eig_banded
+
+        w, v = eig_banded(self.ab, lower=self.lower,
+                          eigvals_only=eigvals_only,
+                          overwrite_a_band=overwrite_a_band,
+                          select=select,
+                          select_range=select_range,
+                          max_ev=max_ev)
+        return EigendecompositionOperator(w=w, v=v, **self.kwargs)
+
+    def cholesky(self, overwrite_ab=False):
+        """
+        Chlesky decomposition.
+        Operator needs to be positive-definite.
+
+        Uses scipy.linalg.cholesky_banded.
+
+        Returns a matrix in ab form
+        """
+        from scipy.linalg import cholesky_banded
+
+        ab_chol = cholesky_banded(self.ab, overwrite_ab=overwrite_ab,
+                                  lower=self.lower)
+        if self.lower:
+            out = LowerTriangularOperator(self.shape, ab_chol, **self.kwargs)
+        else:
+            out = UpperTriangularOperator(self.shape, ab_chol, **self.kwargs)
+        return out
+
+
+ at real
+ at linear
+ at symmetric
+ at inplace
+class SymmetricBandToeplitzOperator(Operator):
+    """
+    The SymmetricBandToeplitz operator for symmetric band Toeplitz matrices.
+
+    The vector product is implemented using the FFTW library, so it scales
+    as O(nlogn) operations.
+
+    Example
+    -------
+    >>> N = SymmetricBandToeplitzOperator(5, [3, 2, 1, 1])
+    >>> print(N.todense().astype(int))
+    [[3 2 1 1 0]
+     [2 3 2 1 1]
+     [1 2 3 2 1]
+     [1 1 2 3 2]
+     [0 1 1 2 3]]
+
+    """
+    def __init__(self, shapein, firstrow, dtype=None, fftw_flag='FFTW_MEASURE',
+                 nthreads=None, **keywords):
+        shapein = tointtuple(shapein)
+        if dtype is None:
+            dtype = float
+        if nthreads is None:
+            nthreads = multiprocessing.cpu_count()
+        firstrow = np.asarray(firstrow, dtype)
+        if firstrow.shape[-1] == 1:
+            self.__class__ = DiagonalOperator
+            self.__init__(firstrow[..., 0], broadcast='rightward',
+                          shapein=shapein, **keywords)
+            return
+        nsamples = shapein[-1]
+        bandwidth = 2 * firstrow.shape[-1] - 1
+        ncorr = firstrow.shape[-1] - 1
+        fftsize = 2
+        while fftsize < nsamples + ncorr:
+            fftsize *= 2
+        with _pool.get(fftsize, dtype, aligned=True,
+                       contiguous=True) as rbuffer:
+            with _pool.get(fftsize // 2 + 1, complex_dtype(dtype),
+                           aligned=True, contiguous=True) as cbuffer:
+                fplan = pyfftw.FFTW(
+                    rbuffer, cbuffer, fftw_flags=[fftw_flag], threads=nthreads)
+                bplan = pyfftw.FFTW(
+                    cbuffer, rbuffer, direction='FFTW_BACKWARD',
+                    fftw_flags=[fftw_flag], threads=nthreads)
+                kernel = self._get_kernel(firstrow, fplan, rbuffer, cbuffer,
+                                          ncorr, fftsize, dtype)
+        Operator.__init__(self, shapein=shapein, dtype=dtype, **keywords)
+        self.nsamples = nsamples
+        self.fftsize = fftsize
+        self.bandwidth = bandwidth
+        self.ncorr = ncorr
+        self.fplan = fplan
+        self.bplan = bplan
+        self.kernel = kernel
+        self.fftw_flag = fftw_flag
+        self.nthreads = nthreads
+
+    def direct(self, x, out):
+        with _pool.get(self.fftsize, self.dtype, aligned=True,
+                       contiguous=True) as rbuffer:
+            with _pool.get(self.fftsize // 2 + 1, complex_dtype(self.dtype),
+                           aligned=True, contiguous=True) as cbuffer:
+                lpad = (self.bandwidth - 1) // 2
+                x = x.reshape((-1, self.nsamples))
+                out = out.reshape((-1, self.nsamples))
+                self.fplan.update_arrays(rbuffer, cbuffer)
+                self.bplan.update_arrays(cbuffer, rbuffer)
+
+                for x_, out_, kernel in izip_broadcast(x, out, self.kernel):
+                    rbuffer[:lpad] = 0
+                    rbuffer[lpad:lpad+self.nsamples] = x_
+                    rbuffer[lpad+self.nsamples:] = 0
+                    self.fplan.execute()
+                    cbuffer *= kernel
+                    self.bplan.execute()
+                    out_[...] = rbuffer[lpad:lpad+self.nsamples]
+
+    @property
+    def nbytes(self):
+        return self.kernel.nbytes
+
+    def _get_kernel(self, firstrow, fplan, rbuffer, cbuffer, ncorr, fftsize,
+                    dtype):
+        firstrow = firstrow.reshape((-1, ncorr + 1))
+        kernel = empty((firstrow.shape[0], fftsize // 2 + 1), dtype)
+        for f, k in izip(firstrow, kernel):
+            rbuffer[:ncorr+1] = f
+            rbuffer[ncorr+1:-ncorr] = 0
+            rbuffer[-ncorr:] = f[:0:-1]
+            fplan.execute()
+            k[...] = cbuffer.real / fftsize
+        return kernel
+
+
+ at real
+ at linear
+class DifferenceOperator(Operator):
+    """
+    Non-optimised difference operator.
+
+    """
+    def __init__(self, axis=-1, **keywords):
+        self.axis = axis
+        Operator.__init__(self, **keywords)
+
+    def direct(self, input, output):
+        output[...] = np.diff(input, axis=self.axis)
+
+    def transpose(self, input, output):
+        slices = [slice(None)] * input.ndim
+        slices[self.axis] = slice(1, -1)
+        shapetmp = list(input.shape)
+        shapetmp[self.axis] += 2
+        tmp = np.zeros(shapetmp)
+        tmp[slices] = input
+        output[...] = -np.diff(tmp, axis=self.axis)
+
+    def reshapein(self, shapein):
+        shape = list(shapein)
+        shape[self.axis] -= 1
+        return tuple(shape)
+
+    def reshapeout(self, shapeout):
+        shape = list(shapeout)
+        shape[self.axis] += 1
+        return tuple(shape)
+
+
+ at symmetric
+class EigendecompositionOperator(CompositionOperator):
+    """
+    Define a symmetric Operator from the eigendecomposition of another
+    symmetric Operator. This can be used as an approximation for the
+    operator.
+
+    Inputs
+    -------
+
+    A: Operator (default: None)
+      The linear operator to approximate.
+    v: 2d ndarray (default: None)
+      The eigenvectors as given by arpack.eigsh
+    w: 1d ndarray (default: None)
+      The eigenvalues as given by arpack.eigsh
+    **kwargs: keyword arguments
+      Passed to the arpack.eigsh function.
+
+    You need to specify either A or v and w.
+
+    Returns
+    -------
+
+    An EigendecompositionOperator instance, which is a subclass of
+    Operator.
+
+    Notes
+    -----
+
+    This is really a wrapper for
+    scipy.sparse.linalg.eigen.arpack.eigsh
+    """
+    def __init__(self, A=None, v=None, w=None, **kwargs):
+        if v is None or w is None:
+            w, v = eigsh(A, return_eigenvectors=True, **kwargs)
+        W = DiagonalOperator(w)
+        V = DenseOperator(v)
+        V.set_rule('T,.', '1', CompositionOperator)
+        self.eigenvalues = w
+        self.eigenvectors = v
+        CompositionOperator.__init__(self, [V, W, V.T], **kwargs)
+        self.set_rule('I', lambda s: s ** -1)
+
+    @property
+    def nbytes(self):
+        return self.eigenvalues.nbytes + self.eigenvectors.nbytes
+
+    def det(self):
+        """
+        Output an approximation of the determinant from the
+        eigenvalues.
+        """
+        return np.prod(self.eigenvalues)
+
+    def logdet(self):
+        """
+        Output the log of the determinant. Useful as the determinant
+        of large matrices can exceed floating point capabilities.
+        """
+        return np.sum(np.log(self.eigenvalues))
+
+    def __pow__(self, n):
+        """
+        Raising an eigendecomposition to an integer power requires
+        only raising the eigenvalues to this power.
+        """
+        return EigendecompositionOperator(v=self.eigenvectors,
+                                          w=self.eigenvalues ** n)
+
+    def trace(self):
+        return np.sum(self.eigenvalues)
+
+    def cond(self):
+        """
+        Output an approximation of the condition number by taking the
+        ratio of the maximum over the minimum eigenvalues, removing
+        the zeros.
+
+        For better approximation of the condition number, one should
+        consider generating the eigendecomposition with the keyword
+        which='BE', in order to have a correct estimate of the small
+        eigenvalues.
+        """
+        nze = self.eigenvalues[self.eigenvalues != 0]
+        return nze.max() / nze.min()
diff --git a/pyoperators/memory.py b/pyoperators/memory.py
new file mode 100644
index 0000000..626b6ae
--- /dev/null
+++ b/pyoperators/memory.py
@@ -0,0 +1,326 @@
+"""
+This module handles the allocation of memory.
+
+The stack is by construction a list of contiguous int8 vectors.
+In addition to temporary arrays that are used for intermediate operations,
+the stack may contain the array that will be the output of the operator.
+Care has been taken to ensure that the latter is released from the stack
+to avoid side effects.
+"""
+from __future__ import absolute_import, division, print_function
+
+import gc
+import inspect
+import numpy as np
+from contextlib import contextmanager
+from . import config
+from . import utils
+from .utils import ifirst, product, strshape, tointtuple
+
+__all__ = ['empty', 'ones', 'zeros']
+
+# force garbage collection when deleted operators' nbytes exceed this
+# threshold.
+GC_NBYTES_THRESHOLD = 1e8
+
+MEMORY_ALIGNMENT = 32
+
+# We allow reuse of pool variables only if they do not exceed 20% of
+# the requested size
+MEMORY_TOLERANCE = 1.2
+
+_gc_nbytes_counter = 0
+
+
+def empty(shape, dtype=np.float, order='c', description=None, verbose=None):
+    """
+    Return a new aligned and contiguous array of given shape and type, without
+    initializing entries.
+
+    """
+    shape = tointtuple(shape)
+    dtype = np.dtype(dtype)
+    if verbose is None:
+        verbose = config.PYOPERATORS_VERBOSE
+
+    requested = product(shape) * dtype.itemsize
+    if requested == 0:
+        return np.empty(shape, dtype, order)
+
+    if verbose:
+        if description is None:
+            frames = inspect.getouterframes(inspect.currentframe())
+            i = 1
+            while True:
+                if frames[i][1].replace('.pyc', '.py') != \
+                   __file__.replace('.pyc', '.py'):
+                    break
+                i += 1
+            description = frames[i][3].replace('<module>', 'session')
+            if 'self' in frames[i][0].f_locals:
+                cls = type(frames[i][0].f_locals['self']).__name__
+                description = cls + '.' + description
+            description = 'in ' + description
+        print(utils.strinfo('Allocating ' + strshape(shape) +
+              ' ' + (str(dtype) if dtype.kind != 'V' else 'elements') +
+              ' = ' + utils.strnbytes(requested) + ' ' + description))
+    try:
+        buf = np.empty(requested + MEMORY_ALIGNMENT, np.int8)
+    except MemoryError:
+        gc.collect()
+        buf = np.empty(requested + MEMORY_ALIGNMENT, np.int8)
+
+    address = buf.__array_interface__['data'][0]
+    offset = MEMORY_ALIGNMENT - address % MEMORY_ALIGNMENT
+
+    return np.frombuffer(buf.data, np.int8, count=requested, offset=offset) \
+             .view(dtype).reshape(shape, order=order)
+
+
+def ones(shape, dtype=np.float, order='c', description=None, verbose=None):
+    """
+    Return a new aligned and contiguous array of given shape and type, filled
+    with ones.
+
+    """
+    a = empty(shape, dtype, order, description, verbose)
+    a[...] = 1
+    return a
+
+
+def zeros(shape, dtype=np.float, order='c', description=None, verbose=None):
+    """
+    Return a new aligned and contiguous array of given shape and type, filled
+    with zeros.
+
+    """
+    a = empty(shape, dtype, order, description, verbose)
+    a[...] = 0
+    return a
+
+
+def iscompatible(array, shape, dtype, aligned=False, contiguous=False,
+                 tolerance=np.inf):
+    """
+    Return True if a buffer with specified requirements can be extracted
+    from an numpy array.
+
+    """
+    shape = tointtuple(shape)
+    dtype = np.dtype(dtype)
+    if aligned and \
+       array.__array_interface__['data'][0] % MEMORY_ALIGNMENT != 0:
+        return False
+    if not array.flags.contiguous:
+        if contiguous:
+            return False
+        return array.shape == shape and array.itemsize == dtype.itemsize
+    nbytes = product(shape) * dtype.itemsize
+    if nbytes == 0:
+        return False
+    return array.nbytes >= nbytes and array.nbytes <= tolerance * nbytes
+
+
+class MemoryPool(object):
+    """
+    Class implementing a pool of buffers.
+    """
+
+    def __init__(self):
+        self._buffers = []
+
+    def add(self, v):
+        """ Add a numpy array to the pool. """
+        if not isinstance(v, np.ndarray):
+            raise TypeError('The input is not an ndarray.')
+        if v.flags.contiguous:
+            v = v.ravel().view(np.int8)
+        a = v.__array_interface__['data'][0]
+        if any(_.__array_interface__['data'][0] == a for _ in self._buffers):
+            raise ValueError('There already is an entry in the pool pointing t'
+                             'o this memory location.')
+        try:
+            i = ifirst(self._buffers, lambda x: x.nbytes >= v.nbytes)
+        except ValueError:
+            i = len(self._buffers)
+        self._buffers.insert(i, v)
+
+    def clear(self):
+        """
+        Clear the pool.
+
+        """
+        self._buffers = []
+        gc.collect()
+
+    @contextmanager
+    def copy_if(self, v, aligned=False, contiguous=False):
+        """
+        Return a context manager which may copy the input array into
+        a buffer from the pool to ensure alignment and contiguity requirements.
+
+        """
+        if not isinstance(v, np.ndarray):
+            raise TypeError('The input is not an ndarray.')
+        alignment = MEMORY_ALIGNMENT if aligned else 1
+        if v.__array_interface__['data'][0] % alignment != 0 or \
+           contiguous and not v.flags.contiguous:
+            with self.get(v.shape, v.dtype) as buf:
+                buf[...] = v
+                yield buf
+                v[...] = buf
+        else:
+            yield v
+
+    def extract(self, shape, dtype, aligned=False, contiguous=False,
+                description=None, verbose=None):
+        """
+        Extract a buffer from the pool given the following requirements:
+        shape, dtype, alignment, contiguity.
+
+        """
+        shape = tointtuple(shape)
+        dtype = np.dtype(dtype)
+        compatible = lambda x: iscompatible(x, shape, dtype, aligned,
+                                            contiguous, MEMORY_TOLERANCE)
+        try:
+            i = ifirst(self._buffers, compatible)
+            v = self._buffers.pop(i)
+        except ValueError:
+            v = empty(shape, dtype, description=description, verbose=verbose)
+        return v
+
+    @contextmanager
+    def get(self, shape, dtype, aligned=False, contiguous=False,
+            description=None, verbose=None):
+        """
+        Return a context manager which retrieves a buffer from the pool
+        on enter, and set it back in the pool on exit.
+
+        """
+        v_ = self.extract(shape, dtype, aligned, contiguous, description,
+                          verbose)
+        v = self.view(v_, shape, dtype)
+
+        yield v
+        self.add(v_)
+
+    @contextmanager
+    def get_if(self, condition, shape, dtype, description=None, verbose=None):
+        """
+        Return a context manager which conditionally retrieves a buffer
+        from the pool on enter, and set it back in the pool on exit.
+
+        """
+        if not condition:
+            yield None
+        else:
+            with self.get(shape, dtype, description=description,
+                          verbose=verbose) as v:
+                yield v
+
+    def remove(self, v):
+        """
+        Remove an entry from the pool.
+
+        """
+        address = v.__array_interface__['data'][0]
+        i = ifirst((_.__array_interface__['data'][0] for _ in self._buffers),
+                   lambda x: x == address)
+        self._buffers.pop(i)
+
+    @contextmanager
+    def set(self, v):
+        """
+        Return a context manager that adds a buffer on enter, and remove it
+        on exit.
+
+        """
+        self.add(v)
+        yield
+        self.remove(v)
+
+    @contextmanager
+    def set_if(self, condition, v):
+        """
+        Return a context manager that conditionally adds a buffer on enter,
+        and remove it on exit.
+
+        """
+        if not condition:
+            yield
+        else:
+            with self.set(v):
+                yield
+
+    @staticmethod
+    def view(buf, shape, dtype):
+        """
+        Return a view of given shape and dtype from a buffer.
+        """
+        shape = tointtuple(shape)
+        dtype = np.dtype(dtype)
+        if buf.flags.contiguous:
+            buf = buf.ravel().view(np.int8)
+        elif buf.shape == shape and buf.itemsize == dtype.itemsize:
+            return buf
+        else:
+            raise ValueError('Shape mismatch.')
+        required = dtype.itemsize * product(shape)
+        return buf[:required].view(dtype).reshape(shape)
+
+    def __contains__(self, v):
+        if not isinstance(v, np.ndarray):
+            raise TypeError('The input is not an ndarray.')
+        address = v.__array_interface__['data'][0]
+        try:
+            ifirst((_.__array_interface__['data'][0] for _ in self._buffers),
+                   lambda x: x == address)
+        except ValueError:
+            return False
+        return True
+
+    def __getitem__(self, index):
+        """ Return pool entry by index. """
+        return self._buffers[index]
+
+    def __len__(self):
+        """ Return the number of entries in the pool. """
+        return len(self._buffers)
+
+    def __str__(self, names={}):
+        """
+        Print the stack.
+        A dict of ndarray addresses can be used to name the stack elements.
+
+        Example
+        -------
+        print(pool.__str__({'output':myoutput}))
+
+        """
+        if len(self) == 0:
+            return 'The memory stack is empty.'
+        d = dict((v.__array_interface__['data'][0] if isinstance(v, np.ndarray)
+                 else v, k) for k, v in names.items())
+        result = []
+        for i, s in enumerate(self._buffers):
+            res = '{0:<2}: '.format(i)
+            address = s.__array_interface__['data'][0]
+            if address in d:
+                strid = d[address] + ' '
+            else:
+                strid = ''
+            strid += hex(address)
+            res += '{1}\t({2} bytes)'.format(i, strid, s.nbytes)
+            result.append(res)
+        return '\n'.join(result)
+
+
+def garbage_collect(nbytes=None):
+    global _gc_nbytes_counter
+    if nbytes is None:
+        nbytes = GC_NBYTES_THRESHOLD
+    _gc_nbytes_counter += nbytes
+    if _gc_nbytes_counter >= GC_NBYTES_THRESHOLD:
+        gc.collect()
+        _gc_nbytes_counter = 0
diff --git a/pyoperators/nonlinear.py b/pyoperators/nonlinear.py
new file mode 100644
index 0000000..77885ea
--- /dev/null
+++ b/pyoperators/nonlinear.py
@@ -0,0 +1,923 @@
+#coding: utf-8
+from __future__ import absolute_import, division, print_function
+import numexpr
+if numexpr.__version__ < 2.0:
+    raise ImportError('Please update numexpr to a newer version > 2.0.')
+
+import numpy as np
+import pyoperators as po
+from .core import (
+    BlockColumnOperator, CompositionOperator, ConstantOperator, DiagonalBase,
+    IdentityOperator, MultiplicationOperator, Operator, ReductionOperator)
+from .flags import (
+    idempotent, inplace, real, separable, square, update_output)
+from .utils import (
+    operation_assignment, operation_symbol, pi, strenum, tointtuple)
+from .utils.ufuncs import hard_thresholding, soft_thresholding
+
+__all__ = ['Cartesian2SphericalOperator',
+           'ClipOperator',
+           'HardThresholdingOperator',
+           'MaxOperator',
+           'MinOperator',
+           'MinMaxOperator',
+           'MaximumOperator',
+           'MinimumOperator',
+           'NormalizeOperator',
+           'NumexprOperator',
+           'PowerOperator',
+           'ProductOperator',
+           'ReciprocalOperator',
+           'RoundOperator',
+           'SoftThresholdingOperator',
+           'Spherical2CartesianOperator',
+           'SqrtOperator',
+           'SquareOperator',
+           'To1dOperator',
+           'ToNdOperator']
+
+
+ at real
+class _CartesianSpherical(Operator):
+    CONVENTIONS = ('zenith,azimuth',
+                   'azimuth,zenith',
+                   'elevation,azimuth',
+                   'azimuth,elevation')
+
+    def __init__(self, convention, dtype=float, **keywords):
+        if not isinstance(convention, str):
+            raise TypeError("The input convention '{0}' is not a string.".
+                            format(convention))
+        convention_ = convention.replace(' ', '').lower()
+        if convention_ not in self.CONVENTIONS:
+            raise ValueError(
+                "Invalid spherical convention '{0}'. Expected values are {1}.".
+                format(convention, strenum(self.CONVENTIONS)))
+        self.convention = convention_
+        Operator.__init__(self, dtype=dtype, **keywords)
+
+    @staticmethod
+    def _reshapecartesian(shape):
+        return shape[:-1] + (2,)
+
+    @staticmethod
+    def _reshapespherical(shape):
+        return shape[:-1] + (3,)
+
+    @staticmethod
+    def _validatecartesian(shape):
+        if len(shape) == 0 or shape[-1] != 3:
+            raise ValueError('Invalid cartesian shape.')
+
+    @staticmethod
+    def _validatespherical(shape):
+        if len(shape) == 0 or shape[-1] != 2:
+            raise ValueError('Invalid spherical shape.')
+
+    @staticmethod
+    def _rule_identity(s, o):
+        if s.convention == o.convention:
+            return IdentityOperator()
+
+
+class Cartesian2SphericalOperator(_CartesianSpherical):
+    """
+    Convert cartesian unit vectors into spherical coordinates in radians
+    or degrees.
+
+    The spherical coordinate system is defined by:
+       - the zenith direction of coordinate (0, 0, 1)
+       - the azimuthal reference of coordinate (1, 0, 0)
+       - the azimuth signedness: it is counted positively from the X axis
+    to the Y axis.
+
+    The last dimension of the operator's output is 2 and it encodes
+    the two spherical angles. Four conventions define what these angles are:
+       - 'zenith,azimuth': (theta, phi) angles commonly used
+       in physics or the (colatitude, longitude) angles used
+       in the celestial and geographical coordinate systems
+       - 'azimuth,zenith': (longitude, colatitude) convention
+       - 'elevation,azimuth: (latitude, longitude) convention
+       - 'azimuth,elevation': (longitude, latitude) convention
+
+    """
+    def __init__(self, convention, degrees=False, **keywords):
+        """
+        convention : string
+            One of the following spherical coordinate conventions:
+            'zenith,azimuth', 'azimuth,zenith', 'elevation,azimuth' and
+            'azimuth,elevation'.
+        degrees : boolean, optional
+            If true, the spherical coordinates are returned in degrees.
+
+        """
+        if degrees:
+            self.__class__ = CompositionOperator
+            self.__init__(
+                [po.linear.DegreesOperator(),
+                 Cartesian2SphericalOperator(convention, **keywords)])
+            return
+        self.degrees = False
+
+        _CartesianSpherical.__init__(
+            self, convention,
+            reshapein=self._reshapecartesian,
+            reshapeout=self._reshapespherical,
+            validatein=self._validatecartesian,
+            validateout=self._validatespherical,
+            **keywords)
+        self.set_rule('I',
+                      lambda s: Spherical2CartesianOperator(s.convention))
+        self.set_rule(('.', Spherical2CartesianOperator), self._rule_identity,
+                      CompositionOperator)
+
+    def direct(self, input, output):
+        if self.convention.startswith('azimuth'):
+            o1, o2 = output[..., 1], output[..., 0]
+        else:
+            o1, o2 = output[..., 0], output[..., 1]
+        np.arccos(input[..., 2], o1)
+        if 'elevation' in self.convention:
+            np.subtract(pi(self.dtype) / 2, o1, o1)
+        np.arctan2(input[..., 1], input[..., 0], o2)
+        if o2.ndim > 0:
+            o2[o2 < 0] += 2 * pi(self.dtype)
+        elif o2 < 0:
+            o2 += 2 * pi(self.dtype)
+
+
+class Spherical2CartesianOperator(_CartesianSpherical):
+    """
+    Convert spherical coordinates in radians or degrees into unit cartesian
+    vectors.
+
+    The spherical coordinate system is defined by:
+       - the zenith direction of coordinate (0, 0, 1)
+       - the azimuthal reference of coordinate (1, 0, 0)
+       - the azimuth signedness: it is counted positively from the X axis
+    to the Y axis.
+
+    The last dimension of the operator's input is 2 and it encodes
+    the two spherical angles. Four conventions define what these angles are:
+       - 'zenith,azimuth': (theta, phi) angles commonly used
+       in physics or the (colatitude, longitude) angles used
+       in the celestial and geographical coordinate systems
+       - 'azimuth,zenith': (longitude, colatitude) convention
+       - 'elevation,azimuth: (latitude, longitude) convention
+       - 'azimuth,elevation': (longitude, latitude) convention
+
+    """
+    def __init__(self, convention, degrees=False, **keywords):
+        """
+        convention : string
+            One of the following spherical coordinate conventions:
+            'zenith,azimuth', 'azimuth,zenith', 'elevation,azimuth' and
+            'azimuth,elevation'.
+        degrees : boolean, optional
+            If true, the input spherical coordinates are assumed to be in
+            degrees.
+
+        """
+        if degrees:
+            self.__class__ = CompositionOperator
+            self.__init__(
+                [Spherical2CartesianOperator(convention, **keywords),
+                 po.linear.RadiansOperator()])
+            return
+        self.degrees = False
+
+        _CartesianSpherical.__init__(
+            self, convention,
+            reshapein=self._reshapespherical,
+            reshapeout=self._reshapecartesian,
+            validatein=self._validatespherical,
+            validateout=self._validatecartesian,
+            **keywords)
+        self.set_rule('I',
+                      lambda s: Cartesian2SphericalOperator(s.convention))
+        self.set_rule(('.', Cartesian2SphericalOperator), self._rule_identity,
+                      CompositionOperator)
+
+    def direct(self, input, output):
+        if self.convention.startswith('azimuth'):
+            theta, phi = input[..., 1], input[..., 0]
+        else:
+            theta, phi = input[..., 0], input[..., 1]
+        if 'elevation' in self.convention:
+            theta = 0.5 * pi(self.dtype) - theta
+        sintheta = np.sin(theta)
+        np.multiply(sintheta, np.cos(phi), output[..., 0])
+        np.multiply(sintheta, np.sin(phi), output[..., 1])
+        np.cos(theta, output[..., 2])
+
+
+ at square
+ at inplace
+ at separable
+class ClipOperator(Operator):
+    """
+    Clip (limit) the values in an array.
+
+    Given an interval, values outside the interval are clipped to
+    the interval edges.  For example, if an interval of ``[0, 1]``
+    is specified, values smaller than 0 become 0, and values larger
+    than 1 become 1.
+
+    Arguments
+    ---------
+    minvalue: scalar or array_like
+        The minimum limit below which all input values are set to vmin.
+    maxvalue: scalar or array_like
+        The maximum limit above which all input values are set to vmax.
+
+    Exemples
+    --------
+    >>> C = ClipOperator(0, 1)
+    >>> x = np.linspace(-2, 2, 5)
+    >>> x
+    array([-2., -1.,  0.,  1.,  2.])
+    >>> C(x)
+    array([ 0.,  0.,  0.,  1.,  1.])
+
+    See also
+    --------
+    MaximumOperator, MinimumOperator, np.clip
+
+    """
+    def __init__(self, minvalue, maxvalue, **keywords):
+        self.minvalue = np.asarray(minvalue)
+        self.maxvalue = np.asarray(maxvalue)
+        Operator.__init__(self, **keywords)
+
+    def direct(self, input, output):
+        np.clip(input, self.minvalue, self.maxvalue, out=output)
+
+    @property
+    def nbytes(self):
+        return self.minvalue.nbytes + self.maxvalue.nbytes
+
+    def __str__(self):
+        return 'clip(..., {0}, {1})'.format(self.minvalue, self.maxvalue)
+
+
+ at square
+ at inplace
+ at separable
+class PowerOperator(Operator):
+    'X -> X**n'
+    def __init__(self, n, dtype=float, **keywords):
+        if np.allclose(n, -1) and not isinstance(self, ReciprocalOperator):
+            self.__class__ = ReciprocalOperator
+            self.__init__(dtype=dtype, **keywords)
+            return
+        if n == 0:
+            self.__class__ = ConstantOperator
+            self.__init__(1, dtype=dtype, **keywords)
+            return
+        if np.allclose(n, 0.5) and not isinstance(self, SqrtOperator):
+            self.__class__ = SqrtOperator
+            self.__init__(dtype=dtype, **keywords)
+            return
+        if np.allclose(n, 1):
+            self.__class__ = IdentityOperator
+            self.__init__(**keywords)
+            return
+        if np.allclose(n, 2) and not isinstance(self, SquareOperator):
+            self.__class__ = SquareOperator
+            self.__init__(dtype=dtype, **keywords)
+            return
+        self.n = n
+        Operator.__init__(self, dtype=dtype, **keywords)
+        self.set_rule('I', lambda s: PowerOperator(1/s.n))
+        self.set_rule(('.', PowerOperator),
+                      lambda s, o: PowerOperator(s.n * o.n),
+                      CompositionOperator)
+        self.set_rule(('.', PowerOperator),
+                      lambda s, o: PowerOperator(s.n + o.n),
+                      MultiplicationOperator)
+        self.set_rule(('.', DiagonalBase),
+                      lambda s, o: MultiplicationOperator(
+                          [ConstantOperator(o.get_data(),
+                                            broadcast=o.broadcast),
+                           PowerOperator(s.n + 1)]),
+                      MultiplicationOperator)
+
+    def direct(self, input, output):
+        np.power(input, self.n, output)
+
+    @property
+    def nbytes(self):
+        return self.n.nbytes
+
+    def __str__(self):
+        return '...**{0}'.format(self.n)
+
+
+class ReciprocalOperator(PowerOperator):
+    'X -> 1 / X'
+    def __init__(self, **keywords):
+        PowerOperator.__init__(self, -1, **keywords)
+
+    def direct(self, input, output):
+        np.reciprocal(input, output)
+
+    def __str__(self):
+        return '1/...'
+
+
+class SqrtOperator(PowerOperator):
+    'X -> sqrt(X)'
+    def __init__(self, **keywords):
+        PowerOperator.__init__(self, 0.5, **keywords)
+
+    def direct(self, input, output):
+        np.sqrt(input, output)
+
+
+class SquareOperator(PowerOperator):
+    'X -> X**2'
+    def __init__(self, **keywords):
+        PowerOperator.__init__(self, 2, **keywords)
+
+    def direct(self, input, output):
+        np.square(input, output)
+
+    def __str__(self):
+        return u'...²'.encode('utf-8')
+
+
+class ProductOperator(ReductionOperator):
+    """
+    Product-along-axis operator.
+
+    Parameters
+    ----------
+    axis : integer, optional
+        Axis along which the reduction is performed. If None, all dimensions
+        are collapsed.
+    dtype : dtype, optional
+        Reduction data type.
+    skipna : boolean, optional
+        If this is set to True, the reduction is done as if any NA elements
+        were not counted in the array. The default, False, causes the NA values
+        to propagate, so if any element in a set of elements being reduced is
+        NA, the result will be NA.
+
+    Example
+    -------
+    >>> op = ProductOperator()
+    >>> op([1,2,3])
+    array(6)
+
+    """
+    def __init__(self, axis=None, dtype=None, skipna=True, **keywords):
+        ReductionOperator.__init__(self, np.multiply, axis=axis, dtype=dtype,
+                                   skipna=skipna, **keywords)
+
+    def __str__(self):
+        return 'product' if self.axis is None \
+                         else 'product(..., axis={0})'.format(self.axis)
+
+
+class MaxOperator(ReductionOperator):
+    """
+    Max-along-axis operator.
+
+    Parameters
+    ----------
+    axis : integer, optional
+        Axis along which the reduction is performed. If None, all dimensions
+        are collapsed.
+    dtype : dtype, optional
+        Reduction data type.
+    skipna : boolean, optional
+        If this is set to True, the reduction is done as if any NA elements
+        were not counted in the array. The default, False, causes the NA values
+        to propagate, so if any element in a set of elements being reduced is
+        NA, the result will be NA.
+
+    Example
+    -------
+    >>> op = MaxOperator()
+    >>> op([1,2,3])
+    array(3)
+
+    """
+    def __init__(self, axis=None, dtype=None, skipna=False, **keywords):
+        if np.__version__ < '2':
+            func = np.nanmax if skipna else np.max
+        else:
+            func = np.max
+        ReductionOperator.__init__(self, func, axis=axis, dtype=dtype,
+                                   skipna=skipna, **keywords)
+
+    def __str__(self):
+        return 'max' if self.axis is None \
+                     else 'max(..., axis={0})'.format(self.axis)
+
+
+class MinOperator(ReductionOperator):
+    """
+    Min-along-axis operator.
+
+    Parameters
+    ----------
+    axis : integer, optional
+        Axis along which the reduction is performed. If None, all dimensions
+        are collapsed.
+    dtype : dtype, optional
+        Reduction data type.
+    skipna : boolean, optional
+        If this is set to True, the reduction is done as if any NA elements
+        were not counted in the array. The default, False, causes the NA values
+        to propagate, so if any element in a set of elements being reduced is
+        NA, the result will be NA.
+
+    Example
+    -------
+    >>> op = MinOperator()
+    >>> op([1,2,3])
+    array(1)
+
+    """
+    def __init__(self, axis=None, dtype=None, skipna=False, **keywords):
+        if np.__version__ < '2':
+            func = np.nanmin if skipna else np.min
+        else:
+            func = np.min
+        ReductionOperator.__init__(self, func, axis=axis, dtype=dtype,
+                                   skipna=skipna, **keywords)
+
+    def __str__(self):
+        return 'min' if self.axis is None \
+                     else 'min(..., axis={0})'.format(self.axis)
+
+
+class MinMaxOperator(BlockColumnOperator):
+    """
+    MinMax-along-axis operator.
+
+    Parameters
+    ----------
+    axis : integer, optional
+        Axis along which the reduction is performed. If None, all dimensions
+        are collapsed.
+    new_axisout : integer, optional
+        Axis in which the minimum and maximum values are set.
+    dtype : dtype, optional
+        Operator data type.
+    skipna : boolean, optional
+        If this is set to True, the reduction is done as if any NA elements
+        were not counted in the array. The default, False, causes the NA values
+        to propagate, so if any element in a set of elements being reduced is
+        NA, the result will be NA.
+
+    Example
+    -------
+    >>> op = MinMaxOperator()
+    >>> op([1,2,3])
+    array([1, 3])
+    >>> op = MinMaxOperator(axis=0, new_axisout=0)
+    >>> op([[1,2,3],[2,1,4],[0,1,8]])
+    array([[0, 1, 3],
+           [2, 2, 8]])
+
+    """
+    def __init__(self, axis=None, dtype=None, skipna=False, new_axisout=-1,
+                 **keywords):
+        operands = [MinOperator(axis=axis, dtype=dtype, skipna=skipna),
+                    MaxOperator(axis=axis, dtype=dtype, skipna=skipna)]
+        BlockColumnOperator.__init__(self, operands, new_axisout=new_axisout,
+                                     **keywords)
+
+    def __str__(self):
+        return 'minmax' if self.axis is None \
+                        else 'minmax(..., axis={0})'.format(self.axis)
+
+
+ at square
+ at inplace
+ at separable
+class MaximumOperator(Operator):
+    """
+    Set all input array values above a given value to this value.
+
+    Arguments
+    ---------
+    value: scalar or array_like
+        Threshold value to which the input array is compared.
+
+    Exemple
+    -------
+    >>> M = MaximumOperator(1)
+    >>> x = np.linspace(-2, 2, 5)
+    >>> x
+    array([-2., -1.,  0.,  1.,  2.])
+    >>> M(x)
+    array([ 1.,  1.,  1.,  1.,  2.])
+
+    See also
+    --------
+    ClipOperator, MinimumOperator, np.maximum
+
+    """
+    def __init__(self, value, **keywords):
+        self.value = np.asarray(value)
+        Operator.__init__(self, **keywords)
+
+    def direct(self, input, output):
+        np.maximum(input, self.value, output)
+
+    @property
+    def nbytes(self):
+        return self.value.nbytes
+
+    def __str__(self):
+        return 'maximum(..., {0})'.format(self.value)
+
+
+ at square
+ at inplace
+ at separable
+class MinimumOperator(Operator):
+    """
+    Set all input array values above a given value to this value.
+
+    Arguments
+    ---------
+    value: scalar, broadcastable array
+        The value to which the input array is compared.
+
+    Exemple
+    -------
+    >>> M = MinimumOperator(1)
+    >>> x = np.linspace(-2, 2, 5)
+    >>> x
+    array([-2., -1.,  0.,  1.,  2.])
+    >>> M(x)
+    array([-2., -1.,  0.,  1.,  1.])
+
+    See also
+    --------
+    ClipOperator, MaximumOperator, np.minimum
+
+    """
+    def __init__(self, value, **keywords):
+        self.value = np.asarray(value)
+        Operator.__init__(self, **keywords)
+
+    def direct(self, input, output):
+        np.minimum(input, self.value, output)
+
+    @property
+    def nbytes(self):
+        return self.value.nbytes
+
+    def __str__(self):
+        return 'minimum(..., {0})'.format(self.value)
+
+
+ at square
+ at inplace
+class NormalizeOperator(Operator):
+    """
+    Normalize a cartesian vector.
+
+    Example
+    -------
+    >>> n = NormalizeOperator()
+    >>> n([1, 1])
+    array([ 0.70710678,  0.70710678])
+
+    """
+    def __init__(self, dtype=float, **keywords):
+        Operator.__init__(self, dtype=dtype, **keywords)
+
+    def direct(self, input, output):
+        np.divide(input, np.sqrt(np.sum(input**2, axis=-1))[..., None], output)
+
+
+ at square
+ at inplace
+ at update_output
+class NumexprOperator(Operator):
+    """
+    Return an operator evaluating an expression using numexpr.
+
+    Parameters
+    ----------
+    expr : string
+        The numexp expression to be evaluated. It must contain the 'input'
+        variable name.
+    global_dict : dict
+        A dictionary of global variables that are passed to numexpr's
+        'evaluate' method.
+
+    Example
+    -------
+    >>> k = 1.2
+    >>> op = NumexprOperator('exp(input+k)', {'k':k})
+    >>> print op(1) == np.exp(2.2)
+    True
+
+    """
+    def __init__(self, expr, global_dict=None, dtype=float, **keywords):
+        self.expr = expr
+        self.global_dict = global_dict
+        if numexpr.__version__ < '2.1':
+            keywords['flags'] = self.validate_flags(
+                keywords.get('flags', {}), update_output=False)
+        Operator.__init__(self, dtype=dtype, **keywords)
+
+    def direct(self, input, output, operation=operation_assignment):
+        if operation is operation_assignment:
+            expr = self.expr
+        else:
+            op = operation_symbol[operation]
+            expr = 'output' + op + '(' + self.expr + ')'
+        numexpr.evaluate(expr, global_dict=self.global_dict, out=output)
+
+    @property
+    def nbytes(self):
+        if self.global_dict is None:
+            return 0
+        return np.sum(v.nbytes for v in self.global_dict.values()
+                               if hasattr(v, 'nbytes'))
+
+    def __str__(self):
+        return 'numexpr({0}, ...)'.format(self.expr)
+
+
+ at square
+ at idempotent
+ at inplace
+ at separable
+class RoundOperator(Operator):
+    """
+    Rounding operator.
+
+    The rounding method may be one of the following:
+        - rtz : round towards zero (truncation)
+        - rti : round towards infinity (Not implemented)
+        - rtmi : round towards minus infinity (floor)
+        - rtpi : round towards positive infinity (ceil)
+        - rhtz : round half towards zero (Not implemented)
+        - rhti : round half towards infinity (Fortran's nint)
+        - rhtmi : round half towards minus infinity
+        - rhtpi : round half towards positive infinity
+        - rhte : round half to even (numpy's round),
+        - rhto : round half to odd
+        - rhs : round half stochastically (Not implemented)
+
+    """
+    def __init__(self, method='rhte', **keywords):
+        method = method.lower()
+        table = {'rtz': np.trunc,
+                 #'rti'
+                 'rtmi': np.floor,
+                 'rtpi': np.ceil,
+                 #'rhtz'
+                 #'rhti'
+                 'rhtmi': self._direct_rhtmi,
+                 'rhtpi': self._direct_rhtpi,
+                 'rhte': lambda i, o: np.round(i, 0, o),
+                 #'rhs'
+                 }
+        if method not in table:
+            raise ValueError(
+                'Invalid rounding method. Expected values are {0}.'.format(
+                strenum(table.keys())))
+        Operator.__init__(self, table[method], **keywords)
+        self.method = method
+
+    @staticmethod
+    def _direct_rhtmi(input, output):
+        """ Round half to -inf. """
+        np.add(input, 0.5, output)
+        np.ceil(output, output)
+        np.add(output, -1, output)
+
+    @staticmethod
+    def _direct_rhtpi(input, output):
+        """ Round half to +inf. """
+        np.add(input, -0.5, output)
+        np.floor(output, output)
+        np.add(output, 1, output)
+
+    def __str__(self):
+        method = self.method[1:]
+        if method == 'rmi':
+            method = 'floor'
+        elif method == 'tpi':
+            method = 'ceil'
+        elif method == 'tz':
+            method = 'trunc'
+        return 'round_{0}'.format(method)
+
+
+ at square
+ at idempotent
+ at inplace
+ at separable
+class HardThresholdingOperator(Operator):
+    """
+    Hard thresholding operator.
+
+    Ha(x) = x if |x| > a,
+            0 otherwise.
+
+    Parameter
+    ---------
+    a : positive float or array
+        The hard threshold.
+
+    """
+    def __init__(self, a, **keywords):
+        a = np.asarray(a)
+        if np.any(a < 0):
+            raise ValueError('Negative hard threshold.')
+        if a.ndim > 0:
+            keywords['shapein'] = a.shape
+        if 'dtype' not in keywords:
+            keywords['dtype'] = float
+        if np.all(a == 0):
+            self.__class__ = IdentityOperator
+            self.__init__(**keywords)
+            return
+        Operator.__init__(self, **keywords)
+        self.a = a
+        self.set_rule(('.', HardThresholdingOperator), lambda s, o:
+                      HardThresholdingOperator(np.maximum(s.a, o.a)),
+                      CompositionOperator)
+
+    def direct(self, input, output):
+        hard_thresholding(input, self.a, output)
+
+    @property
+    def nbytes(self):
+        return self.a.nbytes
+
+    def __str__(self):
+        return 'hardthreshold(..., {0})'.format(self.a)
+
+
+ at square
+ at inplace
+ at separable
+class SoftThresholdingOperator(Operator):
+    """
+    Soft thresholding operator.
+
+    Sa(x) = sign(x) [|x| - a]+
+
+    Parameter
+    ---------
+    a : positive float or array
+        The soft threshold.
+
+    """
+    def __init__(self, a, **keywords):
+        a = np.asarray(a)
+        if np.any(a < 0):
+            raise ValueError('Negative soft threshold.')
+        if a.ndim > 0:
+            keywords['shapein'] = a.shape
+        if 'dtype' not in keywords:
+            keywords['dtype'] = float
+        if np.all(a == 0):
+            self.__class__ = IdentityOperator
+            self.__init__(**keywords)
+            return
+        Operator.__init__(self, **keywords)
+        self.a = a
+
+    def direct(self, input, output):
+        soft_thresholding(input, self.a, output)
+
+    @property
+    def nbytes(self):
+        return self.a.nbytes
+
+    def __str__(self):
+        return 'softthreshold(..., {0})'.format(self.a)
+
+
+ at separable
+class _1dNdOperator(Operator):
+    """ Base class for 1d-Nd coordinate mappings. """
+    def __init__(self, shape_, order='C', **keywords):
+        shape_ = tointtuple(shape_)
+        ndim = len(shape_)
+        if ndim == 1:
+            raise NotImplementedError('ndim == 1 is not implemented.')
+        if order.upper() not in ('C', 'F'):
+            raise ValueError("Invalid order '{0}'. Expected order is 'C' or 'F"
+                             "'".format(order))
+        order = order.upper()
+
+        Operator.__init__(self, **keywords)
+        self.shape_ = shape_
+        self.order = order
+        self.ndim = ndim
+        if order == 'C':
+            self.coefs = np.cumproduct((1,) + shape_[:0:-1])[::-1]
+        elif order == 'F':
+            self.coefs = np.cumproduct((1,) + shape_[:-1])
+
+    def _reshape_to1d(self, shape):
+        return shape[:-1]
+
+    def _reshape_tond(self, shape):
+        return shape + (self.ndim,)
+
+    def _validate_to1d(self, shape):
+        if shape[-1] != self.ndim:
+            raise ValueError("Invalid shape '{0}'. The expected last dimension"
+                             " is '{1}'.".format(shape, self.ndim))
+
+
+class To1dOperator(_1dNdOperator):
+    """
+    Convert an N-dimensional indexing to a 1-dimensional indexing.
+
+    C order:
+    -------------------------      -------------
+    | (0,0) | (0,1) | (0,2) |      | 0 | 1 | 2 |
+    -------------------------  =>  -------------
+    | (1,0) | (1,1) | (1,2) |      | 3 | 4 | 5 |
+    -------------------------      -------------
+
+    Fortan order:
+    -------------------------      -------------
+    | (0,0) | (0,1) | (0,2) |      | 0 | 2 | 4 |
+    -------------------------  =>  -------------
+    | (1,0) | (1,1) | (1,2) |      | 1 | 3 | 5 |
+    -------------------------      -------------
+
+    Parameters
+    ----------
+    shape : tuple of int
+        The shape of the array whose element' multi-dimensional coordinates
+        will be converted into 1-d coordinates.
+    order : str
+        'C' for row-major and 'F' for column-major 1-d indexing.
+
+    """
+    def __init__(self, shape_, order='C', **keywords):
+        if 'reshapein' not in keywords:
+            keywords['reshapein'] = self._reshape_to1d
+        if 'reshapeout' not in keywords:
+            keywords['reshapeout'] = self._reshape_tond
+        if 'validatein' not in keywords:
+            keywords['validatein'] = self._validate_to1d
+        _1dNdOperator.__init__(self, shape_, order=order, **keywords)
+        self.set_rule('I', lambda s: ToNdOperator(s.shape_, order=s.order))
+
+    def direct(self, input, output):
+        np.dot(input, self.coefs, out=output)
+
+
+class ToNdOperator(_1dNdOperator):
+    """
+    Convert a 1-dimensional indexing to an N-dimensional indexing.
+
+    C order:
+    -------------      -------------------------
+    | 0 | 1 | 2 |      | (0,0) | (0,1) | (0,2) |
+    -------------  =>  -------------------------
+    | 3 | 4 | 5 |      | (1,0) | (1,1) | (1,2) |
+    -------------      -------------------------
+
+    Fortan order
+    -------------      -------------------------
+    | 0 | 2 | 4 |      | (0,0) | (0,1) | (0,2) |
+    -------------  =>  -------------------------
+    | 1 | 3 | 5 |      | (1,0) | (1,1) | (1,2) |
+    -------------      -------------------------
+
+    Parameters
+    ----------
+    shape : tuple of int
+        The shape of the array whose element' multi-dimensional coordinates
+        will be converted into 1-d coordinates.
+    order : str
+        'C' for row-major and 'F' for column-major 1-d indexing.
+
+    """
+    def __init__(self, shape_, order='C', **keywords):
+        if 'reshapein' not in keywords:
+            keywords['reshapein'] = self._reshape_tond
+        if 'reshapeout' not in keywords:
+            keywords['reshapeout'] = self._reshape_to1d
+        if 'validateout' not in keywords:
+            keywords['validateout'] = self._validate_to1d
+        _1dNdOperator.__init__(self, shape_, order=order, **keywords)
+        self.set_rule('I', lambda s: To1dOperator(
+            s.shape_, order=s.order))
+
+    def direct(self, input, output):
+        np.floor_divide(input[..., None], self.coefs, out=output)
+        np.mod(output, self.shape_, out=output)
+
+    def __str__(self):
+        return 'toNd'
diff --git a/pyoperators/norms.py b/pyoperators/norms.py
new file mode 100644
index 0000000..45d7ef4
--- /dev/null
+++ b/pyoperators/norms.py
@@ -0,0 +1,41 @@
+from __future__ import absolute_import, division, print_function
+import numpy as np
+from .core import (
+    BlockOperator, BlockRowOperator, CompositionOperator, Operator)
+from .flags import real
+from .utils import MPI
+
+# these function may be overridden
+sum = np.sum
+dot = np.dot
+
+
+ at real
+class NormOperator(Operator):
+    commin = None
+    commout = MPI.COMM_SELF
+    shapeout = ()
+    operation = 'sum'
+
+    def __init__(self, **keywords):
+        Operator.__init__(self, **keywords)
+        self.set_rule(('.', BlockOperator), lambda s, b: s._rule_block(s, b),
+                      CompositionOperator)
+
+    @staticmethod
+    def _rule_block(self, b):
+        if b.partitionout is None:
+            return
+        s = self.copy()
+        s.commin = None
+        return BlockRowOperator(len(b.partitionout) * s, operation='sum',
+                                partitionin=b.partitionout, axisin=b.axisout,
+                                new_axisin=b.new_axisout, commin=b.commout,
+                                commout=self.commout) * b
+
+
+class Norm2Operator(NormOperator):
+    def direct(self, input, output):
+        output[...] = dot(input, input)
+        if self.commin is not None:
+            self.commin.Allreduce(MPI.IN_PLACE, output)
diff --git a/pyoperators/operators_mpi.py b/pyoperators/operators_mpi.py
new file mode 100644
index 0000000..9b290e9
--- /dev/null
+++ b/pyoperators/operators_mpi.py
@@ -0,0 +1,154 @@
+from __future__ import absolute_import, division, print_function
+import numpy as np
+from .core import IdentityOperator, Operator
+from .flags import real, linear, square, inplace
+from .utils import isalias, split
+from .utils.mpi import MPI, as_mpi, distribute_shape, timer_mpi
+
+__all__ = ['MPIDistributionGlobalOperator',
+           'MPIDistributionIdentityOperator']
+
+
+ at real
+ at linear
+class MPIDistributionGlobalOperator(Operator):
+    """
+    Distribute sections of a global map to different MPI processes.
+
+    It is a block column operator, whose blocks are distributed across the MPI
+    processes.
+
+    MPI rank 1 --> |I O O|
+                   +-----+
+    MPI rank 2 --> |O I O|
+                   +-----+
+    MPI rank 3 --> |O O I|
+
+    Example
+    -------
+    Given the file 'example_dgo.py':
+
+    import numpy as np
+    from pyoperators import DistributionGlobalOperator
+    from mpi4py import MPI
+    x_global = np.array([1,2,3])
+    d = DistributionGlobalOperator(x_global.shape)
+    x_local = d(x_global)
+    print MPI.COMM_WORLD.rank, ':', x_local, np.all(d.T(x_local) == x_global)
+
+    the following command:
+    $ mpirun -n 3 python example_dgo.py
+
+    will output (in random rank order):
+    0 : [1] True
+    1 : [2] True
+    2 : [3] True
+
+    """
+
+    def __init__(self, shapein, commout=None, **keywords):
+
+        if shapein is None:
+            raise ValueError('The input shape is None.')
+        commout = commout or MPI.COMM_WORLD
+
+        shapeout = distribute_shape(shapein, comm=commout)
+        slice_ = split(shapein[0], commout.size, commout.rank)
+
+        counts = []
+        offsets = [0]
+        for s in split(shapein[0], commout.size):
+            n = (s.stop - s.start) * np.product(shapein[1:])
+            counts.append(n)
+            offsets.append(offsets[-1] + n)
+        offsets.pop()
+        Operator.__init__(self, commin=MPI.COMM_SELF, commout=commout,
+                          shapein=shapein, shapeout=shapeout, **keywords)
+        self.slice = slice_
+        self.counts = counts
+        self.offsets = offsets
+
+    def direct(self, input, output):
+        output[:] = input[self.slice.start:self.slice.stop]
+
+    def transpose(self, input, output):
+        if input.itemsize != output.itemsize:
+            input = input.astype(output.dtype)
+        nbytes = output.itemsize
+        with timer_mpi:
+            self.commout.Allgatherv(
+                input.view(np.byte), [output.view(np.byte),
+                                      ([c * nbytes for c in self.counts],
+                                       [o * nbytes for o in self.offsets])])
+
+
+ at real
+ at linear
+ at square
+ at inplace
+class MPIDistributionIdentityOperator(Operator):
+    """
+    Distribute a global map, of which each MPI process has a copy, to the
+    MPI processes.
+
+    It is a block column operator whose blocks are identities distributed
+    across the MPI processes.
+
+                   |1   O|
+    MPI rank 0 --> |  .  |
+                   |O   1|
+                   +-----+
+                   |1   O|
+    MPI rank 1 --> |  .  |
+                   |O   1|
+                   +-----+
+                   |1   O|
+    MPI rank 2 --> |  .  |
+                   |O   1|
+
+    For an MPI process, the direct method is the Identity and the transpose
+    method is a reduction.
+
+    Example
+    -------
+    Given the file 'example_dio.py':
+
+    import numpy as np
+    from pyoperators import DistributionIdentityOperator
+    from mpi4py import MPI
+    x_global = np.array([1,1,1])
+    d = DistributionIdentityOperator()
+    x_local = x_global * (MPI.COMM_WORLD.rank + 1)
+    print MPI.COMM_WORLD.rank, ':', np.all(d(x_global)==x_global), d.T(x_local)
+
+    the following command:
+    $ mpirun -n 3 python example_dio.py
+
+    will output (in random rank order):
+    0 : True [6 6 6]
+    1 : True [6 6 6]
+    2 : True [6 6 6]
+
+    """
+
+    def __init__(self, commout=None, **keywords):
+        if commout is None:
+            commout = MPI.COMM_WORLD
+        if commout.size == 1:
+            self.__class__ = IdentityOperator
+            self.__init__(**keywords)
+            return
+
+        Operator.__init__(self, commin=MPI.COMM_SELF,
+                          commout=commout or MPI.COMM_WORLD, **keywords)
+
+    def direct(self, input, output):
+        if isalias(input, output):
+            return
+        output[...] = input
+
+    def transpose(self, input, output):
+        if not isalias(input, output):
+            output[...] = input
+        with timer_mpi:
+            self.commout.Allreduce(MPI.IN_PLACE, as_mpi(output), op=MPI.SUM)
diff --git a/pyoperators/operators_pywt.py b/pyoperators/operators_pywt.py
new file mode 100644
index 0000000..c328304
--- /dev/null
+++ b/pyoperators/operators_pywt.py
@@ -0,0 +1,165 @@
+""" Wrap PyWavelets wavelet transforms into Operators.
+
+For now only 1D and 2D wavelets are available.
+
+"""
+from __future__ import absolute_import, division, print_function
+import numpy as np
+import pywt
+from .core import Operator, CompositionOperator
+from .flags import linear, real
+
+__all__ = ['WaveletOperator', 'Wavelet2dOperator']
+
+# dict of corresponding wavelets
+rwavelist = {}
+for l in pywt.wavelist():
+    if 'bior' in l:
+        rwavelist[l] = 'rbio' + l[-3:]
+    elif 'rbio' in l:
+        rwavelist[l] = 'bior' + l[-3:]
+    else:
+        rwavelist[l] = l
+
+
+ at real
+ at linear
+class WaveletOperator(Operator):
+    def __init__(self, wavelet, mode='zpd', level=None, shapein=None,
+                 **keywords):
+        """
+        1D wavelet decomposition and reconstruction. Wavelet coefficients
+        are stored in a vector (ndarray with ndim=1).
+
+        Exemples
+        --------
+        >>> W = WaveletOperator("haar", level=1, shapein=2)
+        >>> W.todense()
+
+        array([[ 0.70710678,  0.70710678],
+               [ 0.70710678, -0.70710678]])
+
+        See Also
+        --------
+        See operators.pywt.MODES docstring for available modes.
+        See operators.pywt.wavelist() for available wavelets.
+        See operators.pywt.wavedec for the operation performed on input arrays.
+
+        Notes
+        -----
+        Wrapping around PyWavelets
+
+        """
+        if not isinstance(wavelet, pywt.Wavelet):
+            wavelet = pywt.Wavelet(wavelet)
+        self.wavelet = wavelet
+        self.rwavelet = rwavelist[wavelet.name]
+        self.mode = mode
+        self.level = level
+        # needed to get sizes of all coefficients
+        a = np.zeros(shapein)
+        b = pywt.wavedec(a, wavelet, mode=mode, level=level)
+        self.sizes = [bi.size for bi in b]
+        self.cumsizes = np.zeros(len(self.sizes) + 1)
+        np.cumsum(self.sizes, out=self.cumsizes[1:])
+        shapeout = sum(self.sizes)
+        Operator.__init__(self, shapein=shapein, shapeout=shapeout, **keywords)
+        if self.wavelet.orthogonal:
+            self.set_rule('T,.', '1', CompositionOperator)
+
+    def direct(self, x, out):
+        coeffs = pywt.wavedec(x, self.wavelet, mode=self.mode,
+                              level=self.level)
+        out[:] = self._coeffs2vect(coeffs)
+
+    def transpose(self, x, out):
+        coeffs = self._vect2coeffs(x)
+        out[:] = pywt.waverec(coeffs, self.rwavelet,
+                              mode=self.mode)[:self.shapein[0]]
+
+    def _coeffs2vect(self, coeffs):
+        return np.concatenate(coeffs)
+
+    def _vect2coeffs(self, vect):
+        return [vect[self.cumsizes[i]:self.cumsizes[i + 1]]
+                for i in xrange(len(self.sizes))]
+
+
+ at real
+ at linear
+class Wavelet2dOperator(Operator):
+    def __init__(self, wavelet, mode='zpd', level=None, shapein=None,
+                 **keywords):
+        """
+        2D wavelet decomposition and reconstruction. Wavelet coefficients
+        are stored in a vector (ndarray with ndim=1).
+
+        Exemple
+        -------
+        >>> W = Wavelet2dOperator("haar", level=1, shapein=(2, 2))
+        >>> W.todense()
+
+        array([[ 0.5,  0.5,  0.5,  0.5],
+               [ 0.5,  0.5, -0.5, -0.5],
+               [ 0.5, -0.5,  0.5, -0.5],
+               [ 0.5, -0.5, -0.5,  0.5]])
+
+        See Also
+        --------
+        See operators.pywt.MODES docstring for available modes.
+        See operators.pywt.wavelist() for available wavelets.
+        See operators.pywt.wavedec for the operation performed on input arrays.
+
+        Notes
+        -----
+        Wrapping around PyWavelet
+
+        """
+        if not isinstance(wavelet, pywt.Wavelet):
+            wavelet = pywt.Wavelet(wavelet)
+        self.wavelet = wavelet
+        self.rwavelet = rwavelist[wavelet.name]
+        self.mode = mode
+        self.level = level
+        # compute shapes and sizes
+        a = np.zeros(shapein)
+        coeffs = pywt.wavedec2(a, wavelet, mode=mode, level=level)
+        approx = coeffs[0]
+        details = coeffs[1:]
+        self.shapes = [approx.shape]
+        self.shapes += [d[i].shape for d in details for i in xrange(3)]
+        self.sizes = [np.prod(s) for s in self.shapes]
+        self.cumsizes = np.zeros(len(self.sizes) + 1)
+        np.cumsum(self.sizes, out=self.cumsizes[1:])
+        shapeout = sum(self.sizes)
+
+        Operator.__init__(self, shapein=shapein, shapeout=shapeout, **keywords)
+        if self.wavelet.orthogonal:
+            self.set_rule('T,.', '1', CompositionOperator)
+
+    def direct(self, x, out):
+        coeffs = pywt.wavedec2(x, self.wavelet, mode=self.mode,
+                               level=self.level)
+        out[:] = self._coeffs2vect(coeffs)
+
+    def transpose(self, x, out):
+        coeffs = self._vect2coeffs(x)
+        rec = pywt.waverec2(coeffs, self.rwavelet, mode=self.mode)
+        out[:] = rec[:self.shapein[0], :self.shapein[1]]
+
+    def _coeffs2vect(self, coeffs):
+        # distinguish between approximation and details
+        approx = coeffs[0]
+        details = coeffs[1:]
+        # transform 2d arrays into vectors
+        vect_coeffs = [approx.ravel()]
+        vect_coeffs += [d[i].ravel() for d in details for i in xrange(3)]
+        # put everything into a single coefficient
+        return np.concatenate(vect_coeffs)
+
+    def _vect2coeffs(self, vect):
+        cs = self.cumsizes
+        approx = [vect[:self.sizes[0]].reshape(self.shapes[0])]
+        details = [[vect[cs[i + j]:cs[i + j + 1]].reshape(self.shapes[i + j])
+                    for j in xrange(3)] for i in xrange(1, len(self.sizes), 3)]
+        return approx + details
diff --git a/pyoperators/proxy.py b/pyoperators/proxy.py
new file mode 100644
index 0000000..e9c486e
--- /dev/null
+++ b/pyoperators/proxy.py
@@ -0,0 +1,242 @@
+from __future__ import absolute_import, division, print_function
+import inspect
+from . import core
+from .utils import operation_assignment, strshape
+
+__all__ = ['proxy_group']
+
+
+class ProxyBase(core.Operator):
+    def __init__(self, number, common, callback, flags, **keywords):
+        if len(common) != 2:
+            raise ValueError('Invalid common list for on-fly operators.')
+        self.number = number
+        self.common = common
+        self.callback = callback
+        core.Operator.__init__(self, flags=flags, **keywords)
+
+    def get_operator(self):
+        if self.common[0] != self.number:
+            self.common[1].delete()
+            self.common[:] = self.number, self.callback(self.number)
+        return self.common[1]
+
+    def direct(self, x, out, operation=operation_assignment):
+        op = self.get_operator()
+        if op.flags.update_output:
+            op.direct(x, out, operation=operation)
+        else:
+            op.direct(x, out)
+
+    def reshapein(self, shape):
+        return self.common[1].reshapein(shape)
+
+    def reshapeout(self, shape):
+        return self.common[1].reshapeout(shape)
+
+    def toshapein(self, x):
+        return self.common[1].toshapein(x)
+
+    def toshapeout(self, x):
+        return self.common[1].toshapeout(x)
+
+    def validatein(self, shape):
+        self.common[1].validatein(shape)
+
+    def validateout(self, shape):
+        self.common[1].validateout(shape)
+
+    def __repr__(self):
+        number = self.number
+        cls = self.common[1].__name__
+        source = '\n'.join(inspect.getsource(self.callback).split('\n')[:2])
+        if self.shapein is not None:
+            sin = ', shapein={0}'.format(strshape(self.shapein))
+        else:
+            sin = ''
+        if sin:
+            sout = ', shapeout={0}'.format(strshape(self.shapeout))
+        else:
+            sout = ''
+        return '{0}({1}, {2}, {3!r}{4}{5})'.format(
+            type(self).__name__, number, cls, source, sin, sout)
+
+    __str__ = __repr__
+
+
+class ProxyReverseBase(ProxyBase):
+    def reshapein(self, shape):
+        return self.common[1].reshapeout(shape)
+
+    def reshapeout(self, shape):
+        return self.common[1].reshapein(shape)
+
+    def toshapein(self, x):
+        return self.common[1].toshapeout(x)
+
+    def toshapeout(self, x):
+        return self.common[1].toshapein(x)
+
+    def validatein(self, shape):
+        self.common[1].validateout(shape)
+
+    def validateout(self, shape):
+        self.common[1].validatein(shape)
+
+
+class ProxyOperator(ProxyBase):
+    """
+    Proxy operators, for on-the-fly computations.
+
+    This operator is meant to be used in a group of proxy operators. When
+    a member of such a group is called, a callback function is used to access
+    the actual operator. This operator is then cached and subsequent uses of
+    this operator (including the associated operators, such as conjugate,
+    transpose, etc.) will not require another call to the potentially expensive
+    callback function. For example, given the group of proxy operators
+    [o1, o2, o3], the sum o1.T * o1 + o2.T * o2 + o3.T * o3 only makes three
+    calls to the callback function.
+
+    """
+    def __init__(self, number, common, callback, flags, flags_conjugate=None,
+                 flags_transpose=None, flags_adjoint=None, flags_inverse=None,
+                 flags_inverse_conjugate=None, flags_inverse_transpose=None,
+                 flags_inverse_adjoint=None, **keywords):
+        ProxyBase.__init__(self, number, common, callback, flags, **keywords)
+        self.flags_conjugate = flags_conjugate
+        self.flags_transpose = flags_transpose
+        self.flags_adjoint = flags_adjoint
+        self.flags_inverse = flags_inverse
+        self.flags_inverse_conjugate = flags_inverse_conjugate
+        self.flags_inverse_transpose = flags_inverse_transpose
+        self.flags_inverse_adjoint = flags_inverse_adjoint
+        self.set_rule('C', lambda s: ProxyConjugateOperator(
+            s.number, s.common, s.callback, s.flags_conjugate))
+        self.set_rule('T', lambda s: ProxyTransposeOperator(
+            s.number, s.common, s.callback, s.flags_transpose))
+        self.set_rule('H', lambda s: ProxyAdjointOperator(
+            s.number, s.common, s.callback, s.flags_adjoint))
+        self.set_rule('I', lambda s: ProxyInverseOperator(
+            s.number, s.common, s.callback, s.flags_inverse))
+        self.set_rule('IC', lambda s: ProxyInverseConjugateOperator(
+            s.number, s.common, s.callback, s.flags_inverse_conjugate))
+        self.set_rule('IT', lambda s: ProxyInverseTransposeOperator(
+            s.number, s.common, s.callback, s.flags_inverse_transpose))
+        self.set_rule('IH', lambda s: ProxyInverseAdjointOperator(
+            s.number, s.common, s.callback, s.flags_inverse_adjoint))
+
+    def __getattr__(self, name):
+        return getattr(self.get_operator(), name)
+
+
+class ProxyConjugateOperator(ProxyBase):
+    def get_operator(self):
+        return ProxyBase.get_operator(self).C
+
+
+class ProxyTransposeOperator(ProxyReverseBase):
+    def get_operator(self):
+        return ProxyBase.get_operator(self).T
+
+
+class ProxyAdjointOperator(ProxyReverseBase):
+    def get_operator(self):
+        return ProxyBase.get_operator(self).H
+
+
+class ProxyInverseOperator(ProxyReverseBase):
+    def get_operator(self):
+        return ProxyBase.get_operator(self).I
+
+
+class ProxyInverseConjugateOperator(ProxyReverseBase):
+    def get_operator(self):
+        return ProxyBase.get_operator(self).I.C
+
+
+class ProxyInverseTransposeOperator(ProxyBase):
+    def get_operator(self):
+        return ProxyBase.get_operator(self).I.T
+
+
+class ProxyInverseAdjointOperator(ProxyBase):
+    def get_operator(self):
+        return ProxyBase.get_operator(self).I.H
+
+
+def proxy_group(n, callback, shapeins=None, shapeouts=None):
+    """
+    Return a group of proxy operators, for on-the-fly computations.
+
+    When a member of such a group is called, a callback function is used
+    to access the actual operator. This operator is then cached and subsequent
+    uses of this operator (including the associated operators, such as
+    conjugate, transpose, etc.) will not require another call to the
+    potentially expensive callback function. In a proxy group, only one
+    instance of actual operator is kept in an object that is common to all
+    members of the group. For example, given the group of proxy operators
+    [o1, o2, o3], the sum o1.T * o1 + o2.T * o2 + o3.T * o3 only calls three
+    times the callback function.
+
+    Note
+    ----
+    By default, it is assumed that the proxies have the same input and output
+    shape. If it is not the case, all the shapes should be specified with the
+    'shapeins' and 'shapeouts' keywords.
+    It is also assumed that all the proxies have the same flags.
+
+    Parameters
+    ----------
+    n : int
+        The number of elements in the proxy group.
+
+    callback : function
+        A function with a single integer argument that ranges from 0 to n-1.
+        Its output is an Operator, and its class and flags should be the same.
+
+    Example
+    -------
+    >>> import numpy as np
+    >>> from pyoperators import BlockColumnOperator, DenseOperator, proxy_group
+    >>> NPROXIES = 3
+    >>> N = 1000
+    >>> counter = 0
+    >>> def callback(number):
+    ...     global counter
+    ...     counter += 1
+    ...     np.random.seed(number)
+    ...     return DenseOperator(np.random.standard_normal((N, N)))
+    >>> group = proxy_group(NPROXIES, callback)
+    >>> op = BlockColumnOperator(group, new_axisout=0)
+    >>> opTop = op.T * op
+    >>> y = opTop(np.ones(N))
+    >>> print(counter)
+    3
+
+    """
+    op = callback(0)
+    flags = op.flags
+    flags_c = op.C.flags
+    flags_t = op.T.flags
+    flags_h = op.H.flags
+    flags_i = op.I.flags
+    flags_ic = op.I.C.flags
+    flags_it = op.I.T.flags
+    flags_ih = op.I.H.flags
+    if shapeins is None:
+        shapeins = n * (op.shapein,)
+    if shapeouts is None:
+        shapeouts = n * (op.shapeout,)
+
+    common = [0, op]
+    ops = [ProxyOperator(i, common, callback, flags, dtype=op.dtype,
+                         shapein=si, shapeout=so,
+                         flags_conjugate=flags_c,
+                         flags_transpose=flags_t,
+                         flags_adjoint=flags_h,
+                         flags_inverse=flags_i,
+                         flags_inverse_conjugate=flags_ic,
+                         flags_inverse_transpose=flags_it,
+                         flags_inverse_adjoint=flags_ih)
+           for i, (si, so) in enumerate(zip(shapeins, shapeouts))]
+    return ops
diff --git a/pyoperators/rules.py b/pyoperators/rules.py
new file mode 100644
index 0000000..941a099
--- /dev/null
+++ b/pyoperators/rules.py
@@ -0,0 +1,426 @@
+from __future__ import absolute_import, division, print_function
+
+import inspect
+import types
+import os
+from . import config
+from .core import HomothetyOperator, IdentityOperator, Operator, ZeroOperator
+from .warnings import warn, PyOperatorsWarning
+
+__all__ = ['rule_manager']
+_triggers = {}
+_default_triggers = {
+    'inplace': False,
+    'none': False}
+_description_triggers = {
+    'inplace': 'Allow inplace simplifications',
+    'none': 'Inhibit all rule simplifications'}
+
+
+class Rule(object):
+    """
+    Abstract class for operator rules.
+
+    An operator rule is a relation that can be expressed by the sentence
+    "'subjects' are 'predicate'". An instance of this class, when called with
+    checks if the inputs are subjects to the rule, and returns the predicate
+    if it is the case. Otherwise, it returns None.
+
+    """
+    def __init__(self, subjects, predicate):
+
+        if not isinstance(subjects, (list, str, tuple)):
+            raise TypeError("The input {0} is invalid.".format(subjects))
+
+        subjects_ = self._split_subject(subjects)
+        if any(not isinstance(s, str) and (not isinstance(s, type) or
+               not issubclass(s, Operator)) for s in subjects_):
+            raise TypeError("The subjects {0} are invalid.".format(subjects))
+        if len(subjects_) == 0:
+            raise ValueError('No rule subject is specified.')
+        if len(subjects_) > 2:
+            raise ValueError('No more than 2 subjects can be specified.')
+        if not isinstance(self, UnaryRule) and len(subjects_) == 1:
+            self.__class__ = UnaryRule
+            self.__init__(subjects, predicate)
+            return
+        if not isinstance(self, BinaryRule) and len(subjects_) == 2:
+            self.__class__ = BinaryRule
+            self.__init__(subjects, predicate)
+            return
+
+        if '1' in subjects_:
+            raise ValueError("'1' cannot be a subject.")
+        if not isinstance(predicate, (str, types.FunctionType)):
+            raise TypeError('Invalid predicate.')
+        if isinstance(predicate, str) and '{' in predicate:
+            raise ValueError("Predicate cannot be a subclass.")
+
+        self.subjects = subjects_
+        self.predicate = predicate
+
+    def __eq__(self, other):
+        if not isinstance(other, Rule):
+            return NotImplemented
+        if self.subjects != other.subjects:
+            return False
+        if isinstance(self.predicate, types.FunctionType):
+            if type(self.predicate) is not type(other.predicate):
+                return False
+            return self.predicate.func_code is other.predicate.func_code
+        if isinstance(self.predicate, str):
+            return self.predicate == other.predicate
+        return self.predicate is other.predicate
+
+    @staticmethod
+    def _symbol2operator(op, symbol):
+        if not isinstance(symbol, str):
+            return symbol
+        if symbol == '1':
+            return IdentityOperator()
+        if symbol == '.':
+            return op
+        try:
+            return {'C': op._C,
+                    'T': op._T,
+                    'H': op._H,
+                    'I': op._I}[symbol]
+        except (KeyError):
+            raise ValueError("Invalid symbol: '{0}'.".format(symbol))
+
+    @classmethod
+    def _split_subject(cls, subject):
+        if isinstance(subject, str):
+            subject = subject.split(',')
+        if not isinstance(subject, (list, tuple)):
+            raise TypeError('The rule subject is invalid.')
+        subject = tuple(s.replace(' ', '') if isinstance(s, str) else s
+                        for s in subject)
+        valid = '.,C,T,H,I,IC,IT,IH'.split(',')
+        if any((not isinstance(s, str) or s not in valid) and
+               (not isinstance(s, type) or not issubclass(s, Operator))
+               for s in subject):
+            raise ValueError('The rule subject is invalid.')
+        return subject
+
+    def __str__(self):
+        subjects = [s if isinstance(s, str) else s.__name__
+                    for s in self.subjects]
+        spredicate = ' '.join(s.strip() for s in inspect.getsource(
+            self.predicate).split('\n')) \
+            if isinstance(self.predicate, types.LambdaType) \
+            else self.predicate
+        return '{0} = {1}'.format(','.join(subjects), spredicate)
+
+    __repr__ = __str__
+
+
+class UnaryRule(Rule):
+    """
+    Binary rule on operators.
+
+    An operator unary rule is a relation that can be expressed by the sentence
+    "'subject' is 'predicate'".
+
+    Parameters
+    ----------
+    subject : str
+        It defines the property of the operator for which the predicate holds:
+            'C' : the operator conjugate
+            'T' : the operator transpose
+            'H' : the operator adjoint
+            'I' : the operator adjoint
+            'IC' : the operator inverse-conjugate
+            'IT' : the operator inverse-transpose
+            'IH' : the operator inverse-adjoint
+
+    predicate : function or str
+        What is returned by the rule when is applies. It can be:
+            '1' : the identity operator
+            '.' : the operator itself
+            or a callable of one argument.
+
+    Example
+    -------
+    >>> rule = UnaryRule('T', '.')
+    >>> o = Operator()
+    >>> oT = rule(o)
+    >>> oT is o
+    True
+
+    """
+    def __init__(self, subjects, predicate):
+        super(UnaryRule, self).__init__(subjects, predicate)
+        if len(self.subjects) != 1:
+            raise ValueError('This is not a unary rule.')
+        if self.subjects[0] == '.':
+            raise ValueError('The subject cannot be the operator itself.')
+        if callable(predicate) or predicate in ('.', '1'):
+            return
+        raise ValueError("Invalid predicate: '{0}'.".format(predicate))
+
+    def __call__(self, reference):
+        predicate = self._symbol2operator(reference, self.predicate)
+        if predicate is None:
+            return None
+        if not isinstance(predicate, Operator) and callable(predicate):
+            predicate = predicate(reference)
+        if not isinstance(predicate, Operator):
+            raise TypeError('The predicate is not an operator.')
+        return predicate
+
+
+class BinaryRule(Rule):
+    """
+    Binary rule on operators.
+
+    An operator rule is a relation that can be expressed by the sentence
+    "'subjects' are 'predicate'". An instance of this class, when called with
+    two input arguments checks if the inputs are subjects to the rule, and
+    returns the predicate if it is the case. Otherwise, it returns None.
+
+    Parameters
+    ----------
+    subjects : str
+        It defines the relationship between the two subjects that must be
+        verified for the rule to apply. It is a pair of two
+        expressions. One has to be '.' and stands for the reference subject.
+        It determines if the reference operator is on the right or left hand
+        side of the operator pair. The other expression constrains the other
+        subject, which must be:
+            '.' : the reference operator itself.
+            'C' : the conjugate of the reference object
+            'T' : the transpose of the reference object
+            'H' : the adjoint of the reference object
+            or an Operator subclass.
+        For instance, given a string 'C,.', the rule will apply to the inputs
+        o1 and o2 if o1 is o2.C. For a condition ('.', DiagonalOperator), the
+        rule will apply if o2 is a DiagonalOperator instance.
+
+    predicate : function or str
+        If the two objects o1, o2, are subjects of the rule, the predicate
+        will be returned. The predicate can be '.', '1' or a callable
+        of two arguments.
+
+    Example
+    -------
+    >>> rule = BinaryRule('.,.', '.')
+    >>> o = Operator()
+    >>> rule(o, o) is o
+    True
+    >>> rule(o, IdentityOperator()) is None
+    True
+
+    """
+    def __init__(self, subjects, predicate):
+        super(BinaryRule, self).__init__(subjects, predicate)
+        if len(self.subjects) != 2:
+            raise ValueError('This is not a binary rule.')
+        self.reference = 1 if self.subjects[1] == '.' else 0
+        self.other = self.subjects[1-self.reference]
+
+    def __call__(self, o1, o2):
+
+        reference, other = (o1, o2) if self.reference == 0 else (o2, o1)
+        subother = self._symbol2operator(reference, self.other)
+
+        if isinstance(subother, (type, tuple)):
+            if subother is HomothetyOperator:
+                subother = (HomothetyOperator, ZeroOperator)
+            if not isinstance(other, subother):
+                return None
+        elif other != subother:
+            return None
+
+        predicate = self._symbol2operator(reference, self.predicate)
+        if predicate is None:
+            return None
+
+        if not isinstance(predicate, Operator) and callable(predicate):
+            predicate = predicate(o1, o2)
+        if predicate is None:
+            return None
+        if isinstance(predicate, (list, tuple)) and len(predicate) == 1:
+            predicate = predicate[0]
+        if not isinstance(predicate, Operator) \
+           and not (isinstance(predicate, (list, tuple))
+                    and all(isinstance(o, Operator)
+                            for o in predicate)):
+            raise TypeError("The predicate '{0}' is not an operator.".format(
+                            predicate))
+        return predicate
+
+
+class RuleManager(object):
+    """
+    Manage a set of rule prescriptions.
+
+    It is a proxy for the global dictionary that contains the rule names
+    and values. It also provides a context manager to change the rules inside
+    a with statement.
+    Rule defaults can be stored in a file 'rules.txt' in the user directory
+    pyoperators.config.LOCAL_PATH.
+
+    Examples
+    --------
+    To prevent rule simplifications:
+    >>> from pyoperators.rules import rules
+    >>> rules['none'] = True
+    or:
+    >>> with rules(none=True):
+    ...     print(rules['none'])
+    ...     # in this context, operator simplification rules are inhibited
+    >>> print(rules['none'])
+    True
+    False
+
+    It is possible to nest contexts:
+    >>> print(rule_manager['none'])
+    >>> with rule_manager(none=True) as new_rule_manager:
+    ...     print(rule_manager['none'])
+    ...     with new_rule_manager(none=False):
+    ...         print(rule_manager['none'])
+    ...     print(rule_manager['none'])
+    >>> print(rule_manager['none'])
+    False
+    True
+    False
+    True
+    False
+
+    """
+    def __init__(self):
+        self._deferred_triggers = {}
+        if len(self) == 0:
+            self.update(_default_triggers)
+            self._update_user_default_triggers()
+
+    def __call__(self, **keywords):
+        for key in keywords:
+            if key not in self:
+                raise KeyError('Unknown rule: {!r}'.format(key))
+        self._deferred_triggers = keywords
+        return self
+
+    def __enter__(self):
+        self._old_triggers = self.copy()
+        self.update(self._deferred_triggers)
+        return RuleManager()
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        global _triggers
+        _triggers = self._old_triggers
+        return False
+
+    def __getitem__(self, key):
+        return _triggers[key]
+
+    def __setitem__(self, key, value):
+        if key not in self:
+            raise KeyError('Unknown rule: {!r}'.format(key))
+        _triggers[key] = value
+
+    def __contains__(self, key):
+        return key in _triggers
+
+    def __iter__(self):
+        return iter(sorted(_triggers.keys()))
+
+    def __len__(self):
+        return len(_triggers)
+
+    def __str__(self):
+        nk = max(len(k) for k in self)
+        nv = max(len(repr(v)) for v in self.values())
+        s = '{0:' + str(nk) + '} = {1!r:' + str(nv) + '}  # {2}'
+        return '\n'.join(s.format(k, self[k], _description_triggers.get(k, ''))
+                         for k in self)
+
+    def clear(self):
+        """ Clear the global rule dictionary. """
+        _triggers.clear()
+
+    def copy(self):
+        """ Copy the global rule dictionary. """
+        return _triggers.copy()
+
+    def get(self, k, *args):
+        """ Get a rule value in the global rule dictionary. """
+        return _triggers.get(k, *args)
+
+    def items(self):
+        """ Return the global rule items. """
+        return _triggers.items()
+
+    def keys(self):
+        """ Return the global rule names. """
+        return _triggers.keys()
+
+    def pop(self, k, *args):
+        """ Pop a given item from the global rule dictionary. """
+        return _triggers.pop(k, *args)
+
+    def popitem(self):
+        """ Pop any item from the global rule dictionary. """
+        return _triggers.popitem()
+
+    def register(self, rule, default, description):
+        """ Add a new rule. """
+        # should not be called in a managed context
+        if not isinstance(rule, str):
+            raise TypeError('The rule is not a string.')
+        if not isinstance(description, str):
+            raise TypeError('The rule description is not a string.')
+        rule = rule.lower()
+        _triggers[rule] = default
+        _description_triggers[rule] = description
+
+    def update(self, *args, **keywords):
+        """ Update the global rule dictionary. """
+        _triggers.update(*args, **keywords)
+
+    def values(self):
+        """ Return the global rule values. """
+        return _triggers.values()
+
+    def _update_user_default_triggers(self):
+        # read user 'rules.txt' to update defaults
+        path = os.path.join(config.LOCAL_PATH, 'rules.txt')
+        if not os.path.exists(path):
+            return
+        if not os.access(path, os.R_OK):
+            warn('The file {0!r} cannot be read.'.format(path),
+                 PyOperatorsWarning)
+            return
+        with open(path) as f:
+            for iline, line in enumerate(f.readlines()):
+                line = line.strip()
+                line_orig = line
+                try:
+                    index = line.index('#')
+                except ValueError:
+                    pass
+                else:
+                    line = line[:index].rstrip()
+                try:
+                    index = line.index('=')
+                except ValueError:
+                    if len(line) == 0:
+                        continue
+                    warn('In file {0!r}, line {1} does not define a rule: {2!r'
+                         '}.'.format(path, iline + 1, line_orig),
+                         PyOperatorsWarning)
+                    continue
+                key = line[:index].rstrip().lower()
+                value = line[index+1:].lstrip()
+                try:
+                    value = eval(value, {})
+                except Exception:
+                    warn('In file {0!r}, line {1}: {2!r} cannot be evaluated'.
+                         format(path, iline+1, value), PyOperatorsWarning)
+                    continue
+                _triggers[key] = value
+
+    __repr__ = __str__
+
+rule_manager = RuleManager()
diff --git a/pyoperators/utils/__init__.py b/pyoperators/utils/__init__.py
new file mode 100644
index 0000000..1c2b790
--- /dev/null
+++ b/pyoperators/utils/__init__.py
@@ -0,0 +1,8 @@
+import ufuncs
+from . import mpi
+from . import testing
+from .cythonutils import *
+from .misc import *
+
+__all__ = [ 'operation_assignment' ]
+
diff --git a/pyoperators/utils/cythonutils.c b/pyoperators/utils/cythonutils.c
new file mode 100644
index 0000000..e3bb344
--- /dev/null
+++ b/pyoperators/utils/cythonutils.c
@@ -0,0 +1,7374 @@
+/* Generated by Cython 0.16 on Fri Jul 13 10:54:11 2012 */
+
+#define PY_SSIZE_T_CLEAN
+#include "Python.h"
+#ifndef Py_PYTHON_H
+    #error Python headers needed to compile C extensions, please install development version of Python.
+#elif PY_VERSION_HEX < 0x02040000
+    #error Cython requires Python 2.4+.
+#else
+#include <stddef.h> /* For offsetof */
+#ifndef offsetof
+#define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
+#endif
+
+#if !defined(WIN32) && !defined(MS_WINDOWS)
+  #ifndef __stdcall
+    #define __stdcall
+  #endif
+  #ifndef __cdecl
+    #define __cdecl
+  #endif
+  #ifndef __fastcall
+    #define __fastcall
+  #endif
+#endif
+
+#ifndef DL_IMPORT
+  #define DL_IMPORT(t) t
+#endif
+#ifndef DL_EXPORT
+  #define DL_EXPORT(t) t
+#endif
+
+#ifndef PY_LONG_LONG
+  #define PY_LONG_LONG LONG_LONG
+#endif
+
+#ifndef Py_HUGE_VAL
+  #define Py_HUGE_VAL HUGE_VAL
+#endif
+
+#ifdef PYPY_VERSION
+#define CYTHON_COMPILING_IN_PYPY 1
+#define CYTHON_COMPILING_IN_CPYTHON 0
+#else
+#define CYTHON_COMPILING_IN_PYPY 0
+#define CYTHON_COMPILING_IN_CPYTHON 1
+#endif
+
+#if CYTHON_COMPILING_IN_PYPY
+  #define __Pyx_PyCFunction_Call PyObject_Call
+#else
+  #define __Pyx_PyCFunction_Call PyCFunction_Call
+#endif
+
+#if PY_VERSION_HEX < 0x02050000
+  typedef int Py_ssize_t;
+  #define PY_SSIZE_T_MAX INT_MAX
+  #define PY_SSIZE_T_MIN INT_MIN
+  #define PY_FORMAT_SIZE_T ""
+  #define PyInt_FromSsize_t(z) PyInt_FromLong(z)
+  #define PyInt_AsSsize_t(o)   __Pyx_PyInt_AsInt(o)
+  #define PyNumber_Index(o)    PyNumber_Int(o)
+  #define PyIndex_Check(o)     PyNumber_Check(o)
+  #define PyErr_WarnEx(category, message, stacklevel) PyErr_Warn(category, message)
+  #define __PYX_BUILD_PY_SSIZE_T "i"
+#else
+  #define __PYX_BUILD_PY_SSIZE_T "n"
+#endif
+
+#if PY_VERSION_HEX < 0x02060000
+  #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
+  #define Py_TYPE(ob)   (((PyObject*)(ob))->ob_type)
+  #define Py_SIZE(ob)   (((PyVarObject*)(ob))->ob_size)
+  #define PyVarObject_HEAD_INIT(type, size) \
+          PyObject_HEAD_INIT(type) size,
+  #define PyType_Modified(t)
+
+  typedef struct {
+     void *buf;
+     PyObject *obj;
+     Py_ssize_t len;
+     Py_ssize_t itemsize;
+     int readonly;
+     int ndim;
+     char *format;
+     Py_ssize_t *shape;
+     Py_ssize_t *strides;
+     Py_ssize_t *suboffsets;
+     void *internal;
+  } Py_buffer;
+
+  #define PyBUF_SIMPLE 0
+  #define PyBUF_WRITABLE 0x0001
+  #define PyBUF_FORMAT 0x0004
+  #define PyBUF_ND 0x0008
+  #define PyBUF_STRIDES (0x0010 | PyBUF_ND)
+  #define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)
+  #define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)
+  #define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)
+  #define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)
+  #define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_FORMAT | PyBUF_WRITABLE)
+  #define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_FORMAT | PyBUF_WRITABLE)
+
+  typedef int (*getbufferproc)(PyObject *, Py_buffer *, int);
+  typedef void (*releasebufferproc)(PyObject *, Py_buffer *);
+#endif
+
+#if PY_MAJOR_VERSION < 3
+  #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
+  #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \
+          PyCode_New(a, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+#else
+  #define __Pyx_BUILTIN_MODULE_NAME "builtins"
+  #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \
+          PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+#endif
+
+#if PY_MAJOR_VERSION < 3 && PY_MINOR_VERSION < 6
+  #define PyUnicode_FromString(s) PyUnicode_Decode(s, strlen(s), "UTF-8", "strict")
+#endif
+
+#if PY_MAJOR_VERSION >= 3
+  #define Py_TPFLAGS_CHECKTYPES 0
+  #define Py_TPFLAGS_HAVE_INDEX 0
+#endif
+
+#if (PY_VERSION_HEX < 0x02060000) || (PY_MAJOR_VERSION >= 3)
+  #define Py_TPFLAGS_HAVE_NEWBUFFER 0
+#endif
+
+
+#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_GET_LENGTH)
+  #define CYTHON_PEP393_ENABLED 1
+  #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u)
+  #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
+#else
+  #define CYTHON_PEP393_ENABLED 0
+  #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u)
+  #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
+#endif
+
+#if PY_MAJOR_VERSION >= 3
+  #define PyBaseString_Type            PyUnicode_Type
+  #define PyStringObject               PyUnicodeObject
+  #define PyString_Type                PyUnicode_Type
+  #define PyString_Check               PyUnicode_Check
+  #define PyString_CheckExact          PyUnicode_CheckExact
+#endif
+
+#if PY_VERSION_HEX < 0x02060000
+  #define PyBytesObject                PyStringObject
+  #define PyBytes_Type                 PyString_Type
+  #define PyBytes_Check                PyString_Check
+  #define PyBytes_CheckExact           PyString_CheckExact
+  #define PyBytes_FromString           PyString_FromString
+  #define PyBytes_FromStringAndSize    PyString_FromStringAndSize
+  #define PyBytes_FromFormat           PyString_FromFormat
+  #define PyBytes_DecodeEscape         PyString_DecodeEscape
+  #define PyBytes_AsString             PyString_AsString
+  #define PyBytes_AsStringAndSize      PyString_AsStringAndSize
+  #define PyBytes_Size                 PyString_Size
+  #define PyBytes_AS_STRING            PyString_AS_STRING
+  #define PyBytes_GET_SIZE             PyString_GET_SIZE
+  #define PyBytes_Repr                 PyString_Repr
+  #define PyBytes_Concat               PyString_Concat
+  #define PyBytes_ConcatAndDel         PyString_ConcatAndDel
+#endif
+
+#if PY_VERSION_HEX < 0x02060000
+  #define PySet_Check(obj)             PyObject_TypeCheck(obj, &PySet_Type)
+  #define PyFrozenSet_Check(obj)       PyObject_TypeCheck(obj, &PyFrozenSet_Type)
+#endif
+#ifndef PySet_CheckExact
+  #define PySet_CheckExact(obj)        (Py_TYPE(obj) == &PySet_Type)
+#endif
+
+#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type)
+
+#if PY_MAJOR_VERSION >= 3
+  #define PyIntObject                  PyLongObject
+  #define PyInt_Type                   PyLong_Type
+  #define PyInt_Check(op)              PyLong_Check(op)
+  #define PyInt_CheckExact(op)         PyLong_CheckExact(op)
+  #define PyInt_FromString             PyLong_FromString
+  #define PyInt_FromUnicode            PyLong_FromUnicode
+  #define PyInt_FromLong               PyLong_FromLong
+  #define PyInt_FromSize_t             PyLong_FromSize_t
+  #define PyInt_FromSsize_t            PyLong_FromSsize_t
+  #define PyInt_AsLong                 PyLong_AsLong
+  #define PyInt_AS_LONG                PyLong_AS_LONG
+  #define PyInt_AsSsize_t              PyLong_AsSsize_t
+  #define PyInt_AsUnsignedLongMask     PyLong_AsUnsignedLongMask
+  #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
+#endif
+
+#if PY_MAJOR_VERSION >= 3
+  #define PyBoolObject                 PyLongObject
+#endif
+
+#if PY_VERSION_HEX < 0x03020000
+  typedef long Py_hash_t;
+  #define __Pyx_PyInt_FromHash_t PyInt_FromLong
+  #define __Pyx_PyInt_AsHash_t   PyInt_AsLong
+#else
+  #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
+  #define __Pyx_PyInt_AsHash_t   PyInt_AsSsize_t
+#endif
+
+#if (PY_MAJOR_VERSION < 3) || (PY_VERSION_HEX >= 0x03010300)
+  #define __Pyx_PySequence_GetSlice(obj, a, b) PySequence_GetSlice(obj, a, b)
+  #define __Pyx_PySequence_SetSlice(obj, a, b, value) PySequence_SetSlice(obj, a, b, value)
+  #define __Pyx_PySequence_DelSlice(obj, a, b) PySequence_DelSlice(obj, a, b)
+#else
+  #define __Pyx_PySequence_GetSlice(obj, a, b) (unlikely(!(obj)) ? \
+        (PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), (PyObject*)0) : \
+        (likely((obj)->ob_type->tp_as_mapping) ? (PySequence_GetSlice(obj, a, b)) : \
+            (PyErr_Format(PyExc_TypeError, "'%.200s' object is unsliceable", (obj)->ob_type->tp_name), (PyObject*)0)))
+  #define __Pyx_PySequence_SetSlice(obj, a, b, value) (unlikely(!(obj)) ? \
+        (PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), -1) : \
+        (likely((obj)->ob_type->tp_as_mapping) ? (PySequence_SetSlice(obj, a, b, value)) : \
+            (PyErr_Format(PyExc_TypeError, "'%.200s' object doesn't support slice assignment", (obj)->ob_type->tp_name), -1)))
+  #define __Pyx_PySequence_DelSlice(obj, a, b) (unlikely(!(obj)) ? \
+        (PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), -1) : \
+        (likely((obj)->ob_type->tp_as_mapping) ? (PySequence_DelSlice(obj, a, b)) : \
+            (PyErr_Format(PyExc_TypeError, "'%.200s' object doesn't support slice deletion", (obj)->ob_type->tp_name), -1)))
+#endif
+
+#if PY_MAJOR_VERSION >= 3
+  #define PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func))
+#endif
+
+#if PY_VERSION_HEX < 0x02050000
+  #define __Pyx_GetAttrString(o,n)   PyObject_GetAttrString((o),((char *)(n)))
+  #define __Pyx_SetAttrString(o,n,a) PyObject_SetAttrString((o),((char *)(n)),(a))
+  #define __Pyx_DelAttrString(o,n)   PyObject_DelAttrString((o),((char *)(n)))
+#else
+  #define __Pyx_GetAttrString(o,n)   PyObject_GetAttrString((o),(n))
+  #define __Pyx_SetAttrString(o,n,a) PyObject_SetAttrString((o),(n),(a))
+  #define __Pyx_DelAttrString(o,n)   PyObject_DelAttrString((o),(n))
+#endif
+
+#if PY_VERSION_HEX < 0x02050000
+  #define __Pyx_NAMESTR(n) ((char *)(n))
+  #define __Pyx_DOCSTR(n)  ((char *)(n))
+#else
+  #define __Pyx_NAMESTR(n) (n)
+  #define __Pyx_DOCSTR(n)  (n)
+#endif
+
+#if PY_MAJOR_VERSION >= 3
+  #define __Pyx_PyNumber_Divide(x,y)         PyNumber_TrueDivide(x,y)
+  #define __Pyx_PyNumber_InPlaceDivide(x,y)  PyNumber_InPlaceTrueDivide(x,y)
+#else
+  #define __Pyx_PyNumber_Divide(x,y)         PyNumber_TrueDivide(x,y)
+  #define __Pyx_PyNumber_InPlaceDivide(x,y)  PyNumber_InPlaceTrueDivide(x,y)
+#endif
+
+#ifndef __PYX_EXTERN_C
+  #ifdef __cplusplus
+    #define __PYX_EXTERN_C extern "C"
+  #else
+    #define __PYX_EXTERN_C extern
+  #endif
+#endif
+
+#if defined(WIN32) || defined(MS_WINDOWS)
+#define _USE_MATH_DEFINES
+#endif
+#include <math.h>
+#define __PYX_HAVE__pyoperators__utils__cythonutils
+#define __PYX_HAVE_API__pyoperators__utils__cythonutils
+#include "stdio.h"
+#include "stdlib.h"
+#include "numpy/arrayobject.h"
+#include "numpy/ufuncobject.h"
+#ifdef _OPENMP
+#include <omp.h>
+#endif /* _OPENMP */
+
+#ifdef PYREX_WITHOUT_ASSERTIONS
+#define CYTHON_WITHOUT_ASSERTIONS
+#endif
+
+
+/* inline attribute */
+#ifndef CYTHON_INLINE
+  #if defined(__GNUC__)
+    #define CYTHON_INLINE __inline__
+  #elif defined(_MSC_VER)
+    #define CYTHON_INLINE __inline
+  #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+    #define CYTHON_INLINE inline
+  #else
+    #define CYTHON_INLINE
+  #endif
+#endif
+
+/* unused attribute */
+#ifndef CYTHON_UNUSED
+# if defined(__GNUC__)
+#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+#     define CYTHON_UNUSED __attribute__ ((__unused__))
+#   else
+#     define CYTHON_UNUSED
+#   endif
+# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
+#   define CYTHON_UNUSED __attribute__ ((__unused__))
+# else
+#   define CYTHON_UNUSED
+# endif
+#endif
+
+typedef struct {PyObject **p; char *s; const long n; const char* encoding; const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; /*proto*/
+
+
+/* Type Conversion Predeclarations */
+
+#define __Pyx_PyBytes_FromUString(s) PyBytes_FromString((char*)s)
+#define __Pyx_PyBytes_AsUString(s)   ((unsigned char*) PyBytes_AsString(s))
+
+#define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None)
+#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False))
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
+static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x);
+
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
+static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject*);
+
+#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
+#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
+
+#ifdef __GNUC__
+  /* Test for GCC > 2.95 */
+  #if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
+    #define likely(x)   __builtin_expect(!!(x), 1)
+    #define unlikely(x) __builtin_expect(!!(x), 0)
+  #else /* __GNUC__ > 2 ... */
+    #define likely(x)   (x)
+    #define unlikely(x) (x)
+  #endif /* __GNUC__ > 2 ... */
+#else /* __GNUC__ */
+  #define likely(x)   (x)
+  #define unlikely(x) (x)
+#endif /* __GNUC__ */
+    
+static PyObject *__pyx_m;
+static PyObject *__pyx_b;
+static PyObject *__pyx_empty_tuple;
+static PyObject *__pyx_empty_bytes;
+static int __pyx_lineno;
+static int __pyx_clineno = 0;
+static const char * __pyx_cfilenm= __FILE__;
+static const char *__pyx_filename;
+
+#if !defined(CYTHON_CCOMPLEX)
+  #if defined(__cplusplus)
+    #define CYTHON_CCOMPLEX 1
+  #elif defined(_Complex_I)
+    #define CYTHON_CCOMPLEX 1
+  #else
+    #define CYTHON_CCOMPLEX 0
+  #endif
+#endif
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    #include <complex>
+  #else
+    #include <complex.h>
+  #endif
+#endif
+#if CYTHON_CCOMPLEX && !defined(__cplusplus) && defined(__sun__) && defined(__GNUC__)
+  #undef _Complex_I
+  #define _Complex_I 1.0fj
+#endif
+
+
+static const char *__pyx_f[] = {
+  "cythonutils.pyx",
+  "numpy.pxd",
+};
+#define IS_UNSIGNED(type) (((type) -1) > 0)
+struct __Pyx_StructField_;
+#define __PYX_BUF_FLAGS_PACKED_STRUCT (1 << 0)
+typedef struct {
+  const char* name; /* for error messages only */
+  struct __Pyx_StructField_* fields;
+  size_t size;     /* sizeof(type) */
+  size_t arraysize[8]; /* length of array in each dimension */
+  int ndim;
+  char typegroup; /* _R_eal, _C_omplex, Signed _I_nt, _U_nsigned int, _S_truct, _P_ointer, _O_bject */
+  char is_unsigned;
+  int flags;
+} __Pyx_TypeInfo;
+typedef struct __Pyx_StructField_ {
+  __Pyx_TypeInfo* type;
+  const char* name;
+  size_t offset;
+} __Pyx_StructField;
+typedef struct {
+  __Pyx_StructField* field;
+  size_t parent_offset;
+} __Pyx_BufFmt_StackElem;
+typedef struct {
+  __Pyx_StructField root;
+  __Pyx_BufFmt_StackElem* head;
+  size_t fmt_offset;
+  size_t new_count, enc_count;
+  size_t struct_alignment;
+  int is_complex;
+  char enc_type;
+  char new_packmode;
+  char enc_packmode;
+  char is_valid_array;
+} __Pyx_BufFmt_Context;
+
+
+/* "numpy.pxd":722
+ * # in Cython to enable them only on the right systems.
+ * 
+ * ctypedef npy_int8       int8_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_int16      int16_t
+ * ctypedef npy_int32      int32_t
+ */
+typedef npy_int8 __pyx_t_5numpy_int8_t;
+
+/* "numpy.pxd":723
+ * 
+ * ctypedef npy_int8       int8_t
+ * ctypedef npy_int16      int16_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_int32      int32_t
+ * ctypedef npy_int64      int64_t
+ */
+typedef npy_int16 __pyx_t_5numpy_int16_t;
+
+/* "numpy.pxd":724
+ * ctypedef npy_int8       int8_t
+ * ctypedef npy_int16      int16_t
+ * ctypedef npy_int32      int32_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_int64      int64_t
+ * #ctypedef npy_int96      int96_t
+ */
+typedef npy_int32 __pyx_t_5numpy_int32_t;
+
+/* "numpy.pxd":725
+ * ctypedef npy_int16      int16_t
+ * ctypedef npy_int32      int32_t
+ * ctypedef npy_int64      int64_t             # <<<<<<<<<<<<<<
+ * #ctypedef npy_int96      int96_t
+ * #ctypedef npy_int128     int128_t
+ */
+typedef npy_int64 __pyx_t_5numpy_int64_t;
+
+/* "numpy.pxd":729
+ * #ctypedef npy_int128     int128_t
+ * 
+ * ctypedef npy_uint8      uint8_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uint16     uint16_t
+ * ctypedef npy_uint32     uint32_t
+ */
+typedef npy_uint8 __pyx_t_5numpy_uint8_t;
+
+/* "numpy.pxd":730
+ * 
+ * ctypedef npy_uint8      uint8_t
+ * ctypedef npy_uint16     uint16_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uint32     uint32_t
+ * ctypedef npy_uint64     uint64_t
+ */
+typedef npy_uint16 __pyx_t_5numpy_uint16_t;
+
+/* "numpy.pxd":731
+ * ctypedef npy_uint8      uint8_t
+ * ctypedef npy_uint16     uint16_t
+ * ctypedef npy_uint32     uint32_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uint64     uint64_t
+ * #ctypedef npy_uint96     uint96_t
+ */
+typedef npy_uint32 __pyx_t_5numpy_uint32_t;
+
+/* "numpy.pxd":732
+ * ctypedef npy_uint16     uint16_t
+ * ctypedef npy_uint32     uint32_t
+ * ctypedef npy_uint64     uint64_t             # <<<<<<<<<<<<<<
+ * #ctypedef npy_uint96     uint96_t
+ * #ctypedef npy_uint128    uint128_t
+ */
+typedef npy_uint64 __pyx_t_5numpy_uint64_t;
+
+/* "numpy.pxd":736
+ * #ctypedef npy_uint128    uint128_t
+ * 
+ * ctypedef npy_float32    float32_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_float64    float64_t
+ * #ctypedef npy_float80    float80_t
+ */
+typedef npy_float32 __pyx_t_5numpy_float32_t;
+
+/* "numpy.pxd":737
+ * 
+ * ctypedef npy_float32    float32_t
+ * ctypedef npy_float64    float64_t             # <<<<<<<<<<<<<<
+ * #ctypedef npy_float80    float80_t
+ * #ctypedef npy_float128   float128_t
+ */
+typedef npy_float64 __pyx_t_5numpy_float64_t;
+
+/* "numpy.pxd":746
+ * # The int types are mapped a bit surprising --
+ * # numpy.int corresponds to 'l' and numpy.long to 'q'
+ * ctypedef npy_long       int_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_longlong   long_t
+ * ctypedef npy_longlong   longlong_t
+ */
+typedef npy_long __pyx_t_5numpy_int_t;
+
+/* "numpy.pxd":747
+ * # numpy.int corresponds to 'l' and numpy.long to 'q'
+ * ctypedef npy_long       int_t
+ * ctypedef npy_longlong   long_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_longlong   longlong_t
+ * 
+ */
+typedef npy_longlong __pyx_t_5numpy_long_t;
+
+/* "numpy.pxd":748
+ * ctypedef npy_long       int_t
+ * ctypedef npy_longlong   long_t
+ * ctypedef npy_longlong   longlong_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_ulong      uint_t
+ */
+typedef npy_longlong __pyx_t_5numpy_longlong_t;
+
+/* "numpy.pxd":750
+ * ctypedef npy_longlong   longlong_t
+ * 
+ * ctypedef npy_ulong      uint_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_ulonglong  ulong_t
+ * ctypedef npy_ulonglong  ulonglong_t
+ */
+typedef npy_ulong __pyx_t_5numpy_uint_t;
+
+/* "numpy.pxd":751
+ * 
+ * ctypedef npy_ulong      uint_t
+ * ctypedef npy_ulonglong  ulong_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_ulonglong  ulonglong_t
+ * 
+ */
+typedef npy_ulonglong __pyx_t_5numpy_ulong_t;
+
+/* "numpy.pxd":752
+ * ctypedef npy_ulong      uint_t
+ * ctypedef npy_ulonglong  ulong_t
+ * ctypedef npy_ulonglong  ulonglong_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_intp       intp_t
+ */
+typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t;
+
+/* "numpy.pxd":754
+ * ctypedef npy_ulonglong  ulonglong_t
+ * 
+ * ctypedef npy_intp       intp_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uintp      uintp_t
+ * 
+ */
+typedef npy_intp __pyx_t_5numpy_intp_t;
+
+/* "numpy.pxd":755
+ * 
+ * ctypedef npy_intp       intp_t
+ * ctypedef npy_uintp      uintp_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_double     float_t
+ */
+typedef npy_uintp __pyx_t_5numpy_uintp_t;
+
+/* "numpy.pxd":757
+ * ctypedef npy_uintp      uintp_t
+ * 
+ * ctypedef npy_double     float_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_double     double_t
+ * ctypedef npy_longdouble longdouble_t
+ */
+typedef npy_double __pyx_t_5numpy_float_t;
+
+/* "numpy.pxd":758
+ * 
+ * ctypedef npy_double     float_t
+ * ctypedef npy_double     double_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_longdouble longdouble_t
+ * 
+ */
+typedef npy_double __pyx_t_5numpy_double_t;
+
+/* "numpy.pxd":759
+ * ctypedef npy_double     float_t
+ * ctypedef npy_double     double_t
+ * ctypedef npy_longdouble longdouble_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_cfloat      cfloat_t
+ */
+typedef npy_longdouble __pyx_t_5numpy_longdouble_t;
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    typedef ::std::complex< double > __pyx_t_double_complex;
+  #else
+    typedef double _Complex __pyx_t_double_complex;
+  #endif
+#else
+    typedef struct { double real, imag; } __pyx_t_double_complex;
+#endif
+
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    typedef ::std::complex< float > __pyx_t_float_complex;
+  #else
+    typedef float _Complex __pyx_t_float_complex;
+  #endif
+#else
+    typedef struct { float real, imag; } __pyx_t_float_complex;
+#endif
+
+
+/*--- Type declarations ---*/
+
+/* "numpy.pxd":761
+ * ctypedef npy_longdouble longdouble_t
+ * 
+ * ctypedef npy_cfloat      cfloat_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_cdouble     cdouble_t
+ * ctypedef npy_clongdouble clongdouble_t
+ */
+typedef npy_cfloat __pyx_t_5numpy_cfloat_t;
+
+/* "numpy.pxd":762
+ * 
+ * ctypedef npy_cfloat      cfloat_t
+ * ctypedef npy_cdouble     cdouble_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_clongdouble clongdouble_t
+ * 
+ */
+typedef npy_cdouble __pyx_t_5numpy_cdouble_t;
+
+/* "numpy.pxd":763
+ * ctypedef npy_cfloat      cfloat_t
+ * ctypedef npy_cdouble     cdouble_t
+ * ctypedef npy_clongdouble clongdouble_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_cdouble     complex_t
+ */
+typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t;
+
+/* "numpy.pxd":765
+ * ctypedef npy_clongdouble clongdouble_t
+ * 
+ * ctypedef npy_cdouble     complex_t             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):
+ */
+typedef npy_cdouble __pyx_t_5numpy_complex_t;
+#ifndef CYTHON_REFNANNY
+  #define CYTHON_REFNANNY 0
+#endif
+#if CYTHON_REFNANNY
+  typedef struct {
+    void (*INCREF)(void*, PyObject*, int);
+    void (*DECREF)(void*, PyObject*, int);
+    void (*GOTREF)(void*, PyObject*, int);
+    void (*GIVEREF)(void*, PyObject*, int);
+    void* (*SetupContext)(const char*, int, const char*);
+    void (*FinishContext)(void**);
+  } __Pyx_RefNannyAPIStruct;
+  static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL;
+  static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); /*proto*/
+  #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
+#ifdef WITH_THREAD
+  #define __Pyx_RefNannySetupContext(name, acquire_gil) \
+          if (acquire_gil) { \
+              PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure(); \
+              __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \
+              PyGILState_Release(__pyx_gilstate_save); \
+          } else { \
+              __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \
+          }
+#else
+  #define __Pyx_RefNannySetupContext(name, acquire_gil) \
+          __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__)
+#endif
+  #define __Pyx_RefNannyFinishContext() \
+          __Pyx_RefNanny->FinishContext(&__pyx_refnanny)
+  #define __Pyx_INCREF(r)  __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_DECREF(r)  __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_GOTREF(r)  __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_XINCREF(r)  do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0)
+  #define __Pyx_XDECREF(r)  do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0)
+  #define __Pyx_XGOTREF(r)  do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0)
+  #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0)
+#else
+  #define __Pyx_RefNannyDeclarations
+  #define __Pyx_RefNannySetupContext(name, acquire_gil)
+  #define __Pyx_RefNannyFinishContext()
+  #define __Pyx_INCREF(r) Py_INCREF(r)
+  #define __Pyx_DECREF(r) Py_DECREF(r)
+  #define __Pyx_GOTREF(r)
+  #define __Pyx_GIVEREF(r)
+  #define __Pyx_XINCREF(r) Py_XINCREF(r)
+  #define __Pyx_XDECREF(r) Py_XDECREF(r)
+  #define __Pyx_XGOTREF(r)
+  #define __Pyx_XGIVEREF(r)
+#endif /* CYTHON_REFNANNY */
+#define __Pyx_CLEAR(r)    do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
+#define __Pyx_XCLEAR(r)   do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)
+
+static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name); /*proto*/
+
+static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed,
+    const char *name, int exact); /*proto*/
+
+static CYTHON_INLINE int  __Pyx_GetBufferAndValidate(Py_buffer* buf, PyObject* obj,
+    __Pyx_TypeInfo* dtype, int flags, int nd, int cast, __Pyx_BufFmt_StackElem* stack);
+static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info);
+
+#define __Pyx_BufPtrStrided1d(type, buf, i0, s0) (type)((char*)buf + i0 * s0)
+static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb); /*proto*/
+static CYTHON_INLINE void __Pyx_ErrFetch(PyObject **type, PyObject **value, PyObject **tb); /*proto*/
+
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); /*proto*/
+
+static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index);
+
+static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected);
+
+static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void);
+
+static void __Pyx_UnpackTupleError(PyObject *, Py_ssize_t index); /*proto*/
+
+static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); /*proto*/
+
+typedef struct {
+  Py_ssize_t shape, strides, suboffsets;
+} __Pyx_Buf_DimInfo;
+typedef struct {
+  size_t refcount;
+  Py_buffer pybuffer;
+} __Pyx_Buffer;
+typedef struct {
+  __Pyx_Buffer *rcbuffer;
+  char *data;
+  __Pyx_Buf_DimInfo diminfo[8];
+} __Pyx_LocalBuf_ND;
+
+#if PY_MAJOR_VERSION < 3
+    static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags);
+    static void __Pyx_ReleaseBuffer(Py_buffer *view);
+#else
+    #define __Pyx_GetBuffer PyObject_GetBuffer
+    #define __Pyx_ReleaseBuffer PyBuffer_Release
+#endif
+
+static Py_ssize_t __Pyx_zeros[] = {0, 0, 0, 0, 0, 0, 0, 0};
+static Py_ssize_t __Pyx_minusones[] = {-1, -1, -1, -1, -1, -1, -1, -1};
+
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, long level); /*proto*/
+
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    #define __Pyx_CREAL(z) ((z).real())
+    #define __Pyx_CIMAG(z) ((z).imag())
+  #else
+    #define __Pyx_CREAL(z) (__real__(z))
+    #define __Pyx_CIMAG(z) (__imag__(z))
+  #endif
+#else
+    #define __Pyx_CREAL(z) ((z).real)
+    #define __Pyx_CIMAG(z) ((z).imag)
+#endif
+#if defined(_WIN32) && defined(__cplusplus) && CYTHON_CCOMPLEX
+    #define __Pyx_SET_CREAL(z,x) ((z).real(x))
+    #define __Pyx_SET_CIMAG(z,y) ((z).imag(y))
+#else
+    #define __Pyx_SET_CREAL(z,x) __Pyx_CREAL(z) = (x)
+    #define __Pyx_SET_CIMAG(z,y) __Pyx_CIMAG(z) = (y)
+#endif
+
+static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double, double);
+
+#if CYTHON_CCOMPLEX
+    #define __Pyx_c_eq(a, b)   ((a)==(b))
+    #define __Pyx_c_sum(a, b)  ((a)+(b))
+    #define __Pyx_c_diff(a, b) ((a)-(b))
+    #define __Pyx_c_prod(a, b) ((a)*(b))
+    #define __Pyx_c_quot(a, b) ((a)/(b))
+    #define __Pyx_c_neg(a)     (-(a))
+  #ifdef __cplusplus
+    #define __Pyx_c_is_zero(z) ((z)==(double)0)
+    #define __Pyx_c_conj(z)    (::std::conj(z))
+    #if 1
+        #define __Pyx_c_abs(z)     (::std::abs(z))
+        #define __Pyx_c_pow(a, b)  (::std::pow(a, b))
+    #endif
+  #else
+    #define __Pyx_c_is_zero(z) ((z)==0)
+    #define __Pyx_c_conj(z)    (conj(z))
+    #if 1
+        #define __Pyx_c_abs(z)     (cabs(z))
+        #define __Pyx_c_pow(a, b)  (cpow(a, b))
+    #endif
+ #endif
+#else
+    static CYTHON_INLINE int __Pyx_c_eq(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg(__pyx_t_double_complex);
+    static CYTHON_INLINE int __Pyx_c_is_zero(__pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj(__pyx_t_double_complex);
+    #if 1
+        static CYTHON_INLINE double __Pyx_c_abs(__pyx_t_double_complex);
+        static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow(__pyx_t_double_complex, __pyx_t_double_complex);
+    #endif
+#endif
+
+static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float, float);
+
+#if CYTHON_CCOMPLEX
+    #define __Pyx_c_eqf(a, b)   ((a)==(b))
+    #define __Pyx_c_sumf(a, b)  ((a)+(b))
+    #define __Pyx_c_difff(a, b) ((a)-(b))
+    #define __Pyx_c_prodf(a, b) ((a)*(b))
+    #define __Pyx_c_quotf(a, b) ((a)/(b))
+    #define __Pyx_c_negf(a)     (-(a))
+  #ifdef __cplusplus
+    #define __Pyx_c_is_zerof(z) ((z)==(float)0)
+    #define __Pyx_c_conjf(z)    (::std::conj(z))
+    #if 1
+        #define __Pyx_c_absf(z)     (::std::abs(z))
+        #define __Pyx_c_powf(a, b)  (::std::pow(a, b))
+    #endif
+  #else
+    #define __Pyx_c_is_zerof(z) ((z)==0)
+    #define __Pyx_c_conjf(z)    (conjf(z))
+    #if 1
+        #define __Pyx_c_absf(z)     (cabsf(z))
+        #define __Pyx_c_powf(a, b)  (cpowf(a, b))
+    #endif
+ #endif
+#else
+    static CYTHON_INLINE int __Pyx_c_eqf(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sumf(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_difff(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prodf(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quotf(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_negf(__pyx_t_float_complex);
+    static CYTHON_INLINE int __Pyx_c_is_zerof(__pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conjf(__pyx_t_float_complex);
+    #if 1
+        static CYTHON_INLINE float __Pyx_c_absf(__pyx_t_float_complex);
+        static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_powf(__pyx_t_float_complex, __pyx_t_float_complex);
+    #endif
+#endif
+
+static CYTHON_INLINE unsigned char __Pyx_PyInt_AsUnsignedChar(PyObject *);
+
+static CYTHON_INLINE unsigned short __Pyx_PyInt_AsUnsignedShort(PyObject *);
+
+static CYTHON_INLINE unsigned int __Pyx_PyInt_AsUnsignedInt(PyObject *);
+
+static CYTHON_INLINE char __Pyx_PyInt_AsChar(PyObject *);
+
+static CYTHON_INLINE short __Pyx_PyInt_AsShort(PyObject *);
+
+static CYTHON_INLINE int __Pyx_PyInt_AsInt(PyObject *);
+
+static CYTHON_INLINE signed char __Pyx_PyInt_AsSignedChar(PyObject *);
+
+static CYTHON_INLINE signed short __Pyx_PyInt_AsSignedShort(PyObject *);
+
+static CYTHON_INLINE signed int __Pyx_PyInt_AsSignedInt(PyObject *);
+
+static CYTHON_INLINE int __Pyx_PyInt_AsLongDouble(PyObject *);
+
+static CYTHON_INLINE unsigned long __Pyx_PyInt_AsUnsignedLong(PyObject *);
+
+static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_PyInt_AsUnsignedLongLong(PyObject *);
+
+static CYTHON_INLINE long __Pyx_PyInt_AsLong(PyObject *);
+
+static CYTHON_INLINE PY_LONG_LONG __Pyx_PyInt_AsLongLong(PyObject *);
+
+static CYTHON_INLINE signed long __Pyx_PyInt_AsSignedLong(PyObject *);
+
+static CYTHON_INLINE signed PY_LONG_LONG __Pyx_PyInt_AsSignedLongLong(PyObject *);
+
+static int __Pyx_check_binary_version(void);
+
+#if !defined(__Pyx_PyIdentifier_FromString)
+#if PY_MAJOR_VERSION < 3
+  #define __Pyx_PyIdentifier_FromString(s) PyString_FromString(s)
+#else
+  #define __Pyx_PyIdentifier_FromString(s) PyUnicode_FromString(s)
+#endif
+#endif
+
+static PyTypeObject *__Pyx_ImportType(const char *module_name, const char *class_name, size_t size, int strict);  /*proto*/
+
+static PyObject *__Pyx_ImportModule(const char *name); /*proto*/
+
+typedef struct {
+    int code_line;
+    PyCodeObject* code_object;
+} __Pyx_CodeObjectCacheEntry;
+struct __Pyx_CodeObjectCache {
+    int count;
+    int max_count;
+    __Pyx_CodeObjectCacheEntry* entries;
+};
+static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL};
+static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line);
+static PyCodeObject *__pyx_find_code_object(int code_line);
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object);
+
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+                               int py_line, const char *filename); /*proto*/
+
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/
+
+
+/* Module declarations from 'cpython.buffer' */
+
+/* Module declarations from 'cpython.ref' */
+
+/* Module declarations from 'libc.stdio' */
+
+/* Module declarations from 'cpython.object' */
+
+/* Module declarations from 'libc.stdlib' */
+
+/* Module declarations from 'numpy' */
+
+/* Module declarations from 'numpy' */
+static PyTypeObject *__pyx_ptype_5numpy_dtype = 0;
+static PyTypeObject *__pyx_ptype_5numpy_flatiter = 0;
+static PyTypeObject *__pyx_ptype_5numpy_broadcast = 0;
+static PyTypeObject *__pyx_ptype_5numpy_ndarray = 0;
+static PyTypeObject *__pyx_ptype_5numpy_ufunc = 0;
+static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *, char *, char *, int *); /*proto*/
+
+/* Module declarations from 'cython' */
+
+/* Module declarations from 'pyoperators.utils.cythonutils' */
+static __Pyx_TypeInfo __Pyx_TypeInfo_nn___pyx_t_5numpy_uint8_t = { "uint8_t", NULL, sizeof(__pyx_t_5numpy_uint8_t), { 0 }, 0, 'U', IS_UNSIGNED(__pyx_t_5numpy_uint8_t), 0 };
+static __Pyx_TypeInfo __Pyx_TypeInfo_nn___pyx_t_5numpy_uint64_t = { "uint64_t", NULL, sizeof(__pyx_t_5numpy_uint64_t), { 0 }, 0, 'U', IS_UNSIGNED(__pyx_t_5numpy_uint64_t), 0 };
+static __Pyx_TypeInfo __Pyx_TypeInfo_nn___pyx_t_5numpy_int64_t = { "int64_t", NULL, sizeof(__pyx_t_5numpy_int64_t), { 0 }, 0, 'I', IS_UNSIGNED(__pyx_t_5numpy_int64_t), 0 };
+static __Pyx_TypeInfo __Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t = { "float64_t", NULL, sizeof(__pyx_t_5numpy_float64_t), { 0 }, 0, 'R', 0, 0 };
+static __Pyx_TypeInfo __Pyx_TypeInfo___pyx_t_double_complex = { "double complex", NULL, sizeof(__pyx_t_double_complex), { 0 }, 0, 'C', 0, 0 };
+#define __Pyx_MODULE_NAME "pyoperators.utils.cythonutils"
+int __pyx_module_is_main_pyoperators__utils__cythonutils = 0;
+
+/* Implementation of 'pyoperators.utils.cythonutils' */
+static PyObject *__pyx_builtin_range;
+static PyObject *__pyx_builtin_ValueError;
+static PyObject *__pyx_builtin_RuntimeError;
+static PyObject *__pyx_pf_11pyoperators_5utils_11cythonutils_inspect_special_values_bool8(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_v); /* proto */
+static PyObject *__pyx_pf_11pyoperators_5utils_11cythonutils_2inspect_special_values_uint64(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_v); /* proto */
+static PyObject *__pyx_pf_11pyoperators_5utils_11cythonutils_4inspect_special_values_int64(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_v); /* proto */
+static PyObject *__pyx_pf_11pyoperators_5utils_11cythonutils_6inspect_special_values_float64(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_v); /* proto */
+static PyObject *__pyx_pf_11pyoperators_5utils_11cythonutils_8inspect_special_values_complex128(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_v); /* proto */
+static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */
+static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info); /* proto */
+static char __pyx_k_1[] = "ndarray is not C contiguous";
+static char __pyx_k_3[] = "ndarray is not Fortran contiguous";
+static char __pyx_k_5[] = "Non-native byte order not supported";
+static char __pyx_k_7[] = "unknown dtype code in numpy.pxd (%d)";
+static char __pyx_k_8[] = "Format string allocated too short, see comment in numpy.pxd";
+static char __pyx_k_11[] = "Format string allocated too short.";
+static char __pyx_k_15[] = "inspect_special_values_bool8";
+static char __pyx_k_16[] = "/home/pchanial/work/tamasis/pyoperators-dev/pyoperators/utils/cythonutils.pyx";
+static char __pyx_k_17[] = "pyoperators.utils.cythonutils";
+static char __pyx_k_20[] = "inspect_special_values_uint64";
+static char __pyx_k_23[] = "inspect_special_values_int64";
+static char __pyx_k_26[] = "inspect_special_values_float64";
+static char __pyx_k_29[] = "inspect_special_values_complex128";
+static char __pyx_k__B[] = "B";
+static char __pyx_k__H[] = "H";
+static char __pyx_k__I[] = "I";
+static char __pyx_k__L[] = "L";
+static char __pyx_k__O[] = "O";
+static char __pyx_k__Q[] = "Q";
+static char __pyx_k__b[] = "b";
+static char __pyx_k__d[] = "d";
+static char __pyx_k__f[] = "f";
+static char __pyx_k__g[] = "g";
+static char __pyx_k__h[] = "h";
+static char __pyx_k__i[] = "i";
+static char __pyx_k__l[] = "l";
+static char __pyx_k__n[] = "n";
+static char __pyx_k__q[] = "q";
+static char __pyx_k__v[] = "v";
+static char __pyx_k__Zd[] = "Zd";
+static char __pyx_k__Zf[] = "Zf";
+static char __pyx_k__Zg[] = "Zg";
+static char __pyx_k__np[] = "np";
+static char __pyx_k__same[] = "same";
+static char __pyx_k__size[] = "size";
+static char __pyx_k__nones[] = "nones";
+static char __pyx_k__numpy[] = "numpy";
+static char __pyx_k__other[] = "other";
+static char __pyx_k__range[] = "range";
+static char __pyx_k__value[] = "value";
+static char __pyx_k__nmones[] = "nmones";
+static char __pyx_k__nzeros[] = "nzeros";
+static char __pyx_k__value0[] = "value0";
+static char __pyx_k____all__[] = "__all__";
+static char __pyx_k____main__[] = "__main__";
+static char __pyx_k____test__[] = "__test__";
+static char __pyx_k__ValueError[] = "ValueError";
+static char __pyx_k__RuntimeError[] = "RuntimeError";
+static PyObject *__pyx_kp_u_1;
+static PyObject *__pyx_kp_u_11;
+static PyObject *__pyx_n_s_15;
+static PyObject *__pyx_kp_s_16;
+static PyObject *__pyx_n_s_17;
+static PyObject *__pyx_n_s_20;
+static PyObject *__pyx_n_s_23;
+static PyObject *__pyx_n_s_26;
+static PyObject *__pyx_n_s_29;
+static PyObject *__pyx_kp_u_3;
+static PyObject *__pyx_kp_u_5;
+static PyObject *__pyx_kp_u_7;
+static PyObject *__pyx_kp_u_8;
+static PyObject *__pyx_n_s__RuntimeError;
+static PyObject *__pyx_n_s__ValueError;
+static PyObject *__pyx_n_s____all__;
+static PyObject *__pyx_n_s____main__;
+static PyObject *__pyx_n_s____test__;
+static PyObject *__pyx_n_s__i;
+static PyObject *__pyx_n_s__n;
+static PyObject *__pyx_n_s__nmones;
+static PyObject *__pyx_n_s__nones;
+static PyObject *__pyx_n_s__np;
+static PyObject *__pyx_n_s__numpy;
+static PyObject *__pyx_n_s__nzeros;
+static PyObject *__pyx_n_s__other;
+static PyObject *__pyx_n_s__range;
+static PyObject *__pyx_n_s__same;
+static PyObject *__pyx_n_s__size;
+static PyObject *__pyx_n_s__v;
+static PyObject *__pyx_n_s__value;
+static PyObject *__pyx_n_s__value0;
+static PyObject *__pyx_int_0;
+static PyObject *__pyx_int_15;
+static PyObject *__pyx_k_tuple_2;
+static PyObject *__pyx_k_tuple_4;
+static PyObject *__pyx_k_tuple_6;
+static PyObject *__pyx_k_tuple_9;
+static PyObject *__pyx_k_tuple_10;
+static PyObject *__pyx_k_tuple_12;
+static PyObject *__pyx_k_tuple_13;
+static PyObject *__pyx_k_tuple_18;
+static PyObject *__pyx_k_tuple_21;
+static PyObject *__pyx_k_tuple_24;
+static PyObject *__pyx_k_tuple_27;
+static PyObject *__pyx_k_codeobj_14;
+static PyObject *__pyx_k_codeobj_19;
+static PyObject *__pyx_k_codeobj_22;
+static PyObject *__pyx_k_codeobj_25;
+static PyObject *__pyx_k_codeobj_28;
+
+/* Python wrapper */
+static PyObject *__pyx_pw_11pyoperators_5utils_11cythonutils_1inspect_special_values_bool8(PyObject *__pyx_self, PyObject *__pyx_v_v); /*proto*/
+static PyMethodDef __pyx_mdef_11pyoperators_5utils_11cythonutils_1inspect_special_values_bool8 = {__Pyx_NAMESTR("inspect_special_values_bool8"), (PyCFunction)__pyx_pw_11pyoperators_5utils_11cythonutils_1inspect_special_values_bool8, METH_O, __Pyx_DOCSTR(0)};
+static PyObject *__pyx_pw_11pyoperators_5utils_11cythonutils_1inspect_special_values_bool8(PyObject *__pyx_self, PyObject *__pyx_v_v) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("inspect_special_values_bool8 (wrapper)", 0);
+  __pyx_self = __pyx_self;
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_v), __pyx_ptype_5numpy_ndarray, 1, "v", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_r = __pyx_pf_11pyoperators_5utils_11cythonutils_inspect_special_values_bool8(__pyx_self, ((PyArrayObject *)__pyx_v_v));
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "pyoperators/utils/cythonutils.pyx":10
+ * 
+ * @cython.boundscheck(False)
+ * def inspect_special_values_bool8(np.ndarray[np.uint8_t, ndim=1] v):             # <<<<<<<<<<<<<<
+ *     cdef int nzeros = 0
+ *     cdef unsigned int n = v.size
+ */
+
+static PyObject *__pyx_pf_11pyoperators_5utils_11cythonutils_inspect_special_values_bool8(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_v) {
+  int __pyx_v_nzeros;
+  unsigned int __pyx_v_n;
+  unsigned int __pyx_v_i;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_v;
+  __Pyx_Buffer __pyx_pybuffer_v;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  unsigned int __pyx_t_2;
+  unsigned int __pyx_t_3;
+  unsigned int __pyx_t_4;
+  int __pyx_t_5;
+  PyObject *__pyx_t_6 = NULL;
+  PyObject *__pyx_t_7 = NULL;
+  int __pyx_t_8;
+  int __pyx_t_9;
+  int __pyx_t_10;
+  PyObject *__pyx_t_11 = NULL;
+  PyObject *__pyx_t_12 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("inspect_special_values_bool8", 0);
+  __pyx_pybuffer_v.pybuffer.buf = NULL;
+  __pyx_pybuffer_v.refcount = 0;
+  __pyx_pybuffernd_v.data = NULL;
+  __pyx_pybuffernd_v.rcbuffer = &__pyx_pybuffer_v;
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_v.rcbuffer->pybuffer, (PyObject*)__pyx_v_v, &__Pyx_TypeInfo_nn___pyx_t_5numpy_uint8_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  }
+  __pyx_pybuffernd_v.diminfo[0].strides = __pyx_pybuffernd_v.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_v.diminfo[0].shape = __pyx_pybuffernd_v.rcbuffer->pybuffer.shape[0];
+
+  /* "pyoperators/utils/cythonutils.pyx":11
+ * @cython.boundscheck(False)
+ * def inspect_special_values_bool8(np.ndarray[np.uint8_t, ndim=1] v):
+ *     cdef int nzeros = 0             # <<<<<<<<<<<<<<
+ *     cdef unsigned int n = v.size
+ *     cdef unsigned int i
+ */
+  __pyx_v_nzeros = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":12
+ * def inspect_special_values_bool8(np.ndarray[np.uint8_t, ndim=1] v):
+ *     cdef int nzeros = 0
+ *     cdef unsigned int n = v.size             # <<<<<<<<<<<<<<
+ *     cdef unsigned int i
+ * 
+ */
+  __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_v), __pyx_n_s__size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyInt_AsUnsignedInt(__pyx_t_1); if (unlikely((__pyx_t_2 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_n = __pyx_t_2;
+
+  /* "pyoperators/utils/cythonutils.pyx":15
+ *     cdef unsigned int i
+ * 
+ *     for i in range(n):             # <<<<<<<<<<<<<<
+ *         if v[i] == 0:
+ *             nzeros += 1
+ */
+  __pyx_t_2 = __pyx_v_n;
+  for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
+    __pyx_v_i = __pyx_t_3;
+
+    /* "pyoperators/utils/cythonutils.pyx":16
+ * 
+ *     for i in range(n):
+ *         if v[i] == 0:             # <<<<<<<<<<<<<<
+ *             nzeros += 1
+ *     return 0, nzeros, n - nzeros, False, nzeros in (0, n)
+ */
+    __pyx_t_4 = __pyx_v_i;
+    __pyx_t_5 = ((*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_uint8_t *, __pyx_pybuffernd_v.rcbuffer->pybuffer.buf, __pyx_t_4, __pyx_pybuffernd_v.diminfo[0].strides)) == 0);
+    if (__pyx_t_5) {
+
+      /* "pyoperators/utils/cythonutils.pyx":17
+ *     for i in range(n):
+ *         if v[i] == 0:
+ *             nzeros += 1             # <<<<<<<<<<<<<<
+ *     return 0, nzeros, n - nzeros, False, nzeros in (0, n)
+ * 
+ */
+      __pyx_v_nzeros = (__pyx_v_nzeros + 1);
+      goto __pyx_L5;
+    }
+    __pyx_L5:;
+  }
+
+  /* "pyoperators/utils/cythonutils.pyx":18
+ *         if v[i] == 0:
+ *             nzeros += 1
+ *     return 0, nzeros, n - nzeros, False, nzeros in (0, n)             # <<<<<<<<<<<<<<
+ * 
+ * @cython.boundscheck(False)
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyInt_FromLong(__pyx_v_nzeros); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_6 = PyLong_FromUnsignedLong((__pyx_v_n - __pyx_v_nzeros)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_6);
+  __pyx_t_7 = __Pyx_PyBool_FromLong(0); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_7);
+  __pyx_t_8 = __pyx_v_nzeros;
+  __pyx_t_5 = ((int)(__pyx_t_8 == 0));
+  if (!__pyx_t_5) {
+    __pyx_t_9 = ((int)(__pyx_t_8 == __pyx_v_n));
+    __pyx_t_10 = __pyx_t_9;
+  } else {
+    __pyx_t_10 = __pyx_t_5;
+  }
+  __pyx_t_11 = __Pyx_PyBool_FromLong(__pyx_t_10); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_11);
+  __pyx_t_12 = PyTuple_New(5); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_12);
+  __Pyx_INCREF(__pyx_int_0);
+  PyTuple_SET_ITEM(__pyx_t_12, 0, __pyx_int_0);
+  __Pyx_GIVEREF(__pyx_int_0);
+  PyTuple_SET_ITEM(__pyx_t_12, 1, __pyx_t_1);
+  __Pyx_GIVEREF(__pyx_t_1);
+  PyTuple_SET_ITEM(__pyx_t_12, 2, __pyx_t_6);
+  __Pyx_GIVEREF(__pyx_t_6);
+  PyTuple_SET_ITEM(__pyx_t_12, 3, __pyx_t_7);
+  __Pyx_GIVEREF(__pyx_t_7);
+  PyTuple_SET_ITEM(__pyx_t_12, 4, __pyx_t_11);
+  __Pyx_GIVEREF(__pyx_t_11);
+  __pyx_t_1 = 0;
+  __pyx_t_6 = 0;
+  __pyx_t_7 = 0;
+  __pyx_t_11 = 0;
+  __pyx_r = ((PyObject *)__pyx_t_12);
+  __pyx_t_12 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_7);
+  __Pyx_XDECREF(__pyx_t_11);
+  __Pyx_XDECREF(__pyx_t_12);
+  { PyObject *__pyx_type, *__pyx_value, *__pyx_tb;
+    __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_v.rcbuffer->pybuffer);
+  __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);}
+  __Pyx_AddTraceback("pyoperators.utils.cythonutils.inspect_special_values_bool8", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  goto __pyx_L2;
+  __pyx_L0:;
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_v.rcbuffer->pybuffer);
+  __pyx_L2:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_11pyoperators_5utils_11cythonutils_3inspect_special_values_uint64(PyObject *__pyx_self, PyObject *__pyx_v_v); /*proto*/
+static PyMethodDef __pyx_mdef_11pyoperators_5utils_11cythonutils_3inspect_special_values_uint64 = {__Pyx_NAMESTR("inspect_special_values_uint64"), (PyCFunction)__pyx_pw_11pyoperators_5utils_11cythonutils_3inspect_special_values_uint64, METH_O, __Pyx_DOCSTR(0)};
+static PyObject *__pyx_pw_11pyoperators_5utils_11cythonutils_3inspect_special_values_uint64(PyObject *__pyx_self, PyObject *__pyx_v_v) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("inspect_special_values_uint64 (wrapper)", 0);
+  __pyx_self = __pyx_self;
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_v), __pyx_ptype_5numpy_ndarray, 1, "v", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_r = __pyx_pf_11pyoperators_5utils_11cythonutils_2inspect_special_values_uint64(__pyx_self, ((PyArrayObject *)__pyx_v_v));
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "pyoperators/utils/cythonutils.pyx":21
+ * 
+ * @cython.boundscheck(False)
+ * def inspect_special_values_uint64(np.ndarray[np.uint64_t, ndim=1] v):             # <<<<<<<<<<<<<<
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0
+ */
+
+static PyObject *__pyx_pf_11pyoperators_5utils_11cythonutils_2inspect_special_values_uint64(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_v) {
+  int __pyx_v_nones;
+  int __pyx_v_nzeros;
+  unsigned int __pyx_v_n;
+  unsigned int __pyx_v_i;
+  __pyx_t_5numpy_uint64_t __pyx_v_value;
+  __pyx_t_5numpy_uint64_t __pyx_v_value0;
+  int __pyx_v_same;
+  int __pyx_v_other;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_v;
+  __Pyx_Buffer __pyx_pybuffer_v;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  unsigned int __pyx_t_2;
+  long __pyx_t_3;
+  unsigned int __pyx_t_4;
+  unsigned int __pyx_t_5;
+  int __pyx_t_6;
+  int __pyx_t_7;
+  int __pyx_t_8;
+  PyObject *__pyx_t_9 = NULL;
+  PyObject *__pyx_t_10 = NULL;
+  PyObject *__pyx_t_11 = NULL;
+  PyObject *__pyx_t_12 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("inspect_special_values_uint64", 0);
+  __pyx_pybuffer_v.pybuffer.buf = NULL;
+  __pyx_pybuffer_v.refcount = 0;
+  __pyx_pybuffernd_v.data = NULL;
+  __pyx_pybuffernd_v.rcbuffer = &__pyx_pybuffer_v;
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_v.rcbuffer->pybuffer, (PyObject*)__pyx_v_v, &__Pyx_TypeInfo_nn___pyx_t_5numpy_uint64_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  }
+  __pyx_pybuffernd_v.diminfo[0].strides = __pyx_pybuffernd_v.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_v.diminfo[0].shape = __pyx_pybuffernd_v.rcbuffer->pybuffer.shape[0];
+
+  /* "pyoperators/utils/cythonutils.pyx":22
+ * @cython.boundscheck(False)
+ * def inspect_special_values_uint64(np.ndarray[np.uint64_t, ndim=1] v):
+ *     cdef int nones = 0             # <<<<<<<<<<<<<<
+ *     cdef int nzeros = 0
+ *     cdef unsigned int n = v.size
+ */
+  __pyx_v_nones = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":23
+ * def inspect_special_values_uint64(np.ndarray[np.uint64_t, ndim=1] v):
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0             # <<<<<<<<<<<<<<
+ *     cdef unsigned int n = v.size
+ *     cdef unsigned int i
+ */
+  __pyx_v_nzeros = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":24
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0
+ *     cdef unsigned int n = v.size             # <<<<<<<<<<<<<<
+ *     cdef unsigned int i
+ *     cdef np.uint64_t value, value0 = v[0]
+ */
+  __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_v), __pyx_n_s__size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyInt_AsUnsignedInt(__pyx_t_1); if (unlikely((__pyx_t_2 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_n = __pyx_t_2;
+
+  /* "pyoperators/utils/cythonutils.pyx":26
+ *     cdef unsigned int n = v.size
+ *     cdef unsigned int i
+ *     cdef np.uint64_t value, value0 = v[0]             # <<<<<<<<<<<<<<
+ *     cdef int same = 1
+ *     cdef int other = 0
+ */
+  __pyx_t_3 = 0;
+  if (__pyx_t_3 < 0) __pyx_t_3 += __pyx_pybuffernd_v.diminfo[0].shape;
+  __pyx_v_value0 = (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_uint64_t *, __pyx_pybuffernd_v.rcbuffer->pybuffer.buf, __pyx_t_3, __pyx_pybuffernd_v.diminfo[0].strides));
+
+  /* "pyoperators/utils/cythonutils.pyx":27
+ *     cdef unsigned int i
+ *     cdef np.uint64_t value, value0 = v[0]
+ *     cdef int same = 1             # <<<<<<<<<<<<<<
+ *     cdef int other = 0
+ * 
+ */
+  __pyx_v_same = 1;
+
+  /* "pyoperators/utils/cythonutils.pyx":28
+ *     cdef np.uint64_t value, value0 = v[0]
+ *     cdef int same = 1
+ *     cdef int other = 0             # <<<<<<<<<<<<<<
+ * 
+ *     for i in range(n):
+ */
+  __pyx_v_other = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":30
+ *     cdef int other = 0
+ * 
+ *     for i in range(n):             # <<<<<<<<<<<<<<
+ *         value = v[i]
+ *         if value == 0:
+ */
+  __pyx_t_2 = __pyx_v_n;
+  for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_2; __pyx_t_4+=1) {
+    __pyx_v_i = __pyx_t_4;
+
+    /* "pyoperators/utils/cythonutils.pyx":31
+ * 
+ *     for i in range(n):
+ *         value = v[i]             # <<<<<<<<<<<<<<
+ *         if value == 0:
+ *             nzeros += 1
+ */
+    __pyx_t_5 = __pyx_v_i;
+    __pyx_v_value = (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_uint64_t *, __pyx_pybuffernd_v.rcbuffer->pybuffer.buf, __pyx_t_5, __pyx_pybuffernd_v.diminfo[0].strides));
+
+    /* "pyoperators/utils/cythonutils.pyx":34
+ *         if value == 0:
+ *             nzeros += 1
+ *         elif value == 1:             # <<<<<<<<<<<<<<
+ *             nones += 1
+ *         else:
+ */
+    switch (__pyx_v_value) {
+
+      /* "pyoperators/utils/cythonutils.pyx":32
+ *     for i in range(n):
+ *         value = v[i]
+ *         if value == 0:             # <<<<<<<<<<<<<<
+ *             nzeros += 1
+ *         elif value == 1:
+ */
+      case 0:
+
+      /* "pyoperators/utils/cythonutils.pyx":33
+ *         value = v[i]
+ *         if value == 0:
+ *             nzeros += 1             # <<<<<<<<<<<<<<
+ *         elif value == 1:
+ *             nones += 1
+ */
+      __pyx_v_nzeros = (__pyx_v_nzeros + 1);
+      break;
+
+      /* "pyoperators/utils/cythonutils.pyx":34
+ *         if value == 0:
+ *             nzeros += 1
+ *         elif value == 1:             # <<<<<<<<<<<<<<
+ *             nones += 1
+ *         else:
+ */
+      case 1:
+
+      /* "pyoperators/utils/cythonutils.pyx":35
+ *             nzeros += 1
+ *         elif value == 1:
+ *             nones += 1             # <<<<<<<<<<<<<<
+ *         else:
+ *             other = 1
+ */
+      __pyx_v_nones = (__pyx_v_nones + 1);
+      break;
+      default:
+
+      /* "pyoperators/utils/cythonutils.pyx":37
+ *             nones += 1
+ *         else:
+ *             other = 1             # <<<<<<<<<<<<<<
+ *         if same == 1 and value != value0:
+ *             same = 0
+ */
+      __pyx_v_other = 1;
+      break;
+    }
+
+    /* "pyoperators/utils/cythonutils.pyx":38
+ *         else:
+ *             other = 1
+ *         if same == 1 and value != value0:             # <<<<<<<<<<<<<<
+ *             same = 0
+ *         if same == 0 and other == 1:
+ */
+    __pyx_t_6 = (__pyx_v_same == 1);
+    if (__pyx_t_6) {
+      __pyx_t_7 = (__pyx_v_value != __pyx_v_value0);
+      __pyx_t_8 = __pyx_t_7;
+    } else {
+      __pyx_t_8 = __pyx_t_6;
+    }
+    if (__pyx_t_8) {
+
+      /* "pyoperators/utils/cythonutils.pyx":39
+ *             other = 1
+ *         if same == 1 and value != value0:
+ *             same = 0             # <<<<<<<<<<<<<<
+ *         if same == 0 and other == 1:
+ *             return 0, 0, 0, True, False
+ */
+      __pyx_v_same = 0;
+      goto __pyx_L5;
+    }
+    __pyx_L5:;
+
+    /* "pyoperators/utils/cythonutils.pyx":40
+ *         if same == 1 and value != value0:
+ *             same = 0
+ *         if same == 0 and other == 1:             # <<<<<<<<<<<<<<
+ *             return 0, 0, 0, True, False
+ *     if other == 1:
+ */
+    __pyx_t_8 = (__pyx_v_same == 0);
+    if (__pyx_t_8) {
+      __pyx_t_6 = (__pyx_v_other == 1);
+      __pyx_t_7 = __pyx_t_6;
+    } else {
+      __pyx_t_7 = __pyx_t_8;
+    }
+    if (__pyx_t_7) {
+
+      /* "pyoperators/utils/cythonutils.pyx":41
+ *             same = 0
+ *         if same == 0 and other == 1:
+ *             return 0, 0, 0, True, False             # <<<<<<<<<<<<<<
+ *     if other == 1:
+ *         return 0, 0, 0, True, True
+ */
+      __Pyx_XDECREF(__pyx_r);
+      __pyx_t_1 = __Pyx_PyBool_FromLong(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_1);
+      __pyx_t_9 = __Pyx_PyBool_FromLong(0); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_9);
+      __pyx_t_10 = PyTuple_New(5); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_10);
+      __Pyx_INCREF(__pyx_int_0);
+      PyTuple_SET_ITEM(__pyx_t_10, 0, __pyx_int_0);
+      __Pyx_GIVEREF(__pyx_int_0);
+      __Pyx_INCREF(__pyx_int_0);
+      PyTuple_SET_ITEM(__pyx_t_10, 1, __pyx_int_0);
+      __Pyx_GIVEREF(__pyx_int_0);
+      __Pyx_INCREF(__pyx_int_0);
+      PyTuple_SET_ITEM(__pyx_t_10, 2, __pyx_int_0);
+      __Pyx_GIVEREF(__pyx_int_0);
+      PyTuple_SET_ITEM(__pyx_t_10, 3, __pyx_t_1);
+      __Pyx_GIVEREF(__pyx_t_1);
+      PyTuple_SET_ITEM(__pyx_t_10, 4, __pyx_t_9);
+      __Pyx_GIVEREF(__pyx_t_9);
+      __pyx_t_1 = 0;
+      __pyx_t_9 = 0;
+      __pyx_r = ((PyObject *)__pyx_t_10);
+      __pyx_t_10 = 0;
+      goto __pyx_L0;
+      goto __pyx_L6;
+    }
+    __pyx_L6:;
+  }
+
+  /* "pyoperators/utils/cythonutils.pyx":42
+ *         if same == 0 and other == 1:
+ *             return 0, 0, 0, True, False
+ *     if other == 1:             # <<<<<<<<<<<<<<
+ *         return 0, 0, 0, True, True
+ *     return 0, nzeros, nones, False, same == 1
+ */
+  __pyx_t_7 = (__pyx_v_other == 1);
+  if (__pyx_t_7) {
+
+    /* "pyoperators/utils/cythonutils.pyx":43
+ *             return 0, 0, 0, True, False
+ *     if other == 1:
+ *         return 0, 0, 0, True, True             # <<<<<<<<<<<<<<
+ *     return 0, nzeros, nones, False, same == 1
+ * 
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __pyx_t_10 = __Pyx_PyBool_FromLong(1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_10);
+    __pyx_t_9 = __Pyx_PyBool_FromLong(1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_9);
+    __pyx_t_1 = PyTuple_New(5); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_1);
+    __Pyx_INCREF(__pyx_int_0);
+    PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_int_0);
+    __Pyx_GIVEREF(__pyx_int_0);
+    __Pyx_INCREF(__pyx_int_0);
+    PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_int_0);
+    __Pyx_GIVEREF(__pyx_int_0);
+    __Pyx_INCREF(__pyx_int_0);
+    PyTuple_SET_ITEM(__pyx_t_1, 2, __pyx_int_0);
+    __Pyx_GIVEREF(__pyx_int_0);
+    PyTuple_SET_ITEM(__pyx_t_1, 3, __pyx_t_10);
+    __Pyx_GIVEREF(__pyx_t_10);
+    PyTuple_SET_ITEM(__pyx_t_1, 4, __pyx_t_9);
+    __Pyx_GIVEREF(__pyx_t_9);
+    __pyx_t_10 = 0;
+    __pyx_t_9 = 0;
+    __pyx_r = ((PyObject *)__pyx_t_1);
+    __pyx_t_1 = 0;
+    goto __pyx_L0;
+    goto __pyx_L7;
+  }
+  __pyx_L7:;
+
+  /* "pyoperators/utils/cythonutils.pyx":44
+ *     if other == 1:
+ *         return 0, 0, 0, True, True
+ *     return 0, nzeros, nones, False, same == 1             # <<<<<<<<<<<<<<
+ * 
+ * @cython.boundscheck(False)
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyInt_FromLong(__pyx_v_nzeros); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_9 = PyInt_FromLong(__pyx_v_nones); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_9);
+  __pyx_t_10 = __Pyx_PyBool_FromLong(0); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_10);
+  __pyx_t_11 = __Pyx_PyBool_FromLong((__pyx_v_same == 1)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_11);
+  __pyx_t_12 = PyTuple_New(5); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_12);
+  __Pyx_INCREF(__pyx_int_0);
+  PyTuple_SET_ITEM(__pyx_t_12, 0, __pyx_int_0);
+  __Pyx_GIVEREF(__pyx_int_0);
+  PyTuple_SET_ITEM(__pyx_t_12, 1, __pyx_t_1);
+  __Pyx_GIVEREF(__pyx_t_1);
+  PyTuple_SET_ITEM(__pyx_t_12, 2, __pyx_t_9);
+  __Pyx_GIVEREF(__pyx_t_9);
+  PyTuple_SET_ITEM(__pyx_t_12, 3, __pyx_t_10);
+  __Pyx_GIVEREF(__pyx_t_10);
+  PyTuple_SET_ITEM(__pyx_t_12, 4, __pyx_t_11);
+  __Pyx_GIVEREF(__pyx_t_11);
+  __pyx_t_1 = 0;
+  __pyx_t_9 = 0;
+  __pyx_t_10 = 0;
+  __pyx_t_11 = 0;
+  __pyx_r = ((PyObject *)__pyx_t_12);
+  __pyx_t_12 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_9);
+  __Pyx_XDECREF(__pyx_t_10);
+  __Pyx_XDECREF(__pyx_t_11);
+  __Pyx_XDECREF(__pyx_t_12);
+  { PyObject *__pyx_type, *__pyx_value, *__pyx_tb;
+    __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_v.rcbuffer->pybuffer);
+  __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);}
+  __Pyx_AddTraceback("pyoperators.utils.cythonutils.inspect_special_values_uint64", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  goto __pyx_L2;
+  __pyx_L0:;
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_v.rcbuffer->pybuffer);
+  __pyx_L2:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_11pyoperators_5utils_11cythonutils_5inspect_special_values_int64(PyObject *__pyx_self, PyObject *__pyx_v_v); /*proto*/
+static PyMethodDef __pyx_mdef_11pyoperators_5utils_11cythonutils_5inspect_special_values_int64 = {__Pyx_NAMESTR("inspect_special_values_int64"), (PyCFunction)__pyx_pw_11pyoperators_5utils_11cythonutils_5inspect_special_values_int64, METH_O, __Pyx_DOCSTR(0)};
+static PyObject *__pyx_pw_11pyoperators_5utils_11cythonutils_5inspect_special_values_int64(PyObject *__pyx_self, PyObject *__pyx_v_v) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("inspect_special_values_int64 (wrapper)", 0);
+  __pyx_self = __pyx_self;
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_v), __pyx_ptype_5numpy_ndarray, 1, "v", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_r = __pyx_pf_11pyoperators_5utils_11cythonutils_4inspect_special_values_int64(__pyx_self, ((PyArrayObject *)__pyx_v_v));
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "pyoperators/utils/cythonutils.pyx":47
+ * 
+ * @cython.boundscheck(False)
+ * def inspect_special_values_int64(np.ndarray[np.int64_t, ndim=1] v):             # <<<<<<<<<<<<<<
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0
+ */
+
+static PyObject *__pyx_pf_11pyoperators_5utils_11cythonutils_4inspect_special_values_int64(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_v) {
+  int __pyx_v_nones;
+  int __pyx_v_nzeros;
+  int __pyx_v_nmones;
+  unsigned int __pyx_v_n;
+  unsigned int __pyx_v_i;
+  __pyx_t_5numpy_int64_t __pyx_v_value;
+  __pyx_t_5numpy_int64_t __pyx_v_value0;
+  int __pyx_v_same;
+  int __pyx_v_other;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_v;
+  __Pyx_Buffer __pyx_pybuffer_v;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  unsigned int __pyx_t_2;
+  long __pyx_t_3;
+  unsigned int __pyx_t_4;
+  unsigned int __pyx_t_5;
+  int __pyx_t_6;
+  int __pyx_t_7;
+  int __pyx_t_8;
+  PyObject *__pyx_t_9 = NULL;
+  PyObject *__pyx_t_10 = NULL;
+  PyObject *__pyx_t_11 = NULL;
+  PyObject *__pyx_t_12 = NULL;
+  PyObject *__pyx_t_13 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("inspect_special_values_int64", 0);
+  __pyx_pybuffer_v.pybuffer.buf = NULL;
+  __pyx_pybuffer_v.refcount = 0;
+  __pyx_pybuffernd_v.data = NULL;
+  __pyx_pybuffernd_v.rcbuffer = &__pyx_pybuffer_v;
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_v.rcbuffer->pybuffer, (PyObject*)__pyx_v_v, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int64_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  }
+  __pyx_pybuffernd_v.diminfo[0].strides = __pyx_pybuffernd_v.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_v.diminfo[0].shape = __pyx_pybuffernd_v.rcbuffer->pybuffer.shape[0];
+
+  /* "pyoperators/utils/cythonutils.pyx":48
+ * @cython.boundscheck(False)
+ * def inspect_special_values_int64(np.ndarray[np.int64_t, ndim=1] v):
+ *     cdef int nones = 0             # <<<<<<<<<<<<<<
+ *     cdef int nzeros = 0
+ *     cdef int nmones = 0
+ */
+  __pyx_v_nones = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":49
+ * def inspect_special_values_int64(np.ndarray[np.int64_t, ndim=1] v):
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0             # <<<<<<<<<<<<<<
+ *     cdef int nmones = 0
+ *     cdef unsigned int n = v.size
+ */
+  __pyx_v_nzeros = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":50
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0
+ *     cdef int nmones = 0             # <<<<<<<<<<<<<<
+ *     cdef unsigned int n = v.size
+ *     cdef unsigned int i
+ */
+  __pyx_v_nmones = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":51
+ *     cdef int nzeros = 0
+ *     cdef int nmones = 0
+ *     cdef unsigned int n = v.size             # <<<<<<<<<<<<<<
+ *     cdef unsigned int i
+ *     cdef np.int64_t value, value0 = v[0]
+ */
+  __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_v), __pyx_n_s__size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyInt_AsUnsignedInt(__pyx_t_1); if (unlikely((__pyx_t_2 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_n = __pyx_t_2;
+
+  /* "pyoperators/utils/cythonutils.pyx":53
+ *     cdef unsigned int n = v.size
+ *     cdef unsigned int i
+ *     cdef np.int64_t value, value0 = v[0]             # <<<<<<<<<<<<<<
+ *     cdef int same = 1
+ *     cdef int other = 0
+ */
+  __pyx_t_3 = 0;
+  if (__pyx_t_3 < 0) __pyx_t_3 += __pyx_pybuffernd_v.diminfo[0].shape;
+  __pyx_v_value0 = (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int64_t *, __pyx_pybuffernd_v.rcbuffer->pybuffer.buf, __pyx_t_3, __pyx_pybuffernd_v.diminfo[0].strides));
+
+  /* "pyoperators/utils/cythonutils.pyx":54
+ *     cdef unsigned int i
+ *     cdef np.int64_t value, value0 = v[0]
+ *     cdef int same = 1             # <<<<<<<<<<<<<<
+ *     cdef int other = 0
+ * 
+ */
+  __pyx_v_same = 1;
+
+  /* "pyoperators/utils/cythonutils.pyx":55
+ *     cdef np.int64_t value, value0 = v[0]
+ *     cdef int same = 1
+ *     cdef int other = 0             # <<<<<<<<<<<<<<
+ * 
+ *     for i in range(n):
+ */
+  __pyx_v_other = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":57
+ *     cdef int other = 0
+ * 
+ *     for i in range(n):             # <<<<<<<<<<<<<<
+ *         value = v[i]
+ *         if value == 0:
+ */
+  __pyx_t_2 = __pyx_v_n;
+  for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_2; __pyx_t_4+=1) {
+    __pyx_v_i = __pyx_t_4;
+
+    /* "pyoperators/utils/cythonutils.pyx":58
+ * 
+ *     for i in range(n):
+ *         value = v[i]             # <<<<<<<<<<<<<<
+ *         if value == 0:
+ *             nzeros += 1
+ */
+    __pyx_t_5 = __pyx_v_i;
+    __pyx_v_value = (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int64_t *, __pyx_pybuffernd_v.rcbuffer->pybuffer.buf, __pyx_t_5, __pyx_pybuffernd_v.diminfo[0].strides));
+
+    /* "pyoperators/utils/cythonutils.pyx":63
+ *         elif value == 1:
+ *             nones += 1
+ *         elif value == -1:             # <<<<<<<<<<<<<<
+ *             nmones += 1
+ *         else:
+ */
+    switch (__pyx_v_value) {
+
+      /* "pyoperators/utils/cythonutils.pyx":59
+ *     for i in range(n):
+ *         value = v[i]
+ *         if value == 0:             # <<<<<<<<<<<<<<
+ *             nzeros += 1
+ *         elif value == 1:
+ */
+      case 0:
+
+      /* "pyoperators/utils/cythonutils.pyx":60
+ *         value = v[i]
+ *         if value == 0:
+ *             nzeros += 1             # <<<<<<<<<<<<<<
+ *         elif value == 1:
+ *             nones += 1
+ */
+      __pyx_v_nzeros = (__pyx_v_nzeros + 1);
+      break;
+
+      /* "pyoperators/utils/cythonutils.pyx":61
+ *         if value == 0:
+ *             nzeros += 1
+ *         elif value == 1:             # <<<<<<<<<<<<<<
+ *             nones += 1
+ *         elif value == -1:
+ */
+      case 1:
+
+      /* "pyoperators/utils/cythonutils.pyx":62
+ *             nzeros += 1
+ *         elif value == 1:
+ *             nones += 1             # <<<<<<<<<<<<<<
+ *         elif value == -1:
+ *             nmones += 1
+ */
+      __pyx_v_nones = (__pyx_v_nones + 1);
+      break;
+
+      /* "pyoperators/utils/cythonutils.pyx":63
+ *         elif value == 1:
+ *             nones += 1
+ *         elif value == -1:             # <<<<<<<<<<<<<<
+ *             nmones += 1
+ *         else:
+ */
+      case -1:
+
+      /* "pyoperators/utils/cythonutils.pyx":64
+ *             nones += 1
+ *         elif value == -1:
+ *             nmones += 1             # <<<<<<<<<<<<<<
+ *         else:
+ *             other = 1
+ */
+      __pyx_v_nmones = (__pyx_v_nmones + 1);
+      break;
+      default:
+
+      /* "pyoperators/utils/cythonutils.pyx":66
+ *             nmones += 1
+ *         else:
+ *             other = 1             # <<<<<<<<<<<<<<
+ *         if same == 1 and value != value0:
+ *             same = 0
+ */
+      __pyx_v_other = 1;
+      break;
+    }
+
+    /* "pyoperators/utils/cythonutils.pyx":67
+ *         else:
+ *             other = 1
+ *         if same == 1 and value != value0:             # <<<<<<<<<<<<<<
+ *             same = 0
+ *         if same == 0 and other == 1:
+ */
+    __pyx_t_6 = (__pyx_v_same == 1);
+    if (__pyx_t_6) {
+      __pyx_t_7 = (__pyx_v_value != __pyx_v_value0);
+      __pyx_t_8 = __pyx_t_7;
+    } else {
+      __pyx_t_8 = __pyx_t_6;
+    }
+    if (__pyx_t_8) {
+
+      /* "pyoperators/utils/cythonutils.pyx":68
+ *             other = 1
+ *         if same == 1 and value != value0:
+ *             same = 0             # <<<<<<<<<<<<<<
+ *         if same == 0 and other == 1:
+ *             return 0, 0, 0, True, False
+ */
+      __pyx_v_same = 0;
+      goto __pyx_L5;
+    }
+    __pyx_L5:;
+
+    /* "pyoperators/utils/cythonutils.pyx":69
+ *         if same == 1 and value != value0:
+ *             same = 0
+ *         if same == 0 and other == 1:             # <<<<<<<<<<<<<<
+ *             return 0, 0, 0, True, False
+ *     if other == 1:
+ */
+    __pyx_t_8 = (__pyx_v_same == 0);
+    if (__pyx_t_8) {
+      __pyx_t_6 = (__pyx_v_other == 1);
+      __pyx_t_7 = __pyx_t_6;
+    } else {
+      __pyx_t_7 = __pyx_t_8;
+    }
+    if (__pyx_t_7) {
+
+      /* "pyoperators/utils/cythonutils.pyx":70
+ *             same = 0
+ *         if same == 0 and other == 1:
+ *             return 0, 0, 0, True, False             # <<<<<<<<<<<<<<
+ *     if other == 1:
+ *         return 0, 0, 0, True, True
+ */
+      __Pyx_XDECREF(__pyx_r);
+      __pyx_t_1 = __Pyx_PyBool_FromLong(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_1);
+      __pyx_t_9 = __Pyx_PyBool_FromLong(0); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_9);
+      __pyx_t_10 = PyTuple_New(5); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_10);
+      __Pyx_INCREF(__pyx_int_0);
+      PyTuple_SET_ITEM(__pyx_t_10, 0, __pyx_int_0);
+      __Pyx_GIVEREF(__pyx_int_0);
+      __Pyx_INCREF(__pyx_int_0);
+      PyTuple_SET_ITEM(__pyx_t_10, 1, __pyx_int_0);
+      __Pyx_GIVEREF(__pyx_int_0);
+      __Pyx_INCREF(__pyx_int_0);
+      PyTuple_SET_ITEM(__pyx_t_10, 2, __pyx_int_0);
+      __Pyx_GIVEREF(__pyx_int_0);
+      PyTuple_SET_ITEM(__pyx_t_10, 3, __pyx_t_1);
+      __Pyx_GIVEREF(__pyx_t_1);
+      PyTuple_SET_ITEM(__pyx_t_10, 4, __pyx_t_9);
+      __Pyx_GIVEREF(__pyx_t_9);
+      __pyx_t_1 = 0;
+      __pyx_t_9 = 0;
+      __pyx_r = ((PyObject *)__pyx_t_10);
+      __pyx_t_10 = 0;
+      goto __pyx_L0;
+      goto __pyx_L6;
+    }
+    __pyx_L6:;
+  }
+
+  /* "pyoperators/utils/cythonutils.pyx":71
+ *         if same == 0 and other == 1:
+ *             return 0, 0, 0, True, False
+ *     if other == 1:             # <<<<<<<<<<<<<<
+ *         return 0, 0, 0, True, True
+ *     return nmones, nzeros, nones, False, same == 1
+ */
+  __pyx_t_7 = (__pyx_v_other == 1);
+  if (__pyx_t_7) {
+
+    /* "pyoperators/utils/cythonutils.pyx":72
+ *             return 0, 0, 0, True, False
+ *     if other == 1:
+ *         return 0, 0, 0, True, True             # <<<<<<<<<<<<<<
+ *     return nmones, nzeros, nones, False, same == 1
+ * 
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __pyx_t_10 = __Pyx_PyBool_FromLong(1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_10);
+    __pyx_t_9 = __Pyx_PyBool_FromLong(1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_9);
+    __pyx_t_1 = PyTuple_New(5); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_1);
+    __Pyx_INCREF(__pyx_int_0);
+    PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_int_0);
+    __Pyx_GIVEREF(__pyx_int_0);
+    __Pyx_INCREF(__pyx_int_0);
+    PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_int_0);
+    __Pyx_GIVEREF(__pyx_int_0);
+    __Pyx_INCREF(__pyx_int_0);
+    PyTuple_SET_ITEM(__pyx_t_1, 2, __pyx_int_0);
+    __Pyx_GIVEREF(__pyx_int_0);
+    PyTuple_SET_ITEM(__pyx_t_1, 3, __pyx_t_10);
+    __Pyx_GIVEREF(__pyx_t_10);
+    PyTuple_SET_ITEM(__pyx_t_1, 4, __pyx_t_9);
+    __Pyx_GIVEREF(__pyx_t_9);
+    __pyx_t_10 = 0;
+    __pyx_t_9 = 0;
+    __pyx_r = ((PyObject *)__pyx_t_1);
+    __pyx_t_1 = 0;
+    goto __pyx_L0;
+    goto __pyx_L7;
+  }
+  __pyx_L7:;
+
+  /* "pyoperators/utils/cythonutils.pyx":73
+ *     if other == 1:
+ *         return 0, 0, 0, True, True
+ *     return nmones, nzeros, nones, False, same == 1             # <<<<<<<<<<<<<<
+ * 
+ * @cython.boundscheck(False)
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyInt_FromLong(__pyx_v_nmones); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_9 = PyInt_FromLong(__pyx_v_nzeros); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_9);
+  __pyx_t_10 = PyInt_FromLong(__pyx_v_nones); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_10);
+  __pyx_t_11 = __Pyx_PyBool_FromLong(0); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_11);
+  __pyx_t_12 = __Pyx_PyBool_FromLong((__pyx_v_same == 1)); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_12);
+  __pyx_t_13 = PyTuple_New(5); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_13);
+  PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_1);
+  __Pyx_GIVEREF(__pyx_t_1);
+  PyTuple_SET_ITEM(__pyx_t_13, 1, __pyx_t_9);
+  __Pyx_GIVEREF(__pyx_t_9);
+  PyTuple_SET_ITEM(__pyx_t_13, 2, __pyx_t_10);
+  __Pyx_GIVEREF(__pyx_t_10);
+  PyTuple_SET_ITEM(__pyx_t_13, 3, __pyx_t_11);
+  __Pyx_GIVEREF(__pyx_t_11);
+  PyTuple_SET_ITEM(__pyx_t_13, 4, __pyx_t_12);
+  __Pyx_GIVEREF(__pyx_t_12);
+  __pyx_t_1 = 0;
+  __pyx_t_9 = 0;
+  __pyx_t_10 = 0;
+  __pyx_t_11 = 0;
+  __pyx_t_12 = 0;
+  __pyx_r = ((PyObject *)__pyx_t_13);
+  __pyx_t_13 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_9);
+  __Pyx_XDECREF(__pyx_t_10);
+  __Pyx_XDECREF(__pyx_t_11);
+  __Pyx_XDECREF(__pyx_t_12);
+  __Pyx_XDECREF(__pyx_t_13);
+  { PyObject *__pyx_type, *__pyx_value, *__pyx_tb;
+    __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_v.rcbuffer->pybuffer);
+  __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);}
+  __Pyx_AddTraceback("pyoperators.utils.cythonutils.inspect_special_values_int64", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  goto __pyx_L2;
+  __pyx_L0:;
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_v.rcbuffer->pybuffer);
+  __pyx_L2:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_11pyoperators_5utils_11cythonutils_7inspect_special_values_float64(PyObject *__pyx_self, PyObject *__pyx_v_v); /*proto*/
+static PyMethodDef __pyx_mdef_11pyoperators_5utils_11cythonutils_7inspect_special_values_float64 = {__Pyx_NAMESTR("inspect_special_values_float64"), (PyCFunction)__pyx_pw_11pyoperators_5utils_11cythonutils_7inspect_special_values_float64, METH_O, __Pyx_DOCSTR(0)};
+static PyObject *__pyx_pw_11pyoperators_5utils_11cythonutils_7inspect_special_values_float64(PyObject *__pyx_self, PyObject *__pyx_v_v) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("inspect_special_values_float64 (wrapper)", 0);
+  __pyx_self = __pyx_self;
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_v), __pyx_ptype_5numpy_ndarray, 1, "v", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_r = __pyx_pf_11pyoperators_5utils_11cythonutils_6inspect_special_values_float64(__pyx_self, ((PyArrayObject *)__pyx_v_v));
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "pyoperators/utils/cythonutils.pyx":76
+ * 
+ * @cython.boundscheck(False)
+ * def inspect_special_values_float64(np.ndarray[np.float64_t, ndim=1] v):             # <<<<<<<<<<<<<<
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0
+ */
+
+static PyObject *__pyx_pf_11pyoperators_5utils_11cythonutils_6inspect_special_values_float64(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_v) {
+  int __pyx_v_nones;
+  int __pyx_v_nzeros;
+  int __pyx_v_nmones;
+  unsigned int __pyx_v_n;
+  unsigned int __pyx_v_i;
+  __pyx_t_5numpy_float64_t __pyx_v_value;
+  __pyx_t_5numpy_float64_t __pyx_v_value0;
+  int __pyx_v_same;
+  int __pyx_v_other;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_v;
+  __Pyx_Buffer __pyx_pybuffer_v;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  unsigned int __pyx_t_2;
+  long __pyx_t_3;
+  unsigned int __pyx_t_4;
+  unsigned int __pyx_t_5;
+  int __pyx_t_6;
+  int __pyx_t_7;
+  int __pyx_t_8;
+  PyObject *__pyx_t_9 = NULL;
+  PyObject *__pyx_t_10 = NULL;
+  PyObject *__pyx_t_11 = NULL;
+  PyObject *__pyx_t_12 = NULL;
+  PyObject *__pyx_t_13 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("inspect_special_values_float64", 0);
+  __pyx_pybuffer_v.pybuffer.buf = NULL;
+  __pyx_pybuffer_v.refcount = 0;
+  __pyx_pybuffernd_v.data = NULL;
+  __pyx_pybuffernd_v.rcbuffer = &__pyx_pybuffer_v;
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_v.rcbuffer->pybuffer, (PyObject*)__pyx_v_v, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float64_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  }
+  __pyx_pybuffernd_v.diminfo[0].strides = __pyx_pybuffernd_v.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_v.diminfo[0].shape = __pyx_pybuffernd_v.rcbuffer->pybuffer.shape[0];
+
+  /* "pyoperators/utils/cythonutils.pyx":77
+ * @cython.boundscheck(False)
+ * def inspect_special_values_float64(np.ndarray[np.float64_t, ndim=1] v):
+ *     cdef int nones = 0             # <<<<<<<<<<<<<<
+ *     cdef int nzeros = 0
+ *     cdef int nmones = 0
+ */
+  __pyx_v_nones = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":78
+ * def inspect_special_values_float64(np.ndarray[np.float64_t, ndim=1] v):
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0             # <<<<<<<<<<<<<<
+ *     cdef int nmones = 0
+ *     cdef unsigned int n = v.size
+ */
+  __pyx_v_nzeros = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":79
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0
+ *     cdef int nmones = 0             # <<<<<<<<<<<<<<
+ *     cdef unsigned int n = v.size
+ *     cdef unsigned int i
+ */
+  __pyx_v_nmones = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":80
+ *     cdef int nzeros = 0
+ *     cdef int nmones = 0
+ *     cdef unsigned int n = v.size             # <<<<<<<<<<<<<<
+ *     cdef unsigned int i
+ *     cdef np.float64_t value, value0 = v[0]
+ */
+  __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_v), __pyx_n_s__size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyInt_AsUnsignedInt(__pyx_t_1); if (unlikely((__pyx_t_2 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_n = __pyx_t_2;
+
+  /* "pyoperators/utils/cythonutils.pyx":82
+ *     cdef unsigned int n = v.size
+ *     cdef unsigned int i
+ *     cdef np.float64_t value, value0 = v[0]             # <<<<<<<<<<<<<<
+ *     cdef int same = 1
+ *     cdef int other = 0
+ */
+  __pyx_t_3 = 0;
+  if (__pyx_t_3 < 0) __pyx_t_3 += __pyx_pybuffernd_v.diminfo[0].shape;
+  __pyx_v_value0 = (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_float64_t *, __pyx_pybuffernd_v.rcbuffer->pybuffer.buf, __pyx_t_3, __pyx_pybuffernd_v.diminfo[0].strides));
+
+  /* "pyoperators/utils/cythonutils.pyx":83
+ *     cdef unsigned int i
+ *     cdef np.float64_t value, value0 = v[0]
+ *     cdef int same = 1             # <<<<<<<<<<<<<<
+ *     cdef int other = 0
+ * 
+ */
+  __pyx_v_same = 1;
+
+  /* "pyoperators/utils/cythonutils.pyx":84
+ *     cdef np.float64_t value, value0 = v[0]
+ *     cdef int same = 1
+ *     cdef int other = 0             # <<<<<<<<<<<<<<
+ * 
+ *     for i in range(n):
+ */
+  __pyx_v_other = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":86
+ *     cdef int other = 0
+ * 
+ *     for i in range(n):             # <<<<<<<<<<<<<<
+ *         value = v[i]
+ *         if value == 0:
+ */
+  __pyx_t_2 = __pyx_v_n;
+  for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_2; __pyx_t_4+=1) {
+    __pyx_v_i = __pyx_t_4;
+
+    /* "pyoperators/utils/cythonutils.pyx":87
+ * 
+ *     for i in range(n):
+ *         value = v[i]             # <<<<<<<<<<<<<<
+ *         if value == 0:
+ *             nzeros += 1
+ */
+    __pyx_t_5 = __pyx_v_i;
+    __pyx_v_value = (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_float64_t *, __pyx_pybuffernd_v.rcbuffer->pybuffer.buf, __pyx_t_5, __pyx_pybuffernd_v.diminfo[0].strides));
+
+    /* "pyoperators/utils/cythonutils.pyx":88
+ *     for i in range(n):
+ *         value = v[i]
+ *         if value == 0:             # <<<<<<<<<<<<<<
+ *             nzeros += 1
+ *         elif value == 1:
+ */
+    __pyx_t_6 = (__pyx_v_value == 0.0);
+    if (__pyx_t_6) {
+
+      /* "pyoperators/utils/cythonutils.pyx":89
+ *         value = v[i]
+ *         if value == 0:
+ *             nzeros += 1             # <<<<<<<<<<<<<<
+ *         elif value == 1:
+ *             nones += 1
+ */
+      __pyx_v_nzeros = (__pyx_v_nzeros + 1);
+      goto __pyx_L5;
+    }
+
+    /* "pyoperators/utils/cythonutils.pyx":90
+ *         if value == 0:
+ *             nzeros += 1
+ *         elif value == 1:             # <<<<<<<<<<<<<<
+ *             nones += 1
+ *         elif value == -1:
+ */
+    __pyx_t_6 = (__pyx_v_value == 1.0);
+    if (__pyx_t_6) {
+
+      /* "pyoperators/utils/cythonutils.pyx":91
+ *             nzeros += 1
+ *         elif value == 1:
+ *             nones += 1             # <<<<<<<<<<<<<<
+ *         elif value == -1:
+ *             nmones += 1
+ */
+      __pyx_v_nones = (__pyx_v_nones + 1);
+      goto __pyx_L5;
+    }
+
+    /* "pyoperators/utils/cythonutils.pyx":92
+ *         elif value == 1:
+ *             nones += 1
+ *         elif value == -1:             # <<<<<<<<<<<<<<
+ *             nmones += 1
+ *         else:
+ */
+    __pyx_t_6 = (__pyx_v_value == -1.0);
+    if (__pyx_t_6) {
+
+      /* "pyoperators/utils/cythonutils.pyx":93
+ *             nones += 1
+ *         elif value == -1:
+ *             nmones += 1             # <<<<<<<<<<<<<<
+ *         else:
+ *             other = 1
+ */
+      __pyx_v_nmones = (__pyx_v_nmones + 1);
+      goto __pyx_L5;
+    }
+    /*else*/ {
+
+      /* "pyoperators/utils/cythonutils.pyx":95
+ *             nmones += 1
+ *         else:
+ *             other = 1             # <<<<<<<<<<<<<<
+ *         if same == 1 and value != value0:
+ *             same = 0
+ */
+      __pyx_v_other = 1;
+    }
+    __pyx_L5:;
+
+    /* "pyoperators/utils/cythonutils.pyx":96
+ *         else:
+ *             other = 1
+ *         if same == 1 and value != value0:             # <<<<<<<<<<<<<<
+ *             same = 0
+ *         if same == 0 and other == 1:
+ */
+    __pyx_t_6 = (__pyx_v_same == 1);
+    if (__pyx_t_6) {
+      __pyx_t_7 = (__pyx_v_value != __pyx_v_value0);
+      __pyx_t_8 = __pyx_t_7;
+    } else {
+      __pyx_t_8 = __pyx_t_6;
+    }
+    if (__pyx_t_8) {
+
+      /* "pyoperators/utils/cythonutils.pyx":97
+ *             other = 1
+ *         if same == 1 and value != value0:
+ *             same = 0             # <<<<<<<<<<<<<<
+ *         if same == 0 and other == 1:
+ *             return 0, 0, 0, True, False
+ */
+      __pyx_v_same = 0;
+      goto __pyx_L6;
+    }
+    __pyx_L6:;
+
+    /* "pyoperators/utils/cythonutils.pyx":98
+ *         if same == 1 and value != value0:
+ *             same = 0
+ *         if same == 0 and other == 1:             # <<<<<<<<<<<<<<
+ *             return 0, 0, 0, True, False
+ *     if other == 1:
+ */
+    __pyx_t_8 = (__pyx_v_same == 0);
+    if (__pyx_t_8) {
+      __pyx_t_6 = (__pyx_v_other == 1);
+      __pyx_t_7 = __pyx_t_6;
+    } else {
+      __pyx_t_7 = __pyx_t_8;
+    }
+    if (__pyx_t_7) {
+
+      /* "pyoperators/utils/cythonutils.pyx":99
+ *             same = 0
+ *         if same == 0 and other == 1:
+ *             return 0, 0, 0, True, False             # <<<<<<<<<<<<<<
+ *     if other == 1:
+ *         return 0, 0, 0, True, True
+ */
+      __Pyx_XDECREF(__pyx_r);
+      __pyx_t_1 = __Pyx_PyBool_FromLong(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 99; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_1);
+      __pyx_t_9 = __Pyx_PyBool_FromLong(0); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 99; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_9);
+      __pyx_t_10 = PyTuple_New(5); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 99; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_10);
+      __Pyx_INCREF(__pyx_int_0);
+      PyTuple_SET_ITEM(__pyx_t_10, 0, __pyx_int_0);
+      __Pyx_GIVEREF(__pyx_int_0);
+      __Pyx_INCREF(__pyx_int_0);
+      PyTuple_SET_ITEM(__pyx_t_10, 1, __pyx_int_0);
+      __Pyx_GIVEREF(__pyx_int_0);
+      __Pyx_INCREF(__pyx_int_0);
+      PyTuple_SET_ITEM(__pyx_t_10, 2, __pyx_int_0);
+      __Pyx_GIVEREF(__pyx_int_0);
+      PyTuple_SET_ITEM(__pyx_t_10, 3, __pyx_t_1);
+      __Pyx_GIVEREF(__pyx_t_1);
+      PyTuple_SET_ITEM(__pyx_t_10, 4, __pyx_t_9);
+      __Pyx_GIVEREF(__pyx_t_9);
+      __pyx_t_1 = 0;
+      __pyx_t_9 = 0;
+      __pyx_r = ((PyObject *)__pyx_t_10);
+      __pyx_t_10 = 0;
+      goto __pyx_L0;
+      goto __pyx_L7;
+    }
+    __pyx_L7:;
+  }
+
+  /* "pyoperators/utils/cythonutils.pyx":100
+ *         if same == 0 and other == 1:
+ *             return 0, 0, 0, True, False
+ *     if other == 1:             # <<<<<<<<<<<<<<
+ *         return 0, 0, 0, True, True
+ *     return nmones, nzeros, nones, False, same == 1
+ */
+  __pyx_t_7 = (__pyx_v_other == 1);
+  if (__pyx_t_7) {
+
+    /* "pyoperators/utils/cythonutils.pyx":101
+ *             return 0, 0, 0, True, False
+ *     if other == 1:
+ *         return 0, 0, 0, True, True             # <<<<<<<<<<<<<<
+ *     return nmones, nzeros, nones, False, same == 1
+ * 
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __pyx_t_10 = __Pyx_PyBool_FromLong(1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_10);
+    __pyx_t_9 = __Pyx_PyBool_FromLong(1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_9);
+    __pyx_t_1 = PyTuple_New(5); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_1);
+    __Pyx_INCREF(__pyx_int_0);
+    PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_int_0);
+    __Pyx_GIVEREF(__pyx_int_0);
+    __Pyx_INCREF(__pyx_int_0);
+    PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_int_0);
+    __Pyx_GIVEREF(__pyx_int_0);
+    __Pyx_INCREF(__pyx_int_0);
+    PyTuple_SET_ITEM(__pyx_t_1, 2, __pyx_int_0);
+    __Pyx_GIVEREF(__pyx_int_0);
+    PyTuple_SET_ITEM(__pyx_t_1, 3, __pyx_t_10);
+    __Pyx_GIVEREF(__pyx_t_10);
+    PyTuple_SET_ITEM(__pyx_t_1, 4, __pyx_t_9);
+    __Pyx_GIVEREF(__pyx_t_9);
+    __pyx_t_10 = 0;
+    __pyx_t_9 = 0;
+    __pyx_r = ((PyObject *)__pyx_t_1);
+    __pyx_t_1 = 0;
+    goto __pyx_L0;
+    goto __pyx_L8;
+  }
+  __pyx_L8:;
+
+  /* "pyoperators/utils/cythonutils.pyx":102
+ *     if other == 1:
+ *         return 0, 0, 0, True, True
+ *     return nmones, nzeros, nones, False, same == 1             # <<<<<<<<<<<<<<
+ * 
+ * @cython.boundscheck(False)
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyInt_FromLong(__pyx_v_nmones); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_9 = PyInt_FromLong(__pyx_v_nzeros); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_9);
+  __pyx_t_10 = PyInt_FromLong(__pyx_v_nones); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_10);
+  __pyx_t_11 = __Pyx_PyBool_FromLong(0); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_11);
+  __pyx_t_12 = __Pyx_PyBool_FromLong((__pyx_v_same == 1)); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_12);
+  __pyx_t_13 = PyTuple_New(5); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_13);
+  PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_1);
+  __Pyx_GIVEREF(__pyx_t_1);
+  PyTuple_SET_ITEM(__pyx_t_13, 1, __pyx_t_9);
+  __Pyx_GIVEREF(__pyx_t_9);
+  PyTuple_SET_ITEM(__pyx_t_13, 2, __pyx_t_10);
+  __Pyx_GIVEREF(__pyx_t_10);
+  PyTuple_SET_ITEM(__pyx_t_13, 3, __pyx_t_11);
+  __Pyx_GIVEREF(__pyx_t_11);
+  PyTuple_SET_ITEM(__pyx_t_13, 4, __pyx_t_12);
+  __Pyx_GIVEREF(__pyx_t_12);
+  __pyx_t_1 = 0;
+  __pyx_t_9 = 0;
+  __pyx_t_10 = 0;
+  __pyx_t_11 = 0;
+  __pyx_t_12 = 0;
+  __pyx_r = ((PyObject *)__pyx_t_13);
+  __pyx_t_13 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_9);
+  __Pyx_XDECREF(__pyx_t_10);
+  __Pyx_XDECREF(__pyx_t_11);
+  __Pyx_XDECREF(__pyx_t_12);
+  __Pyx_XDECREF(__pyx_t_13);
+  { PyObject *__pyx_type, *__pyx_value, *__pyx_tb;
+    __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_v.rcbuffer->pybuffer);
+  __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);}
+  __Pyx_AddTraceback("pyoperators.utils.cythonutils.inspect_special_values_float64", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  goto __pyx_L2;
+  __pyx_L0:;
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_v.rcbuffer->pybuffer);
+  __pyx_L2:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_11pyoperators_5utils_11cythonutils_9inspect_special_values_complex128(PyObject *__pyx_self, PyObject *__pyx_v_v); /*proto*/
+static PyMethodDef __pyx_mdef_11pyoperators_5utils_11cythonutils_9inspect_special_values_complex128 = {__Pyx_NAMESTR("inspect_special_values_complex128"), (PyCFunction)__pyx_pw_11pyoperators_5utils_11cythonutils_9inspect_special_values_complex128, METH_O, __Pyx_DOCSTR(0)};
+static PyObject *__pyx_pw_11pyoperators_5utils_11cythonutils_9inspect_special_values_complex128(PyObject *__pyx_self, PyObject *__pyx_v_v) {
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("inspect_special_values_complex128 (wrapper)", 0);
+  __pyx_self = __pyx_self;
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_v), __pyx_ptype_5numpy_ndarray, 1, "v", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_r = __pyx_pf_11pyoperators_5utils_11cythonutils_8inspect_special_values_complex128(__pyx_self, ((PyArrayObject *)__pyx_v_v));
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "pyoperators/utils/cythonutils.pyx":105
+ * 
+ * @cython.boundscheck(False)
+ * def inspect_special_values_complex128(np.ndarray[np.complex128_t, ndim=1] v):             # <<<<<<<<<<<<<<
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0
+ */
+
+static PyObject *__pyx_pf_11pyoperators_5utils_11cythonutils_8inspect_special_values_complex128(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_v) {
+  int __pyx_v_nones;
+  int __pyx_v_nzeros;
+  int __pyx_v_nmones;
+  unsigned int __pyx_v_n;
+  unsigned int __pyx_v_i;
+  __pyx_t_double_complex __pyx_v_value;
+  __pyx_t_double_complex __pyx_v_value0;
+  int __pyx_v_same;
+  int __pyx_v_other;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_v;
+  __Pyx_Buffer __pyx_pybuffer_v;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  unsigned int __pyx_t_2;
+  long __pyx_t_3;
+  unsigned int __pyx_t_4;
+  unsigned int __pyx_t_5;
+  int __pyx_t_6;
+  int __pyx_t_7;
+  int __pyx_t_8;
+  PyObject *__pyx_t_9 = NULL;
+  PyObject *__pyx_t_10 = NULL;
+  PyObject *__pyx_t_11 = NULL;
+  PyObject *__pyx_t_12 = NULL;
+  PyObject *__pyx_t_13 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("inspect_special_values_complex128", 0);
+  __pyx_pybuffer_v.pybuffer.buf = NULL;
+  __pyx_pybuffer_v.refcount = 0;
+  __pyx_pybuffernd_v.data = NULL;
+  __pyx_pybuffernd_v.rcbuffer = &__pyx_pybuffer_v;
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_v.rcbuffer->pybuffer, (PyObject*)__pyx_v_v, &__Pyx_TypeInfo___pyx_t_double_complex, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  }
+  __pyx_pybuffernd_v.diminfo[0].strides = __pyx_pybuffernd_v.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_v.diminfo[0].shape = __pyx_pybuffernd_v.rcbuffer->pybuffer.shape[0];
+
+  /* "pyoperators/utils/cythonutils.pyx":106
+ * @cython.boundscheck(False)
+ * def inspect_special_values_complex128(np.ndarray[np.complex128_t, ndim=1] v):
+ *     cdef int nones = 0             # <<<<<<<<<<<<<<
+ *     cdef int nzeros = 0
+ *     cdef int nmones = 0
+ */
+  __pyx_v_nones = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":107
+ * def inspect_special_values_complex128(np.ndarray[np.complex128_t, ndim=1] v):
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0             # <<<<<<<<<<<<<<
+ *     cdef int nmones = 0
+ *     cdef unsigned int n = v.size
+ */
+  __pyx_v_nzeros = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":108
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0
+ *     cdef int nmones = 0             # <<<<<<<<<<<<<<
+ *     cdef unsigned int n = v.size
+ *     cdef unsigned int i
+ */
+  __pyx_v_nmones = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":109
+ *     cdef int nzeros = 0
+ *     cdef int nmones = 0
+ *     cdef unsigned int n = v.size             # <<<<<<<<<<<<<<
+ *     cdef unsigned int i
+ *     cdef np.complex128_t value, value0 = v[0]
+ */
+  __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_v), __pyx_n_s__size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 109; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyInt_AsUnsignedInt(__pyx_t_1); if (unlikely((__pyx_t_2 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 109; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_n = __pyx_t_2;
+
+  /* "pyoperators/utils/cythonutils.pyx":111
+ *     cdef unsigned int n = v.size
+ *     cdef unsigned int i
+ *     cdef np.complex128_t value, value0 = v[0]             # <<<<<<<<<<<<<<
+ *     cdef int same = 1
+ *     cdef int other = 0
+ */
+  __pyx_t_3 = 0;
+  if (__pyx_t_3 < 0) __pyx_t_3 += __pyx_pybuffernd_v.diminfo[0].shape;
+  __pyx_v_value0 = (*__Pyx_BufPtrStrided1d(__pyx_t_double_complex *, __pyx_pybuffernd_v.rcbuffer->pybuffer.buf, __pyx_t_3, __pyx_pybuffernd_v.diminfo[0].strides));
+
+  /* "pyoperators/utils/cythonutils.pyx":112
+ *     cdef unsigned int i
+ *     cdef np.complex128_t value, value0 = v[0]
+ *     cdef int same = 1             # <<<<<<<<<<<<<<
+ *     cdef int other = 0
+ * 
+ */
+  __pyx_v_same = 1;
+
+  /* "pyoperators/utils/cythonutils.pyx":113
+ *     cdef np.complex128_t value, value0 = v[0]
+ *     cdef int same = 1
+ *     cdef int other = 0             # <<<<<<<<<<<<<<
+ * 
+ *     for i in range(n):
+ */
+  __pyx_v_other = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":115
+ *     cdef int other = 0
+ * 
+ *     for i in range(n):             # <<<<<<<<<<<<<<
+ *         value = v[i]
+ *         if value == 0:
+ */
+  __pyx_t_2 = __pyx_v_n;
+  for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_2; __pyx_t_4+=1) {
+    __pyx_v_i = __pyx_t_4;
+
+    /* "pyoperators/utils/cythonutils.pyx":116
+ * 
+ *     for i in range(n):
+ *         value = v[i]             # <<<<<<<<<<<<<<
+ *         if value == 0:
+ *             nzeros += 1
+ */
+    __pyx_t_5 = __pyx_v_i;
+    __pyx_v_value = (*__Pyx_BufPtrStrided1d(__pyx_t_double_complex *, __pyx_pybuffernd_v.rcbuffer->pybuffer.buf, __pyx_t_5, __pyx_pybuffernd_v.diminfo[0].strides));
+
+    /* "pyoperators/utils/cythonutils.pyx":117
+ *     for i in range(n):
+ *         value = v[i]
+ *         if value == 0:             # <<<<<<<<<<<<<<
+ *             nzeros += 1
+ *         elif value == 1:
+ */
+    __pyx_t_6 = (__Pyx_c_eq(__pyx_v_value, __pyx_t_double_complex_from_parts(0, 0)));
+    if (__pyx_t_6) {
+
+      /* "pyoperators/utils/cythonutils.pyx":118
+ *         value = v[i]
+ *         if value == 0:
+ *             nzeros += 1             # <<<<<<<<<<<<<<
+ *         elif value == 1:
+ *             nones += 1
+ */
+      __pyx_v_nzeros = (__pyx_v_nzeros + 1);
+      goto __pyx_L5;
+    }
+
+    /* "pyoperators/utils/cythonutils.pyx":119
+ *         if value == 0:
+ *             nzeros += 1
+ *         elif value == 1:             # <<<<<<<<<<<<<<
+ *             nones += 1
+ *         elif value == -1:
+ */
+    __pyx_t_6 = (__Pyx_c_eq(__pyx_v_value, __pyx_t_double_complex_from_parts(1, 0)));
+    if (__pyx_t_6) {
+
+      /* "pyoperators/utils/cythonutils.pyx":120
+ *             nzeros += 1
+ *         elif value == 1:
+ *             nones += 1             # <<<<<<<<<<<<<<
+ *         elif value == -1:
+ *             nmones += 1
+ */
+      __pyx_v_nones = (__pyx_v_nones + 1);
+      goto __pyx_L5;
+    }
+
+    /* "pyoperators/utils/cythonutils.pyx":121
+ *         elif value == 1:
+ *             nones += 1
+ *         elif value == -1:             # <<<<<<<<<<<<<<
+ *             nmones += 1
+ *         else:
+ */
+    __pyx_t_6 = (__Pyx_c_eq(__pyx_v_value, __pyx_t_double_complex_from_parts(-1, 0)));
+    if (__pyx_t_6) {
+
+      /* "pyoperators/utils/cythonutils.pyx":122
+ *             nones += 1
+ *         elif value == -1:
+ *             nmones += 1             # <<<<<<<<<<<<<<
+ *         else:
+ *             other = 1
+ */
+      __pyx_v_nmones = (__pyx_v_nmones + 1);
+      goto __pyx_L5;
+    }
+    /*else*/ {
+
+      /* "pyoperators/utils/cythonutils.pyx":124
+ *             nmones += 1
+ *         else:
+ *             other = 1             # <<<<<<<<<<<<<<
+ *         if same == 1 and value != value0:
+ *             same = 0
+ */
+      __pyx_v_other = 1;
+    }
+    __pyx_L5:;
+
+    /* "pyoperators/utils/cythonutils.pyx":125
+ *         else:
+ *             other = 1
+ *         if same == 1 and value != value0:             # <<<<<<<<<<<<<<
+ *             same = 0
+ *         if same == 0 and other == 1:
+ */
+    __pyx_t_6 = (__pyx_v_same == 1);
+    if (__pyx_t_6) {
+      __pyx_t_7 = (!__Pyx_c_eq(__pyx_v_value, __pyx_v_value0));
+      __pyx_t_8 = __pyx_t_7;
+    } else {
+      __pyx_t_8 = __pyx_t_6;
+    }
+    if (__pyx_t_8) {
+
+      /* "pyoperators/utils/cythonutils.pyx":126
+ *             other = 1
+ *         if same == 1 and value != value0:
+ *             same = 0             # <<<<<<<<<<<<<<
+ *         if same == 0 and other == 1:
+ *             return 0, 0, 0, True, False
+ */
+      __pyx_v_same = 0;
+      goto __pyx_L6;
+    }
+    __pyx_L6:;
+
+    /* "pyoperators/utils/cythonutils.pyx":127
+ *         if same == 1 and value != value0:
+ *             same = 0
+ *         if same == 0 and other == 1:             # <<<<<<<<<<<<<<
+ *             return 0, 0, 0, True, False
+ *     if other == 1:
+ */
+    __pyx_t_8 = (__pyx_v_same == 0);
+    if (__pyx_t_8) {
+      __pyx_t_6 = (__pyx_v_other == 1);
+      __pyx_t_7 = __pyx_t_6;
+    } else {
+      __pyx_t_7 = __pyx_t_8;
+    }
+    if (__pyx_t_7) {
+
+      /* "pyoperators/utils/cythonutils.pyx":128
+ *             same = 0
+ *         if same == 0 and other == 1:
+ *             return 0, 0, 0, True, False             # <<<<<<<<<<<<<<
+ *     if other == 1:
+ *         return 0, 0, 0, True, True
+ */
+      __Pyx_XDECREF(__pyx_r);
+      __pyx_t_1 = __Pyx_PyBool_FromLong(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 128; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_1);
+      __pyx_t_9 = __Pyx_PyBool_FromLong(0); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 128; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_9);
+      __pyx_t_10 = PyTuple_New(5); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 128; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_10);
+      __Pyx_INCREF(__pyx_int_0);
+      PyTuple_SET_ITEM(__pyx_t_10, 0, __pyx_int_0);
+      __Pyx_GIVEREF(__pyx_int_0);
+      __Pyx_INCREF(__pyx_int_0);
+      PyTuple_SET_ITEM(__pyx_t_10, 1, __pyx_int_0);
+      __Pyx_GIVEREF(__pyx_int_0);
+      __Pyx_INCREF(__pyx_int_0);
+      PyTuple_SET_ITEM(__pyx_t_10, 2, __pyx_int_0);
+      __Pyx_GIVEREF(__pyx_int_0);
+      PyTuple_SET_ITEM(__pyx_t_10, 3, __pyx_t_1);
+      __Pyx_GIVEREF(__pyx_t_1);
+      PyTuple_SET_ITEM(__pyx_t_10, 4, __pyx_t_9);
+      __Pyx_GIVEREF(__pyx_t_9);
+      __pyx_t_1 = 0;
+      __pyx_t_9 = 0;
+      __pyx_r = ((PyObject *)__pyx_t_10);
+      __pyx_t_10 = 0;
+      goto __pyx_L0;
+      goto __pyx_L7;
+    }
+    __pyx_L7:;
+  }
+
+  /* "pyoperators/utils/cythonutils.pyx":129
+ *         if same == 0 and other == 1:
+ *             return 0, 0, 0, True, False
+ *     if other == 1:             # <<<<<<<<<<<<<<
+ *         return 0, 0, 0, True, True
+ *     return nmones, nzeros, nones, False, same == 1
+ */
+  __pyx_t_7 = (__pyx_v_other == 1);
+  if (__pyx_t_7) {
+
+    /* "pyoperators/utils/cythonutils.pyx":130
+ *             return 0, 0, 0, True, False
+ *     if other == 1:
+ *         return 0, 0, 0, True, True             # <<<<<<<<<<<<<<
+ *     return nmones, nzeros, nones, False, same == 1
+ * 
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __pyx_t_10 = __Pyx_PyBool_FromLong(1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_10);
+    __pyx_t_9 = __Pyx_PyBool_FromLong(1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_9);
+    __pyx_t_1 = PyTuple_New(5); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_1);
+    __Pyx_INCREF(__pyx_int_0);
+    PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_int_0);
+    __Pyx_GIVEREF(__pyx_int_0);
+    __Pyx_INCREF(__pyx_int_0);
+    PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_int_0);
+    __Pyx_GIVEREF(__pyx_int_0);
+    __Pyx_INCREF(__pyx_int_0);
+    PyTuple_SET_ITEM(__pyx_t_1, 2, __pyx_int_0);
+    __Pyx_GIVEREF(__pyx_int_0);
+    PyTuple_SET_ITEM(__pyx_t_1, 3, __pyx_t_10);
+    __Pyx_GIVEREF(__pyx_t_10);
+    PyTuple_SET_ITEM(__pyx_t_1, 4, __pyx_t_9);
+    __Pyx_GIVEREF(__pyx_t_9);
+    __pyx_t_10 = 0;
+    __pyx_t_9 = 0;
+    __pyx_r = ((PyObject *)__pyx_t_1);
+    __pyx_t_1 = 0;
+    goto __pyx_L0;
+    goto __pyx_L8;
+  }
+  __pyx_L8:;
+
+  /* "pyoperators/utils/cythonutils.pyx":131
+ *     if other == 1:
+ *         return 0, 0, 0, True, True
+ *     return nmones, nzeros, nones, False, same == 1             # <<<<<<<<<<<<<<
+ * 
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyInt_FromLong(__pyx_v_nmones); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_9 = PyInt_FromLong(__pyx_v_nzeros); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_9);
+  __pyx_t_10 = PyInt_FromLong(__pyx_v_nones); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_10);
+  __pyx_t_11 = __Pyx_PyBool_FromLong(0); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_11);
+  __pyx_t_12 = __Pyx_PyBool_FromLong((__pyx_v_same == 1)); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_12);
+  __pyx_t_13 = PyTuple_New(5); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_13);
+  PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_1);
+  __Pyx_GIVEREF(__pyx_t_1);
+  PyTuple_SET_ITEM(__pyx_t_13, 1, __pyx_t_9);
+  __Pyx_GIVEREF(__pyx_t_9);
+  PyTuple_SET_ITEM(__pyx_t_13, 2, __pyx_t_10);
+  __Pyx_GIVEREF(__pyx_t_10);
+  PyTuple_SET_ITEM(__pyx_t_13, 3, __pyx_t_11);
+  __Pyx_GIVEREF(__pyx_t_11);
+  PyTuple_SET_ITEM(__pyx_t_13, 4, __pyx_t_12);
+  __Pyx_GIVEREF(__pyx_t_12);
+  __pyx_t_1 = 0;
+  __pyx_t_9 = 0;
+  __pyx_t_10 = 0;
+  __pyx_t_11 = 0;
+  __pyx_t_12 = 0;
+  __pyx_r = ((PyObject *)__pyx_t_13);
+  __pyx_t_13 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_9);
+  __Pyx_XDECREF(__pyx_t_10);
+  __Pyx_XDECREF(__pyx_t_11);
+  __Pyx_XDECREF(__pyx_t_12);
+  __Pyx_XDECREF(__pyx_t_13);
+  { PyObject *__pyx_type, *__pyx_value, *__pyx_tb;
+    __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_v.rcbuffer->pybuffer);
+  __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);}
+  __Pyx_AddTraceback("pyoperators.utils.cythonutils.inspect_special_values_complex128", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  goto __pyx_L2;
+  __pyx_L0:;
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_v.rcbuffer->pybuffer);
+  __pyx_L2:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static int __pyx_pw_5numpy_7ndarray_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/
+static int __pyx_pw_5numpy_7ndarray_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__getbuffer__ (wrapper)", 0);
+  __pyx_r = __pyx_pf_5numpy_7ndarray___getbuffer__(((PyArrayObject *)__pyx_v_self), ((Py_buffer *)__pyx_v_info), ((int)__pyx_v_flags));
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "numpy.pxd":193
+ *         # experimental exception made for __getbuffer__ and __releasebuffer__
+ *         # -- the details of this may change.
+ *         def __getbuffer__(ndarray self, Py_buffer* info, int flags):             # <<<<<<<<<<<<<<
+ *             # This implementation of getbuffer is geared towards Cython
+ *             # requirements, and does not yet fullfill the PEP.
+ */
+
+static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) {
+  int __pyx_v_copy_shape;
+  int __pyx_v_i;
+  int __pyx_v_ndim;
+  int __pyx_v_endian_detector;
+  int __pyx_v_little_endian;
+  int __pyx_v_t;
+  char *__pyx_v_f;
+  PyArray_Descr *__pyx_v_descr = 0;
+  int __pyx_v_offset;
+  int __pyx_v_hasfields;
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  int __pyx_t_2;
+  int __pyx_t_3;
+  PyObject *__pyx_t_4 = NULL;
+  int __pyx_t_5;
+  int __pyx_t_6;
+  int __pyx_t_7;
+  PyObject *__pyx_t_8 = NULL;
+  char *__pyx_t_9;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__getbuffer__", 0);
+  if (__pyx_v_info != NULL) {
+    __pyx_v_info->obj = Py_None; __Pyx_INCREF(Py_None);
+    __Pyx_GIVEREF(__pyx_v_info->obj);
+  }
+
+  /* "numpy.pxd":199
+ *             # of flags
+ * 
+ *             if info == NULL: return             # <<<<<<<<<<<<<<
+ * 
+ *             cdef int copy_shape, i, ndim
+ */
+  __pyx_t_1 = (__pyx_v_info == NULL);
+  if (__pyx_t_1) {
+    __pyx_r = 0;
+    goto __pyx_L0;
+    goto __pyx_L3;
+  }
+  __pyx_L3:;
+
+  /* "numpy.pxd":202
+ * 
+ *             cdef int copy_shape, i, ndim
+ *             cdef int endian_detector = 1             # <<<<<<<<<<<<<<
+ *             cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)
+ * 
+ */
+  __pyx_v_endian_detector = 1;
+
+  /* "numpy.pxd":203
+ *             cdef int copy_shape, i, ndim
+ *             cdef int endian_detector = 1
+ *             cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)             # <<<<<<<<<<<<<<
+ * 
+ *             ndim = PyArray_NDIM(self)
+ */
+  __pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0);
+
+  /* "numpy.pxd":205
+ *             cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)
+ * 
+ *             ndim = PyArray_NDIM(self)             # <<<<<<<<<<<<<<
+ * 
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):
+ */
+  __pyx_v_ndim = PyArray_NDIM(__pyx_v_self);
+
+  /* "numpy.pxd":207
+ *             ndim = PyArray_NDIM(self)
+ * 
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):             # <<<<<<<<<<<<<<
+ *                 copy_shape = 1
+ *             else:
+ */
+  __pyx_t_1 = ((sizeof(npy_intp)) != (sizeof(Py_ssize_t)));
+  if (__pyx_t_1) {
+
+    /* "numpy.pxd":208
+ * 
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):
+ *                 copy_shape = 1             # <<<<<<<<<<<<<<
+ *             else:
+ *                 copy_shape = 0
+ */
+    __pyx_v_copy_shape = 1;
+    goto __pyx_L4;
+  }
+  /*else*/ {
+
+    /* "numpy.pxd":210
+ *                 copy_shape = 1
+ *             else:
+ *                 copy_shape = 0             # <<<<<<<<<<<<<<
+ * 
+ *             if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)
+ */
+    __pyx_v_copy_shape = 0;
+  }
+  __pyx_L4:;
+
+  /* "numpy.pxd":212
+ *                 copy_shape = 0
+ * 
+ *             if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)             # <<<<<<<<<<<<<<
+ *                 and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)):
+ *                 raise ValueError(u"ndarray is not C contiguous")
+ */
+  __pyx_t_1 = ((__pyx_v_flags & PyBUF_C_CONTIGUOUS) == PyBUF_C_CONTIGUOUS);
+  if (__pyx_t_1) {
+
+    /* "numpy.pxd":213
+ * 
+ *             if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)
+ *                 and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)):             # <<<<<<<<<<<<<<
+ *                 raise ValueError(u"ndarray is not C contiguous")
+ * 
+ */
+    __pyx_t_2 = (!PyArray_CHKFLAGS(__pyx_v_self, NPY_C_CONTIGUOUS));
+    __pyx_t_3 = __pyx_t_2;
+  } else {
+    __pyx_t_3 = __pyx_t_1;
+  }
+  if (__pyx_t_3) {
+
+    /* "numpy.pxd":214
+ *             if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)
+ *                 and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)):
+ *                 raise ValueError(u"ndarray is not C contiguous")             # <<<<<<<<<<<<<<
+ * 
+ *             if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
+ */
+    __pyx_t_4 = PyObject_Call(__pyx_builtin_ValueError, ((PyObject *)__pyx_k_tuple_2), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 214; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_4);
+    __Pyx_Raise(__pyx_t_4, 0, 0, 0);
+    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+    {__pyx_filename = __pyx_f[1]; __pyx_lineno = 214; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    goto __pyx_L5;
+  }
+  __pyx_L5:;
+
+  /* "numpy.pxd":216
+ *                 raise ValueError(u"ndarray is not C contiguous")
+ * 
+ *             if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)             # <<<<<<<<<<<<<<
+ *                 and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)):
+ *                 raise ValueError(u"ndarray is not Fortran contiguous")
+ */
+  __pyx_t_3 = ((__pyx_v_flags & PyBUF_F_CONTIGUOUS) == PyBUF_F_CONTIGUOUS);
+  if (__pyx_t_3) {
+
+    /* "numpy.pxd":217
+ * 
+ *             if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
+ *                 and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)):             # <<<<<<<<<<<<<<
+ *                 raise ValueError(u"ndarray is not Fortran contiguous")
+ * 
+ */
+    __pyx_t_1 = (!PyArray_CHKFLAGS(__pyx_v_self, NPY_F_CONTIGUOUS));
+    __pyx_t_2 = __pyx_t_1;
+  } else {
+    __pyx_t_2 = __pyx_t_3;
+  }
+  if (__pyx_t_2) {
+
+    /* "numpy.pxd":218
+ *             if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
+ *                 and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)):
+ *                 raise ValueError(u"ndarray is not Fortran contiguous")             # <<<<<<<<<<<<<<
+ * 
+ *             info.buf = PyArray_DATA(self)
+ */
+    __pyx_t_4 = PyObject_Call(__pyx_builtin_ValueError, ((PyObject *)__pyx_k_tuple_4), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_4);
+    __Pyx_Raise(__pyx_t_4, 0, 0, 0);
+    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+    {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    goto __pyx_L6;
+  }
+  __pyx_L6:;
+
+  /* "numpy.pxd":220
+ *                 raise ValueError(u"ndarray is not Fortran contiguous")
+ * 
+ *             info.buf = PyArray_DATA(self)             # <<<<<<<<<<<<<<
+ *             info.ndim = ndim
+ *             if copy_shape:
+ */
+  __pyx_v_info->buf = PyArray_DATA(__pyx_v_self);
+
+  /* "numpy.pxd":221
+ * 
+ *             info.buf = PyArray_DATA(self)
+ *             info.ndim = ndim             # <<<<<<<<<<<<<<
+ *             if copy_shape:
+ *                 # Allocate new buffer for strides and shape info.
+ */
+  __pyx_v_info->ndim = __pyx_v_ndim;
+
+  /* "numpy.pxd":222
+ *             info.buf = PyArray_DATA(self)
+ *             info.ndim = ndim
+ *             if copy_shape:             # <<<<<<<<<<<<<<
+ *                 # Allocate new buffer for strides and shape info.
+ *                 # This is allocated as one block, strides first.
+ */
+  if (__pyx_v_copy_shape) {
+
+    /* "numpy.pxd":225
+ *                 # Allocate new buffer for strides and shape info.
+ *                 # This is allocated as one block, strides first.
+ *                 info.strides = <Py_ssize_t*>stdlib.malloc(sizeof(Py_ssize_t) * <size_t>ndim * 2)             # <<<<<<<<<<<<<<
+ *                 info.shape = info.strides + ndim
+ *                 for i in range(ndim):
+ */
+    __pyx_v_info->strides = ((Py_ssize_t *)malloc((((sizeof(Py_ssize_t)) * ((size_t)__pyx_v_ndim)) * 2)));
+
+    /* "numpy.pxd":226
+ *                 # This is allocated as one block, strides first.
+ *                 info.strides = <Py_ssize_t*>stdlib.malloc(sizeof(Py_ssize_t) * <size_t>ndim * 2)
+ *                 info.shape = info.strides + ndim             # <<<<<<<<<<<<<<
+ *                 for i in range(ndim):
+ *                     info.strides[i] = PyArray_STRIDES(self)[i]
+ */
+    __pyx_v_info->shape = (__pyx_v_info->strides + __pyx_v_ndim);
+
+    /* "numpy.pxd":227
+ *                 info.strides = <Py_ssize_t*>stdlib.malloc(sizeof(Py_ssize_t) * <size_t>ndim * 2)
+ *                 info.shape = info.strides + ndim
+ *                 for i in range(ndim):             # <<<<<<<<<<<<<<
+ *                     info.strides[i] = PyArray_STRIDES(self)[i]
+ *                     info.shape[i] = PyArray_DIMS(self)[i]
+ */
+    __pyx_t_5 = __pyx_v_ndim;
+    for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) {
+      __pyx_v_i = __pyx_t_6;
+
+      /* "numpy.pxd":228
+ *                 info.shape = info.strides + ndim
+ *                 for i in range(ndim):
+ *                     info.strides[i] = PyArray_STRIDES(self)[i]             # <<<<<<<<<<<<<<
+ *                     info.shape[i] = PyArray_DIMS(self)[i]
+ *             else:
+ */
+      (__pyx_v_info->strides[__pyx_v_i]) = (PyArray_STRIDES(__pyx_v_self)[__pyx_v_i]);
+
+      /* "numpy.pxd":229
+ *                 for i in range(ndim):
+ *                     info.strides[i] = PyArray_STRIDES(self)[i]
+ *                     info.shape[i] = PyArray_DIMS(self)[i]             # <<<<<<<<<<<<<<
+ *             else:
+ *                 info.strides = <Py_ssize_t*>PyArray_STRIDES(self)
+ */
+      (__pyx_v_info->shape[__pyx_v_i]) = (PyArray_DIMS(__pyx_v_self)[__pyx_v_i]);
+    }
+    goto __pyx_L7;
+  }
+  /*else*/ {
+
+    /* "numpy.pxd":231
+ *                     info.shape[i] = PyArray_DIMS(self)[i]
+ *             else:
+ *                 info.strides = <Py_ssize_t*>PyArray_STRIDES(self)             # <<<<<<<<<<<<<<
+ *                 info.shape = <Py_ssize_t*>PyArray_DIMS(self)
+ *             info.suboffsets = NULL
+ */
+    __pyx_v_info->strides = ((Py_ssize_t *)PyArray_STRIDES(__pyx_v_self));
+
+    /* "numpy.pxd":232
+ *             else:
+ *                 info.strides = <Py_ssize_t*>PyArray_STRIDES(self)
+ *                 info.shape = <Py_ssize_t*>PyArray_DIMS(self)             # <<<<<<<<<<<<<<
+ *             info.suboffsets = NULL
+ *             info.itemsize = PyArray_ITEMSIZE(self)
+ */
+    __pyx_v_info->shape = ((Py_ssize_t *)PyArray_DIMS(__pyx_v_self));
+  }
+  __pyx_L7:;
+
+  /* "numpy.pxd":233
+ *                 info.strides = <Py_ssize_t*>PyArray_STRIDES(self)
+ *                 info.shape = <Py_ssize_t*>PyArray_DIMS(self)
+ *             info.suboffsets = NULL             # <<<<<<<<<<<<<<
+ *             info.itemsize = PyArray_ITEMSIZE(self)
+ *             info.readonly = not PyArray_ISWRITEABLE(self)
+ */
+  __pyx_v_info->suboffsets = NULL;
+
+  /* "numpy.pxd":234
+ *                 info.shape = <Py_ssize_t*>PyArray_DIMS(self)
+ *             info.suboffsets = NULL
+ *             info.itemsize = PyArray_ITEMSIZE(self)             # <<<<<<<<<<<<<<
+ *             info.readonly = not PyArray_ISWRITEABLE(self)
+ * 
+ */
+  __pyx_v_info->itemsize = PyArray_ITEMSIZE(__pyx_v_self);
+
+  /* "numpy.pxd":235
+ *             info.suboffsets = NULL
+ *             info.itemsize = PyArray_ITEMSIZE(self)
+ *             info.readonly = not PyArray_ISWRITEABLE(self)             # <<<<<<<<<<<<<<
+ * 
+ *             cdef int t
+ */
+  __pyx_v_info->readonly = (!PyArray_ISWRITEABLE(__pyx_v_self));
+
+  /* "numpy.pxd":238
+ * 
+ *             cdef int t
+ *             cdef char* f = NULL             # <<<<<<<<<<<<<<
+ *             cdef dtype descr = self.descr
+ *             cdef list stack
+ */
+  __pyx_v_f = NULL;
+
+  /* "numpy.pxd":239
+ *             cdef int t
+ *             cdef char* f = NULL
+ *             cdef dtype descr = self.descr             # <<<<<<<<<<<<<<
+ *             cdef list stack
+ *             cdef int offset
+ */
+  __Pyx_INCREF(((PyObject *)__pyx_v_self->descr));
+  __pyx_v_descr = __pyx_v_self->descr;
+
+  /* "numpy.pxd":243
+ *             cdef int offset
+ * 
+ *             cdef bint hasfields = PyDataType_HASFIELDS(descr)             # <<<<<<<<<<<<<<
+ * 
+ *             if not hasfields and not copy_shape:
+ */
+  __pyx_v_hasfields = PyDataType_HASFIELDS(__pyx_v_descr);
+
+  /* "numpy.pxd":245
+ *             cdef bint hasfields = PyDataType_HASFIELDS(descr)
+ * 
+ *             if not hasfields and not copy_shape:             # <<<<<<<<<<<<<<
+ *                 # do not call releasebuffer
+ *                 info.obj = None
+ */
+  __pyx_t_2 = (!__pyx_v_hasfields);
+  if (__pyx_t_2) {
+    __pyx_t_3 = (!__pyx_v_copy_shape);
+    __pyx_t_1 = __pyx_t_3;
+  } else {
+    __pyx_t_1 = __pyx_t_2;
+  }
+  if (__pyx_t_1) {
+
+    /* "numpy.pxd":247
+ *             if not hasfields and not copy_shape:
+ *                 # do not call releasebuffer
+ *                 info.obj = None             # <<<<<<<<<<<<<<
+ *             else:
+ *                 # need to call releasebuffer
+ */
+    __Pyx_INCREF(Py_None);
+    __Pyx_GIVEREF(Py_None);
+    __Pyx_GOTREF(__pyx_v_info->obj);
+    __Pyx_DECREF(__pyx_v_info->obj);
+    __pyx_v_info->obj = Py_None;
+    goto __pyx_L10;
+  }
+  /*else*/ {
+
+    /* "numpy.pxd":250
+ *             else:
+ *                 # need to call releasebuffer
+ *                 info.obj = self             # <<<<<<<<<<<<<<
+ * 
+ *             if not hasfields:
+ */
+    __Pyx_INCREF(((PyObject *)__pyx_v_self));
+    __Pyx_GIVEREF(((PyObject *)__pyx_v_self));
+    __Pyx_GOTREF(__pyx_v_info->obj);
+    __Pyx_DECREF(__pyx_v_info->obj);
+    __pyx_v_info->obj = ((PyObject *)__pyx_v_self);
+  }
+  __pyx_L10:;
+
+  /* "numpy.pxd":252
+ *                 info.obj = self
+ * 
+ *             if not hasfields:             # <<<<<<<<<<<<<<
+ *                 t = descr.type_num
+ *                 if ((descr.byteorder == '>' and little_endian) or
+ */
+  __pyx_t_1 = (!__pyx_v_hasfields);
+  if (__pyx_t_1) {
+
+    /* "numpy.pxd":253
+ * 
+ *             if not hasfields:
+ *                 t = descr.type_num             # <<<<<<<<<<<<<<
+ *                 if ((descr.byteorder == '>' and little_endian) or
+ *                     (descr.byteorder == '<' and not little_endian)):
+ */
+    __pyx_v_t = __pyx_v_descr->type_num;
+
+    /* "numpy.pxd":254
+ *             if not hasfields:
+ *                 t = descr.type_num
+ *                 if ((descr.byteorder == '>' and little_endian) or             # <<<<<<<<<<<<<<
+ *                     (descr.byteorder == '<' and not little_endian)):
+ *                     raise ValueError(u"Non-native byte order not supported")
+ */
+    __pyx_t_1 = (__pyx_v_descr->byteorder == '>');
+    if (__pyx_t_1) {
+      __pyx_t_2 = __pyx_v_little_endian;
+    } else {
+      __pyx_t_2 = __pyx_t_1;
+    }
+    if (!__pyx_t_2) {
+
+      /* "numpy.pxd":255
+ *                 t = descr.type_num
+ *                 if ((descr.byteorder == '>' and little_endian) or
+ *                     (descr.byteorder == '<' and not little_endian)):             # <<<<<<<<<<<<<<
+ *                     raise ValueError(u"Non-native byte order not supported")
+ *                 if   t == NPY_BYTE:        f = "b"
+ */
+      __pyx_t_1 = (__pyx_v_descr->byteorder == '<');
+      if (__pyx_t_1) {
+        __pyx_t_3 = (!__pyx_v_little_endian);
+        __pyx_t_7 = __pyx_t_3;
+      } else {
+        __pyx_t_7 = __pyx_t_1;
+      }
+      __pyx_t_1 = __pyx_t_7;
+    } else {
+      __pyx_t_1 = __pyx_t_2;
+    }
+    if (__pyx_t_1) {
+
+      /* "numpy.pxd":256
+ *                 if ((descr.byteorder == '>' and little_endian) or
+ *                     (descr.byteorder == '<' and not little_endian)):
+ *                     raise ValueError(u"Non-native byte order not supported")             # <<<<<<<<<<<<<<
+ *                 if   t == NPY_BYTE:        f = "b"
+ *                 elif t == NPY_UBYTE:       f = "B"
+ */
+      __pyx_t_4 = PyObject_Call(__pyx_builtin_ValueError, ((PyObject *)__pyx_k_tuple_6), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 256; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_4);
+      __Pyx_Raise(__pyx_t_4, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      {__pyx_filename = __pyx_f[1]; __pyx_lineno = 256; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      goto __pyx_L12;
+    }
+    __pyx_L12:;
+
+    /* "numpy.pxd":257
+ *                     (descr.byteorder == '<' and not little_endian)):
+ *                     raise ValueError(u"Non-native byte order not supported")
+ *                 if   t == NPY_BYTE:        f = "b"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_UBYTE:       f = "B"
+ *                 elif t == NPY_SHORT:       f = "h"
+ */
+    __pyx_t_1 = (__pyx_v_t == NPY_BYTE);
+    if (__pyx_t_1) {
+      __pyx_v_f = __pyx_k__b;
+      goto __pyx_L13;
+    }
+
+    /* "numpy.pxd":258
+ *                     raise ValueError(u"Non-native byte order not supported")
+ *                 if   t == NPY_BYTE:        f = "b"
+ *                 elif t == NPY_UBYTE:       f = "B"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_SHORT:       f = "h"
+ *                 elif t == NPY_USHORT:      f = "H"
+ */
+    __pyx_t_1 = (__pyx_v_t == NPY_UBYTE);
+    if (__pyx_t_1) {
+      __pyx_v_f = __pyx_k__B;
+      goto __pyx_L13;
+    }
+
+    /* "numpy.pxd":259
+ *                 if   t == NPY_BYTE:        f = "b"
+ *                 elif t == NPY_UBYTE:       f = "B"
+ *                 elif t == NPY_SHORT:       f = "h"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_USHORT:      f = "H"
+ *                 elif t == NPY_INT:         f = "i"
+ */
+    __pyx_t_1 = (__pyx_v_t == NPY_SHORT);
+    if (__pyx_t_1) {
+      __pyx_v_f = __pyx_k__h;
+      goto __pyx_L13;
+    }
+
+    /* "numpy.pxd":260
+ *                 elif t == NPY_UBYTE:       f = "B"
+ *                 elif t == NPY_SHORT:       f = "h"
+ *                 elif t == NPY_USHORT:      f = "H"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_INT:         f = "i"
+ *                 elif t == NPY_UINT:        f = "I"
+ */
+    __pyx_t_1 = (__pyx_v_t == NPY_USHORT);
+    if (__pyx_t_1) {
+      __pyx_v_f = __pyx_k__H;
+      goto __pyx_L13;
+    }
+
+    /* "numpy.pxd":261
+ *                 elif t == NPY_SHORT:       f = "h"
+ *                 elif t == NPY_USHORT:      f = "H"
+ *                 elif t == NPY_INT:         f = "i"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_UINT:        f = "I"
+ *                 elif t == NPY_LONG:        f = "l"
+ */
+    __pyx_t_1 = (__pyx_v_t == NPY_INT);
+    if (__pyx_t_1) {
+      __pyx_v_f = __pyx_k__i;
+      goto __pyx_L13;
+    }
+
+    /* "numpy.pxd":262
+ *                 elif t == NPY_USHORT:      f = "H"
+ *                 elif t == NPY_INT:         f = "i"
+ *                 elif t == NPY_UINT:        f = "I"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_LONG:        f = "l"
+ *                 elif t == NPY_ULONG:       f = "L"
+ */
+    __pyx_t_1 = (__pyx_v_t == NPY_UINT);
+    if (__pyx_t_1) {
+      __pyx_v_f = __pyx_k__I;
+      goto __pyx_L13;
+    }
+
+    /* "numpy.pxd":263
+ *                 elif t == NPY_INT:         f = "i"
+ *                 elif t == NPY_UINT:        f = "I"
+ *                 elif t == NPY_LONG:        f = "l"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_ULONG:       f = "L"
+ *                 elif t == NPY_LONGLONG:    f = "q"
+ */
+    __pyx_t_1 = (__pyx_v_t == NPY_LONG);
+    if (__pyx_t_1) {
+      __pyx_v_f = __pyx_k__l;
+      goto __pyx_L13;
+    }
+
+    /* "numpy.pxd":264
+ *                 elif t == NPY_UINT:        f = "I"
+ *                 elif t == NPY_LONG:        f = "l"
+ *                 elif t == NPY_ULONG:       f = "L"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_LONGLONG:    f = "q"
+ *                 elif t == NPY_ULONGLONG:   f = "Q"
+ */
+    __pyx_t_1 = (__pyx_v_t == NPY_ULONG);
+    if (__pyx_t_1) {
+      __pyx_v_f = __pyx_k__L;
+      goto __pyx_L13;
+    }
+
+    /* "numpy.pxd":265
+ *                 elif t == NPY_LONG:        f = "l"
+ *                 elif t == NPY_ULONG:       f = "L"
+ *                 elif t == NPY_LONGLONG:    f = "q"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_ULONGLONG:   f = "Q"
+ *                 elif t == NPY_FLOAT:       f = "f"
+ */
+    __pyx_t_1 = (__pyx_v_t == NPY_LONGLONG);
+    if (__pyx_t_1) {
+      __pyx_v_f = __pyx_k__q;
+      goto __pyx_L13;
+    }
+
+    /* "numpy.pxd":266
+ *                 elif t == NPY_ULONG:       f = "L"
+ *                 elif t == NPY_LONGLONG:    f = "q"
+ *                 elif t == NPY_ULONGLONG:   f = "Q"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_FLOAT:       f = "f"
+ *                 elif t == NPY_DOUBLE:      f = "d"
+ */
+    __pyx_t_1 = (__pyx_v_t == NPY_ULONGLONG);
+    if (__pyx_t_1) {
+      __pyx_v_f = __pyx_k__Q;
+      goto __pyx_L13;
+    }
+
+    /* "numpy.pxd":267
+ *                 elif t == NPY_LONGLONG:    f = "q"
+ *                 elif t == NPY_ULONGLONG:   f = "Q"
+ *                 elif t == NPY_FLOAT:       f = "f"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_DOUBLE:      f = "d"
+ *                 elif t == NPY_LONGDOUBLE:  f = "g"
+ */
+    __pyx_t_1 = (__pyx_v_t == NPY_FLOAT);
+    if (__pyx_t_1) {
+      __pyx_v_f = __pyx_k__f;
+      goto __pyx_L13;
+    }
+
+    /* "numpy.pxd":268
+ *                 elif t == NPY_ULONGLONG:   f = "Q"
+ *                 elif t == NPY_FLOAT:       f = "f"
+ *                 elif t == NPY_DOUBLE:      f = "d"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_LONGDOUBLE:  f = "g"
+ *                 elif t == NPY_CFLOAT:      f = "Zf"
+ */
+    __pyx_t_1 = (__pyx_v_t == NPY_DOUBLE);
+    if (__pyx_t_1) {
+      __pyx_v_f = __pyx_k__d;
+      goto __pyx_L13;
+    }
+
+    /* "numpy.pxd":269
+ *                 elif t == NPY_FLOAT:       f = "f"
+ *                 elif t == NPY_DOUBLE:      f = "d"
+ *                 elif t == NPY_LONGDOUBLE:  f = "g"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_CFLOAT:      f = "Zf"
+ *                 elif t == NPY_CDOUBLE:     f = "Zd"
+ */
+    __pyx_t_1 = (__pyx_v_t == NPY_LONGDOUBLE);
+    if (__pyx_t_1) {
+      __pyx_v_f = __pyx_k__g;
+      goto __pyx_L13;
+    }
+
+    /* "numpy.pxd":270
+ *                 elif t == NPY_DOUBLE:      f = "d"
+ *                 elif t == NPY_LONGDOUBLE:  f = "g"
+ *                 elif t == NPY_CFLOAT:      f = "Zf"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_CDOUBLE:     f = "Zd"
+ *                 elif t == NPY_CLONGDOUBLE: f = "Zg"
+ */
+    __pyx_t_1 = (__pyx_v_t == NPY_CFLOAT);
+    if (__pyx_t_1) {
+      __pyx_v_f = __pyx_k__Zf;
+      goto __pyx_L13;
+    }
+
+    /* "numpy.pxd":271
+ *                 elif t == NPY_LONGDOUBLE:  f = "g"
+ *                 elif t == NPY_CFLOAT:      f = "Zf"
+ *                 elif t == NPY_CDOUBLE:     f = "Zd"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_CLONGDOUBLE: f = "Zg"
+ *                 elif t == NPY_OBJECT:      f = "O"
+ */
+    __pyx_t_1 = (__pyx_v_t == NPY_CDOUBLE);
+    if (__pyx_t_1) {
+      __pyx_v_f = __pyx_k__Zd;
+      goto __pyx_L13;
+    }
+
+    /* "numpy.pxd":272
+ *                 elif t == NPY_CFLOAT:      f = "Zf"
+ *                 elif t == NPY_CDOUBLE:     f = "Zd"
+ *                 elif t == NPY_CLONGDOUBLE: f = "Zg"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_OBJECT:      f = "O"
+ *                 else:
+ */
+    __pyx_t_1 = (__pyx_v_t == NPY_CLONGDOUBLE);
+    if (__pyx_t_1) {
+      __pyx_v_f = __pyx_k__Zg;
+      goto __pyx_L13;
+    }
+
+    /* "numpy.pxd":273
+ *                 elif t == NPY_CDOUBLE:     f = "Zd"
+ *                 elif t == NPY_CLONGDOUBLE: f = "Zg"
+ *                 elif t == NPY_OBJECT:      f = "O"             # <<<<<<<<<<<<<<
+ *                 else:
+ *                     raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
+ */
+    __pyx_t_1 = (__pyx_v_t == NPY_OBJECT);
+    if (__pyx_t_1) {
+      __pyx_v_f = __pyx_k__O;
+      goto __pyx_L13;
+    }
+    /*else*/ {
+
+      /* "numpy.pxd":275
+ *                 elif t == NPY_OBJECT:      f = "O"
+ *                 else:
+ *                     raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)             # <<<<<<<<<<<<<<
+ *                 info.format = f
+ *                 return
+ */
+      __pyx_t_4 = PyInt_FromLong(__pyx_v_t); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_4);
+      __pyx_t_8 = PyNumber_Remainder(((PyObject *)__pyx_kp_u_7), __pyx_t_4); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(((PyObject *)__pyx_t_8));
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_4);
+      PyTuple_SET_ITEM(__pyx_t_4, 0, ((PyObject *)__pyx_t_8));
+      __Pyx_GIVEREF(((PyObject *)__pyx_t_8));
+      __pyx_t_8 = 0;
+      __pyx_t_8 = PyObject_Call(__pyx_builtin_ValueError, ((PyObject *)__pyx_t_4), NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_8);
+      __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0;
+      __Pyx_Raise(__pyx_t_8, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+      {__pyx_filename = __pyx_f[1]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    }
+    __pyx_L13:;
+
+    /* "numpy.pxd":276
+ *                 else:
+ *                     raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
+ *                 info.format = f             # <<<<<<<<<<<<<<
+ *                 return
+ *             else:
+ */
+    __pyx_v_info->format = __pyx_v_f;
+
+    /* "numpy.pxd":277
+ *                     raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
+ *                 info.format = f
+ *                 return             # <<<<<<<<<<<<<<
+ *             else:
+ *                 info.format = <char*>stdlib.malloc(_buffer_format_string_len)
+ */
+    __pyx_r = 0;
+    goto __pyx_L0;
+    goto __pyx_L11;
+  }
+  /*else*/ {
+
+    /* "numpy.pxd":279
+ *                 return
+ *             else:
+ *                 info.format = <char*>stdlib.malloc(_buffer_format_string_len)             # <<<<<<<<<<<<<<
+ *                 info.format[0] = '^' # Native data types, manual alignment
+ *                 offset = 0
+ */
+    __pyx_v_info->format = ((char *)malloc(255));
+
+    /* "numpy.pxd":280
+ *             else:
+ *                 info.format = <char*>stdlib.malloc(_buffer_format_string_len)
+ *                 info.format[0] = '^' # Native data types, manual alignment             # <<<<<<<<<<<<<<
+ *                 offset = 0
+ *                 f = _util_dtypestring(descr, info.format + 1,
+ */
+    (__pyx_v_info->format[0]) = '^';
+
+    /* "numpy.pxd":281
+ *                 info.format = <char*>stdlib.malloc(_buffer_format_string_len)
+ *                 info.format[0] = '^' # Native data types, manual alignment
+ *                 offset = 0             # <<<<<<<<<<<<<<
+ *                 f = _util_dtypestring(descr, info.format + 1,
+ *                                       info.format + _buffer_format_string_len,
+ */
+    __pyx_v_offset = 0;
+
+    /* "numpy.pxd":284
+ *                 f = _util_dtypestring(descr, info.format + 1,
+ *                                       info.format + _buffer_format_string_len,
+ *                                       &offset)             # <<<<<<<<<<<<<<
+ *                 f[0] = 0 # Terminate format string
+ * 
+ */
+    __pyx_t_9 = __pyx_f_5numpy__util_dtypestring(__pyx_v_descr, (__pyx_v_info->format + 1), (__pyx_v_info->format + 255), (&__pyx_v_offset)); if (unlikely(__pyx_t_9 == NULL)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 282; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __pyx_v_f = __pyx_t_9;
+
+    /* "numpy.pxd":285
+ *                                       info.format + _buffer_format_string_len,
+ *                                       &offset)
+ *                 f[0] = 0 # Terminate format string             # <<<<<<<<<<<<<<
+ * 
+ *         def __releasebuffer__(ndarray self, Py_buffer* info):
+ */
+    (__pyx_v_f[0]) = 0;
+  }
+  __pyx_L11:;
+
+  __pyx_r = 0;
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_8);
+  __Pyx_AddTraceback("numpy.ndarray.__getbuffer__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  if (__pyx_v_info != NULL && __pyx_v_info->obj != NULL) {
+    __Pyx_GOTREF(__pyx_v_info->obj);
+    __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = NULL;
+  }
+  goto __pyx_L2;
+  __pyx_L0:;
+  if (__pyx_v_info != NULL && __pyx_v_info->obj == Py_None) {
+    __Pyx_GOTREF(Py_None);
+    __Pyx_DECREF(Py_None); __pyx_v_info->obj = NULL;
+  }
+  __pyx_L2:;
+  __Pyx_XDECREF((PyObject *)__pyx_v_descr);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static void __pyx_pw_5numpy_7ndarray_3__releasebuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info); /*proto*/
+static void __pyx_pw_5numpy_7ndarray_3__releasebuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__releasebuffer__ (wrapper)", 0);
+  __pyx_pf_5numpy_7ndarray_2__releasebuffer__(((PyArrayObject *)__pyx_v_self), ((Py_buffer *)__pyx_v_info));
+  __Pyx_RefNannyFinishContext();
+}
+
+/* "numpy.pxd":287
+ *                 f[0] = 0 # Terminate format string
+ * 
+ *         def __releasebuffer__(ndarray self, Py_buffer* info):             # <<<<<<<<<<<<<<
+ *             if PyArray_HASFIELDS(self):
+ *                 stdlib.free(info.format)
+ */
+
+static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info) {
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  __Pyx_RefNannySetupContext("__releasebuffer__", 0);
+
+  /* "numpy.pxd":288
+ * 
+ *         def __releasebuffer__(ndarray self, Py_buffer* info):
+ *             if PyArray_HASFIELDS(self):             # <<<<<<<<<<<<<<
+ *                 stdlib.free(info.format)
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):
+ */
+  __pyx_t_1 = PyArray_HASFIELDS(__pyx_v_self);
+  if (__pyx_t_1) {
+
+    /* "numpy.pxd":289
+ *         def __releasebuffer__(ndarray self, Py_buffer* info):
+ *             if PyArray_HASFIELDS(self):
+ *                 stdlib.free(info.format)             # <<<<<<<<<<<<<<
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):
+ *                 stdlib.free(info.strides)
+ */
+    free(__pyx_v_info->format);
+    goto __pyx_L3;
+  }
+  __pyx_L3:;
+
+  /* "numpy.pxd":290
+ *             if PyArray_HASFIELDS(self):
+ *                 stdlib.free(info.format)
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):             # <<<<<<<<<<<<<<
+ *                 stdlib.free(info.strides)
+ *                 # info.shape was stored after info.strides in the same block
+ */
+  __pyx_t_1 = ((sizeof(npy_intp)) != (sizeof(Py_ssize_t)));
+  if (__pyx_t_1) {
+
+    /* "numpy.pxd":291
+ *                 stdlib.free(info.format)
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):
+ *                 stdlib.free(info.strides)             # <<<<<<<<<<<<<<
+ *                 # info.shape was stored after info.strides in the same block
+ * 
+ */
+    free(__pyx_v_info->strides);
+    goto __pyx_L4;
+  }
+  __pyx_L4:;
+
+  __Pyx_RefNannyFinishContext();
+}
+
+/* "numpy.pxd":767
+ * ctypedef npy_cdouble     complex_t
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(1, <void*>a)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__pyx_v_a) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0);
+
+  /* "numpy.pxd":768
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):
+ *     return PyArray_MultiIterNew(1, <void*>a)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 768; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew1", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "numpy.pxd":770
+ *     return PyArray_MultiIterNew(1, <void*>a)
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__pyx_v_a, PyObject *__pyx_v_b) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0);
+
+  /* "numpy.pxd":771
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 771; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew2", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "numpy.pxd":773
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0);
+
+  /* "numpy.pxd":774
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 774; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew3", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "numpy.pxd":776
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 0);
+
+  /* "numpy.pxd":777
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(4, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 777; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew4", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "numpy.pxd":779
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d, PyObject *__pyx_v_e) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 0);
+
+  /* "numpy.pxd":780
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL:
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(5, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d), ((void *)__pyx_v_e)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 780; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew5", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "numpy.pxd":782
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+ * 
+ * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL:             # <<<<<<<<<<<<<<
+ *     # Recursive utility function used in __getbuffer__ to get format
+ *     # string. The new location in the format string is returned.
+ */
+
+static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx_v_descr, char *__pyx_v_f, char *__pyx_v_end, int *__pyx_v_offset) {
+  PyArray_Descr *__pyx_v_child = 0;
+  int __pyx_v_endian_detector;
+  int __pyx_v_little_endian;
+  PyObject *__pyx_v_fields = 0;
+  PyObject *__pyx_v_childname = NULL;
+  PyObject *__pyx_v_new_offset = NULL;
+  PyObject *__pyx_v_t = NULL;
+  char *__pyx_r;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  Py_ssize_t __pyx_t_2;
+  PyObject *__pyx_t_3 = NULL;
+  PyObject *__pyx_t_4 = NULL;
+  PyObject *__pyx_t_5 = NULL;
+  int __pyx_t_6;
+  int __pyx_t_7;
+  int __pyx_t_8;
+  int __pyx_t_9;
+  long __pyx_t_10;
+  char *__pyx_t_11;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("_util_dtypestring", 0);
+
+  /* "numpy.pxd":789
+ *     cdef int delta_offset
+ *     cdef tuple i
+ *     cdef int endian_detector = 1             # <<<<<<<<<<<<<<
+ *     cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)
+ *     cdef tuple fields
+ */
+  __pyx_v_endian_detector = 1;
+
+  /* "numpy.pxd":790
+ *     cdef tuple i
+ *     cdef int endian_detector = 1
+ *     cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)             # <<<<<<<<<<<<<<
+ *     cdef tuple fields
+ * 
+ */
+  __pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0);
+
+  /* "numpy.pxd":793
+ *     cdef tuple fields
+ * 
+ *     for childname in descr.names:             # <<<<<<<<<<<<<<
+ *         fields = descr.fields[childname]
+ *         child, new_offset = fields
+ */
+  if (unlikely(((PyObject *)__pyx_v_descr->names) == Py_None)) {
+    PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); {__pyx_filename = __pyx_f[1]; __pyx_lineno = 793; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 
+  }
+  __pyx_t_1 = ((PyObject *)__pyx_v_descr->names); __Pyx_INCREF(__pyx_t_1); __pyx_t_2 = 0;
+  for (;;) {
+    if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_1)) break;
+    __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_3); __pyx_t_2++;
+    __Pyx_XDECREF(__pyx_v_childname);
+    __pyx_v_childname = __pyx_t_3;
+    __pyx_t_3 = 0;
+
+    /* "numpy.pxd":794
+ * 
+ *     for childname in descr.names:
+ *         fields = descr.fields[childname]             # <<<<<<<<<<<<<<
+ *         child, new_offset = fields
+ * 
+ */
+    __pyx_t_3 = PyObject_GetItem(__pyx_v_descr->fields, __pyx_v_childname); if (!__pyx_t_3) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 794; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_3);
+    if (!(likely(PyTuple_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected tuple, got %.200s", Py_TYPE(__pyx_t_3)->tp_name), 0))) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 794; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_XDECREF(((PyObject *)__pyx_v_fields));
+    __pyx_v_fields = ((PyObject*)__pyx_t_3);
+    __pyx_t_3 = 0;
+
+    /* "numpy.pxd":795
+ *     for childname in descr.names:
+ *         fields = descr.fields[childname]
+ *         child, new_offset = fields             # <<<<<<<<<<<<<<
+ * 
+ *         if (end - f) - (new_offset - offset[0]) < 15:
+ */
+    if (likely(PyTuple_CheckExact(((PyObject *)__pyx_v_fields)))) {
+      PyObject* sequence = ((PyObject *)__pyx_v_fields);
+      if (unlikely(PyTuple_GET_SIZE(sequence) != 2)) {
+        if (PyTuple_GET_SIZE(sequence) > 2) __Pyx_RaiseTooManyValuesError(2);
+        else __Pyx_RaiseNeedMoreValuesError(PyTuple_GET_SIZE(sequence));
+        {__pyx_filename = __pyx_f[1]; __pyx_lineno = 795; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      }
+      __pyx_t_3 = PyTuple_GET_ITEM(sequence, 0); 
+      __pyx_t_4 = PyTuple_GET_ITEM(sequence, 1); 
+      __Pyx_INCREF(__pyx_t_3);
+      __Pyx_INCREF(__pyx_t_4);
+    } else {
+      __Pyx_UnpackTupleError(((PyObject *)__pyx_v_fields), 2);
+      {__pyx_filename = __pyx_f[1]; __pyx_lineno = 795; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    }
+    if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_dtype))))) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 795; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_XDECREF(((PyObject *)__pyx_v_child));
+    __pyx_v_child = ((PyArray_Descr *)__pyx_t_3);
+    __pyx_t_3 = 0;
+    __Pyx_XDECREF(__pyx_v_new_offset);
+    __pyx_v_new_offset = __pyx_t_4;
+    __pyx_t_4 = 0;
+
+    /* "numpy.pxd":797
+ *         child, new_offset = fields
+ * 
+ *         if (end - f) - (new_offset - offset[0]) < 15:             # <<<<<<<<<<<<<<
+ *             raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")
+ * 
+ */
+    __pyx_t_4 = PyInt_FromLong((__pyx_v_end - __pyx_v_f)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_4);
+    __pyx_t_3 = PyInt_FromLong((__pyx_v_offset[0])); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_3);
+    __pyx_t_5 = PyNumber_Subtract(__pyx_v_new_offset, __pyx_t_3); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_5);
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __pyx_t_3 = PyNumber_Subtract(__pyx_t_4, __pyx_t_5); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+    __pyx_t_5 = PyObject_RichCompare(__pyx_t_3, __pyx_int_15, Py_LT); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_GOTREF(__pyx_t_5);
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+    if (__pyx_t_6) {
+
+      /* "numpy.pxd":798
+ * 
+ *         if (end - f) - (new_offset - offset[0]) < 15:
+ *             raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")             # <<<<<<<<<<<<<<
+ * 
+ *         if ((child.byteorder == '>' and little_endian) or
+ */
+      __pyx_t_5 = PyObject_Call(__pyx_builtin_RuntimeError, ((PyObject *)__pyx_k_tuple_9), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_Raise(__pyx_t_5, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      {__pyx_filename = __pyx_f[1]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      goto __pyx_L5;
+    }
+    __pyx_L5:;
+
+    /* "numpy.pxd":800
+ *             raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")
+ * 
+ *         if ((child.byteorder == '>' and little_endian) or             # <<<<<<<<<<<<<<
+ *             (child.byteorder == '<' and not little_endian)):
+ *             raise ValueError(u"Non-native byte order not supported")
+ */
+    __pyx_t_6 = (__pyx_v_child->byteorder == '>');
+    if (__pyx_t_6) {
+      __pyx_t_7 = __pyx_v_little_endian;
+    } else {
+      __pyx_t_7 = __pyx_t_6;
+    }
+    if (!__pyx_t_7) {
+
+      /* "numpy.pxd":801
+ * 
+ *         if ((child.byteorder == '>' and little_endian) or
+ *             (child.byteorder == '<' and not little_endian)):             # <<<<<<<<<<<<<<
+ *             raise ValueError(u"Non-native byte order not supported")
+ *             # One could encode it in the format string and have Cython
+ */
+      __pyx_t_6 = (__pyx_v_child->byteorder == '<');
+      if (__pyx_t_6) {
+        __pyx_t_8 = (!__pyx_v_little_endian);
+        __pyx_t_9 = __pyx_t_8;
+      } else {
+        __pyx_t_9 = __pyx_t_6;
+      }
+      __pyx_t_6 = __pyx_t_9;
+    } else {
+      __pyx_t_6 = __pyx_t_7;
+    }
+    if (__pyx_t_6) {
+
+      /* "numpy.pxd":802
+ *         if ((child.byteorder == '>' and little_endian) or
+ *             (child.byteorder == '<' and not little_endian)):
+ *             raise ValueError(u"Non-native byte order not supported")             # <<<<<<<<<<<<<<
+ *             # One could encode it in the format string and have Cython
+ *             # complain instead, BUT: < and > in format strings also imply
+ */
+      __pyx_t_5 = PyObject_Call(__pyx_builtin_ValueError, ((PyObject *)__pyx_k_tuple_10), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_Raise(__pyx_t_5, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      goto __pyx_L6;
+    }
+    __pyx_L6:;
+
+    /* "numpy.pxd":812
+ * 
+ *         # Output padding bytes
+ *         while offset[0] < new_offset:             # <<<<<<<<<<<<<<
+ *             f[0] = 120 # "x"; pad byte
+ *             f += 1
+ */
+    while (1) {
+      __pyx_t_5 = PyInt_FromLong((__pyx_v_offset[0])); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __pyx_t_3 = PyObject_RichCompare(__pyx_t_5, __pyx_v_new_offset, Py_LT); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (!__pyx_t_6) break;
+
+      /* "numpy.pxd":813
+ *         # Output padding bytes
+ *         while offset[0] < new_offset:
+ *             f[0] = 120 # "x"; pad byte             # <<<<<<<<<<<<<<
+ *             f += 1
+ *             offset[0] += 1
+ */
+      (__pyx_v_f[0]) = 120;
+
+      /* "numpy.pxd":814
+ *         while offset[0] < new_offset:
+ *             f[0] = 120 # "x"; pad byte
+ *             f += 1             # <<<<<<<<<<<<<<
+ *             offset[0] += 1
+ * 
+ */
+      __pyx_v_f = (__pyx_v_f + 1);
+
+      /* "numpy.pxd":815
+ *             f[0] = 120 # "x"; pad byte
+ *             f += 1
+ *             offset[0] += 1             # <<<<<<<<<<<<<<
+ * 
+ *         offset[0] += child.itemsize
+ */
+      __pyx_t_10 = 0;
+      (__pyx_v_offset[__pyx_t_10]) = ((__pyx_v_offset[__pyx_t_10]) + 1);
+    }
+
+    /* "numpy.pxd":817
+ *             offset[0] += 1
+ * 
+ *         offset[0] += child.itemsize             # <<<<<<<<<<<<<<
+ * 
+ *         if not PyDataType_HASFIELDS(child):
+ */
+    __pyx_t_10 = 0;
+    (__pyx_v_offset[__pyx_t_10]) = ((__pyx_v_offset[__pyx_t_10]) + __pyx_v_child->elsize);
+
+    /* "numpy.pxd":819
+ *         offset[0] += child.itemsize
+ * 
+ *         if not PyDataType_HASFIELDS(child):             # <<<<<<<<<<<<<<
+ *             t = child.type_num
+ *             if end - f < 5:
+ */
+    __pyx_t_6 = (!PyDataType_HASFIELDS(__pyx_v_child));
+    if (__pyx_t_6) {
+
+      /* "numpy.pxd":820
+ * 
+ *         if not PyDataType_HASFIELDS(child):
+ *             t = child.type_num             # <<<<<<<<<<<<<<
+ *             if end - f < 5:
+ *                 raise RuntimeError(u"Format string allocated too short.")
+ */
+      __pyx_t_3 = PyInt_FromLong(__pyx_v_child->type_num); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 820; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_XDECREF(__pyx_v_t);
+      __pyx_v_t = __pyx_t_3;
+      __pyx_t_3 = 0;
+
+      /* "numpy.pxd":821
+ *         if not PyDataType_HASFIELDS(child):
+ *             t = child.type_num
+ *             if end - f < 5:             # <<<<<<<<<<<<<<
+ *                 raise RuntimeError(u"Format string allocated too short.")
+ * 
+ */
+      __pyx_t_6 = ((__pyx_v_end - __pyx_v_f) < 5);
+      if (__pyx_t_6) {
+
+        /* "numpy.pxd":822
+ *             t = child.type_num
+ *             if end - f < 5:
+ *                 raise RuntimeError(u"Format string allocated too short.")             # <<<<<<<<<<<<<<
+ * 
+ *             # Until ticket #99 is fixed, use integers to avoid warnings
+ */
+        __pyx_t_3 = PyObject_Call(__pyx_builtin_RuntimeError, ((PyObject *)__pyx_k_tuple_12), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 822; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __Pyx_GOTREF(__pyx_t_3);
+        __Pyx_Raise(__pyx_t_3, 0, 0, 0);
+        __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+        {__pyx_filename = __pyx_f[1]; __pyx_lineno = 822; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        goto __pyx_L10;
+      }
+      __pyx_L10:;
+
+      /* "numpy.pxd":825
+ * 
+ *             # Until ticket #99 is fixed, use integers to avoid warnings
+ *             if   t == NPY_BYTE:        f[0] =  98 #"b"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_UBYTE:       f[0] =  66 #"B"
+ *             elif t == NPY_SHORT:       f[0] = 104 #"h"
+ */
+      __pyx_t_3 = PyInt_FromLong(NPY_BYTE); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 825; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_5 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 825; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 825; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 98;
+        goto __pyx_L11;
+      }
+
+      /* "numpy.pxd":826
+ *             # Until ticket #99 is fixed, use integers to avoid warnings
+ *             if   t == NPY_BYTE:        f[0] =  98 #"b"
+ *             elif t == NPY_UBYTE:       f[0] =  66 #"B"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_SHORT:       f[0] = 104 #"h"
+ *             elif t == NPY_USHORT:      f[0] =  72 #"H"
+ */
+      __pyx_t_5 = PyInt_FromLong(NPY_UBYTE); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_5, Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 66;
+        goto __pyx_L11;
+      }
+
+      /* "numpy.pxd":827
+ *             if   t == NPY_BYTE:        f[0] =  98 #"b"
+ *             elif t == NPY_UBYTE:       f[0] =  66 #"B"
+ *             elif t == NPY_SHORT:       f[0] = 104 #"h"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_USHORT:      f[0] =  72 #"H"
+ *             elif t == NPY_INT:         f[0] = 105 #"i"
+ */
+      __pyx_t_3 = PyInt_FromLong(NPY_SHORT); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 827; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_5 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 827; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 827; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 104;
+        goto __pyx_L11;
+      }
+
+      /* "numpy.pxd":828
+ *             elif t == NPY_UBYTE:       f[0] =  66 #"B"
+ *             elif t == NPY_SHORT:       f[0] = 104 #"h"
+ *             elif t == NPY_USHORT:      f[0] =  72 #"H"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_INT:         f[0] = 105 #"i"
+ *             elif t == NPY_UINT:        f[0] =  73 #"I"
+ */
+      __pyx_t_5 = PyInt_FromLong(NPY_USHORT); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 828; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_5, Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 828; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 828; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 72;
+        goto __pyx_L11;
+      }
+
+      /* "numpy.pxd":829
+ *             elif t == NPY_SHORT:       f[0] = 104 #"h"
+ *             elif t == NPY_USHORT:      f[0] =  72 #"H"
+ *             elif t == NPY_INT:         f[0] = 105 #"i"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_UINT:        f[0] =  73 #"I"
+ *             elif t == NPY_LONG:        f[0] = 108 #"l"
+ */
+      __pyx_t_3 = PyInt_FromLong(NPY_INT); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 829; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_5 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 829; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 829; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 105;
+        goto __pyx_L11;
+      }
+
+      /* "numpy.pxd":830
+ *             elif t == NPY_USHORT:      f[0] =  72 #"H"
+ *             elif t == NPY_INT:         f[0] = 105 #"i"
+ *             elif t == NPY_UINT:        f[0] =  73 #"I"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_LONG:        f[0] = 108 #"l"
+ *             elif t == NPY_ULONG:       f[0] = 76  #"L"
+ */
+      __pyx_t_5 = PyInt_FromLong(NPY_UINT); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 830; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_5, Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 830; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 830; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 73;
+        goto __pyx_L11;
+      }
+
+      /* "numpy.pxd":831
+ *             elif t == NPY_INT:         f[0] = 105 #"i"
+ *             elif t == NPY_UINT:        f[0] =  73 #"I"
+ *             elif t == NPY_LONG:        f[0] = 108 #"l"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_ULONG:       f[0] = 76  #"L"
+ *             elif t == NPY_LONGLONG:    f[0] = 113 #"q"
+ */
+      __pyx_t_3 = PyInt_FromLong(NPY_LONG); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 831; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_5 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 831; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 831; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 108;
+        goto __pyx_L11;
+      }
+
+      /* "numpy.pxd":832
+ *             elif t == NPY_UINT:        f[0] =  73 #"I"
+ *             elif t == NPY_LONG:        f[0] = 108 #"l"
+ *             elif t == NPY_ULONG:       f[0] = 76  #"L"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_LONGLONG:    f[0] = 113 #"q"
+ *             elif t == NPY_ULONGLONG:   f[0] = 81  #"Q"
+ */
+      __pyx_t_5 = PyInt_FromLong(NPY_ULONG); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 832; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_5, Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 832; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 832; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 76;
+        goto __pyx_L11;
+      }
+
+      /* "numpy.pxd":833
+ *             elif t == NPY_LONG:        f[0] = 108 #"l"
+ *             elif t == NPY_ULONG:       f[0] = 76  #"L"
+ *             elif t == NPY_LONGLONG:    f[0] = 113 #"q"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_ULONGLONG:   f[0] = 81  #"Q"
+ *             elif t == NPY_FLOAT:       f[0] = 102 #"f"
+ */
+      __pyx_t_3 = PyInt_FromLong(NPY_LONGLONG); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_5 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 113;
+        goto __pyx_L11;
+      }
+
+      /* "numpy.pxd":834
+ *             elif t == NPY_ULONG:       f[0] = 76  #"L"
+ *             elif t == NPY_LONGLONG:    f[0] = 113 #"q"
+ *             elif t == NPY_ULONGLONG:   f[0] = 81  #"Q"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_FLOAT:       f[0] = 102 #"f"
+ *             elif t == NPY_DOUBLE:      f[0] = 100 #"d"
+ */
+      __pyx_t_5 = PyInt_FromLong(NPY_ULONGLONG); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 834; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_5, Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 834; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 834; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 81;
+        goto __pyx_L11;
+      }
+
+      /* "numpy.pxd":835
+ *             elif t == NPY_LONGLONG:    f[0] = 113 #"q"
+ *             elif t == NPY_ULONGLONG:   f[0] = 81  #"Q"
+ *             elif t == NPY_FLOAT:       f[0] = 102 #"f"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_DOUBLE:      f[0] = 100 #"d"
+ *             elif t == NPY_LONGDOUBLE:  f[0] = 103 #"g"
+ */
+      __pyx_t_3 = PyInt_FromLong(NPY_FLOAT); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 835; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_5 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 835; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 835; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 102;
+        goto __pyx_L11;
+      }
+
+      /* "numpy.pxd":836
+ *             elif t == NPY_ULONGLONG:   f[0] = 81  #"Q"
+ *             elif t == NPY_FLOAT:       f[0] = 102 #"f"
+ *             elif t == NPY_DOUBLE:      f[0] = 100 #"d"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_LONGDOUBLE:  f[0] = 103 #"g"
+ *             elif t == NPY_CFLOAT:      f[0] = 90; f[1] = 102; f += 1 # Zf
+ */
+      __pyx_t_5 = PyInt_FromLong(NPY_DOUBLE); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 836; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_5, Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 836; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 836; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 100;
+        goto __pyx_L11;
+      }
+
+      /* "numpy.pxd":837
+ *             elif t == NPY_FLOAT:       f[0] = 102 #"f"
+ *             elif t == NPY_DOUBLE:      f[0] = 100 #"d"
+ *             elif t == NPY_LONGDOUBLE:  f[0] = 103 #"g"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_CFLOAT:      f[0] = 90; f[1] = 102; f += 1 # Zf
+ *             elif t == NPY_CDOUBLE:     f[0] = 90; f[1] = 100; f += 1 # Zd
+ */
+      __pyx_t_3 = PyInt_FromLong(NPY_LONGDOUBLE); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 837; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_5 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 837; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 837; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 103;
+        goto __pyx_L11;
+      }
+
+      /* "numpy.pxd":838
+ *             elif t == NPY_DOUBLE:      f[0] = 100 #"d"
+ *             elif t == NPY_LONGDOUBLE:  f[0] = 103 #"g"
+ *             elif t == NPY_CFLOAT:      f[0] = 90; f[1] = 102; f += 1 # Zf             # <<<<<<<<<<<<<<
+ *             elif t == NPY_CDOUBLE:     f[0] = 90; f[1] = 100; f += 1 # Zd
+ *             elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg
+ */
+      __pyx_t_5 = PyInt_FromLong(NPY_CFLOAT); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 838; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_5, Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 838; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 838; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 90;
+        (__pyx_v_f[1]) = 102;
+        __pyx_v_f = (__pyx_v_f + 1);
+        goto __pyx_L11;
+      }
+
+      /* "numpy.pxd":839
+ *             elif t == NPY_LONGDOUBLE:  f[0] = 103 #"g"
+ *             elif t == NPY_CFLOAT:      f[0] = 90; f[1] = 102; f += 1 # Zf
+ *             elif t == NPY_CDOUBLE:     f[0] = 90; f[1] = 100; f += 1 # Zd             # <<<<<<<<<<<<<<
+ *             elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg
+ *             elif t == NPY_OBJECT:      f[0] = 79 #"O"
+ */
+      __pyx_t_3 = PyInt_FromLong(NPY_CDOUBLE); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 839; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_5 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 839; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 839; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 90;
+        (__pyx_v_f[1]) = 100;
+        __pyx_v_f = (__pyx_v_f + 1);
+        goto __pyx_L11;
+      }
+
+      /* "numpy.pxd":840
+ *             elif t == NPY_CFLOAT:      f[0] = 90; f[1] = 102; f += 1 # Zf
+ *             elif t == NPY_CDOUBLE:     f[0] = 90; f[1] = 100; f += 1 # Zd
+ *             elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg             # <<<<<<<<<<<<<<
+ *             elif t == NPY_OBJECT:      f[0] = 79 #"O"
+ *             else:
+ */
+      __pyx_t_5 = PyInt_FromLong(NPY_CLONGDOUBLE); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 840; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_5, Py_EQ); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 840; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 840; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 90;
+        (__pyx_v_f[1]) = 103;
+        __pyx_v_f = (__pyx_v_f + 1);
+        goto __pyx_L11;
+      }
+
+      /* "numpy.pxd":841
+ *             elif t == NPY_CDOUBLE:     f[0] = 90; f[1] = 100; f += 1 # Zd
+ *             elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg
+ *             elif t == NPY_OBJECT:      f[0] = 79 #"O"             # <<<<<<<<<<<<<<
+ *             else:
+ *                 raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
+ */
+      __pyx_t_3 = PyInt_FromLong(NPY_OBJECT); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 841; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_5 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 841; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 841; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 79;
+        goto __pyx_L11;
+      }
+      /*else*/ {
+
+        /* "numpy.pxd":843
+ *             elif t == NPY_OBJECT:      f[0] = 79 #"O"
+ *             else:
+ *                 raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)             # <<<<<<<<<<<<<<
+ *             f += 1
+ *         else:
+ */
+        __pyx_t_5 = PyNumber_Remainder(((PyObject *)__pyx_kp_u_7), __pyx_v_t); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 843; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __Pyx_GOTREF(((PyObject *)__pyx_t_5));
+        __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 843; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __Pyx_GOTREF(__pyx_t_3);
+        PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_t_5));
+        __Pyx_GIVEREF(((PyObject *)__pyx_t_5));
+        __pyx_t_5 = 0;
+        __pyx_t_5 = PyObject_Call(__pyx_builtin_ValueError, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 843; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+        __Pyx_GOTREF(__pyx_t_5);
+        __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
+        __Pyx_Raise(__pyx_t_5, 0, 0, 0);
+        __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+        {__pyx_filename = __pyx_f[1]; __pyx_lineno = 843; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      }
+      __pyx_L11:;
+
+      /* "numpy.pxd":844
+ *             else:
+ *                 raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
+ *             f += 1             # <<<<<<<<<<<<<<
+ *         else:
+ *             # Cython ignores struct boundary information ("T{...}"),
+ */
+      __pyx_v_f = (__pyx_v_f + 1);
+      goto __pyx_L9;
+    }
+    /*else*/ {
+
+      /* "numpy.pxd":848
+ *             # Cython ignores struct boundary information ("T{...}"),
+ *             # so don't output it
+ *             f = _util_dtypestring(child, f, end, offset)             # <<<<<<<<<<<<<<
+ *     return f
+ * 
+ */
+      __pyx_t_11 = __pyx_f_5numpy__util_dtypestring(__pyx_v_child, __pyx_v_f, __pyx_v_end, __pyx_v_offset); if (unlikely(__pyx_t_11 == NULL)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+      __pyx_v_f = __pyx_t_11;
+    }
+    __pyx_L9:;
+  }
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "numpy.pxd":849
+ *             # so don't output it
+ *             f = _util_dtypestring(child, f, end, offset)
+ *     return f             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = __pyx_v_f;
+  goto __pyx_L0;
+
+  __pyx_r = 0;
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_AddTraceback("numpy._util_dtypestring", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XDECREF((PyObject *)__pyx_v_child);
+  __Pyx_XDECREF(__pyx_v_fields);
+  __Pyx_XDECREF(__pyx_v_childname);
+  __Pyx_XDECREF(__pyx_v_new_offset);
+  __Pyx_XDECREF(__pyx_v_t);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "numpy.pxd":964
+ * 
+ * 
+ * cdef inline void set_array_base(ndarray arr, object base):             # <<<<<<<<<<<<<<
+ *      cdef PyObject* baseptr
+ *      if base is None:
+ */
+
+static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_arr, PyObject *__pyx_v_base) {
+  PyObject *__pyx_v_baseptr;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  __Pyx_RefNannySetupContext("set_array_base", 0);
+
+  /* "numpy.pxd":966
+ * cdef inline void set_array_base(ndarray arr, object base):
+ *      cdef PyObject* baseptr
+ *      if base is None:             # <<<<<<<<<<<<<<
+ *          baseptr = NULL
+ *      else:
+ */
+  __pyx_t_1 = (__pyx_v_base == Py_None);
+  if (__pyx_t_1) {
+
+    /* "numpy.pxd":967
+ *      cdef PyObject* baseptr
+ *      if base is None:
+ *          baseptr = NULL             # <<<<<<<<<<<<<<
+ *      else:
+ *          Py_INCREF(base) # important to do this before decref below!
+ */
+    __pyx_v_baseptr = NULL;
+    goto __pyx_L3;
+  }
+  /*else*/ {
+
+    /* "numpy.pxd":969
+ *          baseptr = NULL
+ *      else:
+ *          Py_INCREF(base) # important to do this before decref below!             # <<<<<<<<<<<<<<
+ *          baseptr = <PyObject*>base
+ *      Py_XDECREF(arr.base)
+ */
+    Py_INCREF(__pyx_v_base);
+
+    /* "numpy.pxd":970
+ *      else:
+ *          Py_INCREF(base) # important to do this before decref below!
+ *          baseptr = <PyObject*>base             # <<<<<<<<<<<<<<
+ *      Py_XDECREF(arr.base)
+ *      arr.base = baseptr
+ */
+    __pyx_v_baseptr = ((PyObject *)__pyx_v_base);
+  }
+  __pyx_L3:;
+
+  /* "numpy.pxd":971
+ *          Py_INCREF(base) # important to do this before decref below!
+ *          baseptr = <PyObject*>base
+ *      Py_XDECREF(arr.base)             # <<<<<<<<<<<<<<
+ *      arr.base = baseptr
+ * 
+ */
+  Py_XDECREF(__pyx_v_arr->base);
+
+  /* "numpy.pxd":972
+ *          baseptr = <PyObject*>base
+ *      Py_XDECREF(arr.base)
+ *      arr.base = baseptr             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object get_array_base(ndarray arr):
+ */
+  __pyx_v_arr->base = __pyx_v_baseptr;
+
+  __Pyx_RefNannyFinishContext();
+}
+
+/* "numpy.pxd":974
+ *      arr.base = baseptr
+ * 
+ * cdef inline object get_array_base(ndarray arr):             # <<<<<<<<<<<<<<
+ *     if arr.base is NULL:
+ *         return None
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__pyx_v_arr) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  __Pyx_RefNannySetupContext("get_array_base", 0);
+
+  /* "numpy.pxd":975
+ * 
+ * cdef inline object get_array_base(ndarray arr):
+ *     if arr.base is NULL:             # <<<<<<<<<<<<<<
+ *         return None
+ *     else:
+ */
+  __pyx_t_1 = (__pyx_v_arr->base == NULL);
+  if (__pyx_t_1) {
+
+    /* "numpy.pxd":976
+ * cdef inline object get_array_base(ndarray arr):
+ *     if arr.base is NULL:
+ *         return None             # <<<<<<<<<<<<<<
+ *     else:
+ *         return <object>arr.base
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __Pyx_INCREF(Py_None);
+    __pyx_r = Py_None;
+    goto __pyx_L0;
+    goto __pyx_L3;
+  }
+  /*else*/ {
+
+    /* "numpy.pxd":978
+ *         return None
+ *     else:
+ *         return <object>arr.base             # <<<<<<<<<<<<<<
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __Pyx_INCREF(((PyObject *)__pyx_v_arr->base));
+    __pyx_r = ((PyObject *)__pyx_v_arr->base);
+    goto __pyx_L0;
+  }
+  __pyx_L3:;
+
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyMethodDef __pyx_methods[] = {
+  {0, 0, 0, 0}
+};
+
+#if PY_MAJOR_VERSION >= 3
+static struct PyModuleDef __pyx_moduledef = {
+    PyModuleDef_HEAD_INIT,
+    __Pyx_NAMESTR("cythonutils"),
+    0, /* m_doc */
+    -1, /* m_size */
+    __pyx_methods /* m_methods */,
+    NULL, /* m_reload */
+    NULL, /* m_traverse */
+    NULL, /* m_clear */
+    NULL /* m_free */
+};
+#endif
+
+static __Pyx_StringTabEntry __pyx_string_tab[] = {
+  {&__pyx_kp_u_1, __pyx_k_1, sizeof(__pyx_k_1), 0, 1, 0, 0},
+  {&__pyx_kp_u_11, __pyx_k_11, sizeof(__pyx_k_11), 0, 1, 0, 0},
+  {&__pyx_n_s_15, __pyx_k_15, sizeof(__pyx_k_15), 0, 0, 1, 1},
+  {&__pyx_kp_s_16, __pyx_k_16, sizeof(__pyx_k_16), 0, 0, 1, 0},
+  {&__pyx_n_s_17, __pyx_k_17, sizeof(__pyx_k_17), 0, 0, 1, 1},
+  {&__pyx_n_s_20, __pyx_k_20, sizeof(__pyx_k_20), 0, 0, 1, 1},
+  {&__pyx_n_s_23, __pyx_k_23, sizeof(__pyx_k_23), 0, 0, 1, 1},
+  {&__pyx_n_s_26, __pyx_k_26, sizeof(__pyx_k_26), 0, 0, 1, 1},
+  {&__pyx_n_s_29, __pyx_k_29, sizeof(__pyx_k_29), 0, 0, 1, 1},
+  {&__pyx_kp_u_3, __pyx_k_3, sizeof(__pyx_k_3), 0, 1, 0, 0},
+  {&__pyx_kp_u_5, __pyx_k_5, sizeof(__pyx_k_5), 0, 1, 0, 0},
+  {&__pyx_kp_u_7, __pyx_k_7, sizeof(__pyx_k_7), 0, 1, 0, 0},
+  {&__pyx_kp_u_8, __pyx_k_8, sizeof(__pyx_k_8), 0, 1, 0, 0},
+  {&__pyx_n_s__RuntimeError, __pyx_k__RuntimeError, sizeof(__pyx_k__RuntimeError), 0, 0, 1, 1},
+  {&__pyx_n_s__ValueError, __pyx_k__ValueError, sizeof(__pyx_k__ValueError), 0, 0, 1, 1},
+  {&__pyx_n_s____all__, __pyx_k____all__, sizeof(__pyx_k____all__), 0, 0, 1, 1},
+  {&__pyx_n_s____main__, __pyx_k____main__, sizeof(__pyx_k____main__), 0, 0, 1, 1},
+  {&__pyx_n_s____test__, __pyx_k____test__, sizeof(__pyx_k____test__), 0, 0, 1, 1},
+  {&__pyx_n_s__i, __pyx_k__i, sizeof(__pyx_k__i), 0, 0, 1, 1},
+  {&__pyx_n_s__n, __pyx_k__n, sizeof(__pyx_k__n), 0, 0, 1, 1},
+  {&__pyx_n_s__nmones, __pyx_k__nmones, sizeof(__pyx_k__nmones), 0, 0, 1, 1},
+  {&__pyx_n_s__nones, __pyx_k__nones, sizeof(__pyx_k__nones), 0, 0, 1, 1},
+  {&__pyx_n_s__np, __pyx_k__np, sizeof(__pyx_k__np), 0, 0, 1, 1},
+  {&__pyx_n_s__numpy, __pyx_k__numpy, sizeof(__pyx_k__numpy), 0, 0, 1, 1},
+  {&__pyx_n_s__nzeros, __pyx_k__nzeros, sizeof(__pyx_k__nzeros), 0, 0, 1, 1},
+  {&__pyx_n_s__other, __pyx_k__other, sizeof(__pyx_k__other), 0, 0, 1, 1},
+  {&__pyx_n_s__range, __pyx_k__range, sizeof(__pyx_k__range), 0, 0, 1, 1},
+  {&__pyx_n_s__same, __pyx_k__same, sizeof(__pyx_k__same), 0, 0, 1, 1},
+  {&__pyx_n_s__size, __pyx_k__size, sizeof(__pyx_k__size), 0, 0, 1, 1},
+  {&__pyx_n_s__v, __pyx_k__v, sizeof(__pyx_k__v), 0, 0, 1, 1},
+  {&__pyx_n_s__value, __pyx_k__value, sizeof(__pyx_k__value), 0, 0, 1, 1},
+  {&__pyx_n_s__value0, __pyx_k__value0, sizeof(__pyx_k__value0), 0, 0, 1, 1},
+  {0, 0, 0, 0, 0, 0, 0}
+};
+static int __Pyx_InitCachedBuiltins(void) {
+  __pyx_builtin_range = __Pyx_GetName(__pyx_b, __pyx_n_s__range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_builtin_ValueError = __Pyx_GetName(__pyx_b, __pyx_n_s__ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 214; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_builtin_RuntimeError = __Pyx_GetName(__pyx_b, __pyx_n_s__RuntimeError); if (!__pyx_builtin_RuntimeError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  return 0;
+  __pyx_L1_error:;
+  return -1;
+}
+
+static int __Pyx_InitCachedConstants(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0);
+
+  /* "numpy.pxd":214
+ *             if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)
+ *                 and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)):
+ *                 raise ValueError(u"ndarray is not C contiguous")             # <<<<<<<<<<<<<<
+ * 
+ *             if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
+ */
+  __pyx_k_tuple_2 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_2)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 214; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_2);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_u_1));
+  PyTuple_SET_ITEM(__pyx_k_tuple_2, 0, ((PyObject *)__pyx_kp_u_1));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_u_1));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_2));
+
+  /* "numpy.pxd":218
+ *             if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
+ *                 and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)):
+ *                 raise ValueError(u"ndarray is not Fortran contiguous")             # <<<<<<<<<<<<<<
+ * 
+ *             info.buf = PyArray_DATA(self)
+ */
+  __pyx_k_tuple_4 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_4);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_u_3));
+  PyTuple_SET_ITEM(__pyx_k_tuple_4, 0, ((PyObject *)__pyx_kp_u_3));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_u_3));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_4));
+
+  /* "numpy.pxd":256
+ *                 if ((descr.byteorder == '>' and little_endian) or
+ *                     (descr.byteorder == '<' and not little_endian)):
+ *                     raise ValueError(u"Non-native byte order not supported")             # <<<<<<<<<<<<<<
+ *                 if   t == NPY_BYTE:        f = "b"
+ *                 elif t == NPY_UBYTE:       f = "B"
+ */
+  __pyx_k_tuple_6 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_6)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 256; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_6);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_u_5));
+  PyTuple_SET_ITEM(__pyx_k_tuple_6, 0, ((PyObject *)__pyx_kp_u_5));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_u_5));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_6));
+
+  /* "numpy.pxd":798
+ * 
+ *         if (end - f) - (new_offset - offset[0]) < 15:
+ *             raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")             # <<<<<<<<<<<<<<
+ * 
+ *         if ((child.byteorder == '>' and little_endian) or
+ */
+  __pyx_k_tuple_9 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_9)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_9);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_u_8));
+  PyTuple_SET_ITEM(__pyx_k_tuple_9, 0, ((PyObject *)__pyx_kp_u_8));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_u_8));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_9));
+
+  /* "numpy.pxd":802
+ *         if ((child.byteorder == '>' and little_endian) or
+ *             (child.byteorder == '<' and not little_endian)):
+ *             raise ValueError(u"Non-native byte order not supported")             # <<<<<<<<<<<<<<
+ *             # One could encode it in the format string and have Cython
+ *             # complain instead, BUT: < and > in format strings also imply
+ */
+  __pyx_k_tuple_10 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_10)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_10);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_u_5));
+  PyTuple_SET_ITEM(__pyx_k_tuple_10, 0, ((PyObject *)__pyx_kp_u_5));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_u_5));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_10));
+
+  /* "numpy.pxd":822
+ *             t = child.type_num
+ *             if end - f < 5:
+ *                 raise RuntimeError(u"Format string allocated too short.")             # <<<<<<<<<<<<<<
+ * 
+ *             # Until ticket #99 is fixed, use integers to avoid warnings
+ */
+  __pyx_k_tuple_12 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_12)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 822; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_12);
+  __Pyx_INCREF(((PyObject *)__pyx_kp_u_11));
+  PyTuple_SET_ITEM(__pyx_k_tuple_12, 0, ((PyObject *)__pyx_kp_u_11));
+  __Pyx_GIVEREF(((PyObject *)__pyx_kp_u_11));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_12));
+
+  /* "pyoperators/utils/cythonutils.pyx":10
+ * 
+ * @cython.boundscheck(False)
+ * def inspect_special_values_bool8(np.ndarray[np.uint8_t, ndim=1] v):             # <<<<<<<<<<<<<<
+ *     cdef int nzeros = 0
+ *     cdef unsigned int n = v.size
+ */
+  __pyx_k_tuple_13 = PyTuple_New(4); if (unlikely(!__pyx_k_tuple_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_13);
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__v));
+  PyTuple_SET_ITEM(__pyx_k_tuple_13, 0, ((PyObject *)__pyx_n_s__v));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__v));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__nzeros));
+  PyTuple_SET_ITEM(__pyx_k_tuple_13, 1, ((PyObject *)__pyx_n_s__nzeros));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__nzeros));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__n));
+  PyTuple_SET_ITEM(__pyx_k_tuple_13, 2, ((PyObject *)__pyx_n_s__n));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__n));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__i));
+  PyTuple_SET_ITEM(__pyx_k_tuple_13, 3, ((PyObject *)__pyx_n_s__i));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__i));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_13));
+  __pyx_k_codeobj_14 = (PyObject*)__Pyx_PyCode_New(1, 0, 4, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_13, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_16, __pyx_n_s_15, 10, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+
+  /* "pyoperators/utils/cythonutils.pyx":21
+ * 
+ * @cython.boundscheck(False)
+ * def inspect_special_values_uint64(np.ndarray[np.uint64_t, ndim=1] v):             # <<<<<<<<<<<<<<
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0
+ */
+  __pyx_k_tuple_18 = PyTuple_New(9); if (unlikely(!__pyx_k_tuple_18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_18);
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__v));
+  PyTuple_SET_ITEM(__pyx_k_tuple_18, 0, ((PyObject *)__pyx_n_s__v));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__v));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__nones));
+  PyTuple_SET_ITEM(__pyx_k_tuple_18, 1, ((PyObject *)__pyx_n_s__nones));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__nones));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__nzeros));
+  PyTuple_SET_ITEM(__pyx_k_tuple_18, 2, ((PyObject *)__pyx_n_s__nzeros));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__nzeros));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__n));
+  PyTuple_SET_ITEM(__pyx_k_tuple_18, 3, ((PyObject *)__pyx_n_s__n));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__n));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__i));
+  PyTuple_SET_ITEM(__pyx_k_tuple_18, 4, ((PyObject *)__pyx_n_s__i));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__i));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__value));
+  PyTuple_SET_ITEM(__pyx_k_tuple_18, 5, ((PyObject *)__pyx_n_s__value));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__value));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__value0));
+  PyTuple_SET_ITEM(__pyx_k_tuple_18, 6, ((PyObject *)__pyx_n_s__value0));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__value0));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__same));
+  PyTuple_SET_ITEM(__pyx_k_tuple_18, 7, ((PyObject *)__pyx_n_s__same));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__same));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__other));
+  PyTuple_SET_ITEM(__pyx_k_tuple_18, 8, ((PyObject *)__pyx_n_s__other));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__other));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_18));
+  __pyx_k_codeobj_19 = (PyObject*)__Pyx_PyCode_New(1, 0, 9, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_18, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_16, __pyx_n_s_20, 21, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+
+  /* "pyoperators/utils/cythonutils.pyx":47
+ * 
+ * @cython.boundscheck(False)
+ * def inspect_special_values_int64(np.ndarray[np.int64_t, ndim=1] v):             # <<<<<<<<<<<<<<
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0
+ */
+  __pyx_k_tuple_21 = PyTuple_New(10); if (unlikely(!__pyx_k_tuple_21)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_21);
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__v));
+  PyTuple_SET_ITEM(__pyx_k_tuple_21, 0, ((PyObject *)__pyx_n_s__v));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__v));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__nones));
+  PyTuple_SET_ITEM(__pyx_k_tuple_21, 1, ((PyObject *)__pyx_n_s__nones));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__nones));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__nzeros));
+  PyTuple_SET_ITEM(__pyx_k_tuple_21, 2, ((PyObject *)__pyx_n_s__nzeros));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__nzeros));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__nmones));
+  PyTuple_SET_ITEM(__pyx_k_tuple_21, 3, ((PyObject *)__pyx_n_s__nmones));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__nmones));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__n));
+  PyTuple_SET_ITEM(__pyx_k_tuple_21, 4, ((PyObject *)__pyx_n_s__n));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__n));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__i));
+  PyTuple_SET_ITEM(__pyx_k_tuple_21, 5, ((PyObject *)__pyx_n_s__i));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__i));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__value));
+  PyTuple_SET_ITEM(__pyx_k_tuple_21, 6, ((PyObject *)__pyx_n_s__value));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__value));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__value0));
+  PyTuple_SET_ITEM(__pyx_k_tuple_21, 7, ((PyObject *)__pyx_n_s__value0));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__value0));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__same));
+  PyTuple_SET_ITEM(__pyx_k_tuple_21, 8, ((PyObject *)__pyx_n_s__same));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__same));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__other));
+  PyTuple_SET_ITEM(__pyx_k_tuple_21, 9, ((PyObject *)__pyx_n_s__other));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__other));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_21));
+  __pyx_k_codeobj_22 = (PyObject*)__Pyx_PyCode_New(1, 0, 10, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_21, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_16, __pyx_n_s_23, 47, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_22)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+
+  /* "pyoperators/utils/cythonutils.pyx":76
+ * 
+ * @cython.boundscheck(False)
+ * def inspect_special_values_float64(np.ndarray[np.float64_t, ndim=1] v):             # <<<<<<<<<<<<<<
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0
+ */
+  __pyx_k_tuple_24 = PyTuple_New(10); if (unlikely(!__pyx_k_tuple_24)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_24);
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__v));
+  PyTuple_SET_ITEM(__pyx_k_tuple_24, 0, ((PyObject *)__pyx_n_s__v));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__v));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__nones));
+  PyTuple_SET_ITEM(__pyx_k_tuple_24, 1, ((PyObject *)__pyx_n_s__nones));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__nones));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__nzeros));
+  PyTuple_SET_ITEM(__pyx_k_tuple_24, 2, ((PyObject *)__pyx_n_s__nzeros));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__nzeros));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__nmones));
+  PyTuple_SET_ITEM(__pyx_k_tuple_24, 3, ((PyObject *)__pyx_n_s__nmones));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__nmones));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__n));
+  PyTuple_SET_ITEM(__pyx_k_tuple_24, 4, ((PyObject *)__pyx_n_s__n));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__n));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__i));
+  PyTuple_SET_ITEM(__pyx_k_tuple_24, 5, ((PyObject *)__pyx_n_s__i));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__i));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__value));
+  PyTuple_SET_ITEM(__pyx_k_tuple_24, 6, ((PyObject *)__pyx_n_s__value));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__value));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__value0));
+  PyTuple_SET_ITEM(__pyx_k_tuple_24, 7, ((PyObject *)__pyx_n_s__value0));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__value0));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__same));
+  PyTuple_SET_ITEM(__pyx_k_tuple_24, 8, ((PyObject *)__pyx_n_s__same));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__same));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__other));
+  PyTuple_SET_ITEM(__pyx_k_tuple_24, 9, ((PyObject *)__pyx_n_s__other));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__other));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_24));
+  __pyx_k_codeobj_25 = (PyObject*)__Pyx_PyCode_New(1, 0, 10, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_24, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_16, __pyx_n_s_26, 76, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_25)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+
+  /* "pyoperators/utils/cythonutils.pyx":105
+ * 
+ * @cython.boundscheck(False)
+ * def inspect_special_values_complex128(np.ndarray[np.complex128_t, ndim=1] v):             # <<<<<<<<<<<<<<
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0
+ */
+  __pyx_k_tuple_27 = PyTuple_New(10); if (unlikely(!__pyx_k_tuple_27)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_k_tuple_27);
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__v));
+  PyTuple_SET_ITEM(__pyx_k_tuple_27, 0, ((PyObject *)__pyx_n_s__v));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__v));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__nones));
+  PyTuple_SET_ITEM(__pyx_k_tuple_27, 1, ((PyObject *)__pyx_n_s__nones));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__nones));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__nzeros));
+  PyTuple_SET_ITEM(__pyx_k_tuple_27, 2, ((PyObject *)__pyx_n_s__nzeros));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__nzeros));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__nmones));
+  PyTuple_SET_ITEM(__pyx_k_tuple_27, 3, ((PyObject *)__pyx_n_s__nmones));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__nmones));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__n));
+  PyTuple_SET_ITEM(__pyx_k_tuple_27, 4, ((PyObject *)__pyx_n_s__n));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__n));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__i));
+  PyTuple_SET_ITEM(__pyx_k_tuple_27, 5, ((PyObject *)__pyx_n_s__i));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__i));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__value));
+  PyTuple_SET_ITEM(__pyx_k_tuple_27, 6, ((PyObject *)__pyx_n_s__value));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__value));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__value0));
+  PyTuple_SET_ITEM(__pyx_k_tuple_27, 7, ((PyObject *)__pyx_n_s__value0));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__value0));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__same));
+  PyTuple_SET_ITEM(__pyx_k_tuple_27, 8, ((PyObject *)__pyx_n_s__same));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__same));
+  __Pyx_INCREF(((PyObject *)__pyx_n_s__other));
+  PyTuple_SET_ITEM(__pyx_k_tuple_27, 9, ((PyObject *)__pyx_n_s__other));
+  __Pyx_GIVEREF(((PyObject *)__pyx_n_s__other));
+  __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_27));
+  __pyx_k_codeobj_28 = (PyObject*)__Pyx_PyCode_New(1, 0, 10, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_27, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_16, __pyx_n_s_29, 105, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_28)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_RefNannyFinishContext();
+  return 0;
+  __pyx_L1_error:;
+  __Pyx_RefNannyFinishContext();
+  return -1;
+}
+
+static int __Pyx_InitGlobals(void) {
+  if (__Pyx_InitStrings(__pyx_string_tab) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+  __pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+  __pyx_int_15 = PyInt_FromLong(15); if (unlikely(!__pyx_int_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+  return 0;
+  __pyx_L1_error:;
+  return -1;
+}
+
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC initcythonutils(void); /*proto*/
+PyMODINIT_FUNC initcythonutils(void)
+#else
+PyMODINIT_FUNC PyInit_cythonutils(void); /*proto*/
+PyMODINIT_FUNC PyInit_cythonutils(void)
+#endif
+{
+  PyObject *__pyx_t_1 = NULL;
+  __Pyx_RefNannyDeclarations
+  #if CYTHON_REFNANNY
+  __Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny");
+  if (!__Pyx_RefNanny) {
+      PyErr_Clear();
+      __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny");
+      if (!__Pyx_RefNanny)
+          Py_FatalError("failed to import 'refnanny' module");
+  }
+  #endif
+  __Pyx_RefNannySetupContext("PyMODINIT_FUNC PyInit_cythonutils(void)", 0);
+  if ( __Pyx_check_binary_version() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  #ifdef __Pyx_CyFunction_USED
+  if (__Pyx_CyFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  #endif
+  #ifdef __Pyx_FusedFunction_USED
+  if (__pyx_FusedFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  #endif
+  #ifdef __Pyx_Generator_USED
+  if (__pyx_Generator_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  #endif
+  /*--- Library function declarations ---*/
+  /*--- Threads initialization code ---*/
+  #if defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS
+  #ifdef WITH_THREAD /* Python build with threading support? */
+  PyEval_InitThreads();
+  #endif
+  #endif
+  /*--- Module creation code ---*/
+  #if PY_MAJOR_VERSION < 3
+  __pyx_m = Py_InitModule4(__Pyx_NAMESTR("cythonutils"), __pyx_methods, 0, 0, PYTHON_API_VERSION);
+  #else
+  __pyx_m = PyModule_Create(&__pyx_moduledef);
+  #endif
+  if (!__pyx_m) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+  #if PY_MAJOR_VERSION < 3
+  Py_INCREF(__pyx_m);
+  #endif
+  __pyx_b = PyImport_AddModule(__Pyx_NAMESTR(__Pyx_BUILTIN_MODULE_NAME));
+  if (!__pyx_b) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+  if (__Pyx_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+  /*--- Initialize various global constants etc. ---*/
+  if (unlikely(__Pyx_InitGlobals() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  if (__pyx_module_is_main_pyoperators__utils__cythonutils) {
+    if (__Pyx_SetAttrString(__pyx_m, "__name__", __pyx_n_s____main__) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+  }
+  /*--- Builtin init code ---*/
+  if (unlikely(__Pyx_InitCachedBuiltins() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  /*--- Constants init code ---*/
+  if (unlikely(__Pyx_InitCachedConstants() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  /*--- Global init code ---*/
+  /*--- Variable export code ---*/
+  /*--- Function export code ---*/
+  /*--- Type init code ---*/
+  /*--- Type import code ---*/
+  __pyx_ptype_5numpy_dtype = __Pyx_ImportType("numpy", "dtype", sizeof(PyArray_Descr), 0); if (unlikely(!__pyx_ptype_5numpy_dtype)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_ptype_5numpy_flatiter = __Pyx_ImportType("numpy", "flatiter", sizeof(PyArrayIterObject), 0); if (unlikely(!__pyx_ptype_5numpy_flatiter)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 164; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_ptype_5numpy_broadcast = __Pyx_ImportType("numpy", "broadcast", sizeof(PyArrayMultiIterObject), 0); if (unlikely(!__pyx_ptype_5numpy_broadcast)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 168; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_ptype_5numpy_ndarray = __Pyx_ImportType("numpy", "ndarray", sizeof(PyArrayObject), 0); if (unlikely(!__pyx_ptype_5numpy_ndarray)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_ptype_5numpy_ufunc = __Pyx_ImportType("numpy", "ufunc", sizeof(PyUFuncObject), 0); if (unlikely(!__pyx_ptype_5numpy_ufunc)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 860; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  /*--- Variable import code ---*/
+  /*--- Function import code ---*/
+  /*--- Execution code ---*/
+
+  /* "pyoperators/utils/cythonutils.pyx":3
+ * from __future__ import division
+ * 
+ * import numpy as np             # <<<<<<<<<<<<<<
+ * cimport numpy as np
+ * cimport cython
+ */
+  __pyx_t_1 = __Pyx_Import(((PyObject *)__pyx_n_s__numpy), 0, -1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyObject_SetAttr(__pyx_m, __pyx_n_s__np, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":7
+ * cimport cython
+ * 
+ * __all__ = []             # <<<<<<<<<<<<<<
+ * 
+ * @cython.boundscheck(False)
+ */
+  __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyObject_SetAttr(__pyx_m, __pyx_n_s____all__, ((PyObject *)__pyx_t_1)) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":10
+ * 
+ * @cython.boundscheck(False)
+ * def inspect_special_values_bool8(np.ndarray[np.uint8_t, ndim=1] v):             # <<<<<<<<<<<<<<
+ *     cdef int nzeros = 0
+ *     cdef unsigned int n = v.size
+ */
+  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_11pyoperators_5utils_11cythonutils_1inspect_special_values_bool8, NULL, __pyx_n_s_17); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyObject_SetAttr(__pyx_m, __pyx_n_s_15, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":21
+ * 
+ * @cython.boundscheck(False)
+ * def inspect_special_values_uint64(np.ndarray[np.uint64_t, ndim=1] v):             # <<<<<<<<<<<<<<
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0
+ */
+  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_11pyoperators_5utils_11cythonutils_3inspect_special_values_uint64, NULL, __pyx_n_s_17); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyObject_SetAttr(__pyx_m, __pyx_n_s_20, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":47
+ * 
+ * @cython.boundscheck(False)
+ * def inspect_special_values_int64(np.ndarray[np.int64_t, ndim=1] v):             # <<<<<<<<<<<<<<
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0
+ */
+  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_11pyoperators_5utils_11cythonutils_5inspect_special_values_int64, NULL, __pyx_n_s_17); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyObject_SetAttr(__pyx_m, __pyx_n_s_23, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":76
+ * 
+ * @cython.boundscheck(False)
+ * def inspect_special_values_float64(np.ndarray[np.float64_t, ndim=1] v):             # <<<<<<<<<<<<<<
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0
+ */
+  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_11pyoperators_5utils_11cythonutils_7inspect_special_values_float64, NULL, __pyx_n_s_17); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyObject_SetAttr(__pyx_m, __pyx_n_s_26, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":105
+ * 
+ * @cython.boundscheck(False)
+ * def inspect_special_values_complex128(np.ndarray[np.complex128_t, ndim=1] v):             # <<<<<<<<<<<<<<
+ *     cdef int nones = 0
+ *     cdef int nzeros = 0
+ */
+  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_11pyoperators_5utils_11cythonutils_9inspect_special_values_complex128, NULL, __pyx_n_s_17); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyObject_SetAttr(__pyx_m, __pyx_n_s_29, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "pyoperators/utils/cythonutils.pyx":1
+ * from __future__ import division             # <<<<<<<<<<<<<<
+ * 
+ * import numpy as np
+ */
+  __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(((PyObject *)__pyx_t_1));
+  if (PyObject_SetAttr(__pyx_m, __pyx_n_s____test__, ((PyObject *)__pyx_t_1)) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+
+  /* "numpy.pxd":974
+ *      arr.base = baseptr
+ * 
+ * cdef inline object get_array_base(ndarray arr):             # <<<<<<<<<<<<<<
+ *     if arr.base is NULL:
+ *         return None
+ */
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  if (__pyx_m) {
+    __Pyx_AddTraceback("init pyoperators.utils.cythonutils", __pyx_clineno, __pyx_lineno, __pyx_filename);
+    Py_DECREF(__pyx_m); __pyx_m = 0;
+  } else if (!PyErr_Occurred()) {
+    PyErr_SetString(PyExc_ImportError, "init pyoperators.utils.cythonutils");
+  }
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  #if PY_MAJOR_VERSION < 3
+  return;
+  #else
+  return __pyx_m;
+  #endif
+}
+
+/* Runtime support code */
+#if CYTHON_REFNANNY
+static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) {
+    PyObject *m = NULL, *p = NULL;
+    void *r = NULL;
+    m = PyImport_ImportModule((char *)modname);
+    if (!m) goto end;
+    p = PyObject_GetAttrString(m, (char *)"RefNannyAPI");
+    if (!p) goto end;
+    r = PyLong_AsVoidPtr(p);
+end:
+    Py_XDECREF(p);
+    Py_XDECREF(m);
+    return (__Pyx_RefNannyAPIStruct *)r;
+}
+#endif /* CYTHON_REFNANNY */
+
+static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name) {
+    PyObject *result;
+    result = PyObject_GetAttr(dict, name);
+    if (!result) {
+        if (dict != __pyx_b) {
+            PyErr_Clear();
+            result = PyObject_GetAttr(__pyx_b, name);
+        }
+        if (!result) {
+            PyErr_SetObject(PyExc_NameError, name);
+        }
+    }
+    return result;
+}
+
+static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed,
+    const char *name, int exact)
+{
+    if (!type) {
+        PyErr_Format(PyExc_SystemError, "Missing type object");
+        return 0;
+    }
+    if (none_allowed && obj == Py_None) return 1;
+    else if (exact) {
+        if (Py_TYPE(obj) == type) return 1;
+    }
+    else {
+        if (PyObject_TypeCheck(obj, type)) return 1;
+    }
+    PyErr_Format(PyExc_TypeError,
+        "Argument '%s' has incorrect type (expected %s, got %s)",
+        name, type->tp_name, Py_TYPE(obj)->tp_name);
+    return 0;
+}
+
+static CYTHON_INLINE int __Pyx_IsLittleEndian(void) {
+  unsigned int n = 1;
+  return *(unsigned char*)(&n) != 0;
+}
+static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx,
+                              __Pyx_BufFmt_StackElem* stack,
+                              __Pyx_TypeInfo* type) {
+  stack[0].field = &ctx->root;
+  stack[0].parent_offset = 0;
+  ctx->root.type = type;
+  ctx->root.name = "buffer dtype";
+  ctx->root.offset = 0;
+  ctx->head = stack;
+  ctx->head->field = &ctx->root;
+  ctx->fmt_offset = 0;
+  ctx->head->parent_offset = 0;
+  ctx->new_packmode = '@';
+  ctx->enc_packmode = '@';
+  ctx->new_count = 1;
+  ctx->enc_count = 0;
+  ctx->enc_type = 0;
+  ctx->is_complex = 0;
+  ctx->is_valid_array = 0;
+  ctx->struct_alignment = 0;
+  while (type->typegroup == 'S') {
+    ++ctx->head;
+    ctx->head->field = type->fields;
+    ctx->head->parent_offset = 0;
+    type = type->fields->type;
+  }
+}
+static int __Pyx_BufFmt_ParseNumber(const char** ts) {
+    int count;
+    const char* t = *ts;
+    if (*t < '0' || *t > '9') {
+      return -1;
+    } else {
+        count = *t++ - '0';
+        while (*t >= '0' && *t < '9') {
+            count *= 10;
+            count += *t++ - '0';
+        }
+    }
+    *ts = t;
+    return count;
+}
+static int __Pyx_BufFmt_ExpectNumber(const char **ts) {
+    int number = __Pyx_BufFmt_ParseNumber(ts);
+    if (number == -1) /* First char was not a digit */
+        PyErr_Format(PyExc_ValueError,\
+                     "Does not understand character buffer dtype format string ('%c')", **ts);
+    return number;
+}
+static void __Pyx_BufFmt_RaiseUnexpectedChar(char ch) {
+  PyErr_Format(PyExc_ValueError,
+               "Unexpected format string character: '%c'", ch);
+}
+static const char* __Pyx_BufFmt_DescribeTypeChar(char ch, int is_complex) {
+  switch (ch) {
+    case 'b': return "'char'";
+    case 'B': return "'unsigned char'";
+    case 'h': return "'short'";
+    case 'H': return "'unsigned short'";
+    case 'i': return "'int'";
+    case 'I': return "'unsigned int'";
+    case 'l': return "'long'";
+    case 'L': return "'unsigned long'";
+    case 'q': return "'long long'";
+    case 'Q': return "'unsigned long long'";
+    case 'f': return (is_complex ? "'complex float'" : "'float'");
+    case 'd': return (is_complex ? "'complex double'" : "'double'");
+    case 'g': return (is_complex ? "'complex long double'" : "'long double'");
+    case 'T': return "a struct";
+    case 'O': return "Python object";
+    case 'P': return "a pointer";
+    case 's': case 'p': return "a string";
+    case 0: return "end";
+    default: return "unparseable format string";
+  }
+}
+static size_t __Pyx_BufFmt_TypeCharToStandardSize(char ch, int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return 2;
+    case 'i': case 'I': case 'l': case 'L': return 4;
+    case 'q': case 'Q': return 8;
+    case 'f': return (is_complex ? 8 : 4);
+    case 'd': return (is_complex ? 16 : 8);
+    case 'g': {
+      PyErr_SetString(PyExc_ValueError, "Python does not define a standard format string size for long double ('g')..");
+      return 0;
+    }
+    case 'O': case 'P': return sizeof(void*);
+    default:
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+}
+static size_t __Pyx_BufFmt_TypeCharToNativeSize(char ch, int is_complex) {
+  switch (ch) {
+    case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return sizeof(short);
+    case 'i': case 'I': return sizeof(int);
+    case 'l': case 'L': return sizeof(long);
+    #ifdef HAVE_LONG_LONG
+    case 'q': case 'Q': return sizeof(PY_LONG_LONG);
+    #endif
+    case 'f': return sizeof(float) * (is_complex ? 2 : 1);
+    case 'd': return sizeof(double) * (is_complex ? 2 : 1);
+    case 'g': return sizeof(long double) * (is_complex ? 2 : 1);
+    case 'O': case 'P': return sizeof(void*);
+    default: {
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+  }
+}
+typedef struct { char c; short x; } __Pyx_st_short;
+typedef struct { char c; int x; } __Pyx_st_int;
+typedef struct { char c; long x; } __Pyx_st_long;
+typedef struct { char c; float x; } __Pyx_st_float;
+typedef struct { char c; double x; } __Pyx_st_double;
+typedef struct { char c; long double x; } __Pyx_st_longdouble;
+typedef struct { char c; void *x; } __Pyx_st_void_p;
+#ifdef HAVE_LONG_LONG
+typedef struct { char c; PY_LONG_LONG x; } __Pyx_st_longlong;
+#endif
+static size_t __Pyx_BufFmt_TypeCharToAlignment(char ch, int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return sizeof(__Pyx_st_short) - sizeof(short);
+    case 'i': case 'I': return sizeof(__Pyx_st_int) - sizeof(int);
+    case 'l': case 'L': return sizeof(__Pyx_st_long) - sizeof(long);
+#ifdef HAVE_LONG_LONG
+    case 'q': case 'Q': return sizeof(__Pyx_st_longlong) - sizeof(PY_LONG_LONG);
+#endif
+    case 'f': return sizeof(__Pyx_st_float) - sizeof(float);
+    case 'd': return sizeof(__Pyx_st_double) - sizeof(double);
+    case 'g': return sizeof(__Pyx_st_longdouble) - sizeof(long double);
+    case 'P': case 'O': return sizeof(__Pyx_st_void_p) - sizeof(void*);
+    default:
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+}
+/* These are for computing the padding at the end of the struct to align
+   on the first member of the struct. This will probably the same as above,
+   but we don't have any guarantees.
+ */
+typedef struct { short x; char c; } __Pyx_pad_short;
+typedef struct { int x; char c; } __Pyx_pad_int;
+typedef struct { long x; char c; } __Pyx_pad_long;
+typedef struct { float x; char c; } __Pyx_pad_float;
+typedef struct { double x; char c; } __Pyx_pad_double;
+typedef struct { long double x; char c; } __Pyx_pad_longdouble;
+typedef struct { void *x; char c; } __Pyx_pad_void_p;
+#ifdef HAVE_LONG_LONG
+typedef struct { PY_LONG_LONG x; char c; } __Pyx_pad_longlong;
+#endif
+static size_t __Pyx_BufFmt_TypeCharToPadding(char ch, int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return sizeof(__Pyx_pad_short) - sizeof(short);
+    case 'i': case 'I': return sizeof(__Pyx_pad_int) - sizeof(int);
+    case 'l': case 'L': return sizeof(__Pyx_pad_long) - sizeof(long);
+#ifdef HAVE_LONG_LONG
+    case 'q': case 'Q': return sizeof(__Pyx_pad_longlong) - sizeof(PY_LONG_LONG);
+#endif
+    case 'f': return sizeof(__Pyx_pad_float) - sizeof(float);
+    case 'd': return sizeof(__Pyx_pad_double) - sizeof(double);
+    case 'g': return sizeof(__Pyx_pad_longdouble) - sizeof(long double);
+    case 'P': case 'O': return sizeof(__Pyx_pad_void_p) - sizeof(void*);
+    default:
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+}
+static char __Pyx_BufFmt_TypeCharToGroup(char ch, int is_complex) {
+  switch (ch) {
+    case 'c': case 'b': case 'h': case 'i':
+    case 'l': case 'q': case 's': case 'p':
+        return 'I';
+    case 'B': case 'H': case 'I': case 'L': case 'Q':
+        return 'U';
+    case 'f': case 'd': case 'g':
+        return (is_complex ? 'C' : 'R');
+    case 'O':
+        return 'O';
+    case 'P':
+        return 'P';
+    default: {
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+  }
+}
+static void __Pyx_BufFmt_RaiseExpected(__Pyx_BufFmt_Context* ctx) {
+  if (ctx->head == NULL || ctx->head->field == &ctx->root) {
+    const char* expected;
+    const char* quote;
+    if (ctx->head == NULL) {
+      expected = "end";
+      quote = "";
+    } else {
+      expected = ctx->head->field->type->name;
+      quote = "'";
+    }
+    PyErr_Format(PyExc_ValueError,
+                 "Buffer dtype mismatch, expected %s%s%s but got %s",
+                 quote, expected, quote,
+                 __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex));
+  } else {
+    __Pyx_StructField* field = ctx->head->field;
+    __Pyx_StructField* parent = (ctx->head - 1)->field;
+    PyErr_Format(PyExc_ValueError,
+                 "Buffer dtype mismatch, expected '%s' but got %s in '%s.%s'",
+                 field->type->name, __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex),
+                 parent->type->name, field->name);
+  }
+}
+static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) {
+  char group;
+  size_t size, offset, arraysize = 1;
+  if (ctx->enc_type == 0) return 0;
+  if (ctx->head->field->type->arraysize[0]) {
+    int i, ndim = 0;
+    if (ctx->enc_type == 's' || ctx->enc_type == 'p') {
+        ctx->is_valid_array = ctx->head->field->type->ndim == 1;
+        ndim = 1;
+        if (ctx->enc_count != ctx->head->field->type->arraysize[0]) {
+            PyErr_Format(PyExc_ValueError,
+                         "Expected a dimension of size %zu, got %zu",
+                         ctx->head->field->type->arraysize[0], ctx->enc_count);
+            return -1;
+        }
+    }
+    if (!ctx->is_valid_array) {
+      PyErr_Format(PyExc_ValueError, "Expected %d dimensions, got %d",
+                   ctx->head->field->type->ndim, ndim);
+      return -1;
+    }
+    for (i = 0; i < ctx->head->field->type->ndim; i++) {
+      arraysize *= ctx->head->field->type->arraysize[i];
+    }
+    ctx->is_valid_array = 0;
+    ctx->enc_count = 1;
+  }
+  group = __Pyx_BufFmt_TypeCharToGroup(ctx->enc_type, ctx->is_complex);
+  do {
+    __Pyx_StructField* field = ctx->head->field;
+    __Pyx_TypeInfo* type = field->type;
+    if (ctx->enc_packmode == '@' || ctx->enc_packmode == '^') {
+      size = __Pyx_BufFmt_TypeCharToNativeSize(ctx->enc_type, ctx->is_complex);
+    } else {
+      size = __Pyx_BufFmt_TypeCharToStandardSize(ctx->enc_type, ctx->is_complex);
+    }
+    if (ctx->enc_packmode == '@') {
+      size_t align_at = __Pyx_BufFmt_TypeCharToAlignment(ctx->enc_type, ctx->is_complex);
+      size_t align_mod_offset;
+      if (align_at == 0) return -1;
+      align_mod_offset = ctx->fmt_offset % align_at;
+      if (align_mod_offset > 0) ctx->fmt_offset += align_at - align_mod_offset;
+      if (ctx->struct_alignment == 0)
+          ctx->struct_alignment = __Pyx_BufFmt_TypeCharToPadding(ctx->enc_type,
+                                                                 ctx->is_complex);
+    }
+    if (type->size != size || type->typegroup != group) {
+      if (type->typegroup == 'C' && type->fields != NULL) {
+        size_t parent_offset = ctx->head->parent_offset + field->offset;
+        ++ctx->head;
+        ctx->head->field = type->fields;
+        ctx->head->parent_offset = parent_offset;
+        continue;
+      }
+      __Pyx_BufFmt_RaiseExpected(ctx);
+      return -1;
+    }
+    offset = ctx->head->parent_offset + field->offset;
+    if (ctx->fmt_offset != offset) {
+      PyErr_Format(PyExc_ValueError,
+                   "Buffer dtype mismatch; next field is at offset %"PY_FORMAT_SIZE_T"d but %"PY_FORMAT_SIZE_T"d expected",
+                   (Py_ssize_t)ctx->fmt_offset, (Py_ssize_t)offset);
+      return -1;
+    }
+    ctx->fmt_offset += size;
+    if (arraysize)
+      ctx->fmt_offset += (arraysize - 1) * size;
+    --ctx->enc_count; /* Consume from buffer string */
+    while (1) {
+      if (field == &ctx->root) {
+        ctx->head = NULL;
+        if (ctx->enc_count != 0) {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return -1;
+        }
+        break; /* breaks both loops as ctx->enc_count == 0 */
+      }
+      ctx->head->field = ++field;
+      if (field->type == NULL) {
+        --ctx->head;
+        field = ctx->head->field;
+        continue;
+      } else if (field->type->typegroup == 'S') {
+        size_t parent_offset = ctx->head->parent_offset + field->offset;
+        if (field->type->fields->type == NULL) continue; /* empty struct */
+        field = field->type->fields;
+        ++ctx->head;
+        ctx->head->field = field;
+        ctx->head->parent_offset = parent_offset;
+        break;
+      } else {
+        break;
+      }
+    }
+  } while (ctx->enc_count);
+  ctx->enc_type = 0;
+  ctx->is_complex = 0;
+  return 0;
+}
+static CYTHON_INLINE PyObject *
+__pyx_buffmt_parse_array(__Pyx_BufFmt_Context* ctx, const char** tsp)
+{
+    const char *ts = *tsp;
+    int i = 0, number;
+    int ndim = ctx->head->field->type->ndim;
+;
+    ++ts;
+    if (ctx->new_count != 1) {
+        PyErr_SetString(PyExc_ValueError,
+                        "Cannot handle repeated arrays in format string");
+        return NULL;
+    }
+    if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+    while (*ts && *ts != ')') {
+        if (isspace(*ts))
+            continue;
+        number = __Pyx_BufFmt_ExpectNumber(&ts);
+        if (number == -1) return NULL;
+        if (i < ndim && (size_t) number != ctx->head->field->type->arraysize[i])
+            return PyErr_Format(PyExc_ValueError,
+                        "Expected a dimension of size %zu, got %d",
+                        ctx->head->field->type->arraysize[i], number);
+        if (*ts != ',' && *ts != ')')
+            return PyErr_Format(PyExc_ValueError,
+                                "Expected a comma in format string, got '%c'", *ts);
+        if (*ts == ',') ts++;
+        i++;
+    }
+    if (i != ndim)
+        return PyErr_Format(PyExc_ValueError, "Expected %d dimension(s), got %d",
+                            ctx->head->field->type->ndim, i);
+    if (!*ts) {
+        PyErr_SetString(PyExc_ValueError,
+                        "Unexpected end of format string, expected ')'");
+        return NULL;
+    }
+    ctx->is_valid_array = 1;
+    ctx->new_count = 1;
+    *tsp = ++ts;
+    return Py_None;
+}
+static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts) {
+  int got_Z = 0;
+  while (1) {
+    switch(*ts) {
+      case 0:
+        if (ctx->enc_type != 0 && ctx->head == NULL) {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return NULL;
+        }
+        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+        if (ctx->head != NULL) {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return NULL;
+        }
+                return ts;
+      case ' ':
+      case 10:
+      case 13:
+        ++ts;
+        break;
+      case '<':
+        if (!__Pyx_IsLittleEndian()) {
+          PyErr_SetString(PyExc_ValueError, "Little-endian buffer not supported on big-endian compiler");
+          return NULL;
+        }
+        ctx->new_packmode = '=';
+        ++ts;
+        break;
+      case '>':
+      case '!':
+        if (__Pyx_IsLittleEndian()) {
+          PyErr_SetString(PyExc_ValueError, "Big-endian buffer not supported on little-endian compiler");
+          return NULL;
+        }
+        ctx->new_packmode = '=';
+        ++ts;
+        break;
+      case '=':
+      case '@':
+      case '^':
+        ctx->new_packmode = *ts++;
+        break;
+      case 'T': /* substruct */
+        {
+          const char* ts_after_sub;
+          size_t i, struct_count = ctx->new_count;
+          size_t struct_alignment = ctx->struct_alignment;
+          ctx->new_count = 1;
+          ++ts;
+          if (*ts != '{') {
+            PyErr_SetString(PyExc_ValueError, "Buffer acquisition: Expected '{' after 'T'");
+            return NULL;
+          }
+          if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+          ctx->enc_type = 0; /* Erase processed last struct element */
+          ctx->enc_count = 0;
+          ctx->struct_alignment = 0;
+          ++ts;
+          ts_after_sub = ts;
+          for (i = 0; i != struct_count; ++i) {
+            ts_after_sub = __Pyx_BufFmt_CheckString(ctx, ts);
+            if (!ts_after_sub) return NULL;
+          }
+          ts = ts_after_sub;
+          if (struct_alignment) ctx->struct_alignment = struct_alignment;
+        }
+        break;
+      case '}': /* end of substruct; either repeat or move on */
+        {
+          size_t alignment = ctx->struct_alignment;
+          ++ts;
+          if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+          ctx->enc_type = 0; /* Erase processed last struct element */
+          if (alignment && ctx->fmt_offset % alignment) {
+            ctx->fmt_offset += alignment - (ctx->fmt_offset % alignment);
+          }
+        }
+        return ts;
+      case 'x':
+        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+        ctx->fmt_offset += ctx->new_count;
+        ctx->new_count = 1;
+        ctx->enc_count = 0;
+        ctx->enc_type = 0;
+        ctx->enc_packmode = ctx->new_packmode;
+        ++ts;
+        break;
+      case 'Z':
+        got_Z = 1;
+        ++ts;
+        if (*ts != 'f' && *ts != 'd' && *ts != 'g') {
+          __Pyx_BufFmt_RaiseUnexpectedChar('Z');
+          return NULL;
+        }        /* fall through */
+      case 'c': case 'b': case 'B': case 'h': case 'H': case 'i': case 'I':
+      case 'l': case 'L': case 'q': case 'Q':
+      case 'f': case 'd': case 'g':
+      case 'O': case 's': case 'p':
+        if (ctx->enc_type == *ts && got_Z == ctx->is_complex &&
+            ctx->enc_packmode == ctx->new_packmode) {
+          ctx->enc_count += ctx->new_count;
+        } else {
+          if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+          ctx->enc_count = ctx->new_count;
+          ctx->enc_packmode = ctx->new_packmode;
+          ctx->enc_type = *ts;
+          ctx->is_complex = got_Z;
+        }
+        ++ts;
+        ctx->new_count = 1;
+        got_Z = 0;
+        break;
+      case ':':
+        ++ts;
+        while(*ts != ':') ++ts;
+        ++ts;
+        break;
+      case '(':
+        if (!__pyx_buffmt_parse_array(ctx, &ts)) return NULL;
+        break;
+      default:
+        {
+          int number = __Pyx_BufFmt_ExpectNumber(&ts);
+          if (number == -1) return NULL;
+          ctx->new_count = (size_t)number;
+        }
+    }
+  }
+}
+static CYTHON_INLINE void __Pyx_ZeroBuffer(Py_buffer* buf) {
+  buf->buf = NULL;
+  buf->obj = NULL;
+  buf->strides = __Pyx_zeros;
+  buf->shape = __Pyx_zeros;
+  buf->suboffsets = __Pyx_minusones;
+}
+static CYTHON_INLINE int __Pyx_GetBufferAndValidate(
+        Py_buffer* buf, PyObject* obj,  __Pyx_TypeInfo* dtype, int flags,
+        int nd, int cast, __Pyx_BufFmt_StackElem* stack)
+{
+  if (obj == Py_None || obj == NULL) {
+    __Pyx_ZeroBuffer(buf);
+    return 0;
+  }
+  buf->buf = NULL;
+  if (__Pyx_GetBuffer(obj, buf, flags) == -1) goto fail;
+  if (buf->ndim != nd) {
+    PyErr_Format(PyExc_ValueError,
+                 "Buffer has wrong number of dimensions (expected %d, got %d)",
+                 nd, buf->ndim);
+    goto fail;
+  }
+  if (!cast) {
+    __Pyx_BufFmt_Context ctx;
+    __Pyx_BufFmt_Init(&ctx, stack, dtype);
+    if (!__Pyx_BufFmt_CheckString(&ctx, buf->format)) goto fail;
+  }
+  if ((unsigned)buf->itemsize != dtype->size) {
+    PyErr_Format(PyExc_ValueError,
+      "Item size of buffer (%"PY_FORMAT_SIZE_T"d byte%s) does not match size of '%s' (%"PY_FORMAT_SIZE_T"d byte%s)",
+      buf->itemsize, (buf->itemsize > 1) ? "s" : "",
+      dtype->name, (Py_ssize_t)dtype->size, (dtype->size > 1) ? "s" : "");
+    goto fail;
+  }
+  if (buf->suboffsets == NULL) buf->suboffsets = __Pyx_minusones;
+  return 0;
+fail:;
+  __Pyx_ZeroBuffer(buf);
+  return -1;
+}
+static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info) {
+  if (info->buf == NULL) return;
+  if (info->suboffsets == __Pyx_minusones) info->suboffsets = NULL;
+  __Pyx_ReleaseBuffer(info);
+}
+
+static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb) {
+#if CYTHON_COMPILING_IN_CPYTHON
+    PyObject *tmp_type, *tmp_value, *tmp_tb;
+    PyThreadState *tstate = PyThreadState_GET();
+    tmp_type = tstate->curexc_type;
+    tmp_value = tstate->curexc_value;
+    tmp_tb = tstate->curexc_traceback;
+    tstate->curexc_type = type;
+    tstate->curexc_value = value;
+    tstate->curexc_traceback = tb;
+    Py_XDECREF(tmp_type);
+    Py_XDECREF(tmp_value);
+    Py_XDECREF(tmp_tb);
+#else
+    PyErr_Restore(type, value, tb);
+#endif
+}
+static CYTHON_INLINE void __Pyx_ErrFetch(PyObject **type, PyObject **value, PyObject **tb) {
+#if CYTHON_COMPILING_IN_CPYTHON
+    PyThreadState *tstate = PyThreadState_GET();
+    *type = tstate->curexc_type;
+    *value = tstate->curexc_value;
+    *tb = tstate->curexc_traceback;
+    tstate->curexc_type = 0;
+    tstate->curexc_value = 0;
+    tstate->curexc_traceback = 0;
+#else
+    PyErr_Fetch(type, value, tb);
+#endif
+}
+
+#if PY_MAJOR_VERSION < 3
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb,
+                        CYTHON_UNUSED PyObject *cause) {
+    Py_XINCREF(type);
+    Py_XINCREF(value);
+    Py_XINCREF(tb);
+    if (tb == Py_None) {
+        Py_DECREF(tb);
+        tb = 0;
+    }
+    else if (tb != NULL && !PyTraceBack_Check(tb)) {
+        PyErr_SetString(PyExc_TypeError,
+            "raise: arg 3 must be a traceback or None");
+        goto raise_error;
+    }
+    if (value == NULL) {
+        value = Py_None;
+        Py_INCREF(value);
+    }
+    #if PY_VERSION_HEX < 0x02050000
+    if (!PyClass_Check(type))
+    #else
+    if (!PyType_Check(type))
+    #endif
+    {
+        if (value != Py_None) {
+            PyErr_SetString(PyExc_TypeError,
+                "instance exception may not have a separate value");
+            goto raise_error;
+        }
+        Py_DECREF(value);
+        value = type;
+        #if PY_VERSION_HEX < 0x02050000
+            if (PyInstance_Check(type)) {
+                type = (PyObject*) ((PyInstanceObject*)type)->in_class;
+                Py_INCREF(type);
+            }
+            else {
+                type = 0;
+                PyErr_SetString(PyExc_TypeError,
+                    "raise: exception must be an old-style class or instance");
+                goto raise_error;
+            }
+        #else
+            type = (PyObject*) Py_TYPE(type);
+            Py_INCREF(type);
+            if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) {
+                PyErr_SetString(PyExc_TypeError,
+                    "raise: exception class must be a subclass of BaseException");
+                goto raise_error;
+            }
+        #endif
+    }
+    __Pyx_ErrRestore(type, value, tb);
+    return;
+raise_error:
+    Py_XDECREF(value);
+    Py_XDECREF(type);
+    Py_XDECREF(tb);
+    return;
+}
+#else /* Python 3+ */
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) {
+    if (tb == Py_None) {
+        tb = 0;
+    } else if (tb && !PyTraceBack_Check(tb)) {
+        PyErr_SetString(PyExc_TypeError,
+            "raise: arg 3 must be a traceback or None");
+        goto bad;
+    }
+    if (value == Py_None)
+        value = 0;
+    if (PyExceptionInstance_Check(type)) {
+        if (value) {
+            PyErr_SetString(PyExc_TypeError,
+                "instance exception may not have a separate value");
+            goto bad;
+        }
+        value = type;
+        type = (PyObject*) Py_TYPE(value);
+    } else if (!PyExceptionClass_Check(type)) {
+        PyErr_SetString(PyExc_TypeError,
+            "raise: exception class must be a subclass of BaseException");
+        goto bad;
+    }
+    if (cause) {
+        PyObject *fixed_cause;
+        if (PyExceptionClass_Check(cause)) {
+            fixed_cause = PyObject_CallObject(cause, NULL);
+            if (fixed_cause == NULL)
+                goto bad;
+        }
+        else if (PyExceptionInstance_Check(cause)) {
+            fixed_cause = cause;
+            Py_INCREF(fixed_cause);
+        }
+        else {
+            PyErr_SetString(PyExc_TypeError,
+                            "exception causes must derive from "
+                            "BaseException");
+            goto bad;
+        }
+        if (!value) {
+            value = PyObject_CallObject(type, NULL);
+        }
+        PyException_SetCause(value, fixed_cause);
+    }
+    PyErr_SetObject(type, value);
+    if (tb) {
+        PyThreadState *tstate = PyThreadState_GET();
+        PyObject* tmp_tb = tstate->curexc_traceback;
+        if (tb != tmp_tb) {
+            Py_INCREF(tb);
+            tstate->curexc_traceback = tb;
+            Py_XDECREF(tmp_tb);
+        }
+    }
+bad:
+    return;
+}
+#endif
+
+static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) {
+    PyErr_Format(PyExc_ValueError,
+                 "need more than %"PY_FORMAT_SIZE_T"d value%s to unpack",
+                 index, (index == 1) ? "" : "s");
+}
+
+static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) {
+    PyErr_Format(PyExc_ValueError,
+                 "too many values to unpack (expected %"PY_FORMAT_SIZE_T"d)", expected);
+}
+
+static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void) {
+    PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable");
+}
+
+static void __Pyx_UnpackTupleError(PyObject *t, Py_ssize_t index) {
+    if (t == Py_None) {
+      __Pyx_RaiseNoneNotIterableError();
+    } else if (PyTuple_GET_SIZE(t) < index) {
+      __Pyx_RaiseNeedMoreValuesError(PyTuple_GET_SIZE(t));
+    } else {
+      __Pyx_RaiseTooManyValuesError(index);
+    }
+}
+
+static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type) {
+    if (unlikely(!type)) {
+        PyErr_Format(PyExc_SystemError, "Missing type object");
+        return 0;
+    }
+    if (likely(PyObject_TypeCheck(obj, type)))
+        return 1;
+    PyErr_Format(PyExc_TypeError, "Cannot convert %.200s to %.200s",
+                 Py_TYPE(obj)->tp_name, type->tp_name);
+    return 0;
+}
+
+#if PY_MAJOR_VERSION < 3
+static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags) {
+    PyObject *getbuffer_cobj;
+
+  #if PY_VERSION_HEX >= 0x02060000
+    if (PyObject_CheckBuffer(obj)) return PyObject_GetBuffer(obj, view, flags);
+  #endif
+
+        if (PyObject_TypeCheck(obj, __pyx_ptype_5numpy_ndarray)) return __pyx_pw_5numpy_7ndarray_1__getbuffer__(obj, view, flags);
+
+  #if PY_VERSION_HEX < 0x02060000
+    if (obj->ob_type->tp_dict &&
+        (getbuffer_cobj = PyMapping_GetItemString(obj->ob_type->tp_dict,
+                                             "__pyx_getbuffer"))) {
+        getbufferproc func;
+
+      #if PY_VERSION_HEX >= 0x02070000 && !(PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 0)
+        func = (getbufferproc) PyCapsule_GetPointer(getbuffer_cobj, "getbuffer(obj, view, flags)");
+      #else
+        func = (getbufferproc) PyCObject_AsVoidPtr(getbuffer_cobj);
+      #endif
+        Py_DECREF(getbuffer_cobj);
+        if (!func)
+            goto fail;
+
+        return func(obj, view, flags);
+    } else {
+        PyErr_Clear();
+    }
+  #endif
+
+    PyErr_Format(PyExc_TypeError, "'%100s' does not have the buffer interface", Py_TYPE(obj)->tp_name);
+
+#if PY_VERSION_HEX < 0x02060000
+fail:
+#endif
+
+    return -1;
+}
+
+static void __Pyx_ReleaseBuffer(Py_buffer *view) {
+    PyObject *obj = view->obj;
+    PyObject *releasebuffer_cobj;
+
+    if (!obj) return;
+
+  #if PY_VERSION_HEX >= 0x02060000
+    if (PyObject_CheckBuffer(obj)) {
+        PyBuffer_Release(view);
+        return;
+    }
+  #endif
+
+        if (PyObject_TypeCheck(obj, __pyx_ptype_5numpy_ndarray)) { __pyx_pw_5numpy_7ndarray_3__releasebuffer__(obj, view); return; }
+
+  #if PY_VERSION_HEX < 0x02060000
+    if (obj->ob_type->tp_dict &&
+        (releasebuffer_cobj = PyMapping_GetItemString(obj->ob_type->tp_dict,
+                                                      "__pyx_releasebuffer"))) {
+        releasebufferproc func;
+
+      #if PY_VERSION_HEX >= 0x02070000 && !(PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 0)
+        func = (releasebufferproc) PyCapsule_GetPointer(releasebuffer_cobj, "releasebuffer(obj, view)");
+      #else
+        func = (releasebufferproc) PyCObject_AsVoidPtr(releasebuffer_cobj);
+      #endif
+
+        Py_DECREF(releasebuffer_cobj);
+
+        if (!func)
+            goto fail;
+
+        func(obj, view);
+        return;
+    } else {
+        PyErr_Clear();
+    }
+  #endif
+
+    goto nofail;
+
+#if PY_VERSION_HEX < 0x02060000
+fail:
+#endif
+    PyErr_WriteUnraisable(obj);
+
+nofail:
+    Py_DECREF(obj);
+    view->obj = NULL;
+}
+
+#endif /*  PY_MAJOR_VERSION < 3 */
+
+  static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, long level) {
+    PyObject *py_import = 0;
+    PyObject *empty_list = 0;
+    PyObject *module = 0;
+    PyObject *global_dict = 0;
+    PyObject *empty_dict = 0;
+    PyObject *list;
+    py_import = __Pyx_GetAttrString(__pyx_b, "__import__");
+    if (!py_import)
+        goto bad;
+    if (from_list)
+        list = from_list;
+    else {
+        empty_list = PyList_New(0);
+        if (!empty_list)
+            goto bad;
+        list = empty_list;
+    }
+    global_dict = PyModule_GetDict(__pyx_m);
+    if (!global_dict)
+        goto bad;
+    empty_dict = PyDict_New();
+    if (!empty_dict)
+        goto bad;
+    #if PY_VERSION_HEX >= 0x02050000
+    {
+        #if PY_MAJOR_VERSION >= 3
+        if (level == -1) {
+            if (strchr(__Pyx_MODULE_NAME, '.')) {
+                /* try package relative import first */
+                PyObject *py_level = PyInt_FromLong(1);
+                if (!py_level)
+                    goto bad;
+                module = PyObject_CallFunctionObjArgs(py_import,
+                    name, global_dict, empty_dict, list, py_level, NULL);
+                Py_DECREF(py_level);
+                if (!module) {
+                    if (!PyErr_ExceptionMatches(PyExc_ImportError))
+                        goto bad;
+                    PyErr_Clear();
+                }
+            }
+            level = 0; /* try absolute import on failure */
+        }
+        #endif
+        if (!module) {
+            PyObject *py_level = PyInt_FromLong(level);
+            if (!py_level)
+                goto bad;
+            module = PyObject_CallFunctionObjArgs(py_import,
+                name, global_dict, empty_dict, list, py_level, NULL);
+            Py_DECREF(py_level);
+        }
+    }
+    #else
+    if (level>0) {
+        PyErr_SetString(PyExc_RuntimeError, "Relative import is not supported for Python <=2.4.");
+        goto bad;
+    }
+    module = PyObject_CallFunctionObjArgs(py_import,
+        name, global_dict, empty_dict, list, NULL);
+    #endif
+bad:
+    Py_XDECREF(empty_list);
+    Py_XDECREF(py_import);
+    Py_XDECREF(empty_dict);
+    return module;
+}
+
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) {
+      return ::std::complex< double >(x, y);
+    }
+  #else
+    static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) {
+      return x + y*(__pyx_t_double_complex)_Complex_I;
+    }
+  #endif
+#else
+    static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) {
+      __pyx_t_double_complex z;
+      z.real = x;
+      z.imag = y;
+      return z;
+    }
+#endif
+
+#if CYTHON_CCOMPLEX
+#else
+    static CYTHON_INLINE int __Pyx_c_eq(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+       return (a.real == b.real) && (a.imag == b.imag);
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        __pyx_t_double_complex z;
+        z.real = a.real + b.real;
+        z.imag = a.imag + b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        __pyx_t_double_complex z;
+        z.real = a.real - b.real;
+        z.imag = a.imag - b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        __pyx_t_double_complex z;
+        z.real = a.real * b.real - a.imag * b.imag;
+        z.imag = a.real * b.imag + a.imag * b.real;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        __pyx_t_double_complex z;
+        double denom = b.real * b.real + b.imag * b.imag;
+        z.real = (a.real * b.real + a.imag * b.imag) / denom;
+        z.imag = (a.imag * b.real - a.real * b.imag) / denom;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg(__pyx_t_double_complex a) {
+        __pyx_t_double_complex z;
+        z.real = -a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    static CYTHON_INLINE int __Pyx_c_is_zero(__pyx_t_double_complex a) {
+       return (a.real == 0) && (a.imag == 0);
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj(__pyx_t_double_complex a) {
+        __pyx_t_double_complex z;
+        z.real =  a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    #if 1
+        static CYTHON_INLINE double __Pyx_c_abs(__pyx_t_double_complex z) {
+          #if !defined(HAVE_HYPOT) || defined(_MSC_VER)
+            return sqrt(z.real*z.real + z.imag*z.imag);
+          #else
+            return hypot(z.real, z.imag);
+          #endif
+        }
+        static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+            __pyx_t_double_complex z;
+            double r, lnr, theta, z_r, z_theta;
+            if (b.imag == 0 && b.real == (int)b.real) {
+                if (b.real < 0) {
+                    double denom = a.real * a.real + a.imag * a.imag;
+                    a.real = a.real / denom;
+                    a.imag = -a.imag / denom;
+                    b.real = -b.real;
+                }
+                switch ((int)b.real) {
+                    case 0:
+                        z.real = 1;
+                        z.imag = 0;
+                        return z;
+                    case 1:
+                        return a;
+                    case 2:
+                        z = __Pyx_c_prod(a, a);
+                        return __Pyx_c_prod(a, a);
+                    case 3:
+                        z = __Pyx_c_prod(a, a);
+                        return __Pyx_c_prod(z, a);
+                    case 4:
+                        z = __Pyx_c_prod(a, a);
+                        return __Pyx_c_prod(z, z);
+                }
+            }
+            if (a.imag == 0) {
+                if (a.real == 0) {
+                    return a;
+                }
+                r = a.real;
+                theta = 0;
+            } else {
+                r = __Pyx_c_abs(a);
+                theta = atan2(a.imag, a.real);
+            }
+            lnr = log(r);
+            z_r = exp(lnr * b.real - theta * b.imag);
+            z_theta = theta * b.real + lnr * b.imag;
+            z.real = z_r * cos(z_theta);
+            z.imag = z_r * sin(z_theta);
+            return z;
+        }
+    #endif
+#endif
+
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) {
+      return ::std::complex< float >(x, y);
+    }
+  #else
+    static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) {
+      return x + y*(__pyx_t_float_complex)_Complex_I;
+    }
+  #endif
+#else
+    static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) {
+      __pyx_t_float_complex z;
+      z.real = x;
+      z.imag = y;
+      return z;
+    }
+#endif
+
+#if CYTHON_CCOMPLEX
+#else
+    static CYTHON_INLINE int __Pyx_c_eqf(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+       return (a.real == b.real) && (a.imag == b.imag);
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sumf(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        __pyx_t_float_complex z;
+        z.real = a.real + b.real;
+        z.imag = a.imag + b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_difff(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        __pyx_t_float_complex z;
+        z.real = a.real - b.real;
+        z.imag = a.imag - b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prodf(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        __pyx_t_float_complex z;
+        z.real = a.real * b.real - a.imag * b.imag;
+        z.imag = a.real * b.imag + a.imag * b.real;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quotf(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        __pyx_t_float_complex z;
+        float denom = b.real * b.real + b.imag * b.imag;
+        z.real = (a.real * b.real + a.imag * b.imag) / denom;
+        z.imag = (a.imag * b.real - a.real * b.imag) / denom;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_negf(__pyx_t_float_complex a) {
+        __pyx_t_float_complex z;
+        z.real = -a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    static CYTHON_INLINE int __Pyx_c_is_zerof(__pyx_t_float_complex a) {
+       return (a.real == 0) && (a.imag == 0);
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conjf(__pyx_t_float_complex a) {
+        __pyx_t_float_complex z;
+        z.real =  a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    #if 1
+        static CYTHON_INLINE float __Pyx_c_absf(__pyx_t_float_complex z) {
+          #if !defined(HAVE_HYPOT) || defined(_MSC_VER)
+            return sqrtf(z.real*z.real + z.imag*z.imag);
+          #else
+            return hypotf(z.real, z.imag);
+          #endif
+        }
+        static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_powf(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+            __pyx_t_float_complex z;
+            float r, lnr, theta, z_r, z_theta;
+            if (b.imag == 0 && b.real == (int)b.real) {
+                if (b.real < 0) {
+                    float denom = a.real * a.real + a.imag * a.imag;
+                    a.real = a.real / denom;
+                    a.imag = -a.imag / denom;
+                    b.real = -b.real;
+                }
+                switch ((int)b.real) {
+                    case 0:
+                        z.real = 1;
+                        z.imag = 0;
+                        return z;
+                    case 1:
+                        return a;
+                    case 2:
+                        z = __Pyx_c_prodf(a, a);
+                        return __Pyx_c_prodf(a, a);
+                    case 3:
+                        z = __Pyx_c_prodf(a, a);
+                        return __Pyx_c_prodf(z, a);
+                    case 4:
+                        z = __Pyx_c_prodf(a, a);
+                        return __Pyx_c_prodf(z, z);
+                }
+            }
+            if (a.imag == 0) {
+                if (a.real == 0) {
+                    return a;
+                }
+                r = a.real;
+                theta = 0;
+            } else {
+                r = __Pyx_c_absf(a);
+                theta = atan2f(a.imag, a.real);
+            }
+            lnr = logf(r);
+            z_r = expf(lnr * b.real - theta * b.imag);
+            z_theta = theta * b.real + lnr * b.imag;
+            z.real = z_r * cosf(z_theta);
+            z.imag = z_r * sinf(z_theta);
+            return z;
+        }
+    #endif
+#endif
+
+static CYTHON_INLINE unsigned char __Pyx_PyInt_AsUnsignedChar(PyObject* x) {
+    const unsigned char neg_one = (unsigned char)-1, const_zero = 0;
+    const int is_unsigned = neg_one > const_zero;
+    if (sizeof(unsigned char) < sizeof(long)) {
+        long val = __Pyx_PyInt_AsLong(x);
+        if (unlikely(val != (long)(unsigned char)val)) {
+            if (!unlikely(val == -1 && PyErr_Occurred())) {
+                PyErr_SetString(PyExc_OverflowError,
+                    (is_unsigned && unlikely(val < 0)) ?
+                    "can't convert negative value to unsigned char" :
+                    "value too large to convert to unsigned char");
+            }
+            return (unsigned char)-1;
+        }
+        return (unsigned char)val;
+    }
+    return (unsigned char)__Pyx_PyInt_AsUnsignedLong(x);
+}
+
+static CYTHON_INLINE unsigned short __Pyx_PyInt_AsUnsignedShort(PyObject* x) {
+    const unsigned short neg_one = (unsigned short)-1, const_zero = 0;
+    const int is_unsigned = neg_one > const_zero;
+    if (sizeof(unsigned short) < sizeof(long)) {
+        long val = __Pyx_PyInt_AsLong(x);
+        if (unlikely(val != (long)(unsigned short)val)) {
+            if (!unlikely(val == -1 && PyErr_Occurred())) {
+                PyErr_SetString(PyExc_OverflowError,
+                    (is_unsigned && unlikely(val < 0)) ?
+                    "can't convert negative value to unsigned short" :
+                    "value too large to convert to unsigned short");
+            }
+            return (unsigned short)-1;
+        }
+        return (unsigned short)val;
+    }
+    return (unsigned short)__Pyx_PyInt_AsUnsignedLong(x);
+}
+
+static CYTHON_INLINE unsigned int __Pyx_PyInt_AsUnsignedInt(PyObject* x) {
+    const unsigned int neg_one = (unsigned int)-1, const_zero = 0;
+    const int is_unsigned = neg_one > const_zero;
+    if (sizeof(unsigned int) < sizeof(long)) {
+        long val = __Pyx_PyInt_AsLong(x);
+        if (unlikely(val != (long)(unsigned int)val)) {
+            if (!unlikely(val == -1 && PyErr_Occurred())) {
+                PyErr_SetString(PyExc_OverflowError,
+                    (is_unsigned && unlikely(val < 0)) ?
+                    "can't convert negative value to unsigned int" :
+                    "value too large to convert to unsigned int");
+            }
+            return (unsigned int)-1;
+        }
+        return (unsigned int)val;
+    }
+    return (unsigned int)__Pyx_PyInt_AsUnsignedLong(x);
+}
+
+static CYTHON_INLINE char __Pyx_PyInt_AsChar(PyObject* x) {
+    const char neg_one = (char)-1, const_zero = 0;
+    const int is_unsigned = neg_one > const_zero;
+    if (sizeof(char) < sizeof(long)) {
+        long val = __Pyx_PyInt_AsLong(x);
+        if (unlikely(val != (long)(char)val)) {
+            if (!unlikely(val == -1 && PyErr_Occurred())) {
+                PyErr_SetString(PyExc_OverflowError,
+                    (is_unsigned && unlikely(val < 0)) ?
+                    "can't convert negative value to char" :
+                    "value too large to convert to char");
+            }
+            return (char)-1;
+        }
+        return (char)val;
+    }
+    return (char)__Pyx_PyInt_AsLong(x);
+}
+
+static CYTHON_INLINE short __Pyx_PyInt_AsShort(PyObject* x) {
+    const short neg_one = (short)-1, const_zero = 0;
+    const int is_unsigned = neg_one > const_zero;
+    if (sizeof(short) < sizeof(long)) {
+        long val = __Pyx_PyInt_AsLong(x);
+        if (unlikely(val != (long)(short)val)) {
+            if (!unlikely(val == -1 && PyErr_Occurred())) {
+                PyErr_SetString(PyExc_OverflowError,
+                    (is_unsigned && unlikely(val < 0)) ?
+                    "can't convert negative value to short" :
+                    "value too large to convert to short");
+            }
+            return (short)-1;
+        }
+        return (short)val;
+    }
+    return (short)__Pyx_PyInt_AsLong(x);
+}
+
+static CYTHON_INLINE int __Pyx_PyInt_AsInt(PyObject* x) {
+    const int neg_one = (int)-1, const_zero = 0;
+    const int is_unsigned = neg_one > const_zero;
+    if (sizeof(int) < sizeof(long)) {
+        long val = __Pyx_PyInt_AsLong(x);
+        if (unlikely(val != (long)(int)val)) {
+            if (!unlikely(val == -1 && PyErr_Occurred())) {
+                PyErr_SetString(PyExc_OverflowError,
+                    (is_unsigned && unlikely(val < 0)) ?
+                    "can't convert negative value to int" :
+                    "value too large to convert to int");
+            }
+            return (int)-1;
+        }
+        return (int)val;
+    }
+    return (int)__Pyx_PyInt_AsLong(x);
+}
+
+static CYTHON_INLINE signed char __Pyx_PyInt_AsSignedChar(PyObject* x) {
+    const signed char neg_one = (signed char)-1, const_zero = 0;
+    const int is_unsigned = neg_one > const_zero;
+    if (sizeof(signed char) < sizeof(long)) {
+        long val = __Pyx_PyInt_AsLong(x);
+        if (unlikely(val != (long)(signed char)val)) {
+            if (!unlikely(val == -1 && PyErr_Occurred())) {
+                PyErr_SetString(PyExc_OverflowError,
+                    (is_unsigned && unlikely(val < 0)) ?
+                    "can't convert negative value to signed char" :
+                    "value too large to convert to signed char");
+            }
+            return (signed char)-1;
+        }
+        return (signed char)val;
+    }
+    return (signed char)__Pyx_PyInt_AsSignedLong(x);
+}
+
+static CYTHON_INLINE signed short __Pyx_PyInt_AsSignedShort(PyObject* x) {
+    const signed short neg_one = (signed short)-1, const_zero = 0;
+    const int is_unsigned = neg_one > const_zero;
+    if (sizeof(signed short) < sizeof(long)) {
+        long val = __Pyx_PyInt_AsLong(x);
+        if (unlikely(val != (long)(signed short)val)) {
+            if (!unlikely(val == -1 && PyErr_Occurred())) {
+                PyErr_SetString(PyExc_OverflowError,
+                    (is_unsigned && unlikely(val < 0)) ?
+                    "can't convert negative value to signed short" :
+                    "value too large to convert to signed short");
+            }
+            return (signed short)-1;
+        }
+        return (signed short)val;
+    }
+    return (signed short)__Pyx_PyInt_AsSignedLong(x);
+}
+
+static CYTHON_INLINE signed int __Pyx_PyInt_AsSignedInt(PyObject* x) {
+    const signed int neg_one = (signed int)-1, const_zero = 0;
+    const int is_unsigned = neg_one > const_zero;
+    if (sizeof(signed int) < sizeof(long)) {
+        long val = __Pyx_PyInt_AsLong(x);
+        if (unlikely(val != (long)(signed int)val)) {
+            if (!unlikely(val == -1 && PyErr_Occurred())) {
+                PyErr_SetString(PyExc_OverflowError,
+                    (is_unsigned && unlikely(val < 0)) ?
+                    "can't convert negative value to signed int" :
+                    "value too large to convert to signed int");
+            }
+            return (signed int)-1;
+        }
+        return (signed int)val;
+    }
+    return (signed int)__Pyx_PyInt_AsSignedLong(x);
+}
+
+static CYTHON_INLINE int __Pyx_PyInt_AsLongDouble(PyObject* x) {
+    const int neg_one = (int)-1, const_zero = 0;
+    const int is_unsigned = neg_one > const_zero;
+    if (sizeof(int) < sizeof(long)) {
+        long val = __Pyx_PyInt_AsLong(x);
+        if (unlikely(val != (long)(int)val)) {
+            if (!unlikely(val == -1 && PyErr_Occurred())) {
+                PyErr_SetString(PyExc_OverflowError,
+                    (is_unsigned && unlikely(val < 0)) ?
+                    "can't convert negative value to int" :
+                    "value too large to convert to int");
+            }
+            return (int)-1;
+        }
+        return (int)val;
+    }
+    return (int)__Pyx_PyInt_AsLong(x);
+}
+
+static CYTHON_INLINE unsigned long __Pyx_PyInt_AsUnsignedLong(PyObject* x) {
+    const unsigned long neg_one = (unsigned long)-1, const_zero = 0;
+    const int is_unsigned = neg_one > const_zero;
+#if PY_VERSION_HEX < 0x03000000
+    if (likely(PyInt_Check(x))) {
+        long val = PyInt_AS_LONG(x);
+        if (is_unsigned && unlikely(val < 0)) {
+            PyErr_SetString(PyExc_OverflowError,
+                            "can't convert negative value to unsigned long");
+            return (unsigned long)-1;
+        }
+        return (unsigned long)val;
+    } else
+#endif
+    if (likely(PyLong_Check(x))) {
+        if (is_unsigned) {
+            if (unlikely(Py_SIZE(x) < 0)) {
+                PyErr_SetString(PyExc_OverflowError,
+                                "can't convert negative value to unsigned long");
+                return (unsigned long)-1;
+            }
+            return (unsigned long)PyLong_AsUnsignedLong(x);
+        } else {
+            return (unsigned long)PyLong_AsLong(x);
+        }
+    } else {
+        unsigned long val;
+        PyObject *tmp = __Pyx_PyNumber_Int(x);
+        if (!tmp) return (unsigned long)-1;
+        val = __Pyx_PyInt_AsUnsignedLong(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+}
+
+static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_PyInt_AsUnsignedLongLong(PyObject* x) {
+    const unsigned PY_LONG_LONG neg_one = (unsigned PY_LONG_LONG)-1, const_zero = 0;
+    const int is_unsigned = neg_one > const_zero;
+#if PY_VERSION_HEX < 0x03000000
+    if (likely(PyInt_Check(x))) {
+        long val = PyInt_AS_LONG(x);
+        if (is_unsigned && unlikely(val < 0)) {
+            PyErr_SetString(PyExc_OverflowError,
+                            "can't convert negative value to unsigned PY_LONG_LONG");
+            return (unsigned PY_LONG_LONG)-1;
+        }
+        return (unsigned PY_LONG_LONG)val;
+    } else
+#endif
+    if (likely(PyLong_Check(x))) {
+        if (is_unsigned) {
+            if (unlikely(Py_SIZE(x) < 0)) {
+                PyErr_SetString(PyExc_OverflowError,
+                                "can't convert negative value to unsigned PY_LONG_LONG");
+                return (unsigned PY_LONG_LONG)-1;
+            }
+            return (unsigned PY_LONG_LONG)PyLong_AsUnsignedLongLong(x);
+        } else {
+            return (unsigned PY_LONG_LONG)PyLong_AsLongLong(x);
+        }
+    } else {
+        unsigned PY_LONG_LONG val;
+        PyObject *tmp = __Pyx_PyNumber_Int(x);
+        if (!tmp) return (unsigned PY_LONG_LONG)-1;
+        val = __Pyx_PyInt_AsUnsignedLongLong(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+}
+
+static CYTHON_INLINE long __Pyx_PyInt_AsLong(PyObject* x) {
+    const long neg_one = (long)-1, const_zero = 0;
+    const int is_unsigned = neg_one > const_zero;
+#if PY_VERSION_HEX < 0x03000000
+    if (likely(PyInt_Check(x))) {
+        long val = PyInt_AS_LONG(x);
+        if (is_unsigned && unlikely(val < 0)) {
+            PyErr_SetString(PyExc_OverflowError,
+                            "can't convert negative value to long");
+            return (long)-1;
+        }
+        return (long)val;
+    } else
+#endif
+    if (likely(PyLong_Check(x))) {
+        if (is_unsigned) {
+            if (unlikely(Py_SIZE(x) < 0)) {
+                PyErr_SetString(PyExc_OverflowError,
+                                "can't convert negative value to long");
+                return (long)-1;
+            }
+            return (long)PyLong_AsUnsignedLong(x);
+        } else {
+            return (long)PyLong_AsLong(x);
+        }
+    } else {
+        long val;
+        PyObject *tmp = __Pyx_PyNumber_Int(x);
+        if (!tmp) return (long)-1;
+        val = __Pyx_PyInt_AsLong(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+}
+
+static CYTHON_INLINE PY_LONG_LONG __Pyx_PyInt_AsLongLong(PyObject* x) {
+    const PY_LONG_LONG neg_one = (PY_LONG_LONG)-1, const_zero = 0;
+    const int is_unsigned = neg_one > const_zero;
+#if PY_VERSION_HEX < 0x03000000
+    if (likely(PyInt_Check(x))) {
+        long val = PyInt_AS_LONG(x);
+        if (is_unsigned && unlikely(val < 0)) {
+            PyErr_SetString(PyExc_OverflowError,
+                            "can't convert negative value to PY_LONG_LONG");
+            return (PY_LONG_LONG)-1;
+        }
+        return (PY_LONG_LONG)val;
+    } else
+#endif
+    if (likely(PyLong_Check(x))) {
+        if (is_unsigned) {
+            if (unlikely(Py_SIZE(x) < 0)) {
+                PyErr_SetString(PyExc_OverflowError,
+                                "can't convert negative value to PY_LONG_LONG");
+                return (PY_LONG_LONG)-1;
+            }
+            return (PY_LONG_LONG)PyLong_AsUnsignedLongLong(x);
+        } else {
+            return (PY_LONG_LONG)PyLong_AsLongLong(x);
+        }
+    } else {
+        PY_LONG_LONG val;
+        PyObject *tmp = __Pyx_PyNumber_Int(x);
+        if (!tmp) return (PY_LONG_LONG)-1;
+        val = __Pyx_PyInt_AsLongLong(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+}
+
+static CYTHON_INLINE signed long __Pyx_PyInt_AsSignedLong(PyObject* x) {
+    const signed long neg_one = (signed long)-1, const_zero = 0;
+    const int is_unsigned = neg_one > const_zero;
+#if PY_VERSION_HEX < 0x03000000
+    if (likely(PyInt_Check(x))) {
+        long val = PyInt_AS_LONG(x);
+        if (is_unsigned && unlikely(val < 0)) {
+            PyErr_SetString(PyExc_OverflowError,
+                            "can't convert negative value to signed long");
+            return (signed long)-1;
+        }
+        return (signed long)val;
+    } else
+#endif
+    if (likely(PyLong_Check(x))) {
+        if (is_unsigned) {
+            if (unlikely(Py_SIZE(x) < 0)) {
+                PyErr_SetString(PyExc_OverflowError,
+                                "can't convert negative value to signed long");
+                return (signed long)-1;
+            }
+            return (signed long)PyLong_AsUnsignedLong(x);
+        } else {
+            return (signed long)PyLong_AsLong(x);
+        }
+    } else {
+        signed long val;
+        PyObject *tmp = __Pyx_PyNumber_Int(x);
+        if (!tmp) return (signed long)-1;
+        val = __Pyx_PyInt_AsSignedLong(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+}
+
+static CYTHON_INLINE signed PY_LONG_LONG __Pyx_PyInt_AsSignedLongLong(PyObject* x) {
+    const signed PY_LONG_LONG neg_one = (signed PY_LONG_LONG)-1, const_zero = 0;
+    const int is_unsigned = neg_one > const_zero;
+#if PY_VERSION_HEX < 0x03000000
+    if (likely(PyInt_Check(x))) {
+        long val = PyInt_AS_LONG(x);
+        if (is_unsigned && unlikely(val < 0)) {
+            PyErr_SetString(PyExc_OverflowError,
+                            "can't convert negative value to signed PY_LONG_LONG");
+            return (signed PY_LONG_LONG)-1;
+        }
+        return (signed PY_LONG_LONG)val;
+    } else
+#endif
+    if (likely(PyLong_Check(x))) {
+        if (is_unsigned) {
+            if (unlikely(Py_SIZE(x) < 0)) {
+                PyErr_SetString(PyExc_OverflowError,
+                                "can't convert negative value to signed PY_LONG_LONG");
+                return (signed PY_LONG_LONG)-1;
+            }
+            return (signed PY_LONG_LONG)PyLong_AsUnsignedLongLong(x);
+        } else {
+            return (signed PY_LONG_LONG)PyLong_AsLongLong(x);
+        }
+    } else {
+        signed PY_LONG_LONG val;
+        PyObject *tmp = __Pyx_PyNumber_Int(x);
+        if (!tmp) return (signed PY_LONG_LONG)-1;
+        val = __Pyx_PyInt_AsSignedLongLong(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+}
+
+static int __Pyx_check_binary_version(void) {
+    char ctversion[4], rtversion[4];
+    PyOS_snprintf(ctversion, 4, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION);
+    PyOS_snprintf(rtversion, 4, "%s", Py_GetVersion());
+    if (ctversion[0] != rtversion[0] || ctversion[2] != rtversion[2]) {
+        char message[200];
+        PyOS_snprintf(message, sizeof(message),
+                      "compiletime version %s of module '%.100s' "
+                      "does not match runtime version %s",
+                      ctversion, __Pyx_MODULE_NAME, rtversion);
+        #if PY_VERSION_HEX < 0x02050000
+        return PyErr_Warn(NULL, message);
+        #else
+        return PyErr_WarnEx(NULL, message, 1);
+        #endif
+    }
+    return 0;
+}
+
+#ifndef __PYX_HAVE_RT_ImportType
+#define __PYX_HAVE_RT_ImportType
+static PyTypeObject *__Pyx_ImportType(const char *module_name, const char *class_name,
+    size_t size, int strict)
+{
+    PyObject *py_module = 0;
+    PyObject *result = 0;
+    PyObject *py_name = 0;
+    char warning[200];
+    py_module = __Pyx_ImportModule(module_name);
+    if (!py_module)
+        goto bad;
+    py_name = __Pyx_PyIdentifier_FromString(class_name);
+    if (!py_name)
+        goto bad;
+    result = PyObject_GetAttr(py_module, py_name);
+    Py_DECREF(py_name);
+    py_name = 0;
+    Py_DECREF(py_module);
+    py_module = 0;
+    if (!result)
+        goto bad;
+    if (!PyType_Check(result)) {
+        PyErr_Format(PyExc_TypeError,
+            "%s.%s is not a type object",
+            module_name, class_name);
+        goto bad;
+    }
+    if (!strict && (size_t)((PyTypeObject *)result)->tp_basicsize > size) {
+        PyOS_snprintf(warning, sizeof(warning),
+            "%s.%s size changed, may indicate binary incompatibility",
+            module_name, class_name);
+        #if PY_VERSION_HEX < 0x02050000
+        if (PyErr_Warn(NULL, warning) < 0) goto bad;
+        #else
+        if (PyErr_WarnEx(NULL, warning, 0) < 0) goto bad;
+        #endif
+    }
+    else if ((size_t)((PyTypeObject *)result)->tp_basicsize != size) {
+        PyErr_Format(PyExc_ValueError,
+            "%s.%s has the wrong size, try recompiling",
+            module_name, class_name);
+        goto bad;
+    }
+    return (PyTypeObject *)result;
+bad:
+    Py_XDECREF(py_module);
+    Py_XDECREF(result);
+    return NULL;
+}
+#endif
+
+#ifndef __PYX_HAVE_RT_ImportModule
+#define __PYX_HAVE_RT_ImportModule
+static PyObject *__Pyx_ImportModule(const char *name) {
+    PyObject *py_name = 0;
+    PyObject *py_module = 0;
+    py_name = __Pyx_PyIdentifier_FromString(name);
+    if (!py_name)
+        goto bad;
+    py_module = PyImport_Import(py_name);
+    Py_DECREF(py_name);
+    return py_module;
+bad:
+    Py_XDECREF(py_name);
+    return 0;
+}
+#endif
+
+static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) {
+    int start = 0, mid = 0, end = count - 1;
+    if (end >= 0 && code_line > entries[end].code_line) {
+        return count;
+    }
+    while (start < end) {
+        mid = (start + end) / 2;
+        if (code_line < entries[mid].code_line) {
+            end = mid;
+        } else if (code_line > entries[mid].code_line) {
+             start = mid + 1;
+        } else {
+            return mid;
+        }
+    }
+    if (code_line <= entries[mid].code_line) {
+        return mid;
+    } else {
+        return mid + 1;
+    }
+}
+static PyCodeObject *__pyx_find_code_object(int code_line) {
+    PyCodeObject* code_object;
+    int pos;
+    if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) {
+        return NULL;
+    }
+    pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
+    if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) {
+        return NULL;
+    }
+    code_object = __pyx_code_cache.entries[pos].code_object;
+    Py_INCREF(code_object);
+    return code_object;
+}
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) {
+    int pos, i;
+    __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries;
+    if (unlikely(!code_line)) {
+        return;
+    }
+    if (unlikely(!entries)) {
+        entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry));
+        if (likely(entries)) {
+            __pyx_code_cache.entries = entries;
+            __pyx_code_cache.max_count = 64;
+            __pyx_code_cache.count = 1;
+            entries[0].code_line = code_line;
+            entries[0].code_object = code_object;
+            Py_INCREF(code_object);
+        }
+        return;
+    }
+    pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
+    if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) {
+        PyCodeObject* tmp = entries[pos].code_object;
+        entries[pos].code_object = code_object;
+        Py_DECREF(tmp);
+        return;
+    }
+    if (__pyx_code_cache.count == __pyx_code_cache.max_count) {
+        int new_max = __pyx_code_cache.max_count + 64;
+        entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc(
+            __pyx_code_cache.entries, new_max*sizeof(__Pyx_CodeObjectCacheEntry));
+        if (unlikely(!entries)) {
+            return;
+        }
+        __pyx_code_cache.entries = entries;
+        __pyx_code_cache.max_count = new_max;
+    }
+    for (i=__pyx_code_cache.count; i>pos; i--) {
+        entries[i] = entries[i-1];
+    }
+    entries[pos].code_line = code_line;
+    entries[pos].code_object = code_object;
+    __pyx_code_cache.count++;
+    Py_INCREF(code_object);
+}
+
+#include "compile.h"
+#include "frameobject.h"
+#include "traceback.h"
+static PyCodeObject* __Pyx_CreateCodeObjectForTraceback(
+            const char *funcname, int c_line,
+            int py_line, const char *filename) {
+    PyCodeObject *py_code = 0;
+    PyObject *py_srcfile = 0;
+    PyObject *py_funcname = 0;
+    #if PY_MAJOR_VERSION < 3
+    py_srcfile = PyString_FromString(filename);
+    #else
+    py_srcfile = PyUnicode_FromString(filename);
+    #endif
+    if (!py_srcfile) goto bad;
+    if (c_line) {
+        #if PY_MAJOR_VERSION < 3
+        py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
+        #else
+        py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
+        #endif
+    }
+    else {
+        #if PY_MAJOR_VERSION < 3
+        py_funcname = PyString_FromString(funcname);
+        #else
+        py_funcname = PyUnicode_FromString(funcname);
+        #endif
+    }
+    if (!py_funcname) goto bad;
+    py_code = __Pyx_PyCode_New(
+        0,            /*int argcount,*/
+        0,            /*int kwonlyargcount,*/
+        0,            /*int nlocals,*/
+        0,            /*int stacksize,*/
+        0,            /*int flags,*/
+        __pyx_empty_bytes, /*PyObject *code,*/
+        __pyx_empty_tuple, /*PyObject *consts,*/
+        __pyx_empty_tuple, /*PyObject *names,*/
+        __pyx_empty_tuple, /*PyObject *varnames,*/
+        __pyx_empty_tuple, /*PyObject *freevars,*/
+        __pyx_empty_tuple, /*PyObject *cellvars,*/
+        py_srcfile,   /*PyObject *filename,*/
+        py_funcname,  /*PyObject *name,*/
+        py_line,      /*int firstlineno,*/
+        __pyx_empty_bytes  /*PyObject *lnotab*/
+    );
+    Py_DECREF(py_srcfile);
+    Py_DECREF(py_funcname);
+    return py_code;
+bad:
+    Py_XDECREF(py_srcfile);
+    Py_XDECREF(py_funcname);
+    return NULL;
+}
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+                               int py_line, const char *filename) {
+    PyCodeObject *py_code = 0;
+    PyObject *py_globals = 0;
+    PyFrameObject *py_frame = 0;
+    py_code = __pyx_find_code_object(c_line ? c_line : py_line);
+    if (!py_code) {
+        py_code = __Pyx_CreateCodeObjectForTraceback(
+            funcname, c_line, py_line, filename);
+        if (!py_code) goto bad;
+        __pyx_insert_code_object(c_line ? c_line : py_line, py_code);
+    }
+    py_globals = PyModule_GetDict(__pyx_m);
+    if (!py_globals) goto bad;
+    py_frame = PyFrame_New(
+        PyThreadState_GET(), /*PyThreadState *tstate,*/
+        py_code,             /*PyCodeObject *code,*/
+        py_globals,          /*PyObject *globals,*/
+        0                    /*PyObject *locals*/
+    );
+    if (!py_frame) goto bad;
+    py_frame->f_lineno = py_line;
+    PyTraceBack_Here(py_frame);
+bad:
+    Py_XDECREF(py_code);
+    Py_XDECREF(py_frame);
+}
+
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
+    while (t->p) {
+        #if PY_MAJOR_VERSION < 3
+        if (t->is_unicode) {
+            *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
+        } else if (t->intern) {
+            *t->p = PyString_InternFromString(t->s);
+        } else {
+            *t->p = PyString_FromStringAndSize(t->s, t->n - 1);
+        }
+        #else  /* Python 3+ has unicode identifiers */
+        if (t->is_unicode | t->is_str) {
+            if (t->intern) {
+                *t->p = PyUnicode_InternFromString(t->s);
+            } else if (t->encoding) {
+                *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);
+            } else {
+                *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);
+            }
+        } else {
+            *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);
+        }
+        #endif
+        if (!*t->p)
+            return -1;
+        ++t;
+    }
+    return 0;
+}
+
+
+/* Type Conversion Functions */
+
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
+   int is_true = x == Py_True;
+   if (is_true | (x == Py_False) | (x == Py_None)) return is_true;
+   else return PyObject_IsTrue(x);
+}
+
+static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) {
+  PyNumberMethods *m;
+  const char *name = NULL;
+  PyObject *res = NULL;
+#if PY_VERSION_HEX < 0x03000000
+  if (PyInt_Check(x) || PyLong_Check(x))
+#else
+  if (PyLong_Check(x))
+#endif
+    return Py_INCREF(x), x;
+  m = Py_TYPE(x)->tp_as_number;
+#if PY_VERSION_HEX < 0x03000000
+  if (m && m->nb_int) {
+    name = "int";
+    res = PyNumber_Int(x);
+  }
+  else if (m && m->nb_long) {
+    name = "long";
+    res = PyNumber_Long(x);
+  }
+#else
+  if (m && m->nb_int) {
+    name = "int";
+    res = PyNumber_Long(x);
+  }
+#endif
+  if (res) {
+#if PY_VERSION_HEX < 0x03000000
+    if (!PyInt_Check(res) && !PyLong_Check(res)) {
+#else
+    if (!PyLong_Check(res)) {
+#endif
+      PyErr_Format(PyExc_TypeError,
+                   "__%s__ returned non-%s (type %.200s)",
+                   name, name, Py_TYPE(res)->tp_name);
+      Py_DECREF(res);
+      return NULL;
+    }
+  }
+  else if (!PyErr_Occurred()) {
+    PyErr_SetString(PyExc_TypeError,
+                    "an integer is required");
+  }
+  return res;
+}
+
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
+  Py_ssize_t ival;
+  PyObject* x = PyNumber_Index(b);
+  if (!x) return -1;
+  ival = PyInt_AsSsize_t(x);
+  Py_DECREF(x);
+  return ival;
+}
+
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
+#if PY_VERSION_HEX < 0x02050000
+   if (ival <= LONG_MAX)
+       return PyInt_FromLong((long)ival);
+   else {
+       unsigned char *bytes = (unsigned char *) &ival;
+       int one = 1; int little = (int)*(unsigned char*)&one;
+       return _PyLong_FromByteArray(bytes, sizeof(size_t), little, 0);
+   }
+#else
+   return PyInt_FromSize_t(ival);
+#endif
+}
+
+static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject* x) {
+   unsigned PY_LONG_LONG val = __Pyx_PyInt_AsUnsignedLongLong(x);
+   if (unlikely(val == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred())) {
+       return (size_t)-1;
+   } else if (unlikely(val != (unsigned PY_LONG_LONG)(size_t)val)) {
+       PyErr_SetString(PyExc_OverflowError,
+                       "value too large to convert to size_t");
+       return (size_t)-1;
+   }
+   return (size_t)val;
+}
+
+
+#endif /* Py_PYTHON_H */
diff --git a/pyoperators/utils/fake_MPI.py b/pyoperators/utils/fake_MPI.py
new file mode 100644
index 0000000..4023a4a
--- /dev/null
+++ b/pyoperators/utils/fake_MPI.py
@@ -0,0 +1,154 @@
+"""
+MPI-wrapper module for non-MPI enabled platforms.
+"""
+import __builtin__
+
+_g = globals()
+_constants = ('SUM', 'MIN', 'MAX', 'PROD', 'BAND', 'BOR', 'BXOR',
+              'LAND', 'LOR', 'LXOR', 'MAXLOC', 'MINLOC',
+              'BOOL', 'BYTE',
+              'C_BOOL', 'C_COMPLEX', 'C_DOUBLE_COMPLEX', 'C_FLOAT_COMPLEX',
+              'C_LONG_DOUBLE_COMPLEX',
+              'CHAR', 'CHARACTER', 'WCHAR',
+              'COMPLEX', 'COMPLEX4', 'COMPLEX8', 'COMPLEX16', 'COMPLEX32',
+              'DOUBLE', 'DOUBLE_COMPLEX', 'DOUBLE_INT', 'DOUBLE_PRECISION',
+              'F_BOOL', 'F_COMPLEX', 'F_DOUBLE', 'F_DOUBLE_COMPLEX', 'F_FLOAT',
+              'F_FLOAT_COMPLEX', 'F_INT',
+              'FLOAT', 'FLOAT_INT',
+              'INT', 'INT8_T', 'INT16_T', 'INT32_T', 'INT64_T', 'INT_INT',
+              'INTEGER', 'INTEGER1', 'INTEGER2', 'INTEGER4', 'INTEGER8',
+              'INTEGER16',
+              'LOGICAL', 'LOGICAL1', 'LOGICAL2', 'LOGICAL4', 'LOGICAL8',
+              'LONG', 'LONG_DOUBLE', 'LONG_DOUBLE_INT', 'LONG_INT', 'LONG_LONG',
+              'PACKED',
+              'REAL', 'REAL2', 'REAL4', 'REAL8', 'REAL16',
+              'SHORT', 'SHORT_INT',
+              'SIGNED_CHAR', 'SIGNED_INT', 'SIGNED_LONG', 'SIGNED_LONG_LONG',
+              'SIGNED_SHORT',
+              'SINT8_T', 'SINT16_T', 'SINT32_T', 'SINT64_T',
+              'TWOINT',
+              'UINT8_T', 'UINT16_T', 'UINT32_T', 'UINT64_T',
+              'UNSIGNED', 'UNSIGNED_CHAR', 'UNSIGNED_INT', 'UNSIGNED_LONG',
+              'UNSIGNED_LONG_LONG', 'UNSIGNED_SHORT',
+              'WCHAR',
+              'IN_PLACE', 'KEYVAL_INVALID')
+for i, c in enumerate(_constants):
+    _g[c] = i
+
+
+class Comm(object):
+    _keyvals = {}  # class attribute
+
+    def __init__(self, rank, size):
+        self.rank = rank
+        self.size = size
+        self._attr = {}
+
+    def Get_rank(self):
+        return self.rank
+
+    def Get_size(self):
+        return self.size
+
+    def allgather(self, sendobj=None, recvobj=None):
+        return [sendobj]
+
+    def allreduce(self, sendobj=None, recvobj=None, op=SUM):
+        return sendobj
+
+    def bcast(self, obj=None, root=0):
+        return obj
+
+    def gather(self, sendobj=None, recvobj=None, root=0):
+        return [sendobj]
+
+    def Allgatherv(self, i, o, op=None):
+        if isinstance(i, int) and i == IN_PLACE:
+            return
+        if isinstance(i, (list, tuple)):
+            i = i[0]
+        o[0][...] = i
+
+    def Allreduce(self, i, o, op=None):
+        if isinstance(i, int) and i == IN_PLACE:
+            return
+        if isinstance(i, (list, tuple)):
+            i = i[0]
+        o[0][...] = i
+
+    def Barrier(self):
+        pass
+
+    def Dup(self):
+        return Comm(self.rank, self.size)
+    Clone = Dup
+
+    def Free(self):
+        return
+
+    def Split(self, color=0, key=0):
+        return Comm(self.rank, self.size)
+
+    @classmethod
+    def Create_keyval(cls, copy_fn=None, delete_fn=None):
+        if len(cls._keyvals) == 0:
+            id = 1
+        else:
+            id = max(cls._keyvals.keys()) + 1
+        cls._keyvals[id] = (copy_fn, delete_fn)
+        return id
+
+    @classmethod
+    def Free_keyval(cls, keyval):
+        if keyval not in cls._keyvals:
+            raise ValueError('Invalid keyval.')
+        del cls._keyvals[keyval]
+
+    def Delete_attr(self, keyval):
+        if keyval not in self._attr:
+            raise ValueError('Invalid keyval.')
+        del self._attr[keyval]
+
+    def Get_attr(self, keyval):
+        if keyval not in self._keyvals:
+            raise ValueError('Invalid keyval.')
+        if keyval not in self._attr:
+            return None
+        return self._attr[keyval]
+
+    def Set_attr(self, keyval, attrval):
+        if keyval not in self._keyvals:
+            raise ValueError('Invalid keyval.')
+        self._attr[keyval] = attrval
+
+    def Create_cart(self, dims, periods=None, reorder=False):
+        return Cartcomm(self.rank, self.size)
+
+    @staticmethod
+    def f2py(fcomm):
+        return COMM_SELF
+
+    def py2f(self):
+        return 0
+
+
+class Cartcomm(Comm):
+    def Sub(self, remain_dims):
+        return Comm(self.rank, self.size)
+
+
+def Get_processor_name():
+    import platform
+    return platform.node()
+
+COMM_NULL = Comm(0, 0)
+COMM_SELF = Comm(0, 1)
+COMM_WORLD = Comm(0, 1)
+
+
+class Exception(__builtin__.Exception):
+    """ Exception.__init__(self, int ierr=0) """
+    def __init__(self, ierr=0):
+        pass
+
+del _g, _constants, __builtin__
diff --git a/pyoperators/utils/misc.py b/pyoperators/utils/misc.py
new file mode 100644
index 0000000..8a6d920
--- /dev/null
+++ b/pyoperators/utils/misc.py
@@ -0,0 +1,1082 @@
+from __future__ import absolute_import, division, print_function
+
+import collections
+import functools
+import itertools
+import multiprocessing
+import multiprocessing.dummy
+import numpy as np
+import operator
+import os
+import signal
+import timeit
+import types
+
+from contextlib import contextmanager
+from itertools import izip
+from . import cythonutils as cu
+from ..warnings import warn, PyOperatorsDeprecationWarning
+
+__all__ = ['all_eq',
+           'broadcast_shapes',
+           'cast',
+           'complex_dtype',
+           'deprecated',
+           'first',
+           'first_is_not',
+           'float_dtype',
+           'groupbykey',
+           'ifirst',
+           'ifirst_is_not',
+           'ilast',
+           'ilast_is_not',
+           'inspect_special_values',
+           'interruptible',
+           'interruptible_if',
+           'isalias',
+           'isclassattr',
+           'isscalar',
+           'isscalarlike',
+           'izip_broadcast',
+           'last',
+           'last_is_not',
+           'least_greater_multiple',
+           'merge_none',
+           'ndarraywrap',
+           'one',
+           'omp_num_threads',
+           'operation_assignment',
+           'operation_symbol',
+           'pi',
+           'pool_threading',
+           'product',
+           'renumerate',
+           'reshape_broadcast',
+           'setting',
+           'settingerr',
+           'split',
+           'strelapsed',
+           'strenum',
+           'strinfo',
+           'strnbytes',
+           'strplural',
+           'strshape',
+           'Timer',
+           'tointtuple',
+           'uninterruptible',
+           'uninterruptible_if',
+           'zero']
+
+
+# decorators
+# ==========
+
+
+def deprecated(msg):
+    def decorator(func):
+        @functools.wraps(func)
+        def _(*args, **keywords):
+            warn('{!r} is deprecated: {}'.format(func.__name__, msg),
+                 PyOperatorsDeprecationWarning)
+            return func(*args, **keywords)
+        return _
+    return decorator
+
+
+# other stuff
+# ===========
+
+
+def all_eq(a, b):
+    """
+    Return True if a and b are equal by recursively comparing them.
+    """
+    if a is b:
+        return True
+    if isinstance(a, collections.Mapping):
+        if type(a) is not type(b):
+            return False
+        if set(a.keys()) != set(b.keys()):
+            return False
+        for k in a:
+            if not all_eq(a[k], b[k]):
+                return False
+        return True
+    if isinstance(a, (str, unicode)):
+        if type(a) is not type(b):
+            return False
+        return a == b
+    if isinstance(a, (float, np.ndarray, np.number)) or \
+       isinstance(b, (float, np.ndarray, np.number)):
+        return np.allclose(a, b)
+    if isinstance(a, collections.Container):
+        if type(a) is not type(b):
+            return False
+        if len(a) != len(b):
+            return False
+        for a_, b_ in izip(a, b):
+            if not all_eq(a_, b_):
+                return False
+        return True
+    if isinstance(a, types.MethodType):
+        if type(a) is not type(b):
+            return False
+        return a.im_class is b.im_class and a.im_func is b.im_func
+    if isinstance(a, types.LambdaType):
+        if type(a) is not type(b):
+            return False
+        return a.func_code is b.func_code
+    return a == b
+
+
+def broadcast_shapes(*shapes):
+    """
+    Broadcast any number of shapes against each other.
+
+    Parameters
+    ----------
+    *shapes : tuples
+        The shapes to broadcast
+
+    Example
+    -------
+    >>> broadcast_shapes((1,5), (3, 2, 1))
+    (3, 2, 5)
+
+    """
+    if any(not isinstance(s, tuple) for s in shapes):
+        raise TypeError('The input shapes are not tuples.')
+    ndim = max(len(s) for s in shapes)
+    shapes_ = [(ndim-len(s)) * [1] + list(s) for s in shapes]
+    outshape = []
+    for idim, dims in enumerate(zip(*shapes_)):
+        dims = [dim for dim in dims if dim != 1]
+        if len(dims) == 0:
+            d = 1
+        elif any(dim != dims[0] for dim in dims):
+            raise ValueError(
+                'The shapes could not be broadcast together {}'.format(
+                    ' '.join(str(s) for s in shapes)))
+        else:
+            d = dims[0]
+        outshape.append(d)
+    return tuple(outshape)
+
+
+def cast(arrays, dtype=None, order='c'):
+    """
+    Cast a list of arrays into a same data type.
+
+    Parameters
+    ----------
+    arrays : sequence of array-like or None
+        The list of arrays to be cast.
+    dtype : numpy.dtype
+        If specified, all arrays will be cast to this data type. Otherwise,
+        the data types is inferred from the arrays.
+
+    Example
+    -------
+    >>> cast([[1., 2.], None, np.array(2j)])
+    (array([ 1.+0.j,  2.+0.j]), None, array(2j))
+
+    """
+    arrays = tuple(arrays)
+    if dtype is None:
+        arrays_ = [np.array(a, copy=False) for a in arrays if a is not None]
+        dtype = np.result_type(*arrays_)
+    result = (np.array(a, dtype=dtype, order=order, copy=False)
+              if a is not None else None for a in arrays)
+    return tuple(result)
+
+
+def complex_dtype(dtype):
+    """
+    Return the complex dtype associated to a numeric dtype.
+
+    Parameter
+    ---------
+    dtype : dtype
+        The input dtype.
+
+    Example
+    -------
+    >>> complex_dtype(int)
+    dtype('complex128')
+    >>> complex_dtype(np.float32)
+    dtype('complex64')
+    >>> complex_dtype(np.float64)
+    dtype('complex128')
+
+    """
+    dtype = float_dtype(dtype)
+    if dtype.kind == 'c':
+        return dtype
+    if dtype == np.float16:
+        if not hasattr(np, 'complex32'):
+            return np.dtype(complex)
+    return np.dtype('complex{}'.format(2 * int(dtype.name[5:])))
+
+
+def float_dtype(dtype):
+    """
+    Return the floating dtype associated to a numeric dtype.
+    Unless the input dtype kind is float or complex, the default float dtype
+    is returned.
+
+    Parameter
+    ---------
+    dtype : dtype
+        The input dtype.
+
+    Example
+    -------
+    >>> float_dtype(int)
+    dtype('float64')
+    >>> float_dtype(np.float32)
+    dtype('float32')
+    >>> float_dtype(np.complex256)
+    dtype('complex256')
+
+    """
+    dtype = np.dtype(dtype)
+    if dtype.kind not in 'biufc':
+        raise TypeError('Non numerical data type.')
+    if dtype.kind in 'iub':
+        return np.dtype(float)
+    return dtype
+
+
+def first(l, f):
+    """
+    Return first item in list that verifies a certain condition, or raise
+    a ValueError exception otherwise.
+
+    Parameters
+    ----------
+    l : list
+        List of elements to be searched for.
+    f : function
+        Function that evaluates to True to match an element.
+
+    Example:
+    --------
+    >>> first([1.,2.,3.], lambda x: x > 1.5)
+    2.0
+
+    """
+    try:
+        return next((_ for _ in l if f(_)))
+    except StopIteration:
+        raise ValueError('There is no matching item in the list.')
+
+
+def first_is_not(l, v):
+    """
+    Return first item in list which is not the specified value.
+    If all items are the specified value, return it.
+
+    Parameters
+    ----------
+    l : sequence
+        The list of elements to be inspected.
+    v : object
+        The value not to be matched.
+
+    Example:
+    --------
+    >>> first_is_not(['a', 'b', 'c'], 'a')
+    'b'
+
+    """
+    return next((_ for _ in l if _ is not v), v)
+
+
+def groupbykey(iterable, key):
+    """
+    Create an iterator which returns (key, sub-iterator) grouped by each
+    value of key.
+
+    """
+    iterator = izip(iterable, key)
+    i, value = next(iterator)
+    l = [i]
+    for i, k in iterator:
+        if k == value:
+            l.append(i)
+            continue
+        yield value, l
+        value = k
+        l = [i]
+    if len(l) != 0:
+        yield value, l
+
+
+def ifirst(l, match):
+    """
+    Return the index of the first item in a list that verifies a certain
+    condition or is equal to a certain value. Raise a ValueError exception
+    otherwise.
+
+    Parameters
+    ----------
+    l : iterator
+        List of elements to be searched for.
+    match : callable or object
+        Function that evaluates to True to match an element or the element
+        to be matched.
+
+    Example:
+    --------
+    >>> ifirst([1.,2.,3.], lambda x: x > 1.5)
+    1
+    >>> ifirst([1., 2., 3.], 2)
+    1
+
+    """
+    try:
+        if not callable(match):
+            return next((i for i, _ in enumerate(l) if _ == match))
+        return next((i for i, _ in enumerate(l) if match(_)))
+    except StopIteration:
+        raise ValueError('There is no matching item in the list.')
+
+
+def ifirst_is_not(l, v):
+    """
+    Return index of first item in list which is not the specified value.
+    If the list is empty or if all items are the specified value, raise
+    a ValueError exception.
+
+    Parameters
+    ----------
+    l : sequence
+        The list of elements to be inspected.
+    v : object
+        The value not to be matched.
+
+    Example:
+    --------
+    >>> ifirst_is_not(['a', 'b', 'c'], 'a')
+    1
+
+    """
+    try:
+        return next((i for i, _ in enumerate(l) if _ is not v))
+    except StopIteration:
+        raise ValueError('There is no matching item in the list.')
+
+
+def ilast(l, match):
+    """
+    Return the index of the last item in a list that verifies a certain
+    condition or is equal to a certain value. Raise a ValueError exception
+    otherwise.
+
+    Parameters
+    ----------
+    l : iterator
+        List of elements to be searched for.
+    match : callable or object
+        Function that evaluates to True to match an element or the element
+        to be matched.
+
+    Example:
+    --------
+    >>> ilast([1.,2.,3.], lambda x: x > 1.5)
+    2
+    >>> ilast([3.,2.,0., 0.], 0)
+    3
+
+    """
+    l = tuple(l)
+    index = ifirst(reversed(l), match)
+    return len(l) - index - 1
+
+
+def ilast_is_not(l, v):
+    """
+    Return index of last item in list which is not the specified value.
+    If the list is empty or if all items are the specified value, raise
+    a ValueError exception.
+
+    Parameters
+    ----------
+    l : sequence
+        The list of elements to be inspected.
+    v : object
+        The value not to be matched.
+
+    Example:
+    --------
+    >>> ilast_is_not(['a', 'b', 'c'], 'a')
+    2
+
+    """
+    l = tuple(l)
+    index = ifirst_is_not(reversed(l), v)
+    return len(l) - index - 1
+
+
+def inspect_special_values(x):
+    """
+    If an array has no other values than -1, 0 and 1, return a tuple consisting
+    of their occurences plus the boolean False and a boolean indicating if
+    all values are equal. Otherwise, return the tuple (0, 0, 0, True,
+    np.all(x == x.flat[0]))
+
+    Parameter
+    ---------
+    x : numerical ndarray
+        The array to be inspected.
+
+    Examples
+    --------
+    >>> inspect_special_values([0,-1,-1])
+    2, 1, 0, False, False
+    >>> inspect_special_values([0,-1,-1,1.2])
+    0, 0, 0, True, False
+
+    """
+    x = np.asarray(x)
+    if x.size == 0:
+        return 0, 0, 0, 0, False
+    x = x.ravel()
+    kind = x.dtype.kind
+    if kind == 'b':
+        return cu.inspect_special_values_bool8(x.view(np.uint8))
+    if kind == 'f':
+        return cu.inspect_special_values_float64(x.astype(np.float64))
+    if kind == 'i':
+        return cu.inspect_special_values_int64(x.astype(np.int64))
+    if kind == 'u':
+        return cu.inspect_special_values_uint64(x.astype(np.uint64))
+    if kind == 'c':
+        return cu.inspect_special_values_complex128(x.astype(np.complex128))
+    return 0, 0, 0, True, False
+
+
+ at contextmanager
+def interruptible():
+    """ Make a block of code interruptible with CTRL-C. """
+    signal_old = signal.getsignal(signal.SIGINT)
+    signal.signal(signal.SIGINT, signal.default_int_handler)
+    yield
+    signal.signal(signal.SIGINT, signal_old)
+
+
+ at contextmanager
+def interruptible_if(condition):
+    """ Conditionally make a block of code interruptible with CTRL-C. """
+    if not condition:
+        yield
+    else:
+        with interruptible():
+            yield
+
+
+def isalias(array1, array2):
+    """
+    Return True if the two input arrays point to the same memory location.
+
+    """
+    return array1.__array_interface__['data'][0] == \
+           array2.__array_interface__['data'][0]
+
+
+def isclassattr(a, cls):
+    """ Test if an attribute is a class attribute. """
+    for c in cls.__mro__:
+        if a in c.__dict__:
+            return True
+    return False
+
+
+ at deprecated("use 'isscalarlike' instead.")
+def isscalar(x):
+    return isscalarlike(x)
+
+
+def isscalarlike(x):
+    """Return True for scalars and 0-ranked arrays."""
+    return np.isscalar(x) or isinstance(x, np.ndarray) and x.ndim == 0
+
+
+def izip_broadcast(*args):
+    """
+    Like izip, except that arguments which are containers of length 1 are
+    repeated.
+
+    """
+    def wrap(a):
+        if hasattr(a, '__len__') and len(a) == 1:
+            return itertools.repeat(a[0])
+        return a
+    if any(not hasattr(a, '__len__') or len(a) != 1 for a in args):
+        args = [wrap(arg) for arg in args]
+    return izip(*args)
+
+
+def last(l, f):
+    """
+    Return last item in list that verifies a certain condition, or raise
+    a ValueError exception otherwise.
+
+    Parameters
+    ----------
+    l : list
+        List of elements to be searched for.
+    f : function
+        Function that evaluates to True to match an element.
+
+    Example:
+    --------
+    >>> first([1.,2.,3.], lambda x: x > 1.5)
+    3.0
+
+    """
+    return first(reversed(tuple(l)), f)
+
+
+def last_is_not(l, v):
+    """
+    Return last item in list which is not the specified value.
+    If all items are the specified value, return it.
+
+    Parameters
+    ----------
+    l : sequence
+        The list of elements to be inspected.
+    v : object
+        The value not to be matched.
+
+    Example:
+    --------
+    >>> last_is_not(['a', 'b', 'c'], 'b')
+    'c'
+
+    """
+    return first_is_not(reversed(tuple(l)), v)
+
+
+def least_greater_multiple(a, l, out=None):
+    """
+    Return the least multiple of values in a list greater than a given number.
+
+    Example
+    -------
+    >>> least_greater_multiple(2253, [2,3])
+    2304
+
+    """
+    if any(v <= 0 for v in l):
+        raise ValueError('The list of multiple is not positive;')
+    it = np.nditer([a, out],
+                   op_flags=[['readonly'],
+                             ['writeonly', 'allocate', 'no_broadcast']])
+    max_power = [int(np.ceil(np.log(np.max(a))/np.log(v))) for v in l]
+    slices = [slice(0, m+1) for m in max_power]
+    powers = np.ogrid[slices]
+    values = 1
+    for v, p in izip(l, powers):
+        values = values * v**p
+    for v, o in it:
+        if np.__version__ < '2':
+            values_ = np.ma.MaskedArray(values, mask=values < v, copy=False)
+            o[...] = np.min(values_)
+        else:
+            o[...] = np.amin(values, where=values >= v)
+    out = it.operands[1]
+    if out.ndim == 0:
+        return out.flat[0]
+    return out
+
+
+def merge_none(a, b):
+    """
+    Compare two sequences elementwise and merge them discarding None entries.
+
+    Raises ValueError exception if the two sequances do not have the same
+    length or if they have different non-None elements.
+
+    Parameters
+    ----------
+    a, b : sequences
+        The sequences to be compared.
+
+    Example
+    -------
+    >>> merge_none([1,None,3],[None,2,3])
+    [1, 2, 3]
+    """
+    if a is b is None:
+        return None
+    if len(a) != len(b):
+        raise ValueError('The input sequences do not have the same length.')
+    if any(p != q for p, q in izip(a, b) if None not in (p, q)):
+        raise ValueError('The input sequences have incompatible values.')
+    return tuple(p if p is not None else q for p, q in izip(a, b))
+
+
+class ndarraywrap(np.ndarray):
+    pass
+
+
+def one(dtype):
+    """ Return 1 with a given dtype. """
+    return np.ones((), dtype=dtype)[()]
+
+
+def omp_num_threads():
+    n = os.getenv('OMP_NUM_THREADS')
+    if n is not None:
+        return int(n)
+    return multiprocessing.cpu_count()
+
+
+def operation_assignment(a, b):
+    """
+    operation_assignment(a, b) -- Same as a[...] = b.
+    """
+    a[...] = b
+
+
+operation_symbol = {
+    operator.iadd: '+',
+    operator.isub: '-',
+    operator.imul: '*',
+    operator.idiv: '/',
+}
+
+
+def pi(dtype):
+    """ Return pi with a given dtype. """
+    return 4 * np.arctan(one(dtype))
+
+
+ at contextmanager
+def pool_threading(nthreads=None):
+    if nthreads is None:
+        nthreads = omp_num_threads()
+    try:
+        import mkl
+        old_mkl_num_threads = mkl.get_max_threads()
+        mkl.set_num_threads(1)
+    except ImportError:
+        pass
+    old_omp_num_threads = os.getenv('OMP_NUM_THREADS')
+    os.environ['OMP_NUM_THREADS'] = '1'
+
+    pool = multiprocessing.dummy.Pool(nthreads)
+    yield pool
+
+    pool.close()
+    pool.join()
+    try:
+        mkl.set_num_threads(old_mkl_num_threads)
+    except NameError:
+        pass
+    if old_omp_num_threads is not None:
+        os.environ['OMP_NUM_THREADS'] = old_omp_num_threads
+    else:
+        del os.environ['OMP_NUM_THREADS']
+
+
+def product(a):
+    """ Return the product of a arbitrary input, including generators. """
+    if isinstance(a, (list, tuple, types.GeneratorType)):
+        # a for loop is a bit faster than reduce(operator.imul, a)
+        r = 1
+        for x in a:
+            r *= x
+        return r
+
+    a = np.asarray(a)
+    return np.product(a, dtype=a.dtype)
+
+
+def renumerate(l):
+    """ Reversed enumerate. """
+    return izip(xrange(len(l)-1, -1, -1), reversed(l))
+
+
+def reshape_broadcast(x, shape):
+    """
+    Reshape an array by setting broadcastable dimensions' strides to zero.
+
+    Parameters
+    ----------
+    x : array-like
+        The array to be reshaped.
+    shape : tuple of int
+        New shape of array. It can be any positive number along the axes of x
+        of length 1.
+
+    Example
+    -------
+    >>> a = np.arange(3).reshape((3, 1))
+    >>> b = reshape_broadcast(a, (2, 3, 2))
+    >>> print(b)
+    [[[0 0]
+      [1 1]
+      [2 2]]
+
+     [[0 0]
+      [1 1]
+      [2 2]]]
+    >>> b.shape
+    (2, 3, 2)
+    >>> b.strides
+    (0, 8, 0)
+
+    """
+    x = np.asanyarray(x)
+    if len(shape) < x.ndim or \
+       any(os != 1 and os != ns for os, ns in zip(x.shape, shape[-x.ndim:])):
+        raise ValueError("The requested shape '{0}' is incompatible with that "
+                         "of the array '{1}'.".format(shape, x.shape))
+    strides = (len(shape) - x.ndim) * (0,) + tuple(
+              (0 if sh == 1 else st for sh, st in zip(x.shape, x.strides)))
+    return np.lib.stride_tricks.as_strided(x, shape, strides)
+
+
+ at contextmanager
+def setting(obj, attr, value):
+    """ Contextually set an attribute to an object. """
+    if hasattr(obj, attr):
+        old_value = getattr(obj, attr)
+        do_delete = False
+    else:
+        do_delete = True
+    setattr(obj, attr, value)
+    yield
+    if do_delete:
+        delattr(obj, attr)
+    else:
+        setattr(obj, attr, old_value)
+
+
+ at contextmanager
+def settingerr(*args, **keywords):
+    """ Contextually set an error handling. """
+    old = np.seterr(*args, **keywords)
+    try:
+        yield
+    except:
+        raise
+    finally:
+        np.seterr(**old)
+
+
+def split(n, m, rank=None):
+    """
+    Return an iterator through the slices that partition a list of n elements
+    in m almost same-size groups. If a rank is provided, only the slice
+    for the rank is returned.
+
+    Example
+    -------
+    >>> split(1000, 2)
+    (slice(0, 500, None), slice(500, 1000, None))
+    >>> split(1000, 2, 1)
+    slice(500, 1000, None)
+
+    """
+    if rank is not None:
+        work = n // m + ((n % m) > rank)
+        start = n // m * rank + min(rank, n % m)
+        return slice(start, start + work)
+
+    def generator():
+        rank = 0
+        start = 0
+        while rank < m:
+            work = n // m + ((n % m) > rank)
+            yield slice(start, start + work)
+            start += work
+            rank += 1
+
+    return tuple(generator())
+
+
+def strelapsed(t0, msg='Elapsed time'):
+    """
+    Return an information message including elapsed time.
+
+    Parameters
+    ----------
+    t0 : float
+        The starting time stamp, obtained with time.time()
+    msg : string, optional
+        Informative message
+
+    Example
+    -------
+    >>> import time
+    >>> t0 = time.time()
+    >>> pass
+    >>> print(strelapsed(t0, 'Did nothing in'))
+    Info computernode: Did nothing in... 0.00s
+
+    """
+    import time
+    return strinfo(msg + '... {0:.2f}s'.format(time.time()-t0))[:-1]
+
+
+def strenum(choices, last='or'):
+    """
+    Enumerates elements of a list
+
+    Parameters
+    ----------
+    choices : list of string
+        list of elements to be enumerated
+    last : string
+        last separator
+
+    Examples
+    --------
+    >>> strenum(['blue', 'red', 'yellow'])
+    "'blue', 'red' or 'yellow'"
+
+    """
+    choices = ["'{0}'".format(choice) for choice in choices]
+    if len(choices) == 0:
+        raise ValueError('There is no valid choice.')
+    if len(choices) == 1:
+        return choices[0]
+    return ', '.join(choices[0:-1]) + ' ' + last + ' ' + choices[-1]
+
+
+def strinfo(msg):
+    """
+    Return information message adding processor's node name.
+
+    Parameter
+    ---------
+    msg : string
+        The information message.
+    Example
+    -------
+    >>> print(strinfo('My information message'))
+    Info computernode: My information message.
+
+    """
+    from .mpi import MPI
+    rank = MPI.COMM_WORLD.rank
+    size = MPI.COMM_WORLD.size
+    if size > 1:
+        n = str(int(np.log10(size - 1)) + 1)
+        rank = ('/{0:0' + n + '}').format(rank)
+    else:
+        rank = ''
+    return 'Info {0}{1}: {2}.'.format(MPI.Get_processor_name(), rank, msg)
+
+
+def strnbytes(nbytes):
+    """
+    Return number of bytes in a human readable unit of KiB, MiB or GiB.
+
+    Parameter
+    ---------
+    nbytes: int
+        Number of bytes, to be displayed in a human readable way.
+
+    Example
+    -------
+    >>> a = np.empty((100,100))
+    >>> print(strnbytes(a.nbytes))
+    78.125 KiB
+
+    """
+    if nbytes < 1024:
+        return str(nbytes) + ' bytes'
+    elif nbytes < 1048576:
+        return str(nbytes / 2**10) + ' KiB'
+    elif nbytes < 1073741824:
+        return str(nbytes / 2**20) + ' MiB'
+    else:
+        return str(nbytes / 2**30) + ' GiB'
+
+
+def strplural(n, name, nonumber=False, s=''):
+    """
+    Returns the plural or singular of a string
+
+    Parameters
+    ----------
+    n : integer
+        The plural or singular is based on this number.
+    name : string
+        String for which a plural is requested.
+    nonumber : boolean
+        If true, don't prepend the number.
+    s : string
+        String to be appended if n > 0
+
+    Examples
+    --------
+    >>> strplural(0, 'cat')
+    'no cat'
+    >>> strplural(1, 'cat')
+    '1 cat'
+    >>> strplural(2, 'cat')
+    '2 cats'
+    >>> strplural(2, 'cat', prepend=False)
+    'cats'
+    >>> animals = ['cat', 'dog']
+    >>> strplural(len(animals), 'animal', s=': ') + ', '.join(animals)
+    '2 animals: cat, dog'
+    >>> strplural(0, 'animal', s=':')
+    'no animal'
+
+    """
+    if n == 0:
+        return ('' if nonumber else 'no ') + name
+    elif n == 1:
+        return ('' if nonumber else '1 ') + name + s
+    else:
+        return ('' if nonumber else str(n) + ' ') + name + 's' + s
+
+
+def strshape(shape, broadcast=None):
+    """ Helper function to convert shapes or list of shapes into strings. """
+    if shape is None:
+        return str(shape)
+    if not isinstance(shape, tuple):
+        raise TypeError('Invalid shape.')
+    if len(shape) == 0 and broadcast in ('leftward', 'rightward'):
+        return '(...)'
+    if broadcast == 'leftward':
+        shape = ('...', ) + shape
+    elif broadcast == 'rightward':
+        shape = shape + ('...',)
+    if len(shape) == 0:
+        return str(shape)
+    if len(shape) == 1:
+        return str(shape[0])
+    return str(shape).replace(' ', '').replace("'", '')
+
+
+class Timer(object):
+    """
+    Context manager for timing purposes.
+
+    Examples
+    --------
+    >>> import time
+    >>> with Timer('Elapsed time: '):
+    ...     time.sleep(0.1)
+    Elapsed time: 0.100191831589s
+
+    >>> with Timer() as t:
+    ...     time.sleep(0.1)
+    ...     print(t.elapsed)
+    ...     time.sleep(0.1)
+    ... print(t.elapsed)
+    0.100234985352
+    0.200633049011
+
+    >>> t = Timer(cumulative=True)
+    >>> with t:
+    ...     time.sleep(0.1)
+    >>> print(t.elapsed)
+    >>> with t:
+    ...     time.sleep(0.1)
+    >>> print(t.elapsed)
+    0.100238084793
+    0.200490236282
+
+    """
+    def __init__(self, msg=None, cumulative=False, timer=timeit.default_timer):
+        """
+        Parameters
+        ----------
+        cumulative : boolean
+            If True, elapsed times are accumulated.
+        timer : callable
+            A platform specific timer function (time.time for Unix and
+            time.clock for Windows)
+        msg : string
+            If not None, print the elapsed time upon exiting the context.
+
+        """
+        self.cumulative = cumulative
+        self._elapsed = 0.
+        self._level = 0
+        self.timer = timer
+        self.msg = msg
+
+    def __enter__(self):
+        self._level += 1
+        if self._level == 1:
+            if not self.cumulative:
+                self._elapsed = 0.
+            self._start = self.timer()
+        return self
+
+    def __exit__(self, *args):
+        self._level -= 1
+        if self._level > 0:
+            return
+        self._elapsed += self.timer() - self._start
+        if self.msg is not None:
+            print('{}{}s'.format(self.msg, self.elapsed))
+
+    @property
+    def elapsed(self):
+        if self._level == 0:
+            return self._elapsed
+        return self._elapsed + self.timer() - self._start
+
+    def reset(self):
+        self._elapsed = 0
+
+
+def tointtuple(data):
+    """Return input as a tuple of int."""
+    if data is None:
+        return data
+    try:
+        return tuple(None if d is None else int(d) for d in data)
+    except TypeError:
+        return (int(data),)
+
+
+ at contextmanager
+def uninterruptible():
+    """
+    Make a block of code uninterruptible with CTRL-C.
+    The KeyboardInterrupt is re-raised after the block is executed.
+
+    """
+    signal_old = signal.getsignal(signal.SIGINT)
+    #XXX the nonlocal Python3 would be handy here
+    ctrlc_is_pressed = []
+
+    def signal_handler(signal, frame):
+        ctrlc_is_pressed.append(True)
+    signal.signal(signal.SIGINT, signal_handler)
+    try:
+        yield
+    except:
+        raise
+    finally:
+        signal.signal(signal.SIGINT, signal_old)
+        if len(ctrlc_is_pressed) > 0:
+            raise KeyboardInterrupt()
+
+
+ at contextmanager
+def uninterruptible_if(condition):
+    """ Conditionally make a block of code uninterruptible with CTRL-C. """
+    if not condition:
+        yield
+    else:
+        with uninterruptible():
+            yield
+
+
+def zero(dtype):
+    """ Return 0 with a given dtype. """
+    return np.zeros((), dtype=dtype)[()]
diff --git a/pyoperators/utils/mpi.py b/pyoperators/utils/mpi.py
new file mode 100644
index 0000000..132e2ed
--- /dev/null
+++ b/pyoperators/utils/mpi.py
@@ -0,0 +1,214 @@
+from __future__ import absolute_import, division, print_function
+import contextlib
+import numpy as np
+import operator
+import os
+from .. import config
+
+try:
+    if config.PYOPERATORS_NO_MPI:
+        raise ImportError()
+    from mpi4py import MPI
+except ImportError:
+    from . import fake_MPI as MPI
+from .misc import deprecated, isscalarlike, Timer, tointtuple
+
+__all__ = ['MPI',
+           'as_mpi',
+           'combine',
+           'distribute',
+           'combine_shape',
+           'distribute_shape',
+           'distribute_slice',
+           'filter_comm',
+           'mprint']
+
+DTYPE_MAP = {
+    np.dtype(np.int8): MPI.SIGNED_CHAR,
+    np.dtype(np.int16): MPI.SHORT,
+    np.dtype(np.int32): MPI.INT,
+    np.dtype(np.int64): MPI.LONG,
+    np.dtype(np.uint8): MPI.UNSIGNED_CHAR,
+    np.dtype(np.uint16): MPI.UNSIGNED_SHORT,
+    np.dtype(np.uint32): MPI.UNSIGNED_INT,
+    np.dtype(np.uint64): MPI.UNSIGNED_LONG,
+    np.dtype(np.float32): MPI.FLOAT,
+    np.dtype(np.float64): MPI.DOUBLE,
+    np.dtype(np.complex64): MPI.COMPLEX,
+    np.dtype(np.complex128): MPI.DOUBLE_COMPLEX,
+}
+
+IOP_PY_MAP = {'sum':operator.iadd,
+              'prod':operator.imul,
+              'min':lambda x,y:np.minimum(x,y,x),
+              'max':lambda x,y:np.maximum(x,y,x)}
+OP_PY_MAP = {'sum':sum,
+             'prod':lambda x: reduce(np.multiply, x),
+             'min':lambda x: reduce(np.minimum, x),
+             'max':lambda x: reduce(np.maximum, x)}
+OP_MPI_MAP = {'sum':MPI.SUM,
+              'prod':MPI.PROD,
+              'min':MPI.MIN,
+              'max':MPI.MAX}
+
+timer_mpi = Timer(cumulative=True)
+
+
+def as_mpi(x):
+    try:
+        return x, DTYPE_MAP[x.dtype]
+    except KeyError:
+        raise KeyError("The dtype '{0}' is not handled in MPI.".format(
+                       x.dtype.name))
+
+
+def combine(n, comm=MPI.COMM_WORLD):
+    """
+    Return total number of work items.
+    """
+    n = np.array(n)
+    with timer_mpi:
+        comm.Allreduce(MPI.IN_PLACE, n, op=MPI.SUM)
+    return int(n)
+
+
+ at deprecated("use 'split' instead.")
+def distribute(n, comm=MPI.COMM_WORLD):
+    """
+    Distribute work across processors.
+    """
+    if isscalarlike(n):
+        return n // comm.size + ((n % comm.size) > comm.rank)
+    n = np.asanyarray(n)
+    s = distribute_slice(n.shape[0], comm=comm)
+    return n[s]
+
+
+def combine_shape(shape, comm=None):
+    """
+    Return the shape of the global array resulting from stacking local arrays
+    along the first dimension.
+
+    """
+    shape = tointtuple(shape)
+    comm = comm or MPI.COMM_WORLD
+    with timer_mpi:
+        shapes = comm.allgather(shape)
+    if any(len(s) != len(shapes[0]) or s[1:] != shapes[0][1:] for s in shapes):
+        raise ValueError("The shapes are incompatible: '{0}'.".format(shapes))
+    return (sum(s[0] for s in shapes),) + shapes[0][1:]
+
+
+def distribute_shape(shape, rank=None, size=None, comm=None):
+    """
+    Return the shape of a local array given the shape of a global array,
+    according to the rank of the MPI job, The load is distributed along
+    the first dimension.
+    """
+    from .misc import tointtuple
+
+    if rank is None or size is None:
+        comm = comm or MPI.COMM_WORLD
+    if size is None:
+        size = comm.size
+    if rank is None:
+        rank = comm.rank
+
+    shape = tointtuple(shape)
+    if len(shape) == 0:
+        if size > 1:
+            raise ValueError(
+                'It is ambiguous to split a scalar across processes.')
+        return ()
+    nglobal = shape[0]
+    nlocal = nglobal // size + ((nglobal % size) > rank)
+    return (nlocal,) + tuple(shape[1:])
+
+
+def distribute_shapes(shape, comm=None):
+    """
+    Return the list of the local array shapes given the shape of a global
+    array, for all MPI processes. The load is distributed along the first
+    dimension.
+
+    """
+    if comm is None:
+        comm = MPI.COMM_WORLD
+    size = comm.size
+    nglobal = shape[0]
+    shape_first = (nglobal // size + 1,) + shape[1:]
+    shape_last = (nglobal // size,) + shape[1:]
+    nfirst = nglobal % size
+    return nfirst * (shape_first,) + (size-nfirst) * (shape_last,)
+
+
+ at deprecated("use 'split' instead.")
+def distribute_slice(nglobal, rank=None, size=None, comm=None):
+    """
+    Given a number of ordered global work items, return the slice that brackets
+    the items distributed to a local MPI job.
+
+    """
+    if rank is None or size is None:
+        comm = comm or MPI.COMM_WORLD
+    if size is None:
+        size = comm.size
+    if rank is None:
+        rank = comm.rank
+    nlocal = nglobal // size + ((nglobal % size) > rank)
+    start = nglobal // size * rank + min(rank, nglobal % size)
+    stop = start + nlocal
+    return slice(start, stop)
+
+
+ at contextlib.contextmanager
+def filter_comm(condition, comm):
+    """
+    Return a context manager whose return value is a communicator that only
+    include processes for which the specified condition is met or None
+    otherwise.
+
+    Parameters:
+    -----------
+    condition : boolean
+        Condition to be met to include the process in the new communicator.
+    comm : mpi4py.MPI.Comm
+        The communicator of the processes that reach the execution of
+        this function. These processes will be included in the new communicator
+        if condition is True.
+        
+    Example:
+    --------
+    The following snippet prints the list of the rank of the 3 first processes,
+    for any number of MPI processes greater than 3:
+    with filter_comm(comm.rank < 3, MPI.COMM_WORLD) as newcomm:
+        if newcomm is not None:
+            print(newcomm.allgather(newcomm.rank))
+
+    """
+    with timer_mpi:
+        newcomm = comm.Split(color=int(condition), key=comm.rank)
+    if not condition:
+        yield None
+    else:
+        yield newcomm
+    with timer_mpi:
+        newcomm.Free()
+
+
+def mprint(msg='', comm=MPI.COMM_WORLD):
+    """
+    Print message on stdout. If the message is the same for all nodes,
+    only print one message. Otherwise, add rank information.
+
+    All messages are gathered and printed by rank 0 process, to make sure that
+    messages are printed in rank order.
+
+    """
+    msgs = comm.gather(msg)
+    if comm.rank == 0:
+        if all(m == msgs[0] for m in msgs):
+            print(msg)
+        else:
+            print('\n'.join('Rank {}: {}'.format(i, m)
+                            for i, m in enumerate(msgs)))
diff --git a/pyoperators/utils/testing.py b/pyoperators/utils/testing.py
new file mode 100644
index 0000000..ef295eb
--- /dev/null
+++ b/pyoperators/utils/testing.py
@@ -0,0 +1,263 @@
+import collections
+import functools
+import numpy as np
+from collections import Container, Mapping
+from itertools import izip
+from nose.plugins.skip import SkipTest
+from numpy.testing import assert_equal, assert_allclose
+
+from .misc import settingerr, strenum
+
+__all__ = ['assert_eq',
+           'assert_in',
+           'assert_not_in',
+           'assert_is',
+           'assert_is_not',
+           'assert_is_instance',
+           'assert_is_not_instance',
+           'assert_is_none',
+           'assert_is_not_none',
+           'assert_is_type',
+           'assert_raises',
+           'skiptest',
+           'skiptest_if',
+           'skiptest_unless_module']
+
+
+def assert_same(actual, desired, atol=0, rtol=5, broadcasting=False):
+    """
+    Compare arrays of floats. The relative error depends on the data type.
+
+    Parameters
+    ----------
+    atol : float
+        Absolute tolerance to account for numerical error propagation, in
+        unit of eps.
+    rtol : float
+        Relative tolerance to account for numerical error propagation, in
+        unit of eps.
+    broadcasting : bool, optional
+        If true, allow broadcasting betwee, actual and desired array.
+
+    """
+    actual = np.asarray(actual)
+    desired = np.asarray(desired)
+    if actual.dtype.kind not in ('b', 'i', 'u', 'f', 'c') or \
+       desired.dtype.kind not in ('b', 'i', 'u', 'f', 'c'):
+        raise TypeError('Non numeric type.')
+    if not broadcasting and actual.shape != desired.shape:
+        raise AssertionError(
+            "The actual array shape '{0}' is different from the desired one '{"
+            "1}'.".format(actual.shape, desired.shape))
+    if actual.dtype.kind in ('b', 'i', 'u') and \
+       desired.dtype.kind in ('b', 'i', 'u'):
+        if not broadcasting:
+            assert_equal(actual, desired)
+        else:
+            assert np.all(actual == desired)
+        return
+    if actual.dtype.kind in ('b', 'i', 'u'):
+        dtype = desired.dtype
+    elif desired.dtype.kind in ('b', 'i', 'u'):
+        dtype = actual.dtype
+    else:
+        dtype = sorted(_.dtype for _ in (actual, desired))[0]
+
+    eps1 = np.finfo(dtype).eps * rtol
+    eps2 = np.finfo(dtype).eps * atol
+
+    with settingerr('ignore'):
+        same_ = abs(actual - desired) <= \
+                eps1 * np.minimum(abs(actual), abs(desired)) + eps2
+        same = (same_ | np.isnan(actual) & np.isnan(desired) |
+                (actual == desired))
+        if np.all(same):
+            return
+
+        msg = 'Arrays are not equal (mismatch {0:.1%}'.format(1-np.mean(same))
+        if np.any(~same_ & np.isfinite(actual) & np.isfinite(desired)):
+            rtolmin = np.nanmax(abs(actual - desired) /
+                                np.minimum(abs(actual), abs(desired)))
+            atolmin = np.nanmax(abs(actual - desired))
+            msg += ', min rtol: {0}, min atol: {1}'.format(
+                rtolmin / np.finfo(dtype).eps,
+                atolmin / np.finfo(dtype).eps)
+        check_nan = (np.isnan(actual) & ~np.isnan(desired) |
+                     np.isnan(desired) & ~np.isnan(actual))
+        if np.any(check_nan):
+            msg += ', check nan'
+        if np.any(~check_nan & (np.isinf(actual) | np.isinf(desired)) &
+                  (actual != desired)):
+            msg += ', check infinite'
+
+        def trepr(x):
+            r = repr(x).split('\n')
+            if len(r) > 3:
+                r = [r[0], r[1], r[2] + ' ...']
+            return '\n'.join(r)
+        raise AssertionError(msg + ")\n x: {1}\n y: {2}".format(
+            1 - np.mean(same), trepr(actual), trepr(desired)))
+
+
+def assert_eq(a, b, msg=''):
+    """ Assert that the two arguments are equal. """
+    if a is b:
+        return
+
+    if not msg:
+        msg = 'Items are not equal:\n ACTUAL: {0}\n DESIRED: {1}'.format(a, b)
+
+    # a or b is an ndarray sub-class
+    if isinstance(a, np.ndarray) and type(a) not in (np.matrix, np.ndarray) or\
+       isinstance(b, np.ndarray) and type(b) not in (np.matrix, np.ndarray):
+        assert_is(type(a), type(b))
+        assert_allclose(a.view(np.ndarray), b.view(np.ndarray), err_msg=msg)
+        assert_eq(a.__dict__, b.__dict__, msg)
+        return
+
+    # a and b are ndarray or one of them is an ndarray and the other is a seq.
+    num_types = (bool, int, float, complex, np.ndarray, np.number)
+    if isinstance(a, num_types) and isinstance(b, num_types) or \
+       isinstance(a, np.ndarray) and isinstance(b, (list, tuple)) or \
+       isinstance(b, np.ndarray) and isinstance(a, (list, tuple)):
+        assert_allclose(a, b, err_msg=msg)
+        return
+
+    if isinstance(a, np.ndarray) or isinstance(b, np.ndarray):
+        raise AssertionError(msg)
+
+    if isinstance(a, Mapping) and isinstance(b, Mapping):
+        assert_equal(set(a.keys()), set(b.keys()), err_msg=msg)
+        for k in a:
+            assert_eq(a[k], b[k], msg)
+        return
+
+    if isinstance(a, Container) and not isinstance(a, (set, str)) and \
+       isinstance(b, Container) and not isinstance(b, (set, str)):
+        assert_equal(len(a), len(b), msg)
+        for a_, b_ in izip(a, b):
+            assert_eq(a_, b_, msg)
+        return
+
+    try:
+        equal = a == b
+    except:
+        equal = False
+
+    assert equal, msg
+
+
+def assert_in(a, b, msg=None):
+    """ Assert that the first argument is in the second one. """
+    if a in b:
+        return
+    assert False, str(a) + ' is not in ' + str(b) + _get_msg(msg)
+
+
+def assert_not_in(a, b, msg=None):
+    """ Assert that the first argument is not in second one. """
+    if a not in b:
+        return
+    assert False, str(a) + ' is in ' + str(b) + _get_msg(msg)
+
+
+def assert_is(a, b, msg=None):
+    """ Assert arguments are equal as determined by the 'is' operator. """
+    if a is b:
+        return
+    assert False, str(a) + ' is not ' + str(b) + _get_msg(msg)
+
+
+def assert_is_not(a, b, msg=None):
+    """ Assert arguments are not equal as determined by the 'is' operator. """
+    if a is not b:
+        return
+    assert False, str(a) + ' is ' + str(b) + _get_msg(msg)
+
+
+def assert_is_instance(a, cls, msg=None):
+    """ Assert that the first argument is an instance of the second one. """
+    if isinstance(a, cls):
+        return
+    assert False, str(a) + " is not a '" + cls.__name__ + "' instance" + \
+        _get_msg(msg)
+
+
+def assert_is_not_instance(a, cls, msg=None):
+    """
+    Assert that the first argument is not an instance of the second one.
+
+    """
+    if not isinstance(a, cls):
+        return
+    assert False, str(a) + " is a '" + cls.__name__ + "' instance" + \
+        _get_msg(msg)
+
+
+def assert_is_none(a, msg=None):
+    """ Assert argument is None. """
+    if a is None:
+        return
+    assert False, str(a) + ' is not None' + _get_msg(msg)
+
+
+def assert_is_not_none(a, msg=None):
+    """ Assert argument is not None. """
+    if a is not None:
+        return
+    assert False, str(a) + ' is None' + _get_msg(msg)
+
+
+def assert_is_type(a, cls, msg=None):
+    """ Assert argument is of a specified type. """
+    if type(cls) is type:
+        cls = (cls,)
+    else:
+        cls = tuple(cls)
+    if any(type(a) is t for t in cls):
+        return
+    raise AssertionError(
+        "{0} is of type '{1}' instead of {2}{3}".format(
+        a, type(a).__name__, strenum(c.__name__ for c in cls), _get_msg(msg)))
+
+
+def assert_raises(*args, **kwargs):
+    np.testing.assert_raises(*args, **kwargs)
+assert_raises.__doc__ = np.testing.assert_raises.__doc__
+
+
+def skiptest(func):
+    @functools.wraps(func)
+    def _():
+        raise SkipTest()
+    return _
+
+
+def skiptest_if(condition):
+    def decorator(func):
+        @functools.wraps(func)
+        def _():
+            if condition:
+                raise SkipTest()
+            func()
+        return _
+    return decorator
+
+
+def skiptest_unless_module(module):
+    def decorator(func):
+        @functools.wraps(func)
+        def _():
+            try:
+                __import__(module)
+            except ImportError:
+                raise SkipTest()
+            func()
+        return _
+    return decorator
+
+
+def _get_msg(msg):
+    if not msg:
+        return '.'
+    return ': ' + str(msg) + '.'
diff --git a/pyoperators/utils/ufuncs.c.src b/pyoperators/utils/ufuncs.c.src
new file mode 100644
index 0000000..312eb07
--- /dev/null
+++ b/pyoperators/utils/ufuncs.c.src
@@ -0,0 +1,387 @@
+/*-*-c-*-*/
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+
+#include <Python.h>
+#include <math.h>
+#include "numpy/npy_math.h"
+#include "numpy/ndarraytypes.h"
+#include "numpy/ufuncobject.h"
+
+#define UNARY_LOOP\
+    char *ip = args[0], *op = args[1];\
+    npy_intp is = steps[0], os = steps[1];\
+    npy_intp n = dimensions[0];\
+    npy_intp i;\
+    for(i = 0; i < n; i++, ip += is, op += os)
+
+#define BINARY_LOOP\
+    char *ip1 = args[0], *ip2 = args[1], *op1 = args[2];\
+    npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];\
+    npy_intp n = dimensions[0];\
+    npy_intp i;\
+    for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, op1 += os1)
+
+static char complex1_float1_types[12] = {NPY_CFLOAT, NPY_FLOAT,
+                                         NPY_CDOUBLE, NPY_DOUBLE,
+                                         NPY_CLONGDOUBLE, NPY_LONGDOUBLE,
+                                         NPY_FLOAT, NPY_FLOAT,
+                                         NPY_DOUBLE, NPY_DOUBLE,
+                                         NPY_LONGDOUBLE, NPY_LONGDOUBLE};
+static char float2_types[9] = {NPY_FLOAT, NPY_FLOAT, NPY_FLOAT,
+                               NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE,
+                               NPY_LONGDOUBLE, NPY_LONGDOUBLE, NPY_LONGDOUBLE};
+static char complex2_types[18] =
+    {NPY_CFLOAT, NPY_CFLOAT, NPY_CFLOAT,
+     NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE,
+     NPY_CLONGDOUBLE, NPY_CLONGDOUBLE, NPY_CLONGDOUBLE,
+     NPY_FLOAT, NPY_CFLOAT, NPY_CFLOAT,
+     NPY_DOUBLE, NPY_CDOUBLE, NPY_CDOUBLE,
+     NPY_LONGDOUBLE, NPY_CLONGDOUBLE, NPY_CLONGDOUBLE};
+
+static void *null_data3[3] = {NULL, NULL, NULL};
+static void *null_data6[6] = {NULL, NULL, NULL, NULL, NULL, NULL};
+static void *null_data17[17] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                                NULL};
+
+
+/*********************
+ * Complex abs(x)**2 *
+ *********************/
+
+/**begin repeat
+ * complex types
+ * #ftype = npy_float, npy_double, npy_longdouble#
+ * #c = f, , l#
+ */
+NPY_NO_EXPORT void
+abs2 at c@(char **args, npy_intp *dimensions, npy_intp *steps, void *data)
+{
+    UNARY_LOOP {
+        const @ftype@ inr = *(@ftype@ *)ip;
+        const @ftype@ ini = ((@ftype@ *)ip)[1];
+        *((@ftype@ *)op) = inr*inr + ini*ini;
+    }
+}
+
+NPY_NO_EXPORT void
+abs2 at c@_real(char **args, npy_intp *dimensions, npy_intp *steps, void *data)
+{
+    UNARY_LOOP {
+        const @ftype@ in = *(@ftype@ *)ip;
+        *((@ftype@ *)op) = in * in;
+    }
+}
+/**end repeat**/
+
+static PyUFuncGenericFunction abs2_funcs[6] = 
+    {&abs2f, &abs2, &abs2l,
+     &abs2f_real, &abs2_real, &abs2l_real};
+
+
+/*********************
+ * Hard thresholding *
+ *********************/
+
+/**begin repeat
+ * #type = npy_float, npy_double, npy_longdouble#
+ * #c = f,,l#
+ */
+NPY_NO_EXPORT void
+hard_thresholding at c@(char **args, npy_intp *dimensions, npy_intp* steps,
+                     void* data)
+{
+    npy_intp i;
+    npy_intp n = dimensions[0];
+    char *in = args[0], *threshold = args[1], *out = args[2];
+    npy_intp in_step = steps[0], threshold_step = steps[1], out_step = steps[2];
+
+    @type@ tmp;
+
+    for (i = 0; i < n; i++) {
+        tmp = *(@type@ *)in;
+        tmp = (fabs at c@(tmp) > *(@type@ *)threshold) ? tmp : 0;
+        *((@type@ *)out) = tmp;
+
+        in += in_step;
+        threshold += threshold_step;
+        out += out_step;
+    }
+}
+/**end repeat**/
+
+static PyUFuncGenericFunction hard_thresholding_funcs[3] =
+           {&hard_thresholdingf,
+            &hard_thresholding,
+            &hard_thresholdingl};
+
+
+/***********
+ * Masking *
+ ***********/
+
+/**begin repeat
+ * #TYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT,
+ *         LONG, ULONG, LONGLONG, ULONGLONG,
+ *         HALF, FLOAT, DOUBLE, LONGDOUBLE#
+ * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
+ *         npy_long, npy_ulong, npy_longlong, npy_ulonglong,
+ *         npy_half, npy_float, npy_double, npy_longdouble#
+ */
+NPY_NO_EXPORT void
+ at TYPE@_masking(char **args, npy_intp *dimensions, npy_intp* steps, void* data)
+{
+    npy_intp i;
+    npy_intp n = dimensions[0];
+    char *in = args[0], *mask = args[1], *out = args[2];
+    npy_intp in_step = steps[0], mask_step = steps[1], out_step = steps[2];
+
+    if (in == out) {
+        for (i = 0; i < n; i++) {
+            if (*mask)
+                *((@type@ *)out) = 0;          
+            mask += mask_step;
+            out += out_step;
+        }
+    } else {
+        for (i = 0; i < n; i++) {
+            if (*mask)
+                *((@type@ *)out) = 0;
+            else
+                *((@type@ *)out) = *(@type@ *)in;
+            in += in_step;
+            mask += mask_step;
+            out += out_step;
+        }
+    }
+}
+/**end repeat**/
+
+/**begin repeat
+ * #TYPE = CFLOAT, CDOUBLE, CLONGDOUBLE#
+ * #type = npy_cfloat, npy_cdouble, npy_clongdouble#
+ * #ftype = npy_float, npy_double, npy_longdouble#
+ */
+NPY_NO_EXPORT void
+ at TYPE@_masking(char **args, npy_intp *dimensions, npy_intp* steps, void* data)
+{
+    npy_intp i;
+    npy_intp n = dimensions[0];
+    char *in = args[0], *mask = args[1], *out = args[2];
+    npy_intp in_step = steps[0], mask_step = steps[1], out_step = steps[2];
+
+    if (in == out) {
+        for (i = 0; i < n; i++) {
+            if (*mask) {
+                ((@ftype@ *)out)[0] = 0.;
+                ((@ftype@ *)out)[1] = 0.;
+            }
+            mask += mask_step;
+            out += out_step;
+        }
+    } else {
+        for (i = 0; i < n; i++) {
+            if (*mask) {
+                ((@ftype@ *)out)[0] = 0.;
+                ((@ftype@ *)out)[1] = 0.;
+            } else
+                *((@type@ *)out) = *(@type@ *)in;
+            in += in_step;
+            mask += mask_step;
+            out += out_step;
+        }
+    }
+}
+/**end repeat**/
+
+static PyUFuncGenericFunction masking_funcs[17] = 
+           {&BYTE_masking, &UBYTE_masking,
+            &SHORT_masking, &USHORT_masking,
+            &INT_masking, &UINT_masking,
+            &LONG_masking, &ULONG_masking,
+            &LONGLONG_masking, &ULONGLONG_masking,
+            &HALF_masking, &FLOAT_masking,
+            &DOUBLE_masking, &LONGDOUBLE_masking,
+            &CFLOAT_masking, &CDOUBLE_masking,
+            &CLONGDOUBLE_masking};
+
+static char masking_types[17*3] = {NPY_BYTE, NPY_BOOL, NPY_BYTE,
+                                   NPY_UBYTE, NPY_BOOL, NPY_UBYTE,
+                                   NPY_SHORT, NPY_BOOL, NPY_SHORT,
+                                   NPY_USHORT, NPY_BOOL, NPY_USHORT,
+                                   NPY_INT, NPY_BOOL, NPY_INT,
+                                   NPY_UINT, NPY_BOOL, NPY_UINT,
+                                   NPY_LONG, NPY_BOOL, NPY_LONG,
+                                   NPY_ULONG, NPY_BOOL, NPY_ULONG,
+                                   NPY_LONGLONG, NPY_BOOL, NPY_LONGLONG,
+                                   NPY_ULONGLONG, NPY_BOOL, NPY_ULONGLONG,
+                                   NPY_HALF, NPY_BOOL, NPY_HALF,
+                                   NPY_FLOAT, NPY_BOOL, NPY_FLOAT,
+                                   NPY_DOUBLE, NPY_BOOL, NPY_DOUBLE,
+                                   NPY_LONGDOUBLE, NPY_BOOL, NPY_LONGDOUBLE,
+                                   NPY_CFLOAT, NPY_BOOL, NPY_CFLOAT,
+                                   NPY_CDOUBLE, NPY_BOOL, NPY_CDOUBLE,
+                                   NPY_CLONGDOUBLE, NPY_BOOL, NPY_CLONGDOUBLE};
+
+
+/****************************
+ * Conjugate multiplication *
+ ****************************/
+
+/**begin repeat
+ * complex types
+ * #ftype = npy_float, npy_double, npy_longdouble#
+ * #c = f, , l#
+ */
+NPY_NO_EXPORT void
+multiply_conjugate at c@(char **args, npy_intp *dimensions, npy_intp *steps,
+                      void *data)
+{
+    BINARY_LOOP {
+        const @ftype@ in1r = *(@ftype@ *)ip1;
+        const @ftype@ in1i = ((@ftype@ *)ip1)[1];
+        const @ftype@ in2r = ((@ftype@ *)ip2)[0];
+        const @ftype@ in2i = ((@ftype@ *)ip2)[1];
+        ((@ftype@ *)op1)[0] =  in1r*in2r + in1i*in2i;
+        ((@ftype@ *)op1)[1] = -in1r*in2i + in1i*in2r;
+    }
+}
+
+NPY_NO_EXPORT void
+multiply_real_conjugate at c@(char **args, npy_intp *dimensions, npy_intp *steps,
+                           void *data)
+{
+    BINARY_LOOP {
+        const @ftype@ in1r = ((@ftype@ *)ip1)[0];
+        const @ftype@ in2r = ((@ftype@ *)ip2)[0];
+        const @ftype@ in2i = ((@ftype@ *)ip2)[1];
+        ((@ftype@ *)op1)[0] =  in1r*in2r;
+        ((@ftype@ *)op1)[1] = -in1r*in2i;
+    }
+}
+/**end repeat**/
+
+static PyUFuncGenericFunction multiply_conjugate_funcs[6] = 
+           {&multiply_conjugatef,
+            &multiply_conjugate,
+            &multiply_conjugatel,
+            &multiply_real_conjugatef,
+            &multiply_real_conjugate,
+            &multiply_real_conjugatel};
+
+
+/*********************
+ * Soft thresholding *
+ *********************/
+
+/**begin repeat
+ * #type = npy_float, npy_double, npy_longdouble#
+ * #c = f,,l#
+ */
+NPY_NO_EXPORT void
+soft_thresholding at c@(char **args, npy_intp *dimensions, npy_intp* steps,
+                     void* data)
+{
+    npy_intp i;
+    npy_intp n = dimensions[0];
+    char *in = args[0], *threshold = args[1], *out = args[2];
+    npy_intp in_step = steps[0], threshold_step = steps[1], out_step = steps[2];
+
+    @type@ tmp;
+
+    for (i = 0; i < n; i++) {
+        tmp = fabs at c@(*(@type@ *)in) - *(@type@ *)threshold;
+        tmp = (tmp > 0) ? tmp : 0;
+        *((@type@ *)out) = copysign at c@(tmp, *(@type@ *)in);
+
+        in += in_step;
+        threshold += threshold_step;
+        out += out_step;
+    }
+}
+/**end repeat**/
+
+static PyUFuncGenericFunction soft_thresholding_funcs[3] = 
+           {&soft_thresholdingf,
+            &soft_thresholding,
+            &soft_thresholdingl};
+
+
+/* Module definition */
+
+static PyMethodDef module_methods[] = {
+    { NULL, NULL, 0, NULL }
+};
+
+#if defined(NPY_PY3K)
+static PyModuleDef moduledef = {
+    PyModuleDef_HEAD_INIT,
+    "ufuncs",
+    NULL,
+    -1,
+    module_methods,
+    NULL,
+    NULL,
+    NULL,
+    NULL
+};
+
+PyMODINIT_FUNC
+PyInit_ufuncs()
+#else
+PyMODINIT_FUNC
+initufuncs()
+#endif
+{
+    PyObject *m, *f, *d;
+#if defined(NPY_PY3K)
+    m = PyModule_Create(&moduledef);
+    if (!m) {
+        return NULL;
+    }
+#else
+    m  = Py_InitModule("ufuncs", module_methods);
+    if (m == NULL) {
+        return;
+    }
+#endif
+
+    import_array();
+    import_umath();
+
+    d = PyModule_GetDict(m);
+
+    f = PyUFunc_FromFuncAndData(abs2_funcs, null_data6,
+            complex1_float1_types, 6, 1, 1, PyUFunc_None, "abs2",
+            "Compute y = x.real**2 + x.imag**2", 0);
+    PyDict_SetItemString(d, "abs2", f);
+    Py_DECREF(f);
+
+    f = PyUFunc_FromFuncAndData(hard_thresholding_funcs, null_data3,
+            float2_types, 3, 2, 1, PyUFunc_None, "hard_thresholding",
+            "Compute y = x1 if |x1| > x2,\n            0  otherwise." , 0);
+    PyDict_SetItemString(d , "hard_thresholding", f);
+    Py_DECREF(f);
+
+    f = PyUFunc_FromFuncAndData(masking_funcs, null_data17,
+            masking_types, 17, 2, 1, PyUFunc_None, "masking",
+            "Set y = 0 where x2,\n        x1  otherwise." , 0);
+    PyDict_SetItemString(d , "masking", f);
+    Py_DECREF(f);
+
+    f = PyUFunc_FromFuncAndData(multiply_conjugate_funcs, null_data3,
+            complex2_types, 3, 2, 1, PyUFunc_None, "multiply_conjugate",
+            "Compute y = x1 * conjugate(x2)", 0);
+    PyDict_SetItemString(d, "multiply_conjugate", f);
+    Py_DECREF(f);
+
+    f = PyUFunc_FromFuncAndData(soft_thresholding_funcs, null_data3,
+            float2_types, 3, 2, 1, PyUFunc_None, "soft_thresholding",
+            "Compute y = sign(x1) * [|x1| - x2]+" , 0);
+    PyDict_SetItemString(d , "soft_thresholding", f);
+    Py_DECREF(f);
+
+#if defined(NPY_PY3K)
+    return m;
+#endif
+}
diff --git a/pyoperators/warnings.py b/pyoperators/warnings.py
new file mode 100644
index 0000000..477abad
--- /dev/null
+++ b/pyoperators/warnings.py
@@ -0,0 +1,15 @@
+from __future__ import absolute_import, division, print_function
+import warnings
+from warnings import warn
+
+
+class PyOperatorsWarning(UserWarning):
+    pass
+
+
+class PyOperatorsDeprecationWarning(DeprecationWarning):
+    pass
+
+warnings.simplefilter('always', category=PyOperatorsWarning)
+warnings.simplefilter('module', category=PyOperatorsDeprecationWarning)
+del warnings
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..4ed72ff
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+import numpy as np
+from distutils.extension import Extension
+from numpy.distutils.core import setup
+from hooks import get_cmdclass, get_version
+
+VERSION = '0.12'
+
+name = 'pyoperators'
+long_description = open('README.rst').read()
+keywords = 'scientific computing'
+platforms = 'MacOS X,Linux,Solaris,Unix,Windows'
+
+ext_modules = [Extension("pyoperators.utils.cythonutils",
+                         sources=["pyoperators/utils/cythonutils.c"],
+                         include_dirs=['.', np.get_include()]),
+               Extension("pyoperators.utils.ufuncs",
+                         sources=["pyoperators/utils/ufuncs.c.src"])]
+
+setup(name=name,
+      version=get_version(name, VERSION),
+      description='Operators and solvers for high-performance computing.',
+      long_description=long_description,
+      url='http://pchanial.github.com/pyoperators',
+      author='Pierre Chanial',
+      author_email='pierre.chanial at gmail.com',
+      maintainer='Pierre Chanial',
+      maintainer_email='pierre.chanial at gmail.com',
+      requires=['numpy(>=1.6)',
+                'scipy(>=0.9)',
+                'pyfftw'],
+      install_requires=['numexpr>2'],
+      packages=['pyoperators', 'pyoperators.iterative', 'pyoperators.utils'],
+      platforms=platforms.split(','),
+      keywords=keywords.split(','),
+      cmdclass=get_cmdclass(),
+      ext_modules=ext_modules,
+      license='CeCILL-B',
+      classifiers=[
+          'Programming Language :: Python',
+          'Programming Language :: Python :: 2 :: Only',
+          'Programming Language :: C',
+          'Programming Language :: Cython',
+          'Development Status :: 4 - Beta',
+          'Intended Audience :: Science/Research',
+          'Operating System :: OS Independent',
+          'Topic :: Scientific/Engineering'])
diff --git a/test/test_algorithms.py b/test/test_algorithms.py
new file mode 100644
index 0000000..0220683
--- /dev/null
+++ b/test/test_algorithms.py
@@ -0,0 +1,127 @@
+from __future__ import division
+
+import numpy as np
+
+from pyoperators.iterative.core import IterativeAlgorithm
+from pyoperators.iterative.stopconditions import MaxIterationStopCondition
+from pyoperators.utils.testing import assert_eq, assert_raises, assert_same
+
+
+def test_suffix():
+    suffix = ['_new', '', '_old', '_old2', '_old3', '_old4']
+    expecteds = [['']] + [suffix[:n] for n in range(2, len(suffix)+1)]
+    keywords = [{'x': 0, 'inplace_recursion': True},
+                {'x': 0},
+                {'x': 0, 'x_old': 1},
+                {'x': 0, 'x_old': 1, 'x_old2': 2},
+                {'x': 0, 'x_old': 1, 'x_old2': 2, 'x_old3': 3},
+                {'x': 0, 'x_old': 1, 'x_old2': 2, 'x_old3': 3, 'x_old4': 4}]
+
+    def func(keywords, expected):
+        a = IterativeAlgorithm(**keywords)
+        assert_eq(a._get_suffix(), expected)
+    for keyword, expected in zip(keywords, expecteds):
+        yield func, keyword, expected
+
+
+def test_fibonacci():
+    class Fibonacci(IterativeAlgorithm):
+        def __init__(self, **keywords):
+            IterativeAlgorithm.__init__(self, x_old=0, x=1, **keywords)
+
+        def iteration(self):
+            np.add(self.x_old, self.x, self.x_new)
+
+        def finalize(self):
+            return int(self.x)
+
+    fib = Fibonacci(normal_stop_condition=MaxIterationStopCondition(10))
+    assert_eq(fib.run(), 55)
+    fib.initialize()
+    assert_eq(list(fib), [1, 2, 3, 5, 8, 13, 21, 34, 55])
+    assert_eq(fib.restart(), 55)
+
+
+def test_recursion():
+    class GaussLegendre1(IterativeAlgorithm):
+        def __init__(self, **keywords):
+            IterativeAlgorithm.__init__(
+                self, a=1, b=1/np.sqrt(2), t=1/4, p=1, p_dtype=int,
+                normal_stop_condition=MaxIterationStopCondition(10),
+                **keywords)
+
+        def iteration(self):
+            self.a_new[...] = (self.a + self.b) / 2
+            self.b_new[...] = np.sqrt(self.a * self.b)
+            self.t_new[...] = self.t - self.p*(self.a - self.a_new)**2
+            self.p_new[...] = 2 * self.p
+
+        def finalize(self):
+            return (self.a + self.b)**2/(4*self.t)
+
+    class GaussLegendre2(IterativeAlgorithm):
+        def __init__(self, **keywords):
+            IterativeAlgorithm.__init__(
+                self, a=1, b=1/np.sqrt(2), t=1/4, p=1, p_dtype=int,
+                allocate_new_state=False, normal_stop_condition=
+                MaxIterationStopCondition(10), **keywords)
+
+        def iteration(self):
+            self.a_new = (self.a + self.b) / 2
+            self.b_new = np.sqrt(self.a * self.b)
+            self.t_new = self.t - self.p*(self.a - self.a_new)**2
+            self.p_new = 2 * self.p
+
+        def finalize(self):
+            return (self.a + self.b)**2/(4*self.t)
+
+    class GaussLegendre3(IterativeAlgorithm):
+        def __init__(self, **keywords):
+            IterativeAlgorithm.__init__(
+                self, a=1, b=1/np.sqrt(2), t=1/4, p=1, p_dtype=int,
+                inplace_recursion=True, normal_stop_condition=
+                MaxIterationStopCondition(10), **keywords)
+
+        def iteration(self):
+            a_tmp = (self.a + self.b) / 2
+            self.b[...] = np.sqrt(self.a * self.b)
+            self.t -= self.p*(self.a - a_tmp)**2
+            self.p *= 2
+            self.a[...] = a_tmp
+
+        def finalize(self):
+            return (self.a + self.b)**2/(4*self.t)
+
+    class GaussLegendre4(IterativeAlgorithm):
+        def __init__(self, **keywords):
+            IterativeAlgorithm.__init__(
+                self, a=1, b=1/np.sqrt(2), t=1/4, p=1, p_dtype=int,
+                inplace_recursion=True, normal_stop_condition=
+                MaxIterationStopCondition(10), **keywords)
+
+        def iteration(self):
+            a_tmp = (self.a + self.b) / 2
+            self.b = np.sqrt(self.a * self.b)
+            self.t = self.t - self.p*(self.a - a_tmp)**2
+            self.p = 2 * self.p
+            self.a = a_tmp
+
+        def finalize(self):
+            return (self.a + self.b)**2/(4*self.t)
+
+    algos = [GaussLegendre1, GaussLegendre2, GaussLegendre3, GaussLegendre4]
+
+    def func(algo, reuse_initial_state):
+        g = algo(reuse_initial_state=reuse_initial_state)
+        pi = g.run()
+        assert g.niterations == 10
+        assert_same(pi, np.pi)
+        if g.reuse_initial_state:
+            assert_raises(RuntimeError, g.restart)
+            return
+        g.restart()
+        assert g.niterations == 10
+        assert_same(pi, np.pi)
+    for algo in algos:
+        for reuse_initial_state in (False, True):
+            yield func, algo, reuse_initial_state
diff --git a/test/test_broadcastingoperators.py b/test/test_broadcastingoperators.py
new file mode 100644
index 0000000..35eab1a
--- /dev/null
+++ b/test/test_broadcastingoperators.py
@@ -0,0 +1,495 @@
+from __future__ import division
+
+import itertools
+import numpy as np
+import operator
+from numpy.testing import assert_equal, assert_raises
+from pyoperators import (
+    AdditionOperator, BlockDiagonalOperator, CompositionOperator,
+    ConstantOperator, DiagonalOperator, DiagonalNumexprOperator,
+    HomothetyOperator, IdentityOperator, MaskOperator, MultiplicationOperator,
+    Operator, PackOperator, UnpackOperator, ZeroOperator, I, O)
+from pyoperators.core import BroadcastingBase
+from pyoperators.flags import linear, square
+from pyoperators.rules import rule_manager
+from pyoperators.utils import float_dtype, product
+from pyoperators.utils.testing import (
+    assert_eq, assert_is, assert_is_instance, assert_is_none, assert_is_not,
+    assert_is_type, assert_not_in, assert_same)
+from .common import HomothetyOutplaceOperator
+
+clss = (ConstantOperator, DiagonalOperator, DiagonalNumexprOperator,
+        HomothetyOperator, IdentityOperator, MaskOperator, ZeroOperator)
+sameshapes = ((False, True), (True, True), (True, True), (True, True))
+types = (bool, np.int8, np.int16, np.int32, np.int64, np.float16, np.float32,
+         np.float64, np.float128, np.complex128, np.complex256)
+
+
+def get_operator(cls, data, **keywords):
+    if cls is DiagonalNumexprOperator:
+        args = (data, '3*data')
+    elif cls is HomothetyOperator:
+        args = (data.flat[0],)
+    elif cls in (IdentityOperator, ZeroOperator):
+        args = ()
+    else:
+        args = (data,)
+    return cls(*args, **keywords)
+
+
+def test_diagonal1():
+    data = (0., 1., [0, 0], [1, 1], 2, [2, 2], [0, 1], [-1, -1], [-1, 1],
+            [2, 1])
+    expected = (ZeroOperator, IdentityOperator, ZeroOperator, IdentityOperator,
+                HomothetyOperator, HomothetyOperator, MaskOperator,
+                HomothetyOperator, DiagonalOperator, DiagonalOperator)
+
+    def func(d, e):
+        op = DiagonalOperator(d)
+        if all(_ in (-1, 1) for _ in op.data.flat):
+            assert op.flags.involutary
+        assert_is_type(op, e)
+    for d, e in zip(data, expected):
+        yield func, d, e
+
+
+def test_diagonal2():
+    ops = (DiagonalOperator([1., 2], broadcast='rightward'),
+           DiagonalOperator([[2., 3, 4], [5, 6, 7]], broadcast='rightward'),
+           DiagonalOperator([1., 2, 3, 4, 5], broadcast='leftward'),
+           DiagonalOperator(np.arange(20).reshape(4, 5), broadcast='leftward'),
+           DiagonalOperator(np.arange(120.).reshape(2, 3, 4, 5)),
+           HomothetyOperator(7.),
+           IdentityOperator())
+
+    x = np.arange(120.).reshape(2, 3, 4, 5) / 2
+
+    def func(cls, d1, d2):
+        op = {AdditionOperator: operator.add,
+              CompositionOperator: operator.mul,
+              MultiplicationOperator: operator.mul}[cls]
+        d = cls([d1, d2])
+        if type(d1) is DiagonalOperator:
+            assert_is_type(d, DiagonalOperator)
+        elif type(d1) is HomothetyOperator:
+            assert_is_type(d, HomothetyOperator)
+        elif op is CompositionOperator:
+            assert_is_type(d, IdentityOperator)
+        else:
+            assert_is_type(d, HomothetyOperator)
+
+        data = op(d1.data.T, d2.data.T).T \
+               if 'rightward' in (d1.broadcast, d2.broadcast) \
+               else op(d1.data, d2.data)
+        assert_same(d.data, data)
+        if cls is CompositionOperator:
+            assert_same(d(x), d1(d2(x)))
+        else:
+            assert_same(d(x), op(d1(x), d2(x)))
+    for op in (AdditionOperator, CompositionOperator):#, MultiplicationOperator):
+        for d1, d2 in itertools.combinations(ops, 2):
+            if set((d1.broadcast, d2.broadcast)) == \
+               set(('leftward', 'rightward')):
+                continue
+            yield func, op, d1, d2
+
+
+def test_masking():
+    mask = MaskOperator(0)
+    assert isinstance(mask, IdentityOperator)
+    mask = MaskOperator(0, shapein=(32, 32), dtype=np.float32)
+    assert isinstance(mask, IdentityOperator)
+    assert mask.shapein == (32, 32)
+    assert mask.dtype == np.float32
+
+    mask = MaskOperator(1)
+    assert isinstance(mask, ZeroOperator)
+    mask = MaskOperator(1, shapein=(32, 32), dtype=np.float32)
+    assert isinstance(mask, ZeroOperator)
+    assert mask.shapein == (32, 32)
+    assert mask.dtype == np.float32
+
+    b = np.array([3., 4., 1., 0., 3., 2.])
+    c = np.array([3., 4., 0., 0., 3., 0.])
+    mask = MaskOperator(np.array([0, 0., 1., 1., 0., 1], dtype=np.int8))
+    assert np.all(mask(b) == c)
+    mask = DiagonalOperator(np.array([1, 1., 0., 0., 1., 0]))
+    assert np.all(mask(b) == c)
+    mask = MaskOperator(np.array([False, False, True, True, False, True]))
+    assert np.all(mask(b) == c)
+
+    b = np.array([[3., 4.], [1., 0.], [3., 2.]])
+    c = np.array([[3., 4.], [0., 0.], [3., 0.]])
+    mask = MaskOperator(np.array([[0, 0.], [1., 1.], [0., 1.]], dtype='int8'))
+    assert np.all(mask(b) == c)
+    mask = DiagonalOperator(np.array([[1, 1.], [0., 0.], [1., 0.]]))
+    assert np.all(mask(b) == c)
+    mask = MaskOperator(np.array([[False, False],
+                                  [True, True],
+                                  [False, True]]))
+    assert np.all(mask(b) == c)
+
+    b = np.array([[[3, 4.], [1., 0.]], [[3., 2], [-1, 9]]])
+    c = np.array([[[3, 4.], [0., 0.]], [[3., 0], [0, 0]]])
+    mask = MaskOperator(np.array([[[0, 0.], [1., 1.]],
+                                  [[0., 1], [1, 1]]], int))
+    assert np.all(mask(b) == c)
+
+    mask = DiagonalOperator(np.array([[[1, 1], [0., 0]], [[1, 0], [0, 0]]]))
+    assert np.all(mask(b) == c)
+    mask = MaskOperator(np.array([[[False, False], [True, True]],
+                                  [[False, True], [True, True]]]))
+    assert np.all(mask(b) == c)
+
+    c = mask(b, b)
+    assert id(b) == id(c)
+
+
+def test_masking2():
+    m = MaskOperator([True, False, True])
+    assert_eq(m * m,  m)
+
+
+def test_homothety_operator():
+    s = HomothetyOperator(1)
+    assert s.C is s.T is s.H is s.I is s
+
+    s = HomothetyOperator(-1)
+    assert s.C is s.T is s.H is s.I is s
+
+    s = HomothetyOperator(2.)
+    assert s.C is s.T is s.H is s
+    assert_is_not(s.I, s)
+
+    def func(o):
+        assert_is_instance(o, HomothetyOperator)
+    for o in (s.I, s.I.C, s.I.T, s.I.H, s.I.I):
+        yield func, o
+
+    s = HomothetyOperator(complex(1, 1))
+    assert_is(s.T, s)
+    assert_is(s.H, s.C)
+    assert_not_in(s.I, (s, s.C))
+    assert_not_in(s.I.C, (s, s.C))
+    assert_is_instance(s.C, HomothetyOperator)
+    for o in (s.I, s.I.C, s.I.T, s.I.H, s.I.I):
+        yield func, o
+
+
+def test_homothety_rules1():
+    models = 1. * I + I, -I, (-2) * I, -(2 * I), 1. * I - I, 1. * I - 2 * I
+    results = [6, -3, -6, -6, 0, -3]
+
+    def func(model, result, i):
+        o = model(i)
+        assert_eq(o, result, str((model, i)))
+        assert_eq(o.dtype, int, str((model, i)))
+    for model, result in zip(models, results):
+        for i in (np.array(3), [3], (3,), np.int(3), 3):
+            yield func, model, result, i
+
+
+def test_homothety_rules2():
+    model = -I
+    iops = (operator.iadd, operator.isub, operator.imul, operator.iadd,
+            operator.imul)
+    imodels = 2*I, 2*I, 2*I, O, O
+    results = [3, -3, -6, -6, 0]
+
+    def func(imodel, result, i):
+        assert_eq(model(i), result)
+    for iop, imodel, result in zip(iops, imodels, results):
+        model = iop(model, imodel)
+        for i in (np.array(3), [3], (3,), np.int(3), 3):
+            yield func, imodel, result, i
+
+
+def test_homothety_rules3():
+    @linear
+    class Op(Operator):
+        pass
+
+    def func(opout, opin, idin):
+        if opin is not None and idin is not None and opin != idin:
+            return
+        p = Op(shapeout=opout, shapein=opin) * IdentityOperator(shapein=idin)
+
+        if idin is None:
+            idin = opin
+        assert_is_instance(p, Op)
+        assert_eq(p.shapein, idin)
+        assert_eq(p.shapeout, opout)
+    for opout in (None, (100,)):
+        for opin in (None, (100,)):
+            for idin in (None, (100,)):
+                yield func, opout, opin, idin
+
+
+def test_constant_rules1():
+    c = 1, np.array([1, 2]), np.array([2, 3, 4])
+    t = 'scalar', 'rightward', 'leftward'
+
+    def func(c1, t1, c2, t2):
+        op2 = ConstantOperator(c2, broadcast=t2)
+        op = op1 + op2
+        if set((op1.broadcast, op2.broadcast)) != \
+           set(('rightward', 'leftward')):
+            assert_is_instance(op, ConstantOperator)
+        v = np.zeros((2, 3))
+        op(np.nan, v)
+        z = np.zeros((2, 3))
+        if t1 == 'rightward':
+            z.T[...] += c1.T
+        else:
+            z[...] += c1
+        if t2 == 'rightward':
+            z.T[...] += c2.T
+        else:
+            z[...] += c2
+        assert_eq(v, z)
+    for c1, t1 in zip(c, t):
+        op1 = ConstantOperator(c1, broadcast=t1)
+        for c2, t2 in zip(c, t):
+            yield func, c1, t1, c2, t2
+
+
+def test_constant_rules2():
+    H = HomothetyOperator
+    C = CompositionOperator
+    D = DiagonalOperator
+    cs = (ConstantOperator(3),
+          ConstantOperator([1, 2, 3], broadcast='leftward'),
+          ConstantOperator(np.ones((2, 3))))
+    os = (I, H(2, shapein=(2, 3)) * Operator(direct=np.square, shapein=(2, 3),
+                                             flags='linear,square'), H(5))
+    results = (((H, 3), (C, (H, 6)), (H, 15)),
+               ((D, [1, 2, 3]), (C, (D, [2, 4, 6])), (D, [5, 10, 15])),
+               ((IdentityOperator, 1), (C, (H, 2)), (H, 5)))
+    v = np.arange(6).reshape((2, 3))
+
+    def func(c, o, r):
+        op = MultiplicationOperator([c, o])
+        assert_eq(op(v), c.data*o(v))
+        assert_is_type(op, r[0])
+        if type(op) is CompositionOperator:
+            op = op.operands[0]
+            r = r[1]
+            assert_is_type(op, r[0])
+        assert_eq, op.data, r[1]
+    for c, rs in zip(cs, results):
+        for o, r in zip(os, rs):
+            yield func, c, o, r
+
+
+def _test_constant_rules3():
+    @square
+    class Op(Operator):
+        def direct(self, input, output):
+            output[...] = input + np.arange(input.size).reshape(input.shape)
+
+    os = (Op(shapein=()), Op(shapein=4), Op(shapein=(2, 3, 4)))
+    cs = (ConstantOperator(2), ConstantOperator([2], broadcast='leftward'),
+          ConstantOperator(2*np.arange(8).reshape((2, 1, 4)),
+                           broadcast='leftward'))
+    v = 10000000
+
+    def func(o, c):
+        op = o * c
+        y_tmp = np.empty(o.shapein, int)
+        c(v, y_tmp)
+        assert_eq(op(v), o(y_tmp))
+    for o, c in zip(os, cs):
+        yield func, o, c
+
+
+def test_packing():
+    valids = np.array([[False, True, True], [False, True, True]])
+    valids = valids.ravel(), valids
+    xs = np.array([[1, 2, 3], [4, 5, 6]])
+    xs = xs.ravel(), xs
+    shapes = (), (4,), (4, 5)
+    broadcasts = 'disabled', 'leftward', 'rightward'
+    expected = np.array([2, 3, 5, 6])
+
+    def func(valid, x, shape, broadcast):
+        p = PackOperator(valid, broadcast=broadcast)
+        masking = MaskOperator(~valid, broadcast=broadcast)
+        if broadcast == 'leftward':
+            x_ = np.empty(shape + x.shape)
+            x_[...] = x
+            expected_ = np.empty(shape + (expected.size,))
+            expected_[...] = expected
+        else:
+            x_ = np.empty(x.shape + shape)
+            x_.reshape((x.size, -1))[...] = x.ravel()[..., None]
+            expected_ = np.empty((expected.size,) + shape)
+            expected_.reshape((expected.size, -1))[...] = expected[..., None]
+
+        if broadcast == 'disabled' and shape != ():
+            assert_raises(ValueError, p, x_)
+            return
+        assert_equal(p(x_), expected_)
+
+        assert_is_type(p.T, UnpackOperator)
+        assert_equal(p.T.broadcast, p.broadcast)
+        assert_equal(p.T(expected_), masking(x_))
+
+        u = UnpackOperator(valid, broadcast=broadcast)
+        assert_is_type(u.T, PackOperator)
+        assert_equal(u.T.broadcast, u.broadcast)
+        assert_equal(u(expected_), masking(x_))
+        assert_equal(u.T(x_), expected_)
+
+    for valid, x in zip(valids, xs):
+        for shape in shapes:
+            for broadcast in broadcasts:
+                yield func, valid, x, shape, broadcast
+
+
+def test_dtype():
+    x = np.array([3, 0, 2])
+
+    def func(c, t):
+        op = get_operator(c, x.astype(t))
+        c_ = type(op)
+        if c_ in (IdentityOperator, ZeroOperator):
+            expected_dtype = int
+        elif c_ is MaskOperator:
+            expected_dtype = bool
+        else:
+            expected_dtype = t
+        assert_equal(op.data.dtype, expected_dtype)
+
+        if c_ in (IdentityOperator, MaskOperator, ZeroOperator):
+            assert_equal(op.dtype, None)
+        elif c_ is DiagonalNumexprOperator:
+            assert_equal(op.dtype, float_dtype(t))
+        else:
+            assert_equal(op.dtype, t)
+
+    for c in clss:
+        for t in types:
+            yield func, c, t
+
+
+def test_shape():
+    shapes = (), (3,), (3, 2)
+    broadcasts = None, 'leftward', 'rightward'
+
+    def func(c, s, b, sameshapein, sameshapeout):
+        x = np.arange(product(s)).reshape(s)
+        op = get_operator(c, x, broadcast=b)
+        if len(s) == 0 or c in (HomothetyOperator, IdentityOperator,
+                                ZeroOperator):
+            assert_equal(op.broadcast, 'scalar')
+            assert_is_none(op.shapein)
+            assert_is_none(op.shapeout)
+        elif b in ('leftward', 'rightward'):
+            assert_equal(op.broadcast, b)
+            assert_is_none(op.shapein)
+            assert_is_none(op.shapeout)
+        else:
+            assert_equal(op.broadcast, 'disabled')
+            if sameshapein:
+                assert_equal(op.shapein, s)
+            if sameshapeout:
+                assert_equal(op.shapeout, s)
+    for c, (sameshapein, sameshapeout) in zip(clss, sameshapes):
+        for s in shapes:
+            for b in broadcasts:
+                yield func, c, s, b, sameshapein, sameshapeout
+
+
+def test_partition():
+    clss = (ConstantOperator, DiagonalOperator, DiagonalNumexprOperator,
+            HomothetyOperator, IdentityOperator, MaskOperator, PackOperator,
+            UnpackOperator)
+    valids = ((True, False, False), (True, True, True), (True, True, True),
+              (True, True, True), (True, True, True), (True, True, True),
+              (True, False, True), (True, True, False))
+
+    def func(a, b, operation, apply_rule):
+        p = operation([a, b])
+        if not apply_rule:
+            if isinstance(a, IdentityOperator) or \
+               isinstance(b, IdentityOperator):
+                return
+            assert not isinstance(p, BlockDiagonalOperator)
+            return
+        assert_is_instance(p, BlockDiagonalOperator)
+        with rule_manager(none=True):
+            q = operation([a, b])
+        assert_equal(p.todense(), q.todense())
+
+    for cls, (commutative, left, right) in zip(clss, valids):
+        for ndims in range(3):
+            shape = tuple(range(2, 2 + ndims))
+
+            def sfunc1(ndim):
+                s = range(2, ndim + 2)
+                data = np.arange(product(s)).reshape(s) + 2
+                if cls is MaskOperator:
+                    data = (data % 2).astype(bool)
+                return data
+
+            def sfunc2(ndim):
+                s = range(2 + ndims - ndim, 2 + ndims)
+                data = np.arange(product(s)).reshape(s) + 2
+                if cls is MaskOperator:
+                    data = (data % 2).astype(bool)
+                return data
+
+            if cls in (HomothetyOperator, IdentityOperator):
+                ops = [get_operator(cls, np.array(2))]
+            else:
+                ops = [get_operator(cls, sfunc1(ndim))
+                       for ndim in range(ndims+1)] + \
+                      [get_operator(cls, sfunc2(ndim), broadcast='leftward')
+                       for ndim in range(1, ndims+1)] + \
+                      [get_operator(cls, sfunc1(ndim), broadcast='rightward')
+                       for ndim in range(1, ndims+1)]
+
+            def toone(index):
+                list_ = list(shape)
+                list_[index] = 1
+                return list_
+
+            def remove(index):
+                list_ = list(shape)
+                list_.pop(index)
+                return list_
+            block = \
+                [BlockDiagonalOperator([HomothetyOutplaceOperator(
+                    v, shapein=toone(axis)) for v in range(2, 2+shape[axis])],
+                    axisin=axis, partitionin=shape[axis]*[1])
+                 for axis in range(-ndims, ndims)] + \
+                [BlockDiagonalOperator([HomothetyOutplaceOperator(
+                    v, shapein=remove(axis)) for v in range(2, 2+shape[axis])],
+                    new_axisin=axis, partitionin=shape[axis]*[1])
+                 for axis in range(-ndims, ndims)]
+
+            for o, b in itertools.product(ops, block):
+                if (o.shapein is None or o.shapein == b.shapein) and \
+                   (o.shapeout is None or o.shapeout == b.shapeout):
+                    yield func, o, b, AdditionOperator, commutative
+                    yield func, o, b, MultiplicationOperator, commutative
+                if o.shapein is None or o.shapein == b.shapeout:
+                    yield func, o, b, CompositionOperator, right
+                if o.shapeout is None or b.shapein == o.shapeout:
+                    yield func, b, o, CompositionOperator, left
+
+
+def test_as_strided():
+    shapes = {'leftward': (2, 4, 3, 4, 2, 2),
+              'rightward': (3, 2, 2, 3, 1, 2)}
+
+    def func(b):
+        o = BroadcastingBase(np.arange(6).reshape((3, 1, 2, 1)), b)
+        s = shapes[b]
+        if b == 'leftward':
+            v = o.data*np.ones(s)
+        else:
+            v = (o.data.T * np.ones(s, int).T).T
+        assert_equal(o._as_strided(s), v)
+    for b in ('rightward', 'leftward'):
+        yield func, b
diff --git a/test/test_core.py b/test/test_core.py
new file mode 100644
index 0000000..637076e
--- /dev/null
+++ b/test/test_core.py
@@ -0,0 +1,2292 @@
+from __future__ import division, print_function
+import itertools
+import numpy as np
+import operator
+import scipy
+import sys
+
+from nose import with_setup
+from nose.plugins.skip import SkipTest
+from numpy.testing import assert_equal
+from pyoperators import memory, flags
+from pyoperators import (
+    Operator, AdditionOperator, BlockColumnOperator, BlockDiagonalOperator,
+    BlockRowOperator, BlockSliceOperator, CompositionOperator,  GroupOperator,
+    ConstantOperator, DenseOperator, DiagonalOperator, HomothetyOperator,
+    IdentityOperator, MultiplicationOperator, PowerOperator,
+    ReciprocalOperator, ReductionOperator, SparseOperator, SquareOperator,
+    asoperator, I, X)
+from pyoperators.core import CopyOperator, _pool as pool
+from pyoperators.memory import zeros
+from pyoperators.rules import rule_manager
+from pyoperators.utils import (
+    ndarraywrap, first_is_not, isalias, isscalarlike, operation_assignment,
+    product, split)
+from pyoperators.utils.mpi import MPI
+from pyoperators.utils.testing import (
+    assert_eq, assert_is, assert_is_none, assert_is_instance, assert_raises,
+    assert_is_type, assert_same, skiptest)
+from scipy.sparse import csc_matrix
+from .common import OPS, ALL_OPS, DTYPES, HomothetyOutplaceOperator
+
+PYTHON_26 = sys.version_info < (2, 7)
+np.seterr(all='raise')
+
+old_memory_verbose = None
+old_memory_tolerance = None
+
+
+def assert_flags(operator, flags, msg=''):
+    if isinstance(flags, str):
+        flags = [f.replace(' ', '') for f in flags.split(',')]
+    for f in flags:
+        assert getattr(operator.flags, f), 'Operator {0} is not {1}.'.format(
+            operator, f) + (' ' + msg if msg else '')
+
+
+def assert_flags_false(operator, flags, msg=''):
+    if isinstance(flags, str):
+        flags = [f.replace(' ', '') for f in flags.split(',')]
+    for f in flags:
+        assert not getattr(operator.flags, f), 'Operator {0} is {1}.'.format(
+            operator, f) + (' ' + msg if msg else '')
+
+
+def assert_is_inttuple(shape, msg=''):
+    msg = '{0} is not an int tuple.'.format(shape) + (' ' + msg if msg else '')
+    assert type(shape) is tuple, msg
+    assert all([isinstance(s, int) for s in shape]), msg
+
+
+def assert_square(op, msg=''):
+    assert_flags(op, 'square', msg)
+    assert_eq(op.shapein, op.shapeout)
+    assert_eq(op.reshapein, op.reshapeout)
+    assert_eq(op.validatein, op.validateout)
+    if op.shapein is None:
+        assert_eq(op.toshapein, op.toshapeout)
+
+SHAPES = (None, (), (1,), (3,), (2, 3))
+
+
+class ndarray2(np.ndarray):
+    pass
+
+
+class ndarray3(np.ndarray):
+    pass
+
+
+class ndarray4(np.ndarray):
+    pass
+
+
+ at flags.linear
+ at flags.square
+class Op2(Operator):
+    attrout = {'newattr': True}
+
+    def direct(self, input, output):
+        pass
+
+    def transpose(self, input, output):
+        pass
+
+
+ at flags.linear
+ at flags.square
+class Op3(Operator):
+    classout = ndarray3
+    classin = ndarray4
+
+    def direct(self, input, output):
+        pass
+
+    def transpose(self, input, output):
+        pass
+
+
+ at flags.update_output
+class OperatorIR(Operator):
+    def direct(self, input, output, operation=operation_assignment):
+        operation(output, input)
+
+
+#===========
+# Test flags
+#===========
+
+def test_flags():
+    def func(op):
+        try:
+            o = op()
+        except:
+            try:
+                v = np.arange(10.)
+                o = op(v)
+            except:
+                print('Cannot test: ' + op.__name__)
+                return
+        if type(o) is not op:
+            print('Cannot test: ' + op.__name__)
+            return
+        if o.flags.idempotent:
+            assert_is(o, o(o))
+        if o.flags.real:
+            assert_is(o, o.C)
+        if o.flags.symmetric:
+            assert_is(o, o.T)
+        if o.flags.hermitian:
+            assert_is(o, o.H)
+        if o.flags.involutary:
+            assert_is(o, o.I)
+        if o.flags.orthogonal:
+            assert_is(o.T, o.I)
+        if o.flags.unitary:
+            assert_is(o.H, o.I)
+    for op in ALL_OPS:
+        yield func, op
+
+
+def test_symmetric():
+    mat = np.matrix([[2, 1], [1, 2]])
+
+    @flags.symmetric
+    class Op(Operator):
+        def __init__(self):
+            Operator.__init__(self, shapein=2, dtype=mat.dtype)
+
+        def direct(self, input, output):
+            output[...] = np.dot(mat, input)
+
+    op = Op()
+    assert_flags(op, 'linear,square,real,symmetric')
+    assert_eq(op.shape, (2, 2))
+    assert_eq(op.shapeout, (2,))
+    assert op is op.C
+    assert op is op.T
+    assert op is op.H
+    assert_eq(op([1, 1]), np.array(mat * [[1], [1]]).ravel())
+
+
+def test_shape_input_and_output():
+    ops = tuple(cls() for cls in OPS)
+    kind = {'Expl': 'explicit', 'Impl': 'implicit', 'Unco': 'unconstrained'}
+
+    def func(flags, name):
+        assert_eq(flags.shape_output, kind[name[:4]])
+        assert_eq(flags.shape_input, kind[name[4:]])
+
+    for op in ops:
+        yield func, op.flags, type(op).__name__
+
+
+def test_update_output1():
+    class OperatorNIR1(Operator):
+        def direct(self, input, output):
+            output[...] = input
+
+    class OperatorNIR2(Operator):
+        def direct(self, input, output, operation=operation_assignment):
+            operation(output, input)
+
+    def func(cls):
+        assert not cls().flags.update_output
+        out = np.zeros(3, dtype=int)
+        assert_raises(ValueError, cls(), [1, 0, 0], out,
+                      operation=operator.iadd)
+
+    for cls in (OperatorNIR1, OperatorNIR2):
+        yield func, cls
+
+
+def test_update_output2():
+    assert OperatorIR().flags.update_output
+    assert_raises(ValueError, OperatorIR(), [1, 0, 0], operation=operator.iadd)
+
+    op = OperatorIR()
+    inputs = [1, 1, 0], [0, 2, 1], [0, 1, 1]
+    expecteds = [0, 1, 1], [2, 5, 3], [0, 2, 0]
+
+    def func(o, e):
+        output = np.ones(3, dtype=int)
+        for i in inputs:
+            op(i, output, operation=o)
+        assert_same(output, e)
+    for o, e in zip((operation_assignment, operator.iadd, operator.imul),
+                    expecteds):
+        yield func, o, e
+
+
+def test_autoflags():
+    def func(f):
+        assert_raises(ValueError, Operator, flags=f)
+    for f in ['shape_input', 'shape_output']:
+        yield func, f
+
+
+#=============
+# Test direct
+#=============
+
+def test_ufuncs():
+    assert_raises(TypeError, Operator, np.maximum)
+
+    def func(ufunc, dtype):
+        o = Operator(np.cos)
+        assert_flags(o, 'real,inplace,outplace,square,separable')
+        assert o.dtype == dtype
+
+    ufuncs = np.cos, np.invert, np.negative
+    dtypes = np.float64, None, None
+    for ufunc, dtype in zip(ufuncs, dtypes):
+        yield func, ufunc, dtype
+
+
+#==================
+# Test conjugation
+#==================
+
+def test_conjugation():
+    @flags.square
+    class OpBase(Operator):
+        def __init__(self, data_=None):
+            Operator.__init__(self, shapein=2, dtype=complex)
+            if data_ is None:
+                data_ = data
+            self.data = data_
+            self.dataI = np.linalg.inv(data_)
+
+        def direct(self, input, output):
+            np.dot(self.data, input, output)
+
+        def inverse(self, input, output):
+            np.dot(self.dataI, input, output)
+
+    class Op1T(OpBase):
+        def transpose(self, input, output):
+            np.dot(self.data.T, input, output)
+
+    class Op1H(OpBase):
+        def adjoint(self, input, output):
+            np.dot(self.data.T.conj(), input, output)
+
+    class Op1IT(OpBase):
+        def inverse_transpose(self, input, output):
+            np.dot(self.dataI.T, input, output)
+
+    class Op1IH(OpBase):
+        def inverse_adjoint(self, input, output):
+            np.dot(self.dataI.T.conj(), input, output)
+
+    class Op2T(OpBase):
+        def __init__(self):
+            OpBase.__init__(self)
+            self.set_rule('T', lambda s: OpBase(s.data.T))
+
+    class Op2H(OpBase):
+        def __init__(self):
+            OpBase.__init__(self)
+            self.set_rule('H', lambda s: OpBase(s.data.T.conj()))
+
+    class Op2IT(OpBase):
+        def __init__(self):
+            OpBase.__init__(self)
+            self.set_rule('IT', lambda s: OpBase(s.dataI.T))
+
+    class Op2IH(OpBase):
+        def __init__(self):
+            OpBase.__init__(self)
+            self.set_rule('IH', lambda s: OpBase(s.dataI.T.conj()))
+
+    data = np.array([[1, 1j], [0, 2]])
+    dense = OpBase().todense()
+    denseI = np.linalg.inv(dense)
+
+    def func(opT, opH, opIT, opIH):
+        assert_eq(opT.C.todense(), dense.conj())
+        assert_eq(opT.T.todense(), dense.T)
+        assert_eq(opT.H.todense(), dense.T.conj())
+        assert_eq(opH.C.todense(), dense.conj())
+        assert_eq(opH.T.todense(), dense.T)
+        assert_eq(opH.H.todense(), dense.T.conj())
+        assert_eq(opIT.I.C.todense(), denseI.conj())
+        assert_eq(opIT.I.T.todense(), denseI.T)
+        assert_eq(opIT.I.H.todense(), denseI.T.conj())
+        assert_eq(opIH.I.C.todense(), denseI.conj())
+        assert_eq(opIH.I.T.todense(), denseI.T)
+        assert_eq(opIH.I.H.todense(), denseI.T.conj())
+    for opT, opH, opIT, opIH in [(Op1T(), Op1H(), Op1IT(), Op1IH()),
+                                 (Op2T(), Op2H(), Op2IT(), Op2IH())]:
+        yield func, opT, opH, opIT, opIH
+
+
+#==================
+# Test *, / and **
+#==================
+
+def test_times_mul_or_comp():
+    mat = [[1, 1, 1],
+           [0, 1, 1],
+           [0, 0, 1]]
+    ops = (2, [1, 2, 3], np.array(3), np.ones(3), np.negative, np.sqrt,
+           np.matrix(mat), csc_matrix(mat), DenseOperator(mat),
+           HomothetyOperator(3), SquareOperator(), X, X.T)
+
+    def islinear(_):
+        if isinstance(_, (np.matrix, csc_matrix)):
+            return True
+        if _ is np.sqrt:
+            return False
+        if _ is np.negative:
+            return True
+        if isscalarlike(_):
+            return True
+        return _.flags.linear
+
+    def func(x, y):
+        if np.__version__ < '1.9' and isinstance(x, np.ndarray):
+            if isinstance(x, np.matrix):
+                x = DenseOperator(x)
+            elif x.ndim > 0:
+                x = DiagonalOperator(x)
+        if scipy.__version__ < '0.14' and isinstance(x, csc_matrix):
+            x = SparseOperator(x)
+        if x is X.T and (y is np.sqrt or isinstance(y, SquareOperator)) or \
+           y is X.T and not isscalarlike(x) and \
+           not isinstance(x, HomothetyOperator):
+            assert_raises(TypeError, eval, 'x * y', {'x': x, 'y': y})
+            return
+
+        with rule_manager(none=True):
+            z = x * y
+
+        if x is X and y is X:
+            assert_is_type(z, MultiplicationOperator)
+        elif x is X.T and y is X or x is X and y is X.T:
+            assert_is_type(z, CompositionOperator)
+        elif x is X:
+            if np.isscalar(y) or \
+               isinstance(y, (list, np.ndarray, HomothetyOperator)) and \
+               not isinstance(y, np.matrix):
+                assert_is_type(z, CompositionOperator)
+            else:
+                assert_is_type(z, MultiplicationOperator)
+        elif type(x) is list or type(x) is np.ndarray and x.ndim > 0:
+            if y is X:
+                assert_is_type(z, CompositionOperator)
+            elif islinear(y):
+                assert_equal(z, asoperator(y).T(x))
+            else:
+                assert_is_type(z, MultiplicationOperator)
+        elif type(y) is list or type(y) is np.ndarray and y.ndim > 0:
+            if x is X.T:
+                assert_is_type(z, CompositionOperator)
+            elif islinear(x):
+                assert_equal(z, asoperator(x)(y))
+            else:
+                assert_is_type(z, MultiplicationOperator)
+        elif islinear(x) and islinear(y):
+            assert_is_type(z, CompositionOperator)
+        else:
+            assert_is_type(z, MultiplicationOperator)
+
+    for x in ops:
+        for y in ops:
+            if not isinstance(x, Operator) and not isinstance(y, Operator):
+                continue
+            yield func, x, y
+
+
+def test_div():
+    def func(flag):
+        op = 1 / Operator(flags={'linear': flag})
+        assert_is_type(op, CompositionOperator)
+        assert_is_type(op.operands[0], ReciprocalOperator)
+        assert_is_type(op.operands[1], Operator)
+    for flag in False, True:
+        yield func, flag
+
+
+def test_div_fail():
+    raise SkipTest
+    assert_is_type(1 / SquareOperator(), PowerOperator)
+
+
+def test_pow():
+    data = [[1, 1], [0, 1]]
+    op_lin = DenseOperator(data)
+    assert_equal((op_lin**3).data, np.dot(np.dot(data, data), data))
+    op_nl = ConstantOperator(data)
+    assert_equal((op_nl**3).data, data)
+
+
+def test_pow2():
+
+    @flags.linear
+    @flags.square
+    class SquareOp(Operator):
+        pass
+
+    def func(op, n):
+        p = op ** n
+        if n < -1:
+            assert_is_instance(p, CompositionOperator)
+            for o in p.operands:
+                assert_is(o, op.I)
+        elif n == -1:
+            assert_is(p, op.I)
+        elif n == 0:
+            assert_is_instance(p, IdentityOperator)
+        elif n == 1:
+            assert_is(p, op)
+        else:
+            assert_is_instance(p, CompositionOperator)
+            for o in p.operands:
+                assert_is(o, op)
+    for op in [SquareOp(), SquareOp(shapein=3)]:
+        for n in range(-3, 4):
+            yield func, op, n
+
+
+def test_pow3():
+    diag = np.array([1., 2, 3])
+    d = DiagonalOperator(diag)
+
+    def func(n):
+        assert_eq((d**n).todense(), DiagonalOperator(diag**n).todense())
+    for n in (-1.2, -1, -0.5, 0, 0.5, 1, 2.4):
+        yield func, n
+
+
+#========================
+# Test input/output shape
+#========================
+
+def test_shape_is_inttuple():
+    def func(o):
+        assert_is_inttuple(o.shapein)
+        assert_is_inttuple(o.shapeout)
+    for shapein in (3, [3], np.array(3), np.array([3]), (3,),
+                    3., [3.], np.array(3.), np.array([3.]), (3.,),
+                    [3, 2], np.array([3, 2]), (3, 2),
+                    [3., 2], np.array([3., 2]), (3., 2)):
+        o = Operator(shapein=shapein, shapeout=shapein)
+        yield func, o
+
+
+def test_shape_explicit():
+    o1, o2, o3 = (
+        Operator(shapeout=(13, 2), shapein=(2, 2), flags='linear'),
+        Operator(shapeout=(2, 2), shapein=(1, 3), flags='linear'),
+        Operator(shapeout=(1, 3), shapein=4, flags='linear'))
+
+    def func(o, eout, ein):
+        assert_eq(o.shapeout, eout)
+        assert_eq(o.shapein, ein)
+    for o, eout, ein in zip([o1*o2, o2*o3, o1*o2*o3],
+                            ((13, 2), (2, 2), (13, 2)),
+                            ((1, 3), (4,), (4,))):
+        yield func, o, eout, ein
+    assert_raises(ValueError, CompositionOperator, [o2, o1])
+    assert_raises(ValueError, CompositionOperator, [o3, o2])
+    assert_raises(ValueError, CompositionOperator, [o3, I, o1])
+
+    o4 = Operator(shapeout=o1.shapeout)
+    o5 = Operator(flags='square')
+
+    o1 = Operator(shapein=(13, 2), flags='square')
+    for o in [o1+I, I+o1, o1+o4, o1+I+o5+o4, I+o5+o1]:
+        yield func, o, o1.shapeout, o1.shapein
+    assert_raises(ValueError, AdditionOperator, [o2, o1])
+    assert_raises(ValueError, AdditionOperator, [o3, o2])
+    assert_raises(ValueError, AdditionOperator, [I, o3, o1])
+    assert_raises(ValueError, AdditionOperator, [o3, I, o1])
+
+
+def test_shape_implicit():
+    @flags.linear
+    class Op(Operator):
+        def __init__(self, factor):
+            self.factor = factor
+            Operator.__init__(self)
+
+        def reshapein(self, shape):
+            return shape[0]*self.factor
+
+        def reshapeout(self, shape):
+            return shape[0]/self.factor
+
+        def __str__(self):
+            return super(Op, self).__str__() + 'x{0}'.format(self.factor)
+    o1, o2, o3 = Op(2), Op(3), Op(4)
+    assert o1.shapein is o2.shapein is o3.shapein is None
+    shapein = (1,)
+    shapeout = (24,)
+
+    def func(o, eout, ein):
+        assert_eq(o.reshapein(shapein), eout)
+        assert_eq(o.reshapeout(shapeout), ein)
+    for o, eout, ein in zip([o1*o2, o2*o3, o1*o2*o3],
+                            ((6,), (12,), (24,)),
+                            ((4,), (2,), (1,))):
+        yield func, o, eout, ein
+
+
+def test_shapeout_unconstrained1():
+    for shape in SHAPES:
+        op = Operator(shapein=shape)
+        assert_is_none(op.shapeout)
+
+
+def test_shapeout_unconstrained2():
+    @flags.linear
+    class Op(Operator):
+        def direct(self, input, output):
+            output[...] = 4
+
+    def func(s1, s2):
+        op = IdentityOperator(shapein=s1) * Op(shapein=s2)
+        if s1 is not None:
+            assert op.shapeout == s1
+        else:
+            assert op.shapeout is None
+    for s1 in SHAPES:
+        for s2 in SHAPES:
+            yield func, s1, s2
+
+
+def test_shapeout_implicit():
+    class Op(Operator):
+        def reshapein(self, shape):
+            return shape + (2,)
+
+    def func(op, shapein):
+        assert_flags_false(op, 'square')
+        assert op.shapein == shapein
+        if shapein is None:
+            assert op.shapeout is None
+        else:
+            assert op.shapeout == shapein + (2,)
+    for shapein in SHAPES:
+        op = Op(shapein=shapein)
+        yield func, op, shapein
+    assert_raises(ValueError, Op, shapein=3, shapeout=11)
+
+
+def test_shapein_unconstrained1():
+
+    def func(shape):
+        op = Operator(shapeout=shape)
+        assert_is_none(op.shapein)
+    for shape in SHAPES[1:]:
+        yield func, shape
+
+
+def test_shapein_unconstrained2():
+    class Op(Operator):
+        def reshapeout(self, shape):
+            return shape + (2,)
+
+    def func(op, shapeout):
+        assert_flags_false(op, 'square')
+        assert op.shapeout == shapeout
+        assert op.shapein == shapeout + (2,)
+    for shape in SHAPES[1:]:
+        op = Op(shapeout=shape)
+        yield func, op, shape
+    assert_raises(ValueError, Op, shapein=3, shapeout=11)
+
+
+def test_shapein_unconstrained3():
+    @flags.square
+    class Op1(Operator):
+        pass
+
+    @flags.square
+    class Op2(Operator):
+        def reshapein(self, shape):
+            return shape
+
+        def toshapein(self, v):
+            return v
+
+    @flags.square
+    class Op3(Operator):
+        def reshapeout(self, shape):
+            return shape
+
+        def toshapeout(self, v):
+            return v
+
+    @flags.square
+    class Op4(Operator):
+        def reshapein(self, shape):
+            return shape
+
+        def reshapeout(self, shape):
+            return shape
+
+        def toshapein(self, v):
+            return v
+
+        def toshapeout(self, v):
+            return v
+
+    def func(op, shape):
+        assert_square(op)
+        assert_eq(op.shapein, shape)
+    for shape in SHAPES[1:]:
+        for cls in (Op1, Op2, Op3, Op4):
+            op = cls(shapeout=shape)
+            yield func, op, shape
+
+
+#================
+# Test validation
+#================
+
+def test_validation():
+    class ValidationError(ValueError):
+        pass
+
+    def vin(shape):
+        if shape[0] % 2 == 0:
+            raise ValidationError()
+
+    def vout(shape):
+        if shape[0] % 2 == 1:
+            raise ValidationError()
+    x_ok = np.empty(3)
+    y_ok = np.empty(4)
+    x_err = np.empty(6)
+    y_err = np.empty(7)
+
+    def func(cls):
+        op = cls(validatein=vin, validateout=vout)
+        op(x_ok, y_ok)
+        cls_error = ValueError if op.flags.shape_input == 'explicit' else \
+            ValidationError
+        assert_raises(cls_error, op, x_err, y_ok)
+        cls_error = ValueError if op.flags.shape_output == 'explicit' else \
+            ValidationError
+        assert_raises(cls_error, op, x_ok, y_err)
+
+        if op.flags.shape_output == 'implicit':
+            assert_raises(ValidationError, cls, validateout=vout,
+                          shapein=x_err.shape)
+        if op.flags.shape_input == 'implicit':
+            assert_raises(ValidationError, cls, validatein=vin,
+                          shapeout=y_err.shape)
+    for cls in OPS:
+        yield func, cls
+
+
+#====================
+# Test operator dtype
+#====================
+
+def test_dtype1():
+    value = 2.5
+
+    @flags.square
+    class Op(Operator):
+        def __init__(self, dtype):
+            Operator.__init__(self, dtype=dtype)
+
+        def direct(self, input, output):
+            np.multiply(input, np.array(value, self.dtype), output)
+    input = complex(1, 1)
+
+    def func(dop, di):
+        try:
+            i = np.array(input, di)
+        except TypeError:
+            i = np.array(input.real, di)
+        o = Op(dop)(i)
+        assert_eq(o.dtype, (i*np.array(value, dop)).dtype, str((dop, di)))
+        assert_eq(o, i*np.array(value, dop), str((dop, di)))
+
+    for dop in DTYPES:
+        for di in DTYPES:
+            yield func, dop, di
+
+
+def test_dtype2():
+    @flags.linear
+    @flags.square
+    class Op(Operator):
+        def direct(self, input, output):
+            np.multiply(input, input, output)
+    op = Op()
+    input = complex(1, 1)
+
+    def func(di):
+        try:
+            i = np.array(input, di)
+        except TypeError:
+            i = np.array(input.real, di)
+        o = op(i)
+        assert_eq(o.dtype, (i * i).dtype, str(di))
+        assert_eq(o, i * i, str(di))
+    for di in DTYPES:
+        yield func, di
+
+
+#===================
+# Test operator name
+#===================
+
+def test_name():
+    class sqrt(Operator):
+        pass
+
+    class MyOp(Operator):
+        __name__ = 'sqrt'
+    ops = [Operator(), Operator(direct=np.sqrt), MyOp(), Operator(name='sqrt'),
+           sqrt()]
+    names = ['Operator'] + 4 * ['sqrt']
+
+    def func(op, name):
+        assert op.__name__ == name
+    for op, name in zip(ops, names):
+        yield func, op, name
+
+
+def test_merge_name():
+    @flags.linear
+    class AbsorbOperator(Operator):
+
+        def __init__(self, **keywords):
+            Operator.__init__(self, **keywords)
+            self.set_rule(('.', HomothetyOperator), lambda s, o: s.copy(),
+                          CompositionOperator)
+
+    class sqrt(AbsorbOperator):
+        pass
+
+    class MyOp(AbsorbOperator):
+        __name__ = 'sqrt'
+    ops = [AbsorbOperator(name='sqrt'), MyOp(), sqrt()]
+    names = 3 * ['sqrt']
+
+    def func(op, name):
+        assert op.__name__ == name
+    for (op, name), h in itertools.product(zip(ops, names),
+                                           (I, HomothetyOperator(2))):
+        yield func, op(h), name
+        yield func, h(op), name
+
+
+#=========================
+# Test operator comparison
+#=========================
+
+def test_eq():
+    def func(op1, op2):
+        assert_eq(op1, op2)
+    for cls in OPS:
+        yield func, cls(), cls()
+
+
+#================
+# Test iadd, imul
+#================
+
+def test_iadd_imul():
+
+    def func(op1, op2, operation):
+        if operation is operator.iadd:
+            op = op1 + op2
+            op1 += op2
+        else:
+            op = op1 * op2.T
+            op1 *= op2.T
+        assert_eq(op1, op)
+    for operation in (operator.iadd, operator.imul):
+        for cls2 in OPS:
+            for cls1 in OPS:
+                yield func, cls1(), cls2(), operation
+
+
+#===========================
+# Test attribute propagation
+#===========================
+
+def test_propagation_attribute1():
+    @flags.linear
+    @flags.square
+    class AddAttribute(Operator):
+        attrout = {'newattr_direct': True}
+        attrin = {'newattr_transpose': True}
+
+        def direct(self, input, output):
+            pass
+
+        def transpose(self, input, output):
+            pass
+
+    @flags.linear
+    @flags.square
+    class AddAttribute2(Operator):
+        attrout = {'newattr_direct': False}
+        attrin = {'newattr_transpose': False}
+
+        def direct(self, input, output):
+            pass
+
+        def transpose(self, input, output):
+            pass
+
+    @flags.linear
+    @flags.square
+    class AddAttribute3(Operator):
+        attrout = {'newattr3_direct': True}
+        attrin = {'newattr3_transpose': True}
+
+        def direct(self, input, output):
+            pass
+
+        def transpose(self, input, output):
+            pass
+
+    inputs = [np.ones(5), np.ones(5).view(ndarray2)]
+
+    def func1(i):
+        op = AddAttribute()
+        assert op(i).newattr_direct
+        assert op.T(i).newattr_transpose
+
+        op = AddAttribute2() * AddAttribute()
+        assert not op(i).newattr_direct
+        assert_eq(op.attrout, {'newattr_direct': False})
+        assert_eq(op.attrin, {'newattr_transpose': True})
+        assert op.T(i).newattr_transpose
+
+        op = AddAttribute3() * AddAttribute()
+        assert op(i).newattr_direct
+        assert op(i).newattr3_direct
+        assert_eq(op.attrout, {'newattr_direct': True,
+                               'newattr3_direct': True})
+        assert_eq(op.attrin, {'newattr_transpose': True,
+                              'newattr3_transpose': True})
+        assert op.T(i).newattr_transpose
+        assert op.T(i).newattr3_transpose
+    for i in inputs:
+        yield func1, i
+
+    def func2(i_):
+        print()
+        print('op')
+        print('==')
+        op = AddAttribute()
+        i = i_.copy()
+        assert op(i, i).newattr_direct
+        i = i_.copy()
+        assert op.T(i, i).newattr_transpose
+
+        pool.clear()
+        print()
+        print('op2 * op')
+        print('=======')
+        op = AddAttribute2() * AddAttribute()
+        i = i_.copy()
+        assert not op(i, i).newattr_direct
+        i = i_.copy()
+        assert op.T(i, i).newattr_transpose
+
+        pool.clear()
+        print()
+        print('op3 * op')
+        print('=======')
+        op = AddAttribute3() * AddAttribute()
+        i = i_.copy()
+        o = op(i, i)
+        assert o.newattr_direct
+        assert o.newattr3_direct
+        i = i_.copy()
+        o = op.T(i, i)
+        assert o.newattr_transpose
+        assert o.newattr3_transpose
+    for i_ in inputs:
+        yield func2, i_
+
+
+def test_propagation_attribute2():
+    @flags.square
+    class Op(Operator):
+        attrin = {'attr_class': 1, 'attr_instance': 2, 'attr_other': 3}
+        attrout = {'attr_class': 4, 'attr_instance': 5, 'attr_other': 6}
+
+        def direct(self, input, output):
+            pass
+
+        def transpose(self, input, output):
+            pass
+
+    class ndarray2(np.ndarray):
+        attr_class = 10
+
+        def __new__(cls, data):
+            result = np.ndarray(data).view(cls)
+            result.attr_instance = 11
+            return result
+
+    op = Op()
+    output = op(ndarray2(1))
+    assert output.__dict__ == op.attrout
+    output = op.T(ndarray2(1))
+    assert output.__dict__ == op.attrin
+
+
+def test_propagation_attribute3():
+    class ndarraybase(np.ndarray):
+        attr_class = None
+
+        def __new__(cls, data):
+            result = np.array(data).view(cls)
+            return result
+
+        def __array_finalize__(self, array):
+            self.attr_class = 0
+            self.attr_instance = 10
+
+    class ndarray1(ndarraybase):
+        attr_class1 = None
+
+        def __new__(cls, data):
+            result = ndarraybase(data).view(cls)
+            return result
+
+        def __array_finalize__(self, array):
+            ndarraybase.__array_finalize__(self, array)
+            self.attr_class1 = 1
+            self.attr_instance1 = 11
+
+    class ndarray2(ndarraybase):
+        attr_class2 = None
+
+        def __new__(cls, data):
+            result = ndarraybase(data).view(cls)
+            return result
+
+        def __array_finalize__(self, array):
+            ndarraybase.__array_finalize__(self, array)
+            self.attr_class2 = 2
+            self.attr_instance2 = 12
+
+    @flags.linear
+    @flags.square
+    class Op(Operator):
+        classin = ndarray1
+        classout = ndarray2
+
+        def direct(self, input, output):
+            pass
+
+        def transpose(self, input, output):
+            pass
+
+    op = Op()
+    input = ndarray1(1)
+    input.attr_class = 30
+    output = op(input)
+    assert output.__dict__ == {'attr_instance': 10, 'attr_instance1': 11,
+                               'attr_instance2': 12, 'attr_class': 30,
+                               'attr_class2': 2}
+    input = ndarray2(1)
+    input.attr_class = 30
+    input.attr_class2 = 32
+    input.attr_instance = 40
+    input.attr_instance2 = 42
+    output = op(input)
+    assert output.__dict__ == {'attr_instance': 40, 'attr_instance2': 42,
+                               'attr_class': 30, 'attr_class2': 32}
+
+    op = Op().T
+    input = ndarray1(1)
+    input.attr_class = 30
+    input.attr_class1 = 31
+    input.attr_instance = 40
+    input.attr_instance1 = 41
+    output = op(input)
+    assert output.__dict__ == {'attr_instance': 40, 'attr_instance1': 41,
+                               'attr_class': 30, 'attr_class1': 31}
+    input = ndarray2(1)
+    input.attr_class = 30
+    output = op(input)
+    assert output.__dict__ == {'attr_instance': 10, 'attr_instance2': 12,
+                               'attr_instance1': 11, 'attr_class': 30,
+                               'attr_class1': 1}
+
+    op = Op().T * Op()  # -> ndarray2 -> ndarray1
+    input = ndarray1(1)
+    input.attr_class = 30
+    input.attr_class1 = 31
+    input.attr_instance = 40
+    input.attr_instance1 = 41
+    output = op(input)
+    assert output.__dict__ == {'attr_instance': 40, 'attr_instance1': 41,
+                               'attr_class': 30, 'attr_class1': 1}
+    input = ndarray2(1)
+    input.attr_class = 30
+    input.attr_class2 = 32
+    input.attr_instance = 40
+    input.attr_instance2 = 42
+    output = op(input)
+    assert output.__dict__ == {'attr_instance': 40, 'attr_instance1': 11,
+                               'attr_instance2': 42, 'attr_class': 30,
+                               'attr_class1': 1}
+
+    op = Op() * Op().T  # -> ndarray1 -> ndarray2
+    input = ndarray1(1)
+    input.attr_class = 30
+    input.attr_class1 = 31
+    input.attr_instance = 40
+    input.attr_instance1 = 41
+    output = op(input)
+    assert output.__dict__ == {'attr_instance': 40, 'attr_instance2': 12,
+                               'attr_instance1': 41, 'attr_class': 30,
+                               'attr_class2': 2}
+    input = ndarray2(1)
+    input.attr_class = 30
+    input.attr_class2 = 32
+    input.attr_instance = 40
+    input.attr_instance2 = 42
+    output = op(input)
+    assert output.__dict__ == {'attr_instance': 40, 'attr_instance2': 42,
+                               'attr_class': 30, 'attr_class2': 2}
+
+
+#=======================
+# Test class propagation
+#=======================
+
+def check_propagation_class(op, i, c):
+    o = op(i)
+    assert_is(type(o), c)
+
+
+def check_propagation_class_inplace(op, i, c):
+    i = i.copy()
+    op(i, i)
+    assert_is(type(i), c)
+
+
+def test_propagation_class():
+    inputs = [np.ones(2), np.ones(2).view(ndarray2)]
+    ops = [I, Op2(), Op2()*Op3(), Op3()*Op2()]
+    results = [[np.ndarray, ndarray2],
+               [ndarraywrap, ndarray2],
+               [ndarray3, ndarray3],
+               [ndarray3, ndarray3]]
+
+    for op, results_ in zip(ops, results):
+        for i, c in zip(inputs, results_):
+            yield check_propagation_class, op, i, c
+
+
+def test_propagation_class_inplace():
+    inputs = [np.ones(2), np.ones(2).view(ndarray2), np.ones(2).view(ndarray3)]
+    ops = [I, Op2(), Op2()*Op3(), Op3()*Op2()]
+    results = [[np.ndarray, ndarray2, ndarray3],
+               [np.ndarray, ndarray2, ndarray3],
+               [np.ndarray, ndarray3, ndarray3],
+               [np.ndarray, ndarray3, ndarray3]]
+
+    for op, results_ in zip(ops, results):
+        for i, c in zip(inputs, results_):
+            yield check_propagation_class_inplace, op, i, c
+
+
+def test_propagation_classT():
+    inputs = [np.ones(2), np.ones(2).view(ndarray2)]
+    ops = [I, Op2(), Op2()*Op3(), Op3()*Op2()]
+    resultsT = [[np.ndarray, ndarray2],
+                [np.ndarray, ndarray2],
+                [ndarray4, ndarray4],
+                [ndarray4, ndarray4]]
+
+    for op, results_ in zip(ops, resultsT):
+        for i, c in zip(inputs, results_):
+            yield check_propagation_class, op.T, i, c
+
+
+def test_propagation_classT_inplace():
+    inputs = [np.ones(2), np.ones(2).view(ndarray2), np.ones(2).view(ndarray4)]
+    ops = [I, Op2(), Op2()*Op3(), Op3()*Op2()]
+    resultsT = [[np.ndarray, ndarray2, ndarray4],
+                [np.ndarray, ndarray2, ndarray4],
+                [np.ndarray, ndarray4, ndarray4],
+                [np.ndarray, ndarray4, ndarray4]]
+
+    for op, results_ in zip(ops, resultsT):
+        for i, c in zip(inputs, results_):
+            yield check_propagation_class_inplace, op.T, i, c
+
+
+def test_propagation_class_nested():
+    @flags.linear
+    @flags.square
+    class O1(Operator):
+        classout = ndarray2
+
+        def direct(self, input, output):
+            output[...] = input
+
+    @flags.linear
+    @flags.square
+    class O2(Operator):
+        def direct(self, input, output):
+            output[...] = input
+
+    def func2(op1, op2, expected):
+        o = op1 * op2
+        assert_is(o(1).__class__, expected)
+
+    def func3(op1, op2, op3, expected):
+        o = op1 * op2 * op3
+        assert_is(o(1).__class__, expected)
+
+    o1 = O1()
+    o2 = O2()
+    ops1 = [I, 2*I, o2, 2*I+o2]
+    ops2 = [I+o1, 2*o1, o1+o2, o2+o1, I+2*o1, I+o1+o2, I+o2+o1, o1+I+o2,
+            o1+o2+I, o2+o1+I, o2+I+o1]
+    for op1 in ops1:
+        for op2 in ops2:
+            yield func2, op1, op2, ndarray2
+            yield func2, op2, op1, ndarray2
+    for op1 in ops1:
+        for op2 in ops2:
+            for op3 in ops1:
+                yield func3, op1, op2, op3, ndarray2
+
+
+#========================
+# Test MPI communicators
+#========================
+
+def test_comm_composite():
+    comms_all = (None, MPI.COMM_SELF, MPI.COMM_WORLD)
+
+    def func(cls, comms, inout):
+        ops = [Operator(**{'comm'+inout: c}) for c in comms]
+        keywords = {}
+        args = ()
+        if cls in (BlockDiagonalOperator, BlockRowOperator):
+            keywords = {'axisin': 0}
+        elif cls is BlockColumnOperator:
+            keywords = {'axisout': 0}
+        else:
+            keywords = {}
+        if MPI.COMM_SELF in comms and MPI.COMM_WORLD in comms:
+            assert_raises(ValueError, cls, ops, *args, **keywords)
+            return
+        op = cls(ops, *args, **keywords)
+        assert_is(getattr(op, 'comm'+inout), first_is_not(comms, None))
+    for cls in (AdditionOperator, MultiplicationOperator, BlockRowOperator,
+                BlockDiagonalOperator, BlockColumnOperator):
+        for comms in itertools.combinations_with_replacement(comms_all, 3):
+            for inout in ('in', 'out'):
+                yield func, cls, comms, inout
+if PYTHON_26:
+    test_comm_composite = skiptest(test_comm_composite)
+
+
+def test_comm_composition():
+    comms_all = (None, MPI.COMM_SELF, MPI.COMM_WORLD)
+
+    def func(commin, commout):
+        ops = [Operator(commin=commin), Operator(commout=commout)]
+        if None not in (commin, commout) and commin is not commout:
+            assert_raises(ValueError, CompositionOperator, ops)
+            return
+        op = CompositionOperator(ops)
+        assert_is(op.commin, commin)
+        assert_is(op.commout, commout)
+    for commin, commout in itertools.product(comms_all, repeat=2):
+        yield func, commin, commout
+
+
+def test_comm_propagation():
+    composite = (AdditionOperator, MultiplicationOperator, BlockRowOperator,
+                 BlockDiagonalOperator, BlockColumnOperator)
+    commin = commin_ = MPI.COMM_WORLD.Dup()
+    commout = commout_ = MPI.COMM_WORLD.Dup()
+
+    class OpGetComm(Operator):
+        def propagate_commin(self, comm):
+            return OpNewComm(commin=comm, commout=comm)
+
+    class OpNewComm(Operator):
+        pass
+
+    class OpSetComm1(Operator):
+        commin = commin_
+        commout = commout_
+
+    class OpSetComm2(Operator):
+        commin = commin_
+        commout = commin_
+
+    opgetcomm = OpGetComm()
+    opsetcomm1 = OpSetComm1()
+    opsetcomm2 = OpSetComm2()
+
+    # get+set in composition
+    def func1(i, op):
+        assert_is(op.commin, commin)
+        assert_is(op.commout, commout)
+        opget = op.operands[i]
+        assert_is_instance(opget, OpNewComm)
+        if i == 0:
+            assert_is(opget.commin, commout)
+            assert_is(opget.commout, commout)
+        else:
+            assert_is(opget.commin, commin)
+            assert_is(opget.commout, commin)
+
+    for i, ops in enumerate([(opgetcomm, opsetcomm1),
+                             (opsetcomm1, opgetcomm)]):
+        op = CompositionOperator(ops)
+        yield func1, i, op
+
+    # get+set in composite
+    def func2(i, op):
+        assert_is(op.commin, commin)
+        assert_is(op.commout, commin)
+        opget = op.operands[i]
+        assert_is_instance(opget, OpNewComm)
+        assert_is(opget.commin, commin)
+        assert_is(opget.commout, commin)
+
+    for cls in composite:
+        for i, ops in enumerate([(opgetcomm, opsetcomm2),
+                                 (opsetcomm2, opgetcomm)]):
+            keywords = {}
+            if cls in (BlockDiagonalOperator, BlockRowOperator):
+                keywords = {'axisin': 0}
+            elif cls is BlockColumnOperator:
+                keywords = {'axisout': 0}
+            op = cls(ops, **keywords)
+            yield func2, i, op
+
+    # composition(get) + set in composite
+    def func3(i, op):
+        assert_is(op.commin, commin)
+        assert_is(op.commout, commin)
+        compget = op.operands[i]
+        assert_is(compget.commin, commin)
+        assert_is(compget.commout, commin)
+        opget = op.operands[i].operands[i]
+        assert_is_instance(opget, OpNewComm)
+        assert_is(opget.commin, commin)
+        assert_is(opget.commout, commin)
+    for cls in composite:
+        for i, ops in enumerate([(opgetcomm(Operator()), opsetcomm2),
+                                 (opsetcomm2, Operator()(opgetcomm))]):
+            keywords = {}
+            if cls in (BlockDiagonalOperator, BlockRowOperator):
+                keywords = {'axisin': 0}
+            elif cls is BlockColumnOperator:
+                keywords = {'axisout': 0}
+            op = cls(ops, **keywords)
+            yield func3, i, op
+
+    # composite(set) + get in composition
+
+    def func4(i, op):
+        assert_is(op.commin, commin)
+        assert_is(op.commout, commin)
+        opget = op.operands[i]
+        assert_is_instance(opget, OpNewComm)
+        assert_is(opget.commin, commin)
+        assert_is(opget.commout, commin)
+    for cls in composite:
+        keywords = {}
+        if cls in (BlockDiagonalOperator, BlockRowOperator):
+            keywords = {'axisin': 0}
+        elif cls is BlockColumnOperator:
+            keywords = {'axisout': 0}
+        for ops_in in [(opsetcomm2, Operator()), (Operator(), opsetcomm2)]:
+            op_in = cls(ops_in, **keywords)
+            for i, op in enumerate([opgetcomm(op_in), op_in(opgetcomm)]):
+                yield func4, i, op
+
+    # composite(get) + set in composition
+    def func5(i, j, op):
+        assert_is(op.commin, commin)
+        assert_is(op.commout, commin)
+        compget = op.operands[j]
+        assert_is(compget.commin, commin)
+        assert_is(compget.commout, commin)
+        opget = compget.operands[i]
+        assert_is_instance(opget, OpNewComm)
+        assert_is(opget.commin, commin)
+        assert_is(opget.commout, commin)
+    for cls in composite:
+        keywords = {}
+        if cls in (BlockDiagonalOperator, BlockRowOperator):
+            keywords = {'axisin': 0}
+        elif cls is BlockColumnOperator:
+            keywords = {'axisout': 0}
+        for i, ops_in in enumerate([(opgetcomm, Operator()),
+                                    (Operator(), opgetcomm)]):
+            op_in = cls(ops_in, **keywords)
+            for j, op in enumerate([op_in(opsetcomm2), opsetcomm2(op_in)]):
+                yield func5, i, j, op
+
+
+#===========================
+# Test in-place/out-of-place
+#===========================
+
+def test_inplace1():
+    @flags.square
+    class NotInplace(Operator):
+        def direct(self, input, output):
+            output[...] = 0
+            output[0] = input[0]
+    pool.clear()
+    op = NotInplace()
+    v = np.array([2., 0., 1.])
+    op(v, v)
+    assert_eq(v, [2, 0, 0])
+    assert_eq(len(pool), 1)
+
+
+def setup_memory():
+    global old_memory_tolerance, old_memory_verbose
+    old_memory_tolerance = memory.MEMORY_TOLERANCE
+    old_memory_verbose = memory.verbose
+    # ensure buffers in the pool are always used
+    memory.MEMORY_TOLERANCE = np.inf
+    memory.verbose = True
+
+
+def teardown_memory():
+    memory.MEMORY_TOLERANCE = old_memory_tolerance
+    memory.verbose = old_memory_verbose
+
+
+ at skiptest
+ at with_setup(setup_memory, teardown_memory)
+def test_inplace_can_use_output():
+    A = zeros(10*8, dtype=np.int8).view(ndarraywrap)
+    B = zeros(10*8, dtype=np.int8).view(ndarraywrap)
+    C = zeros(10*8, dtype=np.int8).view(ndarraywrap)
+    D = zeros(10*8, dtype=np.int8).view(ndarraywrap)
+    ids = {A.__array_interface__['data'][0]: 'A',
+           B.__array_interface__['data'][0]: 'B',
+           C.__array_interface__['data'][0]: 'C',
+           D.__array_interface__['data'][0]: 'D'}
+
+    class Op(Operator):
+        def __init__(self, inplace, log):
+            Operator.__init__(self, flags={'inplace': inplace})
+            self.inplace = inplace
+            self.log = log
+
+        def direct(self, input, output):
+            if not self.inplace and isalias(input, output):
+                raise RuntimeError()
+            if self.flags.inplace:
+                tmp = input[0]
+                output[1:] = 2 * input
+                output[0] = tmp
+            else:
+                output[:] = 0
+                output[0] = input[0]
+                output[1:] = 2 * input
+            try:
+                self.log.insert(0, ids[output.__array_interface__['data'][0]])
+            except KeyError:
+                self.log.insert(0, '?')
+
+        def reshapein(self, shape):
+            return (shape[0]+1,)
+
+    def show_pool():
+        result = ''
+        for s in pool:
+            try:
+                result += ids[s.__array_interface__['data'][0]]
+            except:
+                result += '?'
+        return result
+
+    expecteds_outplace = {
+        2: ['BBA',     # II
+            'BBA',     # IO
+            'BCA',     # OI
+            'BCA'],    # OO
+        3: ['BBBA',    # III
+            'BBBA',    # IIO
+            'BBCA',    # IOI
+            'BBCA',    # IOO
+            'BCCA',    # OII
+            'BCCA',    # OIO
+            'BCBA',    # OOI
+            'BCBA'],   # OOO
+        4: ['BBBBA',   # IIII
+            'BBBBA',   # IIIO
+            'BBBCA',   # IIOI
+            'BBBCA',   # IIOO
+            'BBCCA',   # IOII
+            'BBCCA',   # IOIO
+            'BBCBA',   # IOOI
+            'BBCBA',   # IOOO
+            'BCCCA',   # OIII
+            'BCCCA',   # OIIO
+            'BCCBA',   # OIOI
+            'BCCBA',   # OIOO
+            'BCBBA',   # OOII
+            'BCBBA',   # OOIO
+            'BCBCA',   # OOOI
+            'BCBCA']}  # OOOO
+
+    expecteds_inplace = {
+        2: ['AAA',     # II
+            'ABA',     # IO
+            'ABA',     # OI
+            'ABA'],    # OO
+        3: ['AAAA',    # III
+            'ABBA',    # IIO
+            'ABAA',    # IOI
+            'AABA',    # IOO
+            'ABAA',    # OII
+            'ABBA',    # OIO
+            'ABAA',    # OOI
+            'ACBA'],   # OOO
+        4: ['AAAAA',   # IIII
+            'ABBBA',   # IIIO
+            'ABBAA',   # IIOI
+            'AAABA',   # IIOO
+            'ABAAA',   # IOII
+            'AABBA',   # IOIO
+            'AABAA',   # IOOI
+            'ABABA',   # IOOO
+            'ABAAA',   # OIII
+            'ABBBA',   # OIIO
+            'ABBAA',   # OIOI
+            'ABABA',   # OIOO
+            'ABAAA',   # OOII
+            'ABABA',   # OOIO
+            'ABABA',   # OOOI
+            'ABABA']}  # OOOO
+
+    def func_outplace(n, i, expected, strops):
+        pool._buffers = [C, D]
+        log = []
+        ops = [Op(s == '1', log) for s in strops]
+        op = CompositionOperator(ops)
+        op.show_pool = show_pool  # debug
+        v = A[:8].view(float)
+        v[0] = 1
+        w = B[:(n+1)*8].view(float)
+        op(v, w)
+        log = ''.join(log) + 'A'
+        assert_equal(log, expected)
+        assert_equal(show_pool(), 'CD')
+        w2 = v
+        for op in reversed(ops):
+            w2 = op(w2)
+        assert_equal(w, w2)
+
+    def func_inplace(n, i, expected, strops):
+        pool._buffers = [B, C]
+        log = []
+        ops = [Op(s == '1', log) for s in strops]
+        op = CompositionOperator(ops)
+        v = A[:8].view(float)
+        v[0] = 1
+        w = A[:(n+1)*8].view(float)
+        op(v, w)
+        log = ''.join(log) + 'A'
+        assert_equal(log, expected)
+        assert_equal(show_pool(), 'BC')
+        w2 = v
+        for op in reversed(ops):
+            w2 = op(w2)
+        assert_equal(w, w2)
+
+    for n in [2, 3, 4]:
+        for i, expected in zip(reversed(range(2**n)), expecteds_outplace[n]):
+            strops = bin(i)[2:]
+            while len(strops) != n:
+                strops = '0' + strops
+            yield func_outplace, n, i, expected, strops
+
+    for n in [2, 3, 4]:
+        for i, expected in zip(reversed(range(2**n)), expecteds_inplace[n]):
+            strops = bin(i)[2:]
+            while len(strops) != n:
+                strops = '0' + strops
+            yield func_inplace, n, i, expected, strops
+
+
+ at skiptest
+ at with_setup(setup_memory, teardown_memory)
+def test_inplace_cannot_use_output():
+    A = np.zeros(10*8, dtype=np.int8).view(ndarraywrap)
+    B = np.zeros(10*8, dtype=np.int8).view(ndarraywrap)
+    C = np.zeros(10*8, dtype=np.int8).view(ndarraywrap)
+    D = np.zeros(10*8, dtype=np.int8).view(ndarraywrap)
+    ids = {A.__array_interface__['data'][0]: 'A',
+           B.__array_interface__['data'][0]: 'B',
+           C.__array_interface__['data'][0]: 'C',
+           D.__array_interface__['data'][0]: 'D'}
+
+    class Op(Operator):
+        def __init__(self, inplace, log):
+            Operator.__init__(self, flags={'inplace': inplace})
+            self.inplace = inplace
+            self.log = log
+
+        def direct(self, input, output):
+            if not self.inplace and isalias(input, output):
+                raise RuntimeError()
+            if not self.inplace:
+                output[:] = 0
+            output[:] = input[1:]
+            try:
+                self.log.insert(0, ids[output.__array_interface__['data'][0]])
+            except KeyError:
+                self.log.insert(0, '?')
+
+        def reshapein(self, shape):
+            return (shape[0]-1,)
+
+    def show_stack():
+        return ''.join([ids[s.__array_interface__['data'][0]] for s in pool])
+
+    expecteds_outplace = {
+        2: ['BCA',     # II
+            'BCA',     # IO
+            'BCA',     # OI
+            'BCA'],    # OO
+        3: ['BCCA',    # III
+            'BCCA',    # IIO
+            'BDCA',    # IOI
+            'BDCA',    # IOO
+            'BCCA',    # OII
+            'BCCA',    # OIO
+            'BDCA',    # OOI
+            'BDCA'],   # OOO
+        4: ['BCCCA',   # IIII
+            'BCCCA',   # IIIO
+            'BDDCA',   # IIOI
+            'BDDCA',   # IIOO
+            'BDCCA',   # IOII
+            'BDCCA',   # IOIO
+            'BCDCA',   # IOOI
+            'BCDCA',   # IOOO
+            'BCCCA',   # OIII
+            'BCCCA',   # OIIO
+            'BDDCA',   # OIOI
+            'BDDCA',   # OIOO
+            'BDCCA',   # OOII
+            'BDCCA',   # OOIO
+            'BCDCA',   # OOOI
+            'BCDCA']}  # OOOO
+
+    expecteds_inplace = {
+        2: ['ABA',     # II
+            'ABA',     # IO
+            'ABA',     # OI
+            'ABA'],    # OO
+        3: ['ABBA',    # III
+            'ABBA',    # IIO
+            'ACBA',    # IOI
+            'ACBA',    # IOO
+            'ABBA',    # OII
+            'ABBA',    # OIO
+            'ACBA',    # OOI
+            'ACBA'],   # OOO
+        4: ['ABBBA',   # IIII
+            'ABBBA',   # IIIO
+            'ACCBA',   # IIOI
+            'ACCBA',   # IIOO
+            'ACBBA',   # IOII
+            'ACBBA',   # IOIO
+            'ABCBA',   # IOOI
+            'ABCBA',   # IOOO
+            'ABBBA',   # OIII
+            'ABBBA',   # OIIO
+            'ACCBA',   # OIOI
+            'ACCBA',   # OIOO
+            'ACBBA',   # OOII
+            'ACBBA',   # OOIO
+            'ABCBA',   # OOOI
+            'ABCBA']}  # OOOO
+
+    def func_outplace(n, i, expected, strops):
+        pool._buffers = [C, D]
+        log = []
+        ops = [Op(s == '1', log) for s in strops]
+        op = CompositionOperator(ops)
+        op.show_stack = show_stack
+        v = A[:(n+1)*8].view(float)
+        v[:] = range(n+1)
+        w = B[:8].view(float)
+        op(v, w)
+        delattr(op, 'show_stack')
+        log = ''.join(log) + 'A'
+        assert_equal(log, expected)
+        assert_equal(show_stack(), 'CD')
+        w2 = v
+        for op in reversed(ops):
+            w2 = op(w2)
+        assert_equal(w, w2)
+
+    def func_inplace(n, i, expected, strops):
+        pool._buffers = [B, C]
+        log = []
+        ops = [Op(s == '1', log) for s in strops]
+        op = CompositionOperator(ops)
+        op.show_stack = show_stack
+        v = A[:(n+1)*8].view(float)
+        v[:] = range(n+1)
+        w = A[:8].view(float)
+        op(v, w)
+        delattr(op, 'show_stack')
+        log = ''.join(log) + 'A'
+        assert_equal(log, expected)
+        assert_equal(show_stack(), 'BC')
+        w2 = v
+        for op in reversed(ops):
+            w2 = op(w2)
+        assert_equal(w, w2)
+
+    for n in [2, 3, 4]:
+        for i, expected in zip(reversed(range(2**n)), expecteds_outplace[n]):
+            strops = bin(i)[2:]
+            while len(strops) != n:
+                strops = '0' + strops
+            yield func_outplace, n, i, expected, strops
+
+    for n in [2, 3, 4]:
+        for i, expected in zip(reversed(range(2**n)), expecteds_inplace[n]):
+            strops = bin(i)[2:]
+            while len(strops) != n:
+                strops = '0' + strops
+            yield func_inplace, n, i, expected, strops
+
+
+#====================
+# Test associativity
+#====================
+
+def test_associativity():
+
+    class Op1(Operator):
+        pass
+
+    class Op2(Operator):
+        pass
+
+    class Op3(Operator):
+        pass
+
+    class Op4(Operator):
+        pass
+
+    # composite and operator
+    def func1(cls, op):
+        assert_is_instance(op, cls)
+        assert_eq(len(op.operands), 3)
+        if all(isinstance(o, c) for o, c in zip(op.operands, [Op2, Op3, Op1])):
+            raise SkipTest()  # commutative rules do not preserve order...
+        for o, c in zip(op.operands, [Op1, Op2, Op3]):
+            assert_is_instance(o, c)
+    for operation in (AdditionOperator, MultiplicationOperator,
+                      CompositionOperator):
+        yield func1, operation, operation([operation([Op1(), Op2()]), Op3()])
+        yield func1, operation, operation([Op1(), operation([Op2(), Op3()])])
+
+    # composite and composite
+    def func2(cls, op):
+        assert_is_instance(op, cls)
+        assert_eq(len(op.operands), 4)
+        for o, c in zip(op.operands, [Op1, Op2, Op3, Op4]):
+            assert_is_instance(o, c)
+    for operation in (AdditionOperator, MultiplicationOperator,
+                      CompositionOperator):
+        yield func2, operation, operation([operation([Op1(), Op2()]),
+                                           operation([Op3(), Op4()])])
+
+    a = GroupOperator([Op1(), Op2()])
+    b = GroupOperator([Op3(), Op4()])
+
+    def func3(o1, o2):
+        op = o1(o2)
+        assert_is_instance(op, CompositionOperator)
+        assert_eq(len(op.operands), 2)
+        assert_is(op.operands[0], o1)
+        assert_is(op.operands[1], o2)
+    for o1, o2 in [(Op1(), a), (a, Op1()), (a, b)]:
+        yield func3, o1, o2
+
+
+#================
+# Test composite
+#================
+
+def test_composite():
+    operands = [Operator(shapein=2, flags='square'),
+                Operator(shapein=2, flags='square'),
+                Operator(shapein=2, flags='square')]
+
+    def func(cls, ops):
+        if cls is BlockColumnOperator:
+            op = cls(ops, axisout=0)
+        elif cls in (BlockDiagonalOperator, BlockRowOperator):
+            op = cls(ops, axisin=0)
+        elif cls is BlockSliceOperator:
+            op = cls(ops, (slice(i, i + 2) for i in (0, 2, 4)))
+        else:
+            op = cls(ops)
+        assert_is_type(op.operands, list)
+
+    for cls in (
+            AdditionOperator, BlockColumnOperator, BlockDiagonalOperator,
+            BlockRowOperator, BlockSliceOperator, CompositionOperator,
+            GroupOperator, MultiplicationOperator):
+        for ops in operands, tuple(operands), (_ for _ in operands):
+            yield func, cls, ops
+
+
+#==================
+# Test commutative
+#==================
+
+def test_addition():
+    @flags.square
+    class Op(Operator):
+        def __init__(self, v, **keywords):
+            self.v = v
+            Operator.__init__(self, **keywords)
+
+        def direct(self, input, output):
+            np.multiply(input, self.v, output)
+
+    op = np.sum([Op(v) for v in [1]])
+    assert_is(op.__class__, Op)
+
+    op = np.sum([Op(v) for v in [1, 2]])
+    assert_eq(op.__class__, AdditionOperator)
+
+    pool.clear()
+    assert_eq(op(1), 3)
+    assert_eq(len(pool), 1)
+
+    op = np.sum([Op(v) for v in [1, 2, 4]])
+    assert_is(op.__class__, AdditionOperator)
+
+    pool.clear()
+    input = np.array(1, int)
+    output = np.array(0, int)
+    assert_eq(op(input, output), 7)
+    assert_eq(input, 1)
+    assert_eq(output, 7)
+    assert_eq(len(pool), 1)
+
+    pool.clear()
+    output = input
+    assert_eq(op(input, output), 7)
+    assert_eq(input, 7)
+    assert_eq(output, 7)
+    assert_eq(len(pool), 2)
+
+
+def test_addition_flags():
+    def func(f):
+        o = AdditionOperator([Operator(flags=f), Operator(flags=f)])
+        assert getattr(o.flags, f)
+    for f in 'linear,real,square,symmetric,hermitian,separable'.split(','):
+        yield func, f
+
+
+def test_multiplication():
+    @flags.square
+    class Op(Operator):
+        def __init__(self, v, **keywords):
+            self.v = v
+            Operator.__init__(self, **keywords)
+
+        def direct(self, input, output):
+            np.multiply(input, self.v, output)
+
+    pool.clear()
+    op = MultiplicationOperator([Op(v) for v in [1]])
+    assert_is(op.__class__, Op)
+
+    op = MultiplicationOperator([Op(v) for v in [1,2]])
+    assert_eq(op.__class__, MultiplicationOperator)
+    assert_eq(op(1), 2)
+    assert_eq(len(pool), 1)
+
+    op = MultiplicationOperator([Op(v) for v in [1,2,4]])
+    assert_is(op.__class__, MultiplicationOperator)
+
+    input = np.array(1, int)
+    output = np.array(0, int)
+    assert_eq(op(input, output), 8)
+    assert_eq(input, 1)
+    assert_eq(output, 8)
+    assert_eq(len(pool), 1)
+
+    output = input
+    assert_eq(op(input, output), 8)
+    assert_eq(input, 8)
+    assert_eq(output, 8)
+    assert_eq(len(pool), 2)
+
+
+def test_multiplication_flags():
+
+    def func(f):
+        o = MultiplicationOperator([Operator(flags=f), Operator(flags=f)])
+        assert getattr(o.flags, f)
+    for f in 'real,square,separable'.split(','):
+        yield func, f
+
+
+def test_commutative_shapes():
+
+    def func(cls, OP1, OP2):
+        n1 = OP1.__name__
+        n2 = OP2.__name__
+        op = cls([OP1(), OP2()])
+
+        shape_output = op.flags.shape_output
+        if 'Expl' in (n1[:4], n2[:4]):
+            assert shape_output == 'explicit'
+        elif n1[4:] == 'Expl' and n2[:4] == 'Impl' or \
+             n2[4:] == 'Expl' and n1[:4] == 'Impl':
+            assert shape_output == 'explicit'
+        elif 'Impl' in (n1[:4], n2[:4]):
+            assert shape_output == 'implicit'
+        else:
+            assert shape_output == 'unconstrained'
+
+        shape_input = op.flags.shape_input
+        if 'Expl' in (n1[4:], n2[4:]):
+            assert shape_input == 'explicit'
+        elif n1[:4] == 'Expl' and n2[4:] == 'Impl' or \
+             n2[:4] == 'Expl' and n1[4:] == 'Impl':
+            assert shape_input == 'explicit'
+        elif 'Impl' in (n1[4:], n2[4:]):
+            assert shape_input == 'implicit'
+        else:
+            assert shape_input == 'unconstrained'
+
+    for cls in (AdditionOperator, MultiplicationOperator):
+        for OP1, OP2 in itertools.product(OPS, repeat=2):
+            yield func, cls, OP1, OP2
+
+
+#==================
+# Test Block slice
+#==================
+
+def test_block_slice():
+    size = 4
+
+    def func(o, input, expected):
+        actual = o(input)
+        assert_eq(actual, expected)
+        o(input, input)
+        assert_eq(input, expected)
+    for ndim in range(1, 5):
+        for nops in range(1, 5):
+            for Op in [HomothetyOperator, HomothetyOutplaceOperator]:
+                slices_ = [
+                    [split(size, nops, i) for i in range(nops)],
+                    [split(size, size, i) for i in range(nops)],
+                    [ndim * [slice(i, None, nops)] for i in range(nops)]]
+                for slices in slices_:
+                    input = np.zeros(ndim*(size,))
+                    expected = np.zeros_like(input)
+                    ops = [Op(i+1) for i in range(nops)]
+                    for i, s in enumerate(slices):
+                        input[s] = 10 * (i+1)
+                        expected[s] = input[s] * (i+1)
+                    o = BlockSliceOperator(ops, slices)
+                    assert o.flags.inplace is Op.flags.inplace
+                    yield func, o, input, expected
+
+
+def test_block_slice_rule_homothety():
+    b = BlockSliceOperator(2*[HomothetyOperator(3)],
+                           [slice(0, 10), slice(12, 14)])
+    hb = HomothetyOperator(2) * b
+    assert_is_instance(hb, BlockSliceOperator)
+    for op in hb.operands:
+        assert_is_instance(op, HomothetyOperator)
+        assert_eq(op.data, 6)
+
+
+#==================
+# Test composition
+#==================
+
+def test_composition1():
+
+    def func(op, shapein, shapeout):
+        assert_eq(op.shapein, shapein)
+        assert_eq(op.shapeout, shapeout)
+        if shapein is not None and shapein == shapeout:
+            assert_flags(op, 'square')
+    for shapein in SHAPES:
+        for shapemid in SHAPES:
+            if shapemid is None and shapein is not None:
+                continue
+            op1 = Operator(shapein=shapein, shapeout=shapemid)
+            for shapeout in SHAPES:
+                if shapeout is None and shapemid is not None:
+                    continue
+                op2 = Operator(shapein=shapemid, shapeout=shapeout)
+                op = op2(op1)
+                yield func, op, shapein, shapeout
+
+
+def test_composition2():
+    class Op(Operator):
+        def reshapein(self, shapein):
+            return 2*shapein
+
+    def func(op, shape):
+        assert op.shapein is None
+        assert op.shapeout == (2*shape if shape is not None else None)
+        assert_flags_false(op, 'square')
+    for shape in SHAPES:
+        op = Op()(Operator(shapeout=shape))
+        yield func, op, shape
+
+    op = Op()(Op())
+    assert op.shapein is None
+    assert op.shapeout is None
+    assert_flags_false(op, 'square')
+
+
+def test_composition3():
+    @flags.linear
+    @flags.square
+    @flags.inplace
+    class Op(Operator):
+        def __init__(self, v, **keywords):
+            self.v = v
+            Operator.__init__(self, **keywords)
+
+        def direct(self, input, output):
+            np.multiply(input, self.v, output)
+
+    pool.clear()
+    op = np.product([Op(v) for v in [1]])
+    assert_is(op.__class__, Op)
+    op(1)
+    assert_eq(len(pool), 0)
+
+    pool.clear()
+    op = np.product([Op(v) for v in [1, 2]])
+    assert_is(op.__class__, CompositionOperator)
+    assert_eq(op(1), 2)
+    assert_eq(len(pool), 0)
+
+    pool.clear()
+    assert_eq(op([1]), 2)
+    assert_eq(len(pool), 0)
+
+    op = np.product([Op(v) for v in [1, 2, 4]])
+    assert_is(op.__class__, CompositionOperator)
+
+    pool.clear()
+    input = np.array(1, int)
+    output = np.array(0, int)
+    assert_eq(op(input, output), 8)
+    assert_eq(input, 1)
+    assert_eq(output, 8)
+    assert_eq(len(pool), 0)
+
+    pool.clear()
+    output = input
+    assert_eq(op(input, output), 8)
+    assert_eq(input, 8)
+    assert_eq(len(pool), 0)
+
+    pool.clear()
+    input = np.array([1], int)
+    output = np.array([0], int)
+    assert_eq(op(input, output), 8)
+    assert_eq(input, 1)
+    assert_eq(output, 8)
+    assert_eq(len(pool), 0)
+
+    pool.clear()
+    output = input
+    assert_eq(op(input, output), 8)
+    assert_eq(input, 8)
+    assert_eq(output, 8)
+    assert_eq(len(pool), 0)
+
+
+def test_composition_flags():
+    def func1(f):
+        o = CompositionOperator([Operator(flags=f), Operator(flags=f)])
+        assert getattr(o.flags, f)
+    for f in 'linear,real,square,separable'.split(','):
+        yield func1, f
+
+    def func2(f):
+        o = CompositionOperator([Operator(), Operator(flags=f)])
+        assert getattr(o.flags, f)
+    for f in 'aligned_input,contiguous_input'.split(','):
+        yield func2, f
+
+    def func3(f):
+        o = CompositionOperator([Operator(flags=f), Operator()])
+        assert getattr(o.flags, f)
+    for f in 'aligned_output,contiguous_output'.split(','):
+        yield func3, f
+
+    def func4(f):
+        o = CompositionOperator([Operator(), Operator()])
+        assert not getattr(o.flags, f)
+        o = CompositionOperator([OperatorIR(), Operator()])
+        assert getattr(o.flags, f)
+    yield func4, 'update_output'
+
+
+def test_composition_shapes():
+    def func(OP1, OP2):
+        n1 = OP1.__name__
+        n2 = OP2.__name__
+        if n1[4:] == 'Expl' and n2[:4] == 'Expl':
+            op = OP1() * OP2(shapeout=3)
+        else:
+            op = OP1() * OP2()
+
+        shape_output = op.flags.shape_output
+        if n1[:4] == 'Unco':
+            assert shape_output == 'unconstrained'
+        elif n1[:4] == 'Expl':
+            assert shape_output == 'explicit'
+        elif n2[:4] == 'Expl':
+            assert shape_output == 'explicit'
+        elif n2[:4] == 'Impl':
+            assert shape_output == 'implicit'
+        else:
+            assert shape_output == 'unconstrained'
+
+        shape_input = op.flags.shape_input
+        if n2[4:] == 'Unco':
+            assert shape_input == 'unconstrained'
+        elif n2[4:] == 'Expl':
+            assert shape_input == 'explicit'
+        elif n1[4:] == 'Expl':
+            assert shape_input == 'explicit'
+        elif n1[4:] == 'Impl':
+            assert shape_input == 'implicit'
+        else:
+            assert shape_input == 'unconstrained'
+
+    for OP1, OP2 in itertools.product(OPS, repeat=2):
+        yield func, OP1, OP2
+
+
+def test_composition_get_requirements():
+    @flags.inplace
+    class I__(Operator):
+        pass
+
+    @flags.aligned
+    @flags.contiguous
+    class IAC(I__):
+        pass
+
+    class O____(Operator):
+        pass
+
+    @flags.aligned_input
+    @flags.contiguous_input
+    class O__AC(O____):
+        pass
+
+    @flags.aligned_output
+    @flags.contiguous_output
+    class OAC__(O____):
+        pass
+
+    @flags.aligned
+    @flags.contiguous
+    class OACAC(O____):
+        pass
+
+    Is = [I__(), IAC()]
+    Os = [O____(), O__AC(), OAC__(), OACAC()]
+
+    tests ={'I'  : [[0]],
+            'O'  : [[0], []],
+            'II' : [[0, 1]],
+            'IO' : [[0, 1], []],
+            'OI' : [[0], [1]],
+            'OO' : [[0], [1], []],
+            'III': [[0, 1, 2]],
+            'IIO': [[0, 1, 2], []],
+            'IOI': [[0, 1], [2]],
+            'IOO': [[0, 1], [2], []],
+            'OII': [[0], [1, 2]],
+            'OIO': [[0], [1, 2], []],
+            'OOI': [[0], [1], [2]],
+            'OOO': [[0], [1], [2], []]}
+
+    def get_requirements(ops, t, g):
+        rn = [len(_) for _ in g]
+        for i in range(len(rn)-1):
+            rn[i] -= 1
+
+        ra = [max(ops[i].flags.aligned_output for i in g[0])] + \
+             [max([ops[_[0]-1].flags.aligned_input] +
+                  [ops[i].flags.aligned_output for i in _]) for _ in g[1:-1]]+\
+             ([max(ops[i].flags.aligned_input for i in range(t.rfind('O'),
+              len(ops)))] if len(g) > 1 else [])
+        rc = [max(ops[i].flags.contiguous_output for i in g[0])] + \
+             [max([ops[_[0]-1].flags.contiguous_input] +
+                 [ops[i].flags.contiguous_output for i in _])for _ in g[1:-1]]+\
+             ([max(ops[i].flags.contiguous_input for i in  range(t.rfind('O'),
+              len(ops)))] if len(g) > 1 else [])
+        return rn, ra, rc
+
+    c = CompositionOperator(Is)
+
+    def func(t, rn1, rn2, ra1, ra2, rc1, rc2):
+        assert rn1 == rn2
+        assert ra1 == ra2
+    for t, g in tests.items():
+        it = [Is if _ == 'I' else Os for _ in t]
+        for ops in itertools.product(*it):
+            c.operands = ops
+            rn1, ra1, rc1 = c._get_requirements()
+            rn2, ra2, rc2 = get_requirements(ops, t, g)
+            yield func, t, rn1, rn2, ra1, ra2, rc1, rc2
+
+
+#====================
+# Test copy operator
+#====================
+
+def test_copy():
+    C = CopyOperator()
+    x = np.array([10, 20])
+    assert_equal(x, C(x))
+    x_ = x.copy()
+    C(x, x)
+    assert_equal(x, x_)
+
+
+#========================
+# Test ReductionOperator
+#========================
+
+def test_reduction_operator1():
+    def func(f, s, a):
+        op = ReductionOperator(f, axis=a)
+        v = np.arange(product(s)).reshape(s)
+        if isinstance(f, np.ufunc):
+            if np.__version__ < '1.7' and a is None:
+                expected = f.reduce(v.flat, 0)
+            else:
+                expected = f.reduce(v, a)
+        else:
+            expected = f(v, axis=a)
+        assert_eq(op(v), expected)
+        out = np.empty_like(expected)
+        op(v, out)
+        assert_eq(out, expected)
+    for f in (np.add, np.multiply, np.min, np.max, np.sum, np.prod):
+        for s in SHAPES[2:]:
+            for a in [None] + list(range(len(s))):
+                yield func, f, s, a
+
+
+def test_reduction_operator2():
+    for f in (np.cos, np.modf):
+        assert_raises(TypeError, ReductionOperator, f)
+    f = np.add
+
+    def func(n, op):
+        v = np.empty(n * [2])
+        assert_raises(TypeError if n == 0 else ValueError, op, v)
+    for a in (1, 2, 3):
+        op = ReductionOperator(f, axis=a)
+        for n in range(0, a+1):
+            yield func, n, op
+
+
+#=================
+# Test asoperator
+#=================
+
+def test_asoperator_scalar():
+    scalars = [np.array(1, d) for d in DTYPES]
+
+    def func1(s):
+        o = asoperator(s)
+        assert_is_instance(o, HomothetyOperator)
+
+    def func2(s):
+        o = asoperator(s, constant=True)
+        assert_is_instance(o, ConstantOperator)
+    for s in scalars:
+        yield func1, s
+        yield func2, s
+
+
+def test_asoperator_ndarray():
+    values = ([1], [2], [1, 2], [[1]], [[1, 2]], [[1, 2], [2, 3]],
+              [[[1, 2], [2, 3]]],
+              [[[1, 2], [2, 3]], [[4, 5], [6, 7]], [[8, 9], [10, 11]]])
+    cls = (IdentityOperator, HomothetyOperator, DiagonalOperator,
+           DenseOperator, DenseOperator)
+
+    def totuple(seq):
+        if isinstance(seq, list):
+            return tuple(totuple(_) for _ in seq)
+        return seq
+
+    def func1(v, c, s):
+        o = asoperator(v)
+        assert_is_instance(o, c)
+        if len(s) > 1:
+            s = s[:-2] + (s[-1],)
+        assert_equal(o.shapein, s)
+
+    def func2(v, s):
+        o = asoperator(v, constant=True)
+        if isinstance(v, np.matrix):
+            assert_is_instance(o, DenseOperator)
+            assert_equal(np.array(v).shape, o.shape)
+        else:
+            assert_is_instance(o, ConstantOperator)
+            assert_equal(s, o.shapeout)
+    for v, c in zip(values, cls):
+        vt = totuple(v)
+        va = np.array(v)
+        s = va.shape
+        for data in v, vt, va:
+            yield func1, data, c, s
+            yield func2, data, s
+
+
+def test_asoperator_func():
+    f = lambda x: x**2
+    o = asoperator(f)
+    assert_is_instance(o, Operator)
+    assert_flags(o, 'inplace')
+
+    def func(v):
+        assert_eq(o(v), f(np.array(v)))
+    for v in (2, [2], [2, 3]):
+        yield func, v
diff --git a/test/test_criterions.py b/test/test_criterions.py
new file mode 100755
index 0000000..07fdf95
--- /dev/null
+++ b/test/test_criterions.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+import nose
+import numpy as np
+from numpy import testing
+
+import pyoperators
+from pyoperators.iterative import criterions
+
+sizes = (1, 4, 16, 100)
+values = (-10, -1, 0, 2)
+alist = (-2., -1, 0, 1., 2.)
+shapeins = ((1, ), (2, ), (2, 3))
+
+
+# norms
+def check_norm2(size, value):
+    N = criterions.Norm2()
+    assert N(value * np.ones(size)) == size * value ** 2
+
+
+def test_norm2():
+    for size in sizes:
+        for value in values:
+            yield check_norm2, size, value
+
+
+def check_dnorm2(size, value):
+    N = criterions.Norm2()
+    testing.assert_array_equal(N.diff(value * np.ones(size)), 2 * value * np.ones(size))
+
+
+def test_dnorm2():
+    for size in sizes:
+        for value in values:
+            yield check_dnorm2, size, value
+
+
+def check_norm2_mul(a, value):
+    N = criterions.Norm2()
+    N2 = a * N
+    vec = value * np.ones(1)
+    assert a * N(vec) == N2(vec)
+
+
+def test_norm2_mul():
+    for a in alist:
+        for value in values:
+            yield check_norm2_mul, a, value
+
+
+def check_dnorm2_mul(a, value):
+    N = criterions.Norm2()
+    N2 = a * N
+    vec = value * np.ones(1)
+    testing.assert_array_equal(a * N.diff(vec), N2.diff(vec))
+
+
+def test_dnorm2_mul():
+    for a in alist:
+        for value in values:
+            yield check_dnorm2_mul, a, value
+
+
+# criterion elements
+def check_elements(shapein):
+    N = criterions.Norm2()
+    I = pyoperators.IdentityOperator(shapein=shapein)
+    C0 = criterions.CriterionElement(N, I)
+    assert C0(np.ones(shapein)) == np.prod(shapein)
+
+
+def test_elements():
+    for shapein in shapeins:
+        yield check_elements, shapein
diff --git a/test/test_delete.py b/test/test_delete.py
new file mode 100644
index 0000000..429feb1
--- /dev/null
+++ b/test/test_delete.py
@@ -0,0 +1,40 @@
+from __future__ import division
+import numpy as np
+from numpy.testing import assert_raises
+from pyoperators import DiagonalOperator, HomothetyOperator, Operator, memory
+from pyoperators.utils import setting
+from pyoperators.core import DeletedOperator
+
+
+def test_init():
+    assert_raises, NotImplementedError, DeletedOperator
+
+
+def test_str():
+    op = Operator()
+    op.delete()
+    assert str(op) == 'deleted'
+    assert repr(op) == 'DeletedOperator()'
+
+
+def test_collection_reset():
+    counter = memory._gc_nbytes_counter
+    op = HomothetyOperator(2)
+    op.delete()
+    assert memory._gc_nbytes_counter - counter == 8
+    memory.garbage_collect()
+    assert memory._gc_nbytes_counter == 0
+
+
+def test_collection():
+    with setting(memory, 'GC_NBYTES_THRESHOLD', 8000):
+        memory.garbage_collect()
+        counter = 0
+        for i in range(10):
+            data = np.arange(100)
+            counter += data.nbytes
+            op = DiagonalOperator(data)
+            op.delete()
+            if i < 9:
+                assert memory._gc_nbytes_counter == counter
+        assert memory._gc_nbytes_counter == 0
diff --git a/test/test_dense.py b/test/test_dense.py
new file mode 100644
index 0000000..060bfa8
--- /dev/null
+++ b/test/test_dense.py
@@ -0,0 +1,151 @@
+from __future__ import division
+
+import numpy as np
+from numpy.testing import assert_equal, assert_raises, assert_warns
+from pyoperators import (
+    BlockDiagonalOperator, HomothetyOperator, PyOperatorsWarning)
+from pyoperators.linear import DenseOperator, DenseBlockDiagonalOperator
+from pyoperators.rules import rule_manager
+from pyoperators.utils import broadcast_shapes, product, reshape_broadcast
+from pyoperators.utils.testing import (assert_is_instance, assert_is_type,
+                                       assert_same)
+
+
+def test_dense1():
+    def func(m, d, v):
+        expected = np.dot(m, v)
+        assert_same(d(v), expected)
+        if d.flags.square:
+            w = v.copy()
+            d(w, w)
+            assert_same(w, expected)
+
+    m = np.array([[1, 1j], [2, 2]])
+    d = DenseOperator(m)
+    for v in np.array([1+0j, 0]), np.array([0+0j, 1]):
+        yield func, m, d, v
+        yield func, m.T, d.T, v
+        yield func, m.T.conj(), d.H, v
+
+    m = np.array([[1, 2], [3, 4j], [5, 6]])
+    d = DenseOperator(m)
+    for v in np.array([1+0j, 0]), np.array([0+0j, 1]):
+        yield func, m, d, v
+    for v in (np.array([1+0j, 0, 0]), np.array([0j, 1, 0]),
+              np.array([0j, 0, 1])):
+        yield func, m.T, d.T, v
+        yield func, m.T.conj(), d.H, v
+
+
+def test_dense2():
+    shapeins = ((2,), (3, 2), (3, 1, 2))
+    shapeouts = ((3,), (2, 3), (2, 1, 3))
+    extrainputs = ((), (5,), (3, 4))
+
+    def func(shapein, shapeout, extrainput):
+        datashape = shapeout + shapein
+        inputshape = extrainput + shapein
+        d = np.arange(product(datashape)).reshape(datashape)
+        b = DenseOperator(
+            d, naxesin=len(shapein), naxesout=len(shapeout),
+            shapein=inputshape)
+        bdense = b.todense()
+        n = product(extrainput)
+        d_ = d.reshape((product(shapeout), product(shapein)))
+        expected = BlockDiagonalOperator(n * [d_], axisin=0).todense()
+        assert_equal(bdense, expected)
+    for shapein in shapeins:
+        for shapeout in shapeouts:
+            for extrainput in extrainputs:
+                yield func, shapein, shapeout, extrainput
+
+
+def test_dense_error():
+    shapes = ((2,), (3, 2))
+    data = (np.arange(product(s)).reshape(s) for s in shapes)
+
+    def func(d):
+        b = DenseOperator(d)
+        assert_raises(ValueError, b, np.ones(3))
+    for d in data:
+        yield func, d
+
+
+def test_dense_rule_homothety():
+    m = np.array([[1, 2], [3, 4], [5, 6]])
+    d = HomothetyOperator(2) * DenseOperator(m)
+    assert_is_type(d, DenseOperator)
+    assert_same(d.data, m * 2)
+    d = HomothetyOperator(2j) * DenseOperator(m)
+    assert_is_type(d, DenseOperator)
+    assert_same(d.data, m * 2j)
+    assert_equal(d.dtype, complex)
+
+
+def test_block_diagonal():
+    shapeins = (2,), (3, 2)
+    shapeouts = (3,), (2, 3)
+    extradatas = (4,), (2, 1), (2, 4)
+    extrainputs = (), (4,), (2, 4), (2, 1), (3, 1, 4)
+
+    def func(shapein, shapeout, extradata, extrainput):
+        datashape = extradata + shapeout + shapein
+        d = np.arange(product(datashape)).reshape(datashape)
+        b = DenseBlockDiagonalOperator(
+            d, naxesin=len(shapein), naxesout=len(shapeout))
+        new_shape = broadcast_shapes(extradata, extrainput)
+        bdense = b.todense(shapein=new_shape + shapein)
+        d_ = reshape_broadcast(d, new_shape + shapeout + shapein)
+        d_ = d_.reshape(-1, product(shapeout), product(shapein))
+        expected = BlockDiagonalOperator([_ for _ in d_], axisin=0).todense(
+            shapein=product(new_shape + shapein))
+        assert_same(bdense, expected)
+        bTdense = b.T.todense(shapein=new_shape + shapeout)
+        assert_same(bTdense, expected.T)
+    for shapein in shapeins:
+        for shapeout in shapeouts:
+            for extradata in extradatas:
+                for extrainput in extrainputs:
+                    yield func, shapein, shapeout, extradata, extrainput
+
+
+def test_morphing():
+    def func1(cls):
+        d = cls(3.)
+        assert_is_type(d, HomothetyOperator)
+    for cls in DenseBlockDiagonalOperator, DenseOperator:
+        yield func1, cls
+
+    def func2(shape):
+        d = DenseBlockDiagonalOperator(np.ones(shape))
+        assert_is_type(d, DenseOperator)
+    for shape in (3,), (1, 3), (2, 3):
+        yield func2, shape
+
+
+def test_warning():
+    a = np.arange(24, dtype=float).reshape(2, 3, 4)
+    a = a.swapaxes(0, 1)
+    assert_warns(PyOperatorsWarning, DenseOperator, a, naxesin=2)
+
+
+def test_rule_mul():
+    shapes1 = (), (3,), (3,), (3,), (1,), (1, 3), (1, 3), (4, 1), (4, 1)
+    shapes2 = (3,), (), (3,), (1,), (3,), (4, 3), (4, 1), (4, 3), (1, 3)
+    mat_shapes1 = (1, 3), (2, 1), (2, 3)
+    mat_shapes2 = (3, 1), (1, 2), (3, 2)
+
+    def func(s1, s2, sm1, sm2):
+        shapein = broadcast_shapes(s1 + sm2[1:], s2 + sm2[1:])
+        data1 = np.arange(product(s1 + sm1)).reshape(s1 + sm1)
+        data2 = np.arange(product(s2 + sm2)).reshape(s2 + sm2)
+        op1 = DenseBlockDiagonalOperator(data1)
+        op2 = DenseBlockDiagonalOperator(data2)
+        comp1 = op1 * op2
+        assert_is_instance(comp1, DenseBlockDiagonalOperator)
+        with rule_manager(none=True):
+            comp2 = op1 * op2
+        assert_equal(comp1.todense(shapein), comp2.todense(shapein))
+    for s1, s2 in zip(shapes1, shapes2):
+        for sm1, sm2 in zip(mat_shapes1, mat_shapes2):
+            yield func, s1, s2, sm1, sm2
diff --git a/test/test_fft.py b/test/test_fft.py
new file mode 100644
index 0000000..5e545ed
--- /dev/null
+++ b/test/test_fft.py
@@ -0,0 +1,152 @@
+from __future__ import division
+
+import itertools
+import numpy as np
+import scipy.signal
+
+from pyoperators import (CompositionOperator, ConvolutionOperator,
+                         HomothetyOperator)
+from pyoperators.fft import _FFTWRealConvolutionOperator
+from pyoperators.utils.testing import (
+    assert_eq, assert_is_instance, assert_same)
+
+
+def test_convolution_real():
+
+    def func(image, kernel):
+        ref = scipy.signal.convolve(image, kernel, mode='same')
+        convol = ConvolutionOperator(kernel, image.shape)
+        con = convol(image)
+        assert np.allclose(ref, con, atol=1.e-15)
+        assert np.allclose(convol.todense().T, convol.T.todense(), atol=1.e-15)
+
+    imashape = (7, 7)
+    kershape = (3, 3)
+    kerorig = (np.array(kershape) - 1) // 2
+    kernel = np.zeros(kershape)
+    kernel[kerorig[0]-1:kerorig[0]+2, kerorig[1]-1:kerorig[1]+2] = 0.5 ** 4
+    kernel[kerorig[0], kerorig[1]] = 0.5
+    kernel[kerorig[0]-1, kerorig[1]-1] *= 2
+    kernel[kerorig[0]+1, kerorig[1]+1] = 0
+    image = np.zeros(imashape)
+    image[3, 3] = 1.
+    yield func, image, kernel
+
+    image = np.array([0, 1, 0, 0, 0, 0, 0])
+    kernel = [1, 1, 0.5]
+    yield func, image, kernel
+
+    for kx in range(1, 4, 2):
+        kshape = (kx,)
+        kernel = np.ones(kshape)
+        kernel.flat[-1] = 0.5
+        for ix in range(kx*2, kx*2+3):
+            ishape = (ix,)
+            image = np.zeros(ishape)
+            image.flat[image.size//2] = 1.
+            yield func, image, kernel
+
+    for kx in range(1, 4, 2):
+        for ky in range(1, 4, 2):
+            kshape = (kx, ky)
+            kernel = np.ones(kshape)
+            kernel.flat[-1] = 0.5
+            for ix in range(kx*2+1, kx*2+3):
+                for iy in range(ky*2+1, ky*2+3):
+                    ishape = (ix, iy)
+                    image = np.zeros(ishape)
+                    image[tuple([s//2 for s in image.shape])] = 1.
+                    yield func, image, kernel
+
+    for kx in range(1, 4, 2):
+        for ky in range(1, 4, 2):
+            for kz in range(1, 4, 2):
+                kshape = (kx, ky, kz)
+                kernel = np.ones(kshape)
+                kernel.flat[-1] = 0.5
+                for ix in range(kx*2+1, kx*2+3):
+                    for iy in range(ky*2+1, ky*2+3):
+                        for iz in range(kz*2+1, kz*2+3):
+                            ishape = (ix, iy, iz)
+                            image = np.zeros(ishape)
+                            image[tuple([s//2 for s in image.shape])] = 1.
+                            yield func, image, kernel
+
+
+def test_convolution_complex():
+
+    def func(image, kernel):
+        ref = scipy.signal.fftconvolve(image, kernel, mode='same')
+        convol = ConvolutionOperator(kernel, image.shape)
+        con = convol(image)
+        assert np.allclose(ref, con, atol=1.e-15)
+        assert np.allclose(convol.todense().T.conjugate(),
+                           convol.H.todense(), atol=1.e-15)
+
+    for ndims in range(1, 5):
+        kernel = np.ones(ndims*(3,), complex)
+        kernel.flat[-1] = 0.5
+        image = np.zeros(ndims*(6,))
+        image[tuple([s//2 for s in image.shape])] = 1.
+        yield func, image, kernel
+
+
+def test_convolution_rules_cmp():
+    shape = (5, 5)
+    kernel1 = np.ones((3, 3), complex)
+    kernel1.flat[-1] = 0
+    kernel2 = np.ones((3, 3), complex)
+    kernel2[0, 0] = 0
+    image = np.zeros(shape, complex)
+    image[2, 2] = 1
+    ref = scipy.signal.fftconvolve(
+        scipy.signal.fftconvolve(image, kernel1, mode='same'),
+        kernel2, mode='same')
+    ref[abs(ref) < 1e-15] = 0
+    ref = ref.real
+
+    def func(k1, k2):
+        c1 = ConvolutionOperator(k1, shape)
+        c2 = ConvolutionOperator(k2, shape)
+        c = c1 * c2
+        if k1.dtype.kind == 'f' and k2.dtype.kind == 'f':
+            assert_is_instance(c, _FFTWRealConvolutionOperator)
+        else:
+            assert_is_instance(c, CompositionOperator)
+            assert_eq(len(c.operands), 3)
+        assert np.allclose(c(image.real), ref)
+    for k1, k2 in itertools.product((kernel1.real, kernel1),
+                                    (kernel2.real, kernel2)):
+        for k in ([k1, k2], [k2, k1]):
+            yield func, k[0], k[1]
+
+
+def test_convolution_rules_add():
+    shape = (5, 5)
+    kernel1 = np.ones((3, 3))
+    kernel2 = np.ones((2, 2))
+    c1 = ConvolutionOperator(kernel1, shape)
+    c2 = ConvolutionOperator(kernel2, shape)
+
+    def func(c1, c2):
+        c = c1 + c2
+        assert_is_instance(c, _FFTWRealConvolutionOperator)
+        assert_same(c1.todense() + c2.todense(), c.todense(), atol=5)
+    for (a, b) in itertools.product((c1, c1.T), (c2, c2.T)):
+        yield func, a, b
+
+
+def test_convolution_rules_homothety():
+    h = HomothetyOperator(2)
+    c = ConvolutionOperator(np.ones((3, 3)), (5, 5))
+    ref = c.todense() * h.data
+    lambda_id = lambda x, y: (x, y)
+    lambda_sw = lambda x, y: (y, x)
+
+    def func(ops, r):
+        op = CompositionOperator(ops)
+        assert_same(op.todense(), r, atol=5)
+    for op, r in zip((c, c.T), (ref, ref.T)):
+        for l in (lambda_id, lambda_sw):
+            ops = l(op, h)
+            yield func, ops, r
diff --git a/test/test_flags.py b/test/test_flags.py
new file mode 100644
index 0000000..0fa6335
--- /dev/null
+++ b/test/test_flags.py
@@ -0,0 +1,23 @@
+import pyoperators
+
+
+def test_flags():
+    def func(flag):
+        if flag not in pyoperators.flags.Flags._fields:
+            return
+        if flag in ('shape_input', 'shape_output'):
+            return
+        if flag == 'outplace':
+            flags = {'outplace': False}
+        else:
+            flags = {flag: True}
+        o1 = pyoperators.Operator(flags=flags)
+
+        class O2(pyoperators.Operator):
+            pass
+        O2 = eval('pyoperators.flags.' + flag + '(O2)')
+        o2 = O2()
+        assert o1.flags == o2.flags
+    for flag in dir(pyoperators.flags):
+        yield func, flag
+
diff --git a/test/test_identity.py b/test/test_identity.py
new file mode 100644
index 0000000..4e40b0b
--- /dev/null
+++ b/test/test_identity.py
@@ -0,0 +1,44 @@
+from pyoperators import IdentityOperator
+from pyoperators.utils.testing import (
+    assert_eq, assert_is, assert_is_type)
+from .common import OPS, ndarray2, attr2
+ops = [_() for _ in OPS] + [_(flags={'linear': False,
+                                     'contiguous_input': True}) for _ in OPS]
+
+
+def test_rule_right():
+    ids = (IdentityOperator(classout=ndarray2, attrout=attr2),
+           IdentityOperator(shapein=4, classout=ndarray2, attrout=attr2))
+
+    def func(id_, op_):
+        op = id_(op_)
+        assert_is_type(op, type(op_))
+        attr = {}
+        assert_is(op.classout, id_.classout)
+        attr.update(op_.attrout)
+        attr.update(id_.attrout)
+        assert_eq(op.attrout, attr)
+        assert_eq(op.flags.linear, op_.flags.linear)
+        assert_eq(op.flags.contiguous_input, op_.flags.contiguous_input)
+    for id_ in ids:
+        for op_ in ops:
+            yield func, id_, op_
+
+
+def test_rule_left():
+    ids = (IdentityOperator(classout=ndarray2, attrout=attr2),
+           IdentityOperator(shapein=3, classout=ndarray2, attrout=attr2))
+
+    def func(op_, id_):
+        op = op_(id_)
+        assert_is_type(op, type(op_))
+        attr = {}
+        assert_is(op.classout, op_.classout)
+        attr.update(id_.attrout)
+        attr.update(op_.attrout)
+        assert_eq(op.attrout, attr)
+        assert_eq(op.flags.linear, op_.flags.linear)
+        assert_eq(op.flags.contiguous_input, op_.flags.contiguous_input)
+    for op_ in ops:
+        for id_ in ids:
+            yield func, op_, id_
diff --git a/test/test_iterative.py b/test/test_iterative.py
new file mode 100644
index 0000000..2e0ced8
--- /dev/null
+++ b/test/test_iterative.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+
+"""
+Testing of the iterative module
+
+"""
+
+import numpy as np
+import pyoperators
+from numpy.testing import assert_allclose
+from pyoperators import IdentityOperator, iterative
+from pyoperators.utils.testing import assert_same, skiptest
+
+# collection of definite positive symmetric linear operators to test
+operator_list = [pyoperators.DiagonalOperator(np.random.rand(16)),
+                 pyoperators.TridiagonalOperator(np.arange(1, 17),
+                                                 np.arange(1, 16))]
+
+# collection of vectors
+vector_list = [np.ones(16), np.arange(1, 17)]
+
+# collection of old solvers
+methods = [iterative.algorithms.acg]
+
+# collection of solvers
+classes = [iterative.cg.PCGAlgorithm]
+solvers = [iterative.cg.pcg]
+
+
+ at skiptest
+def test_methods_inv():
+    def func(m, A, x):
+        y = A * x
+        xe = m(A, y, maxiter=100, tol=1e-7)
+        assert_same(x, xe)
+    for A in operator_list:
+        for x in vector_list:
+            for m in methods:
+                yield func, m, A, x
+
+
+def test_classes_inv():
+    def func(c, A, x):
+        y = A(x)
+        algo = c(A, y, maxiter=100, tol=1e-7)
+        xe = algo.run()
+        assert_allclose(x, xe, rtol=1e-6)
+    for A in operator_list:
+        for x in vector_list:
+            for c in classes:
+                yield func, c, A, x
+
+
+def test_solution_as_x0():
+    def func(s, v):
+        solution = s(IdentityOperator(shapein=v.shape), v, x0=v)
+        assert_same(solution['nit'], 0)
+        assert_same(solution['x'], v)
+    for s in solvers:
+        for v in vector_list:
+            yield func, s, v
diff --git a/test/test_linear.py b/test/test_linear.py
new file mode 100644
index 0000000..2004a56
--- /dev/null
+++ b/test/test_linear.py
@@ -0,0 +1,295 @@
+from __future__ import division
+
+import numpy as np
+import pyoperators
+
+from numpy.testing import assert_allclose
+from pyoperators import (
+    BlockColumnOperator, BlockDiagonalOperator, DegreesOperator,
+    DenseOperator, DiagonalOperator, DiagonalNumexprOperator,
+    DifferenceOperator, IntegrationTrapezeOperator, Operator, RadiansOperator,
+    Rotation2dOperator, Rotation3dOperator, TridiagonalOperator,
+    SymmetricBandToeplitzOperator, SumOperator)
+from pyoperators.utils import product
+from pyoperators.utils.testing import (
+    assert_eq, assert_is_instance, assert_is_none, assert_is_type,
+    assert_same)
+from .common import IdentityOutplaceOperator, assert_inplace_outplace
+
+SHAPES = ((), (1,), (3,), (2, 3), (2, 3, 4))
+
+
+def test_degrees():
+    def func(dtype):
+        d = DegreesOperator(dtype=dtype)
+        assert_same(d(1), np.degrees(np.ones((), dtype=dtype)))
+    for dtype in (np.float16, np.float32, np.float64, np.float128):
+        yield func, dtype
+
+
+def test_degrees_rules():
+    d = DegreesOperator()
+    assert_is_type(d.I, RadiansOperator)
+
+
+def test_diagonal_numexpr():
+    diag = np.array([1, 2, 3])
+    expr = '(data+1)*3'
+
+    def func(broadcast, values):
+        if broadcast == 'rightward':
+            expected = (values.T*(diag.T+1)*3).T
+        else:
+            expected = values*(diag+1)*3
+        op = DiagonalNumexprOperator(diag, expr, broadcast=broadcast)
+        if broadcast in ('leftward', 'rightward'):
+            assert op.broadcast == broadcast
+            assert_is_none(op.shapein)
+        else:
+            assert op.broadcast == 'disabled'
+            assert_eq(op.shapein, diag.shape)
+            assert_eq(op.shapeout, diag.shape)
+        assert_inplace_outplace(op, values, expected)
+    for broadcast in (None, 'rightward', 'leftward', 'disabled'):
+        for values in (np.array([3, 2, 1.]),
+                       np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5.]])):
+            if values.ndim > 1 and broadcast in (None, 'disabled'):
+                continue
+            yield func, broadcast, values
+
+
+def test_diagonal_numexpr2():
+    d1 = DiagonalNumexprOperator([1, 2, 3], '(data+1)*3',
+                                 broadcast='rightward')
+    d2 = DiagonalNumexprOperator([3, 2, 1], '(data+2)*2')
+    d = d1 * d2
+    assert_is_instance(d, DiagonalOperator)
+    assert_eq(d.broadcast, 'disabled')
+    assert_eq(d.data, [60, 72, 72])
+    c = BlockColumnOperator(3*[IdentityOutplaceOperator()], new_axisout=0)
+    v = [1, 2]
+    assert_inplace_outplace(d1*c, v, d1(c(v)))
+
+
+def test_diff_non_optimised():
+    def func(shape, axis):
+        dX = DifferenceOperator(axis=axis, shapein=shape)
+        a = np.arange(product(shape)).reshape(shape)
+        assert_eq(dX(a), np.diff(a, axis=axis))
+        dX_dense = dX.todense()
+
+        dXT_dense = dX.T.todense()
+        assert_eq(dX_dense.T, dXT_dense)
+
+    for shape in ((3,), (3, 4), (3, 4, 5), (3, 4, 5, 6)):
+        for axis in range(len(shape)):
+            yield func, shape, axis
+
+
+def test_integration_trapeze():
+    @pyoperators.flags.square
+    class Op(Operator):
+        """ output[i] = value ** (i + input[i]) """
+        def __init__(self, x):
+            Operator.__init__(self, dtype=float)
+            self.x = x
+
+        def direct(self, input, output):
+            output[...] = self.x ** (np.arange(input.size) + input)
+
+    value = range(3)
+    x = [0.5, 1, 2, 4]
+    func_op = BlockColumnOperator([Op(_) for _ in x], new_axisout=0)
+    eval_ = func_op(value)
+    expected = np.trapz(eval_, x=x, axis=0)
+    integ = IntegrationTrapezeOperator(x)(func_op)
+    assert_same(integ(value), expected)
+
+
+def test_radians():
+    def func(dtype):
+        d = RadiansOperator(dtype=dtype)
+        assert_same(d(1), np.radians(np.ones((), dtype=dtype)))
+    for dtype in (np.float16, np.float32, np.float64, np.float128):
+        yield func, dtype
+
+
+def test_radians_rules():
+    d = RadiansOperator()
+    assert_is_type(d.I, DegreesOperator)
+
+
+def test_rotation_2d():
+    def func(shape, degrees):
+        angle = np.arange(product(shape)).reshape(shape)
+        if degrees:
+            angle_ = np.radians(angle)
+        else:
+            angle_ = angle
+        angle_ = angle_.reshape(angle.size)
+        r = Rotation2dOperator(angle, degrees=degrees)
+        actual = r([1, 0]).reshape((angle.size, 2))
+        expected = np.array([np.cos(angle_), np.sin(angle_)]).T
+        assert_same(actual, expected)
+    for shape in SHAPES:
+        for degrees in False, True:
+            yield func, shape, degrees
+
+
+def test_rotation_3d_1axis():
+    rx = Rotation3dOperator('X', 90, degrees=True)
+    ry = Rotation3dOperator('Y', 90, degrees=True)
+    rz = Rotation3dOperator('Z', 90, degrees=True)
+    ref = [[1, 0, 0],
+           [0, 1, 0],
+           [0, 0, 1]]
+
+    # single axis rotations
+    exps = (
+        [[1, 0, 0], [0, 0, 1], [0, -1, 0]],
+        [[0, 0, -1], [0, 1, 0], [1, 0, 0]],
+        [[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
+
+    def func(rot, exp):
+        assert_allclose(rot(ref), exp, atol=1e-15)
+    for rot, exp in zip((rx, ry, rz), exps):
+        yield func, rot, exp
+
+
+def test_rotation_3d_2axis():
+    ref = [[1, 0, 0],
+           [0, 1, 0],
+           [0, 0, 1]]
+    alpha = 0.1
+    beta = 0.2
+
+    # intrinsic rotations
+    conventions = ("XY'", "XZ'", "YX'", "YZ'", "ZX'", "ZY'")
+
+    def func(c):
+        r = Rotation3dOperator(c, alpha, beta)
+        r2 = Rotation3dOperator(c[0], alpha) * \
+             Rotation3dOperator(c[1], beta)
+        assert_allclose(r(ref), r2(ref))
+    for c in conventions:
+        yield func, c
+
+    # extrinsic rotations
+    conventions = ('XY', 'XZ', 'YX', 'YZ', 'ZX', 'ZY')
+
+    def func(c):
+        r = Rotation3dOperator(c, alpha, beta)
+        r2 = Rotation3dOperator(c[1], beta) * \
+             Rotation3dOperator(c[0], alpha)
+        assert_allclose(r(ref), r2(ref))
+    for c in conventions:
+        yield func, c
+
+
+def test_rotation_3d_3axis():
+    ref = [[1, 0, 0],
+           [0, 1, 0],
+           [0, 0, 1]]
+    alpha = 0.1
+    beta = 0.2
+    gamma = 0.3
+
+    # intrinsic rotations
+    conventions = ("XZ'X''", "XZ'Y''",
+                   "XY'X''", "XY'Z''",
+                   "YX'Y''", "YX'Z''",
+                   "YZ'Y''", "YZ'X''",
+                   "ZY'Z''", "ZY'X''",
+                   "ZX'Z''", "ZX'Y''")
+
+    def func(c):
+        r = Rotation3dOperator(c, alpha, beta, gamma)
+        r2 = Rotation3dOperator(c[0], alpha) * \
+             Rotation3dOperator(c[1], beta) * \
+             Rotation3dOperator(c[3], gamma)
+        assert_allclose(r(ref), r2(ref))
+    for c in conventions:
+        yield func, c
+
+    # extrinsic rotations
+    conventions = ("XZX", "XZY",
+                   "XYX", "XYZ",
+                   "YXY", "YXZ",
+                   "YZY", "YZX",
+                   "ZYZ", "ZYX",
+                   "ZXZ", "ZXY")
+
+    def func(c):
+        r = Rotation3dOperator(c, alpha, beta, gamma)
+        r2 = Rotation3dOperator(c[2], gamma) * \
+             Rotation3dOperator(c[1], beta) * \
+             Rotation3dOperator(c[0], alpha)
+        assert_allclose(r(ref), r2(ref))
+    for c in conventions:
+        yield func, c
+
+
+def test_sum_operator():
+    for s in SHAPES[1:]:
+        for a in [None] + list(range(len(s))):
+            op = SumOperator(axis=a)
+            d = op.todense(shapein=s)
+            t = op.T.todense(shapeout=s)
+            assert_eq(d, t.T)
+
+
+def test_symmetric_band_toeplitz_operator():
+    def totoeplitz(n, firstrow):
+        if isinstance(n, tuple):
+            n_ = n[-1]
+            return BlockDiagonalOperator(
+                [totoeplitz(n_, f_) for f_ in firstrow], new_axisin=0)
+        ncorr = len(firstrow) - 1
+        dense = np.zeros((n, n))
+        for i in xrange(n):
+            for j in xrange(n):
+                if abs(i-j) <= ncorr:
+                    dense[i, j] = firstrow[abs(i-j)]
+        return DenseOperator(dense, shapein=dense.shape[1])
+
+    def func(n, firstrow):
+        s = SymmetricBandToeplitzOperator(n, firstrow)
+        if firstrow == [1] or firstrow == [[2], [1]]:
+            assert_is_instance(s, DiagonalOperator)
+        assert_same(s.todense(), totoeplitz(n, firstrow).todense(), atol=1)
+
+    for n in [2, 3, 4, 5]:
+        for firstrow in ([1], [2, 1]):
+            yield func, n, firstrow
+    for n in ((2, _) for _ in [2, 3, 4, 5]):
+        for firstrow in ([[2], [1]], [[2, 1], [3, 2]]):
+            yield func, n, firstrow
+
+
+def test_tridiagonal_operator():
+    values = (
+        ([1, 1, 0], [2, 1], [2, 2]),
+        ([1, 1, 2], [2, 1], None),
+        ([1j, 1, 0], [2, 1], [-1j, 2]),
+        ([1, 1j, 2], [2j, 1], None))
+    expected = ([[1, 2, 0],
+                 [2, 1, 2],
+                 [0, 1, 0]],
+                [[1, 2, 0],
+                 [2, 1, 1],
+                 [0, 1, 2]],
+                [[1j,-1j, 0],
+                 [ 2,  1, 2],
+                 [ 0,  1, 0]],
+                [[ 1,-2j, 0],
+                 [2j, 1j, 1],
+                 [ 0,  1, 2]])
+
+    def func(v, e):
+        o = TridiagonalOperator(v[0], v[1], v[2])
+        assert_eq(o.todense(), e)
+        assert_eq(o.T.todense(), e.T)
+        assert_eq(o.C.todense(), e.conj())
+        assert_eq(o.H.todense(), e.T.conj())
+    for v, e in zip(values, expected):
+        yield func, v, np.array(e)
diff --git a/test/test_memory.py b/test/test_memory.py
new file mode 100644
index 0000000..73ce545
--- /dev/null
+++ b/test/test_memory.py
@@ -0,0 +1,105 @@
+from __future__ import division
+
+import itertools
+import numpy as np
+from numpy.testing import assert_equal
+from pyoperators.memory import MemoryPool, empty, MEMORY_ALIGNMENT
+from pyoperators.utils import tointtuple
+
+buffers = [empty(10), empty((5, 2)), empty(20)[::2], empty(11)[1:],
+           empty(21)[1:].reshape((10, 2))[::2, :]]
+aligned = 3 * [True] + [False, False]
+contiguous = [_.flags.contiguous for _ in buffers]
+
+
+def assert_contiguous(x):
+    assert x.flags.contiguous
+
+
+def assert_aligned(x):
+    assert address(x) % MEMORY_ALIGNMENT == 0
+
+
+def address(l):
+    if isinstance(l, np.ndarray):
+        return l.__array_interface__['data'][0]
+    return [address(_) for _ in l]
+
+
+def test_empty():
+    shapes = (10, (10,), (2, 10), (3, 3, 3))
+    dtypes = (float, np.int8, complex)
+
+    def func(v, s, d):
+        assert_equal(v.shape, tointtuple(s))
+        assert_equal(v.dtype, d)
+        assert_aligned(v)
+        assert_contiguous(v)
+    for s in shapes:
+        for d in dtypes:
+            v = empty(s, d)
+            yield func, v, s, d
+
+
+def test_set():
+    pool = MemoryPool()
+    a = np.empty(9)
+    c = np.empty(11)
+    pool.add(a)
+    pool.add(c)
+
+    def func(b):
+        assert address(pool._buffers) == address([a, b, c])
+    for b in buffers:
+        with pool.set(b):
+            yield func, b
+        assert address(pool._buffers) == address([a, c])
+
+
+def test_get():
+    pool = MemoryPool()
+    pa = empty(9)
+    pc = empty(11)
+    pool.add(pa)
+    pool.add(pc)
+
+    def func(v, b, bs, ba, bc, s, a, c):
+        assert_equal(v.shape, s)
+        if a:
+            assert_aligned(v)
+        if c:
+            assert_contiguous(v)
+        if a > ba or c and not bc or not bc and s != bs:
+            assert address(pool._buffers) == address([pa, b])
+        else:
+            assert address(pool._buffers) == address([pa, pc])
+    for b, ba, bc in zip(buffers, aligned, contiguous):
+        with pool.set(b):
+            for (s, a, c) in itertools.product([(10,), (5, 2), (2, 5)],
+                                               [False, True],
+                                               [False, True]):
+                with pool.get(s, float, a, c) as v:
+                    yield func, v, b, b.shape, ba, bc, s, a, c
+                assert address(pool._buffers) == address([pa, b, pc])
+        assert address(pool._buffers) == address([pa, pc])
+
+
+def test_new_entry():
+    pool = MemoryPool()
+    a = empty(12)
+    b = empty(20)
+    pool.add(a)
+    pool.add(b)
+    shapes = ((4,), (15,), (30,))
+
+    def func(i, s, d=-1):
+        assert_equal(len(pool), 3 + i)
+    for i, s in enumerate(shapes):
+        with pool.get(s, float):
+            pass
+        yield func, i, s
+    for s in [a.shape, b.shape]:
+        for d in [0, 1, 2]:
+            with pool.get(s[0] - d, float):
+                pass
+            yield func, i, s, d
diff --git a/test/test_mpi.py b/test/test_mpi.py
new file mode 100644
index 0000000..5a49384
--- /dev/null
+++ b/test/test_mpi.py
@@ -0,0 +1,143 @@
+import numpy as np
+from pyoperators import (
+    IdentityOperator, MPIDistributionGlobalOperator,
+    MPIDistributionIdentityOperator, MPI)
+from pyoperators.utils import split
+from pyoperators.utils.mpi import (
+    DTYPE_MAP, OP_PY_MAP, OP_MPI_MAP, as_mpi, combine_shape, distribute_shape,
+    distribute_shapes, filter_comm)
+from pyoperators.utils.testing import assert_eq, assert_is_type
+from numpy.testing import assert_equal
+
+comm = MPI.COMM_WORLD
+rank = comm.rank
+size = comm.size
+dtypes = DTYPE_MAP
+
+
+def test_allreduce():
+    n = 10
+
+    def func(x, xs, op):
+        op_py = OP_PY_MAP[op]
+        op_mpi = OP_MPI_MAP[op]
+        actual = np.empty_like(x)
+        comm.Allreduce(as_mpi(x), as_mpi(actual), op=op_mpi)
+        expected = op_py(xs)
+        assert_equal(actual, expected)
+    for dtype in dtypes:
+        if dtype.kind in 'ui':
+            i = np.iinfo(dtype if dtype != np.uint64 else np.int64)
+            x = np.random.random_integers(i.min, i.max-1, size=n).astype(dtype)
+        elif dtype.kind == 'f':
+            x = np.random.random_integers(-100, 100, size=n).astype(dtype)
+        elif dtype.kind == 'c':
+            x = np.random.random_integers(-100, 100, size=n) + \
+                np.random.random_integers(-100, 100, size=n) * 1j
+        else:
+            raise TypeError()
+        xs = comm.allgather(x)
+        for op in OP_PY_MAP:
+            if op in ('min', 'max') and dtype.kind == 'c':
+                continue
+            yield func, x, xs, op
+
+
+def test_collect():
+    def func(comm, s1, s2):
+        shape_global = (s1,) + s2
+        shape_local = distribute_shape(shape_global, comm=comm)
+        shape_global2 = combine_shape(shape_local, comm=comm)
+        assert shape_global == shape_global2
+    for comm in (MPI.COMM_SELF, MPI.COMM_WORLD):
+        for s1 in range(size*2+1):
+            for s2 in ((), (2,), (2, 3)):
+                yield func, comm, s1, s2
+
+
+def test_distribute():
+    class MyComm(object):
+        def __init__(self, rank, size):
+            self.rank = rank
+            self.size = size
+    if size > 1:
+        return
+
+    def func(a, r, shape, shapes):
+        assert_equal(a[r], shape[0])
+        assert_equal(shapes[r], shape)
+
+    for n in range(10):
+        for sz in range(1, 7):
+            work = np.zeros(n, int)
+            for i in range(n):
+                work[i] = i % sz
+            a = np.zeros(sz, int)
+            for r in range(sz):
+                a[r] = sum(work == r)
+            stop = tuple(np.cumsum(a))
+            start = (0,) + stop[:-1]
+            comm = MyComm(0, sz)
+            for s in [(), (1,), (3, 4)]:
+                shapes = distribute_shapes((n,) + s, comm=comm)
+                for r in range(sz):
+                    shape = distribute_shape((n,) + s, rank=r, size=sz)
+                    yield func, a, r, shape, shapes
+                    if len(s) > 0:
+                        continue
+                    sl = slice(start[r], stop[r])
+                    yield assert_eq, sl, split(n, sz, r)
+
+
+def test_dgo():
+    def func(shape, dtype):
+        d = MPIDistributionGlobalOperator(shape)
+        x_global = np.ones(shape, dtype)
+        s = split(shape[0], size, rank)
+        x_local = d(x_global)
+        assert_eq(x_local, x_global[s])
+        assert_eq(d.T(x_local), x_global)
+    for shape in (2,), (2, 3):
+        for dtype in dtypes:
+            yield func, shape, dtype
+
+
+def test_dio():
+    def func(shape, dtype):
+        x_global = np.ones(shape, dtype)
+        d = MPIDistributionIdentityOperator()
+        assert_eq(d(x_global), x_global)
+        x_local = x_global * (rank + 1)
+        assert_eq(d.T(x_local), np.ones(shape) * size * (size + 1) // 2)
+    for shape in (2,), (2, 3):
+        for dtype in dtypes:
+            yield func, shape, dtype
+
+
+def test_dio_morph():
+    op = MPIDistributionIdentityOperator(MPI.COMM_SELF)
+    assert_is_type(op, IdentityOperator)
+
+
+def test_dio_inplace():
+    def func(n):
+        assert_eq(d.todense(shapein=n), d.todense(shapein=n, inplace=True))
+        assert_eq(d.T.todense(shapein=n), d.T.todense(shapein=n, inplace=True))
+    d = MPIDistributionIdentityOperator()
+    for n in range(10):
+        yield func, n
+
+
+def test_filter_comm():
+    comm = MPI.COMM_WORLD
+
+    def func(nglobal):
+        d = np.array(comm.rank)
+        with filter_comm(comm.rank < nglobal, comm) as newcomm:
+            if newcomm is not None:
+                newcomm.Allreduce(MPI.IN_PLACE, as_mpi(d))
+        d = comm.bcast(d)
+        n = min(comm.size, nglobal)
+        assert d == n * (n - 1) // 2
+    for nglobal in range(comm.size + 3):
+        yield func, nglobal
diff --git a/test/test_nbytes.py b/test/test_nbytes.py
new file mode 100644
index 0000000..e2b19e3
--- /dev/null
+++ b/test/test_nbytes.py
@@ -0,0 +1,64 @@
+from __future__ import division
+import numpy as np
+import scipy.sparse as sp
+from numpy.testing import assert_equal
+from pyoperators import (
+    AdditionOperator, BlockColumnOperator, BlockDiagonalOperator,
+    BlockRowOperator, CompositionOperator, MultiplicationOperator,
+    Operator, SparseOperator, rule_manager)
+
+COMPOSITES = (AdditionOperator, BlockColumnOperator, BlockDiagonalOperator,
+              BlockRowOperator, CompositionOperator, MultiplicationOperator)
+
+
+class Op1(Operator):
+    nbytes = 4
+
+
+class Op2(Operator):
+    nbytes = 8
+
+
+def test_sparse():
+    D = np.arange(15, dtype=float).reshape(3, 5)
+    matrices = (sp.coo_matrix, sp.bsr_matrix, sp.csc_matrix, sp.csr_matrix,
+                sp.dia_matrix, sp.dok_matrix)
+    expecteds = 224, 184, 192, 184, 308, 2688
+
+    def func(matrix, expected):
+        op = SparseOperator(matrix(D))
+        assert_equal(op.nbytes, expected)
+    for matrix, expected in zip(matrices, expecteds):
+        yield func, matrix, expected
+
+
+def test_composite():
+    def func(cls):
+        if cls in (BlockColumnOperator, BlockDiagonalOperator):
+            keywords = {'axisout': 0}
+        elif cls is BlockRowOperator:
+            keywords = {'axisin': 0}
+        else:
+            keywords = {}
+        op = cls([Op1(), Op2()], **keywords)
+        assert_equal(op.nbytes, 12)
+        with rule_manager(none=True):
+            op = cls([op, Op1(), Op2()], **keywords)
+        assert_equal(op.nbytes, 24)
+    for cls in COMPOSITES:
+        yield func, cls
+
+
+def test_composite_unique():
+    def func(cls):
+        if cls in (BlockColumnOperator, BlockDiagonalOperator):
+            keywords = {'axisout': 0}
+        elif cls is BlockRowOperator:
+            keywords = {'axisin': 0}
+        else:
+            keywords = {}
+        op = cls(10 * [Op1(), Op2()], **keywords)
+        assert_equal(op.nbytes, 12)
+    for cls in COMPOSITES:
+        yield func, cls
+
diff --git a/test/test_nonlinear.py b/test/test_nonlinear.py
new file mode 100644
index 0000000..8174423
--- /dev/null
+++ b/test/test_nonlinear.py
@@ -0,0 +1,275 @@
+from __future__ import division
+
+import itertools
+import numpy as np
+from numpy.testing import assert_allclose, assert_equal, assert_raises
+from pyoperators import (
+    Cartesian2SphericalOperator, CompositionOperator, ConstantOperator,
+    HardThresholdingOperator, IdentityOperator, MultiplicationOperator,
+    NormalizeOperator, NumexprOperator, PowerOperator, ReciprocalOperator,
+    RoundOperator, SqrtOperator, SquareOperator, SoftThresholdingOperator,
+    Spherical2CartesianOperator)
+from pyoperators.utils import product
+from pyoperators.utils.testing import (
+    assert_is_instance, assert_is_type, assert_same)
+
+
+def test_cartesian_spherical():
+    vecs = ((1, 0, 0), (0, 1, 0), (0, 0, 1),
+            ((1, 0, 0), (0, 1, 0), (0, 0, 1)),
+            (((1, 0, 0), (0, 1, 0)),))
+    shapes = ((), (), (), (3,), (1, 2))
+
+    def func(c, v, s, d):
+        c2s = Cartesian2SphericalOperator(c, degrees=d)
+        s2c = Spherical2CartesianOperator(c, degrees=d)
+        a = s2c(c2s(v))
+        assert_equal(a.shape, s + (3,))
+        assert_allclose(a, v, atol=1e-16)
+    for c in Cartesian2SphericalOperator.CONVENTIONS:
+        for v, s in zip(vecs, shapes):
+            for d in (False, True):
+                yield func, c, v, s, d
+
+
+def test_cartesian_spherical_error():
+    assert_raises(TypeError, Cartesian2SphericalOperator, 3)
+    assert_raises(ValueError, Cartesian2SphericalOperator, 'bla')
+    op = Cartesian2SphericalOperator('zenith,azimuth')
+
+    def func(i, o):
+        if i.shape == (3,) and o.shape == (2,):
+            op(i, o)
+            return
+        assert_raises(ValueError, op.__call__, i, o)
+    for i, o in itertools.product((np.array(1.), np.zeros(2), np.zeros(3)),
+                                  (np.array(1.), np.zeros(2), np.zeros(3))):
+        yield func, i, o
+
+
+def test_cartesian_spherical_rules():
+    def func(c1, c2):
+        op1 = Cartesian2SphericalOperator(c1)
+        op2 = Spherical2CartesianOperator(c2)
+        op = op1(op2)
+        if c1 == c2:
+            assert_is_type(op, IdentityOperator)
+        else:
+            assert_is_type(op, CompositionOperator)
+    for c1 in 'zenith,azimuth', 'azimuth,elevation':
+        op = Cartesian2SphericalOperator(c1)
+        assert_is_type(op.I, Spherical2CartesianOperator)
+        assert_equal(op.convention, c1)
+        for c2 in 'zenith,azimuth', 'azimuth,elevation':
+            yield func, c1, c2
+
+
+def test_spherical_cartesian():
+    dirs_za = ((0, 0), (20, 0), (130, 0), (10, 20), (20, 190),
+               ((0, 0), (20, 0), (130, 0), (10, 20), (20, 130)),
+               (((0, 0), (20, 200), (130, 300)),))
+    dirs_az = ((0, 0), (0, 20), (0, 130), (20, 10), (190, 20),
+               ((0, 0), (0, 20), (0, 130), (20, 10), (130, 20)),
+               (((0, 0), (200, 20), (300, 130)),))
+    dirs_ea = ((90, 0), (70, 0), (-40, 0), (80, 20), (70, 190),
+               ((90, 0), (70, 0), (-40, 0), (80, 20), (70, 130)),
+               (((90, 0), (70, 200), (-40, 300)),))
+    dirs_ae = ((0, 90), (0, 70), (0, -40), (20, 80), (190, 70),
+               ((0, 90), (0, 70), (0, -40), (20, 80), (130, 70)),
+               (((0, 90), (200, 70), (300, -40)),))
+    shapes = ((), (), (), (), (), (5,), (1, 3))
+
+    op_ref = Spherical2CartesianOperator('zenith,azimuth')
+    refs = [op_ref(np.radians(v)) for v in dirs_za]
+
+    def func(c, v, s, d, r):
+        orig = v
+        if not d:
+            v = np.radians(v)
+        s2c = Spherical2CartesianOperator(c, degrees=d)
+        c2s = Cartesian2SphericalOperator(c, degrees=d)
+        assert_allclose(s2c(v), r)
+        a = c2s(s2c(v))
+        if not d:
+            a = np.degrees(a)
+        assert_equal(a.shape, s + (2,))
+        assert_allclose(a, orig, atol=1e-16)
+    for c, vs in (('zenith,azimuth', dirs_za),
+                  ('azimuth,zenith', dirs_az),
+                  ('elevation,azimuth', dirs_ea),
+                  ('azimuth,elevation', dirs_ae)):
+        for v, s, r in zip(vs, shapes, refs):
+            for d in (False, True):
+                yield func, c, v, s, d, r
+
+
+def test_spherical_cartesian_error():
+    assert_raises(TypeError, Spherical2CartesianOperator, 3)
+    assert_raises(ValueError, Spherical2CartesianOperator, 'bla')
+    op = Spherical2CartesianOperator('zenith,azimuth')
+
+    def func(i, o):
+        if i.shape == (2,) and o.shape == (3,):
+            op(i, o)
+            return
+        assert_raises(ValueError, op.__call__, i, o)
+    for i, o in itertools.product((np.array(1.), np.zeros(2), np.zeros(3)),
+                                  (np.array(1.), np.zeros(2), np.zeros(3))):
+        yield func, i, o
+
+
+def test_spherical_cartesian_rules():
+    def func(c1, c2):
+        op1 = Spherical2CartesianOperator(c1)
+        op2 = Cartesian2SphericalOperator(c2)
+        op = op1(op2)
+        if c1 == c2:
+            assert_is_type(op, IdentityOperator)
+        else:
+            assert_is_type(op, CompositionOperator)
+    for c1 in 'zenith,azimuth', 'azimuth,elevation':
+        op = Spherical2CartesianOperator(c1)
+        assert_is_type(op.I, Cartesian2SphericalOperator)
+        assert_equal(op.convention, c1)
+        for c2 in 'zenith,azimuth', 'azimuth,elevation':
+            yield func, c1, c2
+
+
+def test_rounding():
+    a = np.array([-3.5, -3, -2.6, -2.5, -2.4, 0, 0.2, 0.5, 0.9, 1, 1.5])
+    r = RoundOperator('rtz')
+    yield assert_equal, r(a), [-3, -3, -2, -2, -2, 0, 0, 0, 0, 1, 1]
+    #r = RoundOperator('rti')
+    #yield assert_equal, r(a), [-4, -3, -3, -3, -3, 0, 1, 1, 1, 2]
+    r = RoundOperator('rtmi')
+    yield assert_equal, r(a), [-4, -3, -3, -3, -3, 0, 0, 0, 0, 1, 1]
+    r = RoundOperator('rtpi')
+    yield assert_equal, r(a), [-3, -3, -2, -2, -2, 0, 1, 1, 1, 1, 2]
+    #r = RoundOperator('rhtz')
+    #yield assert_equal, r(a), [-3, -3, -3, -2, -2, 0, 0, 0, 1, 1, 1]
+    #r = RoundOperator('rhti')
+    #yield assert_equal, r(a), [-4, -3, -3, -3, -2, 0, 0, 1, 1, 2]
+    r = RoundOperator('rhtmi')
+    yield assert_equal, r(a), [-4, -3, -3, -3, -2, 0, 0, 0, 1, 1, 1]
+    r = RoundOperator('rhtpi')
+    yield assert_equal, r(a), [-3, -3, -3, -2, -2, 0, 0, 1, 1, 1, 2]
+    r = RoundOperator('rhte')
+    yield assert_equal, r(a), [-4, -3, -3, -2, -2, 0, 0, 0, 1, 1, 2]
+    #r = RoundOperator('rhto')
+    #yield assert_equal, r(a), [-3, -3, -3, -2, -2, 0, 0, 0, 1, 1, 1]
+    #r = RoundOperator('rhs')
+    #mask = np.array([True,True,False,True,True,True,False,True,True], np.bool)
+    #result = r(a)
+    #yield assert_equal, result[mask], [-3,-3,-2,0,0,1,1]
+    #yield assert_, result[2] in (-3,-2)
+    #yield assert_, result[-4] in (0,1)
+
+
+def test_normalize():
+    n = NormalizeOperator()
+
+    def func(shape):
+        vec = np.arange(product(shape)).reshape(shape)
+        exp = vec / np.sqrt(np.sum(vec ** 2, axis=-1))[..., None]
+        assert_same(n(vec), exp)
+    for shape in ((2,), (4,), (2, 3), (4, 5, 2)):
+        yield func, shape
+
+
+def test_numexpr1():
+    d = 7.
+    op = NumexprOperator('2.*exp(input)+d', {'d': d})
+    assert op(3.) == 2*np.exp(3.)+d
+
+
+def test_numexpr2():
+    op = NumexprOperator('3*input') + NumexprOperator('2*input')
+    assert_equal(op(np.arange(10)), 5*np.arange(10))
+
+
+def test_power():
+    values = -1, 0, 0.5, 1, 2, 3
+    cls = (ReciprocalOperator, ConstantOperator, SqrtOperator,
+           IdentityOperator, SquareOperator, PowerOperator)
+
+    def func(n, c):
+        op = PowerOperator(n)
+        assert_is_type(op, c)
+        if isinstance(op, PowerOperator):
+            assert_equal(op.n, n)
+    for v, c in zip(values, cls):
+        yield func, v, c
+
+
+def test_power_rule_comp():
+    ops = (ReciprocalOperator(), SqrtOperator(), SquareOperator(),
+           PowerOperator(2.5))
+    op = CompositionOperator(ops)
+    assert_is_type(op, PowerOperator)
+    assert_equal(op.n, -2.5)
+
+
+def test_power_rule_mul():
+    ops = (ReciprocalOperator(), SqrtOperator(), SquareOperator(),
+           PowerOperator(2.5))
+    op = MultiplicationOperator(ops)
+    assert_is_type(op, PowerOperator)
+    assert_equal(op.n, 4)
+
+
+def test_hard_thresholding():
+    x = [-1., -0.2, -0.1, 0, 0.2, 0.1, 2, 3]
+    lbda = 0.2
+    H = HardThresholdingOperator(lbda)
+    expected = [-1, 0, 0, 0, 0, 0, 2, 3]
+    assert_equal(H(x), expected)
+    x = np.array(x)
+    H(x, x)
+    assert_equal(x, expected)
+    lbda2 = [0.3, 0.1, 2]
+    shape = np.asarray(lbda2).shape
+    G = HardThresholdingOperator(lbda2)
+    assert_equal(G.shapein, shape)
+    K = G(H)
+    assert_is_instance(K, HardThresholdingOperator)
+    assert_equal(K.a, np.maximum(lbda, lbda2))
+    assert_equal(K.shapein, shape)
+    K = H(G)
+    assert_is_instance(K, HardThresholdingOperator)
+    assert_equal(K.a, np.maximum(lbda, lbda2))
+    assert_equal(K.shapein, shape)
+
+    H = HardThresholdingOperator([0, 0])
+    assert_is_instance(H, IdentityOperator)
+    assert_equal(H.shapein, (2,))
+
+    H = HardThresholdingOperator(0)
+    assert_is_instance(H, IdentityOperator)
+    assert H.flags.square
+    assert_equal(H.flags.shape_input, 'implicit')
+    assert_equal(H.flags.shape_output, 'implicit')
+
+
+def test_soft_thresholding():
+    x = [-1., -0.2, -0.1, 0, 0.1, 0.2, 2, 3]
+    lbda = np.array(0.2)
+    S = SoftThresholdingOperator(lbda)
+    expected = [-1, 0, 0, 0, 0, 0, 2, 3] - lbda * [-1, 0, 0, 0, 0, 0, 1, 1]
+    assert_equal(S(x), expected)
+    x = np.array(x)
+    S(x, x)
+    assert_equal(x, expected)
+    lbda2 = [0.3, 0.1, 2]
+    shape = np.asarray(lbda2).shape
+    T = SoftThresholdingOperator(lbda2)
+    assert_equal(T.shapein, shape)
+
+    S = SoftThresholdingOperator([0, 0])
+    assert_is_instance(S, IdentityOperator)
+    assert_equal(S.shapein, (2,))
+
+    S = SoftThresholdingOperator(0)
+    assert_is_instance(S, IdentityOperator)
+    assert S.flags.square
+    assert_equal(S.flags.shape_input, 'implicit')
+    assert_equal(S.flags.shape_output, 'implicit')
diff --git a/test/test_partition.py b/test/test_partition.py
new file mode 100644
index 0000000..bac295a
--- /dev/null
+++ b/test/test_partition.py
@@ -0,0 +1,234 @@
+import numpy as np
+
+from pyoperators import (
+    flags, Operator, AdditionOperator, BlockColumnOperator,
+    BlockDiagonalOperator, BlockRowOperator,  CompositionOperator,
+    DiagonalOperator, HomothetyOperator, IdentityOperator,
+    MultiplicationOperator, I, asoperator)
+from pyoperators.core import BlockOperator
+from pyoperators.utils import merge_none
+from pyoperators.utils.testing import (
+    assert_eq, assert_is_instance, assert_raises, assert_is_type)
+from .common import Stretch
+
+
+def test_partition1():
+    o1 = HomothetyOperator(1, shapein=1)
+    o2 = HomothetyOperator(2, shapein=2)
+    o3 = HomothetyOperator(3, shapein=3)
+    r = DiagonalOperator([1, 2, 2, 3, 3, 3]).todense()
+
+    def func(ops, p):
+        op = BlockDiagonalOperator(ops, partitionin=p, axisin=0)
+        assert_eq(op.todense(6), r, str(op))
+    for ops, p in zip(
+            ((o1, o2, o3), (I, o2, o3), (o1, 2*I, o3), (o1, o2, 3*I)),
+            (None, (1, 2, 3), (1, 2, 3), (1, 2, 3))):
+        yield func, ops, p
+
+
+def test_partition2():
+    # in some cases in this test, partitionout cannot be inferred from
+    # partitionin, because the former depends on the input rank
+    i = np.arange(3*4*5*6).reshape(3, 4, 5, 6)
+
+    def func(axisp, p, axiss):
+        op = BlockDiagonalOperator(3*[Stretch(axiss)], partitionin=p,
+                                   axisin=axisp)
+        assert_eq(op(i), Stretch(axiss)(i))
+    for axisp, p in zip(
+            (0, 1, 2, 3, -1, -2, -3),
+            ((1, 1, 1), (1, 2, 1), (2, 2, 1), (2, 3, 1), (2, 3, 1), (2, 2, 1),
+             (1, 2, 1), (1, 1, 1))):
+        for axiss in (0, 1, 2, 3):
+            yield func, axisp, p, axiss
+
+
+def test_partition3():
+    # test axisin != axisout...
+    pass
+
+
+def test_partition4():
+    o1 = HomothetyOperator(1, shapein=1)
+    o2 = HomothetyOperator(2, shapein=2)
+    o3 = HomothetyOperator(3, shapein=3)
+
+    @flags.separable
+    class Op(Operator):
+        pass
+    op = Op()
+    p = BlockDiagonalOperator([o1, o2, o3], axisin=0)
+    r = (op + p + op) * p
+    assert isinstance(r, BlockDiagonalOperator)
+
+
+def test_block1():
+    ops = [HomothetyOperator(i, shapein=(2, 2)) for i in range(1, 4)]
+
+    def func(axis, s):
+        op = BlockDiagonalOperator(ops, new_axisin=axis)
+        assert_eq(op.shapein, s)
+        assert_eq(op.shapeout, s)
+    for axis, s in zip(
+            range(-3, 3),
+            ((3, 2, 2), (2, 3, 2), (2, 2, 3), (3, 2, 2), (2, 3, 2),
+             (2, 2, 3))):
+        yield func, axis, s
+
+
+def test_block2():
+    shape = (3, 4, 5, 6)
+    i = np.arange(np.product(shape)).reshape(shape)
+
+    def func(axisp, axiss):
+        op = BlockDiagonalOperator(shape[axisp]*[Stretch(axiss)],
+                                   new_axisin=axisp)
+        axisp_ = axisp if axisp >= 0 else axisp + 4
+        axiss_ = axiss if axisp_ > axiss else axiss + 1
+        assert_eq(op(i), Stretch(axiss_)(i))
+    for axisp in (0, 1, 2, 3, -1, -2, -3):
+        for axiss in (0, 1, 2):
+            yield func, axisp, axiss
+
+
+def test_block3():
+    # test new_axisin != new_axisout...
+    pass
+
+
+def test_block4():
+    o1 = HomothetyOperator(1, shapein=2)
+    o2 = HomothetyOperator(2, shapein=2)
+    o3 = HomothetyOperator(3, shapein=2)
+
+    @flags.separable
+    class Op(Operator):
+        pass
+    op = Op()
+    p = BlockDiagonalOperator([o1, o2, o3], new_axisin=0)
+    r = (op + p + op) * p
+    assert isinstance(r, BlockDiagonalOperator)
+
+
+def test_block_column1():
+    I2 = IdentityOperator(2)
+    I3 = IdentityOperator(3)
+    assert_raises(ValueError, BlockColumnOperator, [I2, 2*I3], axisout=0)
+    assert_raises(ValueError, BlockColumnOperator, [I2, 2*I3], new_axisout=0)
+
+
+def test_block_column2():
+    p = np.matrix([[1, 0], [0, 2], [1, 0]])
+    o = asoperator(np.matrix(p))
+    e = BlockColumnOperator([o, 2*o], axisout=0)
+    assert_eq(e.todense(), np.vstack([p, 2*p]))
+    assert_eq(e.T.todense(), e.todense().T)
+    e = BlockColumnOperator([o, 2*o], new_axisout=0)
+    assert_eq(e.todense(), np.vstack([p, 2*p]))
+    assert_eq(e.T.todense(), e.todense().T)
+
+
+def test_block_row1():
+    I2 = IdentityOperator(2)
+    I3 = IdentityOperator(3)
+    assert_raises(ValueError, BlockRowOperator, [I2, 2*I3], axisin=0)
+    assert_raises(ValueError, BlockRowOperator, [I2, 2*I3], new_axisin=0)
+
+
+def test_block_row2():
+    p = np.matrix([[1, 0], [0, 2], [1, 0]])
+    o = asoperator(np.matrix(p))
+    r = BlockRowOperator([o, 2*o], axisin=0)
+    assert_eq(r.todense(), np.hstack([p, 2*p]))
+    assert_eq(r.T.todense(), r.todense().T)
+    r = BlockRowOperator([o, 2*o], new_axisin=0)
+    assert_eq(r.todense(), np.hstack([p, 2*p]))
+    assert_eq(r.T.todense(), r.todense().T)
+
+
+def test_partition_implicit_commutative():
+    partitions = (None, None), (2, None), (None, 3), (2, 3)
+    ops = [I, 2*I]
+
+    def func(op1, op2, p1, p2, cls):
+        op = operation([op1, op2])
+        assert type(op) is cls
+        if op.partitionin is None:
+            assert op1.partitionin is op2.partitionin is None
+        else:
+            assert op.partitionin == merge_none(p1, p2)
+        if op.partitionout is None:
+            assert op1.partitionout is op2.partitionout is None
+        else:
+            assert op.partitionout == merge_none(p1, p2)
+    for operation in (AdditionOperator, MultiplicationOperator):
+        for p1 in partitions:
+            for p2 in partitions:
+                for cls, aout, ain, pout1, pin1, pout2, pin2 in zip(
+                        (BlockRowOperator, BlockDiagonalOperator,
+                         BlockColumnOperator),
+                        (None, 0, 0), (0, 0, None), (None, p1, p1),
+                        (p1, p1, None), (None, p2, p2), (p2, p2, None)):
+                    op1 = BlockOperator(
+                        ops, partitionout=pout1, partitionin=pin1, axisin=ain,
+                        axisout=aout)
+                    op2 = BlockOperator(
+                        ops, partitionout=pout2, partitionin=pin2, axisin=ain,
+                        axisout=aout)
+                    yield func, op1, op2, p1, p2, cls
+
+
+def test_partition_implicit_composition():
+    partitions = (None, None), (2, None), (None, 3), (2, 3)
+    ops = [I, 2*I]
+
+    def func(op1, op2, pin1, pout2, cls):
+        op = op1 * op2
+        assert_is_instance(op, cls)
+        if not isinstance(op, BlockOperator):
+            return
+        pout = None if isinstance(op, BlockRowOperator) else \
+               merge_none(pin1, pout2)
+        pin = None if isinstance(op, BlockColumnOperator) else \
+              merge_none(pin1, pout2)
+        assert pout == op.partitionout
+        assert pin == op.partitionin
+    for pin1 in partitions:
+        for pout2 in partitions:
+            for cls1, cls2, cls, aout1, ain1, aout2, ain2, pout1, pin2, in zip(
+                    (BlockRowOperator, BlockRowOperator, BlockDiagonalOperator,
+                     BlockDiagonalOperator),
+                    (BlockDiagonalOperator, BlockColumnOperator,
+                     BlockDiagonalOperator, BlockColumnOperator),
+                    (BlockRowOperator, HomothetyOperator,
+                     BlockDiagonalOperator, BlockColumnOperator),
+                    (None, None, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0),
+                    (0, None, 0, None), (None, None, pin1, pin1),
+                    (pout2, None, pout2, None)):
+                op1 = BlockOperator(ops, partitionin=pin1, partitionout=pout1,
+                                    axisout=aout1, axisin=ain1)
+                op2 = BlockOperator(ops, partitionout=pout2, partitionin=pin2,
+                                    axisout=aout2, axisin=ain2)
+                yield func, op1, op2, pin1, pout2, cls
+
+
+def test_mul():
+    opnl = Operator(shapein=10, flags='square')
+    oplin = Operator(flags='linear,square', shapein=10)
+    clss = ((BlockRowOperator, BlockDiagonalOperator, BlockRowOperator),
+            3 * (BlockDiagonalOperator,),
+            (BlockDiagonalOperator, BlockColumnOperator, BlockColumnOperator),
+            (BlockRowOperator, BlockColumnOperator, AdditionOperator))
+
+    def func(op, cls1, cls2, cls3):
+        operation = CompositionOperator \
+                    if op.flags.linear else MultiplicationOperator
+        op1 = cls1(3*[op], axisin=0)
+        op2 = cls2(3*[op], axisout=0)
+        result = op1 * op2
+        assert_is_type(result, cls3)
+        assert_is_type(result.operands[0], operation)
+    for op in opnl, oplin:
+        for cls1, cls2, cls3 in clss:
+            yield func, op, cls1, cls2, cls3
diff --git a/test/test_proxy.py b/test/test_proxy.py
new file mode 100644
index 0000000..388e8a2
--- /dev/null
+++ b/test/test_proxy.py
@@ -0,0 +1,114 @@
+from __future__ import division
+
+import numpy as np
+from numpy.testing import assert_equal
+from pyoperators import AdditionOperator, CompositionOperator, Operator, flags
+from pyoperators.utils.testing import assert_same
+from pyoperators.proxy import proxy_group
+
+
+mat = np.array([[1, 1, 1j],
+                [0, 1, 1],
+                [0, 0, 1]])
+matI = np.linalg.inv(mat)
+
+global counter
+counter = 0
+
+
+ at flags.linear
+ at flags.square
+class MyOperator(Operator):
+    def __init__(self, i, **keywords):
+        self.i = i
+        Operator.__init__(self, dtype=np.array(mat).dtype, **keywords)
+
+    def direct(self, x, out):
+        out[...] = self.i * np.dot(mat, x)
+
+    def conjugate(self, x, out):
+        out[...] = self.i * np.dot(mat.conjugate(), x)
+
+    def transpose(self, x, out):
+        out[...] = self.i * np.dot(x, mat)
+
+    def adjoint(self, x, out):
+        out[...] = self.i * np.dot(x, mat.conjugate())
+
+    def inverse(self, x, out):
+        out[...] = 1 / self.i * np.dot(matI, x)
+
+    def inverse_conjugate(self, x, out):
+        out[...] = 1 / self.i * np.dot(matI.conjugate(), x)
+
+    def inverse_transpose(self, x, out):
+        out[...] = 1 / self.i * np.dot(x, matI)
+
+    def inverse_adjoint(self, x, out):
+        out[...] = 1 / self.i * np.dot(x, matI.conjugate())
+
+
+def callback(i):
+    global counter
+    counter += 1
+    return MyOperator(i + 1, shapein=3)
+
+
+def get_operator(list, attr):
+    if attr == '':
+        return list
+    elif attr == 'IC':
+        return [_.I.C for _ in list]
+    elif attr == 'IT':
+        return [_.I.T for _ in list]
+    elif attr == 'IH':
+        return [_.I.H for _ in list]
+    return [getattr(_, attr) for _ in list]
+
+
+nproxy = 5
+ref_list = [callback(i) for i in range(nproxy)]
+proxy_list = proxy_group(nproxy, callback)
+
+
+def test_copy():
+    proxy = proxy_list[0]
+    assert proxy.copy().common is proxy.common
+
+
+def test():
+    def func(attr):
+        olist = get_operator(proxy_list, attr)
+        rlist = get_operator(ref_list, attr)
+        for o, r in zip(olist, rlist):
+            assert_same(o.todense(), r.todense())
+    for attr in '', 'C', 'T', 'H', 'I', 'IC', 'IT', 'IH':
+        yield func, attr
+
+
+def test_addition():
+    def func(attr):
+        op = AdditionOperator(get_operator(proxy_list, attr))
+        ref = AdditionOperator(get_operator(ref_list, attr))
+        assert_same(op.todense(), ref.todense())
+    for attr in '', 'C', 'T', 'H', 'I', 'IC', 'IT', 'IH':
+        yield func, attr
+
+
+def test_composite():
+    global counter
+    counter = 0
+    proxy_lists = [get_operator(proxy_list, attr)
+                   for attr in '', 'C', 'T', 'H', 'I', 'IC', 'IT', 'IH']
+    ref_lists = [get_operator(ref_list, attr)
+                 for attr in '', 'C', 'T', 'H', 'I', 'IC', 'IT', 'IH']
+
+    op = AdditionOperator(CompositionOperator(_) for _ in zip(*proxy_lists))
+    ref = AdditionOperator(CompositionOperator(_) for _ in zip(*ref_lists))
+    assert_same(op.todense(), ref.todense())
+    assert_equal(counter, nproxy * op.shapein[0])
+
+
+def test_getattr():
+    assert_equal(np.sum(_.i for _ in proxy_list),
+                 np.sum(_.i for _ in ref_list))
diff --git a/test/test_pywt.py b/test/test_pywt.py
new file mode 100644
index 0000000..884268e
--- /dev/null
+++ b/test/test_pywt.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+import nose
+from numpy import testing
+
+try:
+    import pywt
+except ImportError:
+    from nose.plugins.skip import SkipTest
+    raise SkipTest
+
+from pyoperators.operators_pywt import WaveletOperator, Wavelet2dOperator
+
+sizes = ((32,),)
+shapes = ((4, 4), )
+wavelist = pywt.wavelist()
+levels = [2]
+
+
+def check_wavelet_transpose(w, l, s):
+    W = WaveletOperator(w, level=l, shapein=s)
+    testing.assert_array_almost_equal(W.todense(), W.T.todense().T)
+
+
+def test_wavelet_transpose():
+    for s in sizes:
+        for w in wavelist:
+            for l in levels:
+                yield check_wavelet_transpose, w, l, s
+
+
+def check_wavelet2d_transpose(w, l, s):
+    W = Wavelet2dOperator(w, level=l, shapein=s, mode='per')
+    testing.assert_array_almost_equal(W.todense(), W.T.todense().T)
+
+
+def test_wavelet2d_transpose():
+    for s in shapes:
+        for w in wavelist:
+            for l in levels:
+                yield check_wavelet2d_transpose, w, l, s
+
+
+if __name__ == "__main__":
+    nose.run(defaultTest=__file__)
diff --git a/test/test_reset.py b/test/test_reset.py
new file mode 100644
index 0000000..bc37624
--- /dev/null
+++ b/test/test_reset.py
@@ -0,0 +1,27 @@
+from __future__ import division
+from numpy.testing import assert_equal
+from pyoperators import Operator
+from .common import OPS
+
+
+def test():
+    def func(Op):
+        op = Op()
+        op._reset(shapein=3)
+        assert_equal(op.flags.shape_input, 'explicit')
+        op = Op()
+        op._reset(shapeout=3)
+        assert_equal(op.flags.shape_output, 'explicit')
+    for Op in OPS:
+        yield func, Op
+
+
+def test_square():
+    op = Operator(shapein=3, shapeout=3)
+    assert op.flags.square
+    op._reset(shapeout=4)
+    assert not op.flags.square
+
+    op = Operator(shapein=3, shapeout=3)
+    op._reset(shapein=4)
+    assert not op.flags.square
diff --git a/test/test_rules.py b/test/test_rules.py
new file mode 100644
index 0000000..9869741
--- /dev/null
+++ b/test/test_rules.py
@@ -0,0 +1,430 @@
+import numpy as np
+import os
+import pyoperators
+import tempfile
+import warnings
+from nose import with_setup
+from numpy.testing import assert_equal, assert_raises, assert_warns
+from pyoperators import (
+    Operator, AdditionOperator, CompositionOperator, DiagonalOperator,
+    HomothetyOperator, IdentityOperator, MultiplicationOperator,
+    PyOperatorsWarning)
+from pyoperators.flags import linear
+from pyoperators.rules import BinaryRule, UnaryRule, RuleManager, rule_manager
+from pyoperators.utils import ndarraywrap
+from pyoperators.utils.testing import (
+    assert_eq, assert_is, assert_is_none, assert_is_not_none,
+    assert_is_instance)
+
+from .common import OPS, ndarray2, attr2
+
+op = Operator()
+ops = [OP() for OP in OPS]
+
+ids_left = (IdentityOperator(classout=ndarray2, attrout=attr2),
+            IdentityOperator(shapein=4, classout=ndarray2, attrout=attr2))
+ids_right = (IdentityOperator(classout=ndarray2, attrout=attr2),
+             IdentityOperator(shapein=3, classout=ndarray2, attrout=attr2))
+
+
+class Operator1(Operator):
+    pass
+
+
+class Operator2(Operator):
+    pass
+
+
+class Operator3(Operator):
+    pass
+
+
+class Operator4(Operator1):
+    pass
+
+op1 = Operator1()
+op2 = Operator2()
+op3 = Operator3()
+op4 = Operator4()
+
+
+class NonLinearOperator(Operator):
+    pass
+
+
+ at linear
+class LinearOperator(Operator):
+    pass
+
+
+def p1(o1, o2):
+    return (o2, op1)
+
+
+def p2(o1, o2):
+    return op3
+
+
+def test_unaryrule1():
+    def func(s, p):
+        r = UnaryRule(s, p)
+        if p == '.':
+            assert_is(r(op1), op1)
+        else:
+            assert_is_instance(r(op1), IdentityOperator)
+    for s in ('C', 'T', 'H', 'I', 'IC', 'IT', 'IH'):
+        for p in ('.', '1'):
+            yield func, s, p
+
+
+def test_unaryrule2():
+    assert_raises(ValueError, UnaryRule, '.', '.')
+    assert_raises(ValueError, UnaryRule, '.T', '.')
+    assert_raises(ValueError, UnaryRule, 'T', 'C')
+    assert_raises(ValueError, UnaryRule, 'T', 'T')
+    assert_raises(ValueError, UnaryRule, 'T', 'H')
+    assert_raises(ValueError, UnaryRule, 'T', 'I')
+
+
+def test_binaryrule1():
+    op.T  # generate associated operators
+
+    def func(s1, s2, s3, o1, o2, ref):
+            rule = BinaryRule(s1 + ',' + s2, s3)
+            result = rule(o1, o2)
+            assert_is_not_none(result)
+            assert_is_instance(result, Operator)
+            if s3 == '1':
+                assert_is_instance(result, IdentityOperator)
+                return
+            o3 = eval('ref.'+s3) if s3 != '.' else ref
+            assert_is(result, o3)
+    for s1 in ('.', 'C', 'T', 'H', 'I'):
+        o1 = eval('op.'+s1) if s1 != '.' else op
+        for s2 in ('.', 'C', 'T', 'H', 'I'):
+            if '.' not in (s1, s2):
+                continue
+            o2 = eval('op.'+s2) if s2 != '.' else op
+            ref = o1 if s2[-1] != '.' else o2
+            for s3 in ('1', '.', 'C', 'T', 'H', 'I'):
+                yield func, s1, s2, s3, o1, o2, ref
+
+
+def test_binaryrule2():
+    rule = BinaryRule('.,T', p1)
+    yield assert_is_none, rule(op1, op2)
+    yield assert_equal, rule(op1, op1.T), (op1.T, op1)
+
+
+def test_binaryrule3():
+    rule = BinaryRule('.,T', p2)
+    yield assert_is_none, rule(op1, op2)
+    yield assert_is_instance, rule(op1, op1.T), Operator3
+
+
+def test_binaryrule4():
+    rule = BinaryRule(('.', HomothetyOperator), p1)
+    yield assert_is_none, rule(op1, op2)
+    s = HomothetyOperator(2)
+    yield assert_equal, rule(op1, s), (s, op1)
+
+
+def test_binaryrule5():
+    rule = BinaryRule((type(op1), '.'), p2)
+    yield assert_equal, rule(op1, op1), op3
+    yield assert_is_none, rule(op2, op1)
+    yield assert_equal, rule(op4, op1), op3
+
+
+def test_binaryrule_priority():
+    r1 = lambda o: None
+    r2 = lambda o: None
+
+    class Op1(Operator):
+        pass
+
+    class Op2(Op1):
+        pass
+
+    class Op3(Op2):
+        def __init__(self):
+            Op2.__init__(self)
+            self.set_rule(('.', OpA), r1, CompositionOperator)
+            self.set_rule(('.', Op3), r1, CompositionOperator)
+            self.set_rule('.,T', r1, CompositionOperator)
+            self.set_rule(('.', Op2), r1, CompositionOperator)
+            self.set_rule(('.', OpB), r1, CompositionOperator)
+            self.set_rule(('.', Op1), r1, CompositionOperator)
+            self.set_rule('.,H', r1, CompositionOperator)
+            self.set_rule(('.', Op4), r1, CompositionOperator)
+            self.set_rule(('.', Op2), r2, CompositionOperator)
+
+    class Op4(Op3):
+        pass
+
+    class OpA(Operator):
+        pass
+
+    class OpB(Operator):
+        pass
+
+    op = Op3()
+    act = [r.subjects for r in op.rules[CompositionOperator]['left']]
+    exp = [('.', 'H'), ('.', 'T'), ('.', OpB), ('.', Op4), ('.', Op3),
+           ('.', Op2), ('.', Op1), ('.', OpA)]
+    for a, e in zip(act, exp):
+        yield assert_eq, a, e
+    assert op.rules[CompositionOperator]['left'][5].predicate is r2
+
+
+def test_merge_identity():
+    def func(op, op1, op2, op_ref):
+        assert_is_instance(op, type(op_ref))
+        attr = {}
+        attr.update(op2.attrout)
+        attr.update(op1.attrout)
+        assert_equal(op.attrout, attr)
+        x = np.ones(op.shapein if op.shapein is not None else 3)
+        y = ndarraywrap(4)
+        op(x, y)
+        if op1.flags.shape_output == 'unconstrained' or \
+           op2.flags.shape_output == 'unconstrained':
+            y2_tmp = np.empty(3 if isinstance(op2, IdentityOperator) else 4)
+            y2 = np.empty(4)
+            op2(x, y2_tmp)
+            op1(y2_tmp, y2)
+        else:
+            y2 = op1(op2(x))
+        assert_equal(y, y2)
+        assert_is_instance(y, op1.classout)
+    for op1 in ops:
+        for op2 in ids_right:
+            op = op1 * op2
+            yield func, op, op1, op2, op1
+        for op2 in ids_left:
+            op = op2 * op1
+            yield func, op, op2, op1, op1
+
+
+def test_del_rule():
+    class Op(Operator):
+        def __init__(self):
+            Operator.__init__(self)
+            self.set_rule('T', '.')
+            self.set_rule('C', '1')
+            self.set_rule('.,T', '.', CompositionOperator)
+            self.set_rule('T,.', '.', CompositionOperator)
+            self.set_rule('.,C', '.I', AdditionOperator)
+            self.set_rule('H,.', '.I', AdditionOperator)
+            self.set_rule('.,C', '.I', MultiplicationOperator)
+            self.set_rule('H,.', '.I', MultiplicationOperator)
+
+    op = Op()
+    assert_equal(len(op.rules[None]), 2)
+    assert_equal(len(op.rules[CompositionOperator]['left']), 1)
+    assert_equal(len(op.rules[CompositionOperator]['right']), 2)
+    assert_equal(len(op.rules[AdditionOperator]), 2)
+    assert_equal(len(op.rules[MultiplicationOperator]), 2)
+
+    op.del_rule('T')
+    op.del_rule('C')
+    op.del_rule('.,T', CompositionOperator)
+    op.del_rule('T,.', CompositionOperator)
+    op.del_rule('C,.', AdditionOperator)
+    op.del_rule('.,H', AdditionOperator)
+    op.del_rule('.,C', MultiplicationOperator)
+    op.del_rule('H,.', MultiplicationOperator)
+
+    assert_equal(len(op.rules[None]), 0)
+    assert_equal(len(op.rules[CompositionOperator]['left']), 0)
+    assert_equal(len(op.rules[CompositionOperator]['right']), 1)
+    assert_equal(len(op.rules[AdditionOperator]), 0)
+    assert_equal(len(op.rules[MultiplicationOperator]), 0)
+
+
+def test_absorb_scalar():
+    h = HomothetyOperator(2)
+
+    @linear
+    class AbsorbRightOperator(Operator):
+        def __init__(self, value=3., **keywords):
+            self.value = np.array(value)
+            Operator.__init__(self, **keywords)
+            self.set_rule(('.', HomothetyOperator), lambda s, o:
+                          AbsorbRightOperator(s.value * o.data),
+                          CompositionOperator)
+
+    @linear
+    class AbsorbLeftOperator(Operator):
+        def __init__(self, value=3., **keywords):
+            self.value = np.array(value)
+            Operator.__init__(self, **keywords)
+            self.set_rule((HomothetyOperator, '.'), lambda o, s:
+                          AbsorbLeftOperator(s.value * o.data),
+                          CompositionOperator)
+    nl = NonLinearOperator()
+    l = LinearOperator()
+    ar = AbsorbRightOperator()
+    al = AbsorbLeftOperator()
+    ops = [[h, nl, h, ar, nl, h, al, nl, h],
+           [h, nl, ar, h, nl, al, h, nl, h],
+           [h, ar, nl, h, al],
+           [ar, h, nl, al, h],
+           [h, ar, l, h, al],
+           [ar, h, l, al, h],
+           [h, l, ar],
+           [l, ar, h],
+           [h, l, al],
+           [l, al, h]]
+    expected_types = [
+        [HomothetyOperator, NonLinearOperator, AbsorbRightOperator,
+         NonLinearOperator, AbsorbLeftOperator, NonLinearOperator,
+         HomothetyOperator],
+        [HomothetyOperator, NonLinearOperator, AbsorbRightOperator,
+         NonLinearOperator, AbsorbLeftOperator, NonLinearOperator,
+         HomothetyOperator],
+        [AbsorbRightOperator, NonLinearOperator, AbsorbLeftOperator],
+        [AbsorbRightOperator, NonLinearOperator, AbsorbLeftOperator],
+        [AbsorbRightOperator, LinearOperator, AbsorbLeftOperator],
+        [AbsorbRightOperator, LinearOperator, AbsorbLeftOperator],
+        [LinearOperator, AbsorbRightOperator],
+        [LinearOperator, AbsorbRightOperator],
+        [LinearOperator, AbsorbLeftOperator],
+        [LinearOperator, AbsorbLeftOperator]]
+    expected_values = [[2, 0, 6, 0, 6, 0, 2],
+                       [2, 0, 6, 0, 6, 0, 2],
+                       [6, 0, 6],
+                       [6, 0, 6],
+                       [12, 0, 3],
+                       [12, 0, 3],
+                       [0, 6],
+                       [0, 6],
+                       [0, 6],
+                       [0, 6]]
+
+    def get_val(op):
+        if isinstance(op, (NonLinearOperator, LinearOperator)):
+            return 0
+        if isinstance(op, HomothetyOperator):
+            return op.data
+        return op.value
+
+    def func(ops, expected_types, expected_values):
+        op = CompositionOperator(ops)
+        assert_eq([type(o) for o in op.operands], expected_types)
+        assert_eq([get_val(o) for o in op.operands], expected_values)
+    for op, expected_type, expected_value in zip(ops, expected_types,
+                                                 expected_values):
+        yield func, op, expected_type, expected_value
+
+
+_old_local_path = None
+_old_triggers = None
+
+
+def setup_user_rules():
+    global _old_local_path, _old_triggers
+    _old_local_path = pyoperators.config.LOCAL_PATH
+    _old_triggers = pyoperators.rules._triggers.copy()
+    pyoperators.rules.rule_manager.clear()
+    new_local_path = tempfile.gettempdir()
+    pyoperators.config.LOCAL_PATH = new_local_path
+    with open(os.path.join(new_local_path, 'rules.txt'), 'w') as f:
+        f.write("""
+d1 = 3
+d2 = 'value2' # comment
+incorrect1
+
+# comment
+ # comment
+d3 = incorrect2
+d4 = 'value4' = incorrect3
+d1 = 4""")
+
+
+def teardown_user_rules():
+    pyoperators.config.LOCAL_PATH = _old_local_path
+    pyoperators.rules._triggers = _old_triggers
+    os.remove(os.path.join(tempfile.gettempdir(), 'rules.txt'))
+
+
+ at with_setup(setup_user_rules, teardown_user_rules)
+def test_manager():
+    path = os.path.join(pyoperators.config.LOCAL_PATH, 'rules.txt')
+    oldmod = os.stat(path)[0]
+    try:
+        os.chmod(path, 0)
+        assert_warns(PyOperatorsWarning, RuleManager)
+    finally:
+        os.chmod(path, oldmod)
+    pyoperators.rules.rule_manager.clear()
+
+    with warnings.catch_warnings(record=True) as w:
+        rule_manager = RuleManager()
+        assert_equal(sum(_.category is PyOperatorsWarning for _ in w), 3)
+    assert_equal(len(rule_manager), 4)
+    for key, default in pyoperators.rules._default_triggers.items():
+        assert_equal(rule_manager[key], default)
+    assert 'd1' in rule_manager
+    assert_equal(rule_manager['d1'], 4)
+    assert 'd2' in rule_manager
+    assert_equal(rule_manager['d2'], 'value2')
+    assert_equal(str(rule_manager),
+                 'd1      = 4         # \n'
+                 "d2      = 'value2'  # \n"
+                 'inplace = False     # Allow inplace simplifications\n'
+                 'none    = False     # Inhibit all rule simplifications')
+    rule_manager.register('new_rule', 20, 'my new rule')
+    assert 'new_rule' in rule_manager
+    assert_equal(rule_manager['new_rule'], 20)
+    assert_equal(pyoperators.rules._description_triggers['new_rule'],
+                 'my new rule')
+
+    _triggers = pyoperators.rules._triggers
+    assert_equal(rule_manager.get('d1'), _triggers.get('d1'))
+    assert_equal(rule_manager.items(), _triggers.items())
+    assert_equal(rule_manager.keys(), _triggers.keys())
+    assert_equal(rule_manager.pop('d1'), 4)
+    assert 'd1' not in _triggers
+    item = rule_manager.popitem()
+    assert item[0] not in _triggers
+
+
+def test_manager2():
+    rule_manager['none'] = False
+    assert not rule_manager['none']
+    with rule_manager(none=True) as new_rule_manager:
+        assert rule_manager['none']
+        with new_rule_manager(none=False) as new_rule_manager2:
+            assert not rule_manager['none']
+            rule_manager['none'] = True
+            assert rule_manager['none']
+            with new_rule_manager2():
+                assert rule_manager['none']
+            rule_manager['none'] = False
+        assert rule_manager['none']
+    assert not rule_manager['none']
+
+
+def test_manager_errors():
+    assert_raises(KeyError, rule_manager, non_existent_rule=True)
+    assert_raises(KeyError, rule_manager.__getitem__, 'non_existent')
+    assert_raises(KeyError, rule_manager.__setitem__, 'non_existent', True)
+    assert_raises(TypeError, rule_manager.register, 32, 0, '')
+    assert_raises(TypeError, rule_manager.register, 'new_rule', 0, 0)
+
+
+def test_rule_manager_none():
+    op1 = DiagonalOperator([1, 2, 3])
+    op2 = 2
+
+    def func(cls, none):
+        with rule_manager(none=none):
+            op = cls([op1, op2])
+            if none:
+                assert_is_instance(op, cls)
+            else:
+                assert_is_instance(op, DiagonalOperator)
+    for cls in AdditionOperator, CompositionOperator, MultiplicationOperator:
+        for none in False, True:
+            yield func, cls, none
diff --git a/test/test_sparse.py b/test/test_sparse.py
new file mode 100644
index 0000000..9ab2e95
--- /dev/null
+++ b/test/test_sparse.py
@@ -0,0 +1,94 @@
+from __future__ import division
+
+import numpy as np
+import operator
+import scipy.sparse as sp
+from numpy.testing import assert_raises
+from pyoperators import SparseOperator
+from pyoperators.utils.testing import assert_same
+
+A = np.array([[1, 0, 2, 0],
+              [0, 0, 3, 0],
+              [4, 5, 6, 0],
+              [1, 0, 0, 1]])
+vecs = [[1, 0, 0, 0],
+        [0, 1, 0, 0],
+        [0, 0, 1, 0],
+        [0, 0, 0, 1]]
+formats = 'bsr,csr,csc,coo,dia,dok'.split(',')
+
+
+def test_matvec():
+    def func(format):
+        cls = getattr(sp, format + '_matrix')
+        so = SparseOperator(cls(A))
+        for vec in vecs:
+            assert_same(so(vec), np.dot(A, vec))
+            assert_same(so.T(vec), np.dot(A.T, vec))
+        assert_same(so.todense(), A)
+        assert_same(so.T.todense(), A.T)
+    for format in formats:
+        yield func, format
+
+
+def test_shapes():
+    def func(format):
+        cls = getattr(sp, format + '_matrix')
+        shapein = (2, 2)
+        shapeout = (1, 4, 1)
+        so = SparseOperator(cls(A), shapein=shapein, shapeout=shapeout)
+        for vec in vecs:
+            assert_same(so(np.reshape(vec, shapein)),
+                        np.dot(A, vec).reshape(shapeout))
+            assert_same(so.T(np.reshape(vec, shapeout)),
+                        np.dot(A.T, vec).reshape(shapein))
+    for format in formats:
+        yield func, format
+
+
+def test_update_output():
+    def func(format):
+        cls = getattr(sp, format + '_matrix')
+        so = SparseOperator(cls(A))
+        out = np.zeros(4, dtype=int)
+        outT = np.zeros(4, dtype=int)
+        for vec in vecs:
+            so(vec, out, operation=operator.iadd)
+            so.T(vec, outT, operation=operator.iadd)
+        assert_same(out, np.sum(A, axis=1))
+        assert_same(outT, np.sum(A, axis=0))
+    for format in formats:
+        yield func, format
+
+
+def test_error1():
+    values = (sp.lil_matrix(A), np.zeros((10, 4)),
+              np.matrix(np.zeros((10, 4))), 3)
+
+    def func(v):
+        assert_raises(TypeError, SparseOperator, v)
+    for v in values:
+        yield func, v
+
+
+def test_error2():
+    def func(format):
+        cls = getattr(sp, format + '_matrix')
+        sm = cls(A)
+        shapein = (2, 3)
+        shapeout = (1, 4, 2)
+        assert_raises(ValueError, SparseOperator, sm, shapein=shapein)
+        assert_raises(ValueError, SparseOperator, sm, shapeout=shapeout)
+    for format in formats:
+        yield func, format
+
+
+def test_error3():
+    def func(format):
+        cls = getattr(sp, format + '_matrix')
+        sm = cls(A)
+        so = SparseOperator(sm)
+        out = np.zeros(4, dtype=int)
+        assert_raises(ValueError, so, vecs[0], out, operation=operator.imul)
+    for format in formats:
+        yield func, format
diff --git a/test/test_stopconditions.py b/test/test_stopconditions.py
new file mode 100644
index 0000000..245b184
--- /dev/null
+++ b/test/test_stopconditions.py
@@ -0,0 +1,46 @@
+from __future__ import division
+
+import itertools
+
+from pyoperators.iterative.stopconditions import StopCondition
+from pyoperators.utils.testing import assert_eq, assert_raises
+
+
+class A():
+    pass
+
+sc1 = StopCondition(lambda s: s.a > 2, 'a>2')
+sc2 = StopCondition(lambda s: s.b > 2, 'b>2')
+sc3 = StopCondition(lambda s: s.c > 2, 'c>2')
+
+
+def test_stop_condition():
+    values = (1, 3)
+
+    def func(v):
+        a = A()
+        a.a = v
+        if v > 2:
+            assert_raises(StopIteration, sc1, a)
+    for v in values:
+        yield func, v
+
+
+def test_stop_condition_or():
+    sc = sc1 or sc2 or sc2
+
+    def func(v):
+        a = A()
+        a.a, a.b, a.c = v
+        if any(_ > 2 for _ in v):
+            try:
+                sc(a)
+            except StopIteration as e:
+                if a.a > 2:
+                    assert_eq(str(e), str(sc1))
+                elif a.b > 2:
+                    assert_eq(str(e), str(sc2))
+                else:
+                    assert_eq(str(e), str(sc3))
+    for v in itertools.product((1, 3), repeat=3):
+        yield func, v
diff --git a/test/test_str.py b/test/test_str.py
new file mode 100644
index 0000000..51705aa
--- /dev/null
+++ b/test/test_str.py
@@ -0,0 +1,134 @@
+import itertools
+from nose import SkipTest
+from numpy.testing import assert_equal
+from pyoperators import (
+    CompositionOperator, PowerOperator, ProductOperator, Operator, flags)
+
+
+class NL1(Operator):
+    def __str__(self):
+        return 'n'
+
+
+class NL2(Operator):
+    def __str__(self):
+        return 'a(..., z=1)'
+
+
+class NL3(Operator):
+    def __str__(self):
+        return '...**2'
+
+
+ at flags.linear
+class L(Operator):
+    pass
+
+
+def check(group, expected):
+    actual = str(CompositionOperator(group))
+    if '**2**2' in actual:
+        raise SkipTest
+    assert_equal(str(CompositionOperator(group)), expected)
+
+
+def test1():
+    n = NL1()
+    l = L()
+    groups = itertools.chain(*[itertools.product([n, l], repeat=i)
+                               for i in range(1, 5)])
+    expecteds = (
+        'n|l|'
+        'n(n)|n(l)|l(n)|l * l|'
+        'n(n(n))|n(n(l))|n(l(n))|n(l * l)|'
+        'l(n(n))|l(n(l))|(l * l)(n)|l * l * l|'
+        'n(n(n(n)))|n(n(n(l)))|n(n(l(n)))|n(n(l * l))|'
+        'n(l(n(n)))|n(l(n(l)))|n((l * l)(n))|n(l * l * l)|'
+        'l(n(n(n)))|l(n(n(l)))|l(n(l(n)))|l(n(l * l))|'
+        '(l * l)(n(n))|(l * l)(n(l))|(l * l * l)(n)|l * l * l * l')
+    for group, expected in zip(groups, expecteds.split('|')):
+        yield check, group, expected
+
+
+def test2():
+    n = NL1()
+    a = NL2()
+    l = L()
+    groups = itertools.chain(*[itertools.product([n, l, a], repeat=i)
+                               for i in range(1, 4)])
+    expecteds = iter((
+        'a(..., z=1)|'
+        'n(a(..., z=1))|'
+        'l(a(..., z=1))|'
+        'a(n, z=1)|'
+        'a(l, z=1)|'
+        'a(a(..., z=1), z=1)|'
+        'n(n(a(..., z=1)))|'
+        'n(l(a(..., z=1)))|'
+        'n(a(n, z=1))|'
+        'n(a(l, z=1))|'
+        'n(a(a(..., z=1), z=1))|'
+        'l(n(a(..., z=1)))|'
+        '(l * l)(a(..., z=1))|'
+        'l(a(n, z=1))|'
+        'l(a(l, z=1))|'
+        'l(a(a(..., z=1), z=1))|'
+        'a(n(n), z=1)|'
+        'a(n(l), z=1)|'
+        'a(n(a(..., z=1)), z=1)|'
+        'a(l(n), z=1)|'
+        'a(l * l, z=1)|'
+        'a(l(a(..., z=1)), z=1)|'
+        'a(a(n, z=1), z=1)|'
+        'a(a(l, z=1), z=1)|'
+        'a(a(a(..., z=1), z=1), z=1)').split('|'))
+    for group in groups:
+        if a not in group:
+            continue
+        expected = next(expecteds)
+        yield check, group, expected
+
+
+def test3():
+    n = NL1()
+    a = NL3()
+    l = L()
+    groups = itertools.chain(*[itertools.product([n, l, a], repeat=i)
+                               for i in range(1, 4)])
+    expecteds = iter((
+        '...**2|'
+        'n(...**2)|'
+        'l(...**2)|'
+        'n**2|'
+        'l**2|'
+        '(...**2)**2|'
+        'n(n(...**2))|'
+        'n(l(...**2))|'
+        'n(n**2)|'
+        'n(l**2)|'
+        'n((...**2)**2)|'
+        'l(n(...**2))|'
+        '(l * l)(...**2)|'
+        'l(n**2)|'
+        'l(l**2)|'
+        'l((...**2)**2)|'
+        'n(n)**2|'
+        'n(l)**2|'
+        'n(...**2)**2|'
+        'l(n)**2|'
+        '(l * l)**2|'
+        'l(...**2)**2|'
+        '(n**2)**2|'
+        '(l**2)**2|'
+        '((...**2)**2)**2|').split('|'))
+    for group in groups:
+        if a not in group:
+            continue
+        expected = next(expecteds)
+        yield check, group, expected
+
+
+def test4():
+    raise SkipTest
+    assert str(PowerOperator(3)(ProductOperator(axis=2))) == \
+           'product(..., axis=2)**3'
diff --git a/test/test_ufuncs.py b/test/test_ufuncs.py
new file mode 100644
index 0000000..924984c
--- /dev/null
+++ b/test/test_ufuncs.py
@@ -0,0 +1,52 @@
+import itertools
+import numpy as np
+from pyoperators.utils import pi
+from pyoperators.utils.testing import assert_eq, assert_same
+from pyoperators.utils.ufuncs import abs2, masking, multiply_conjugate
+from .common import DTYPES, COMPLEX_DTYPES
+
+
+def test_abs2():
+    x = np.array([pi(np.float128) + 1j, pi(np.float128)*1j, 3])
+
+    def func(d):
+        x_ = np.array(x if d.kind == 'c' else x.real, dtype=d)
+        actual = abs2(x_)
+        expected = np.abs(x_**2)
+        assert_same(actual, expected)
+        abs2(x_, actual)
+        assert_same(actual, expected)
+    for dtype in DTYPES:
+        yield func, dtype
+
+
+def test_masking():
+    def func(a, mask):
+        actual = masking(a, mask)
+        expected = a.copy()
+        expected[mask] = 0
+        assert_eq(actual, expected)
+        masking(a, mask, a)
+        assert_eq(a, expected)
+    for dtype in DTYPES:
+        a = np.arange(4, dtype=dtype)
+        mask = np.array([True, False, False, True], dtype=bool)
+        yield func, a, mask
+
+
+def test_multiply_conjugate():
+    def func(x1, x2, cdtype):
+        result = multiply_conjugate(x1, x2)
+        expected = x1 * x2.conjugate()
+        assert_eq(result, expected)
+        result[...] = 0
+        multiply_conjugate(x1, x2, result)
+        assert_eq(result, expected)
+    for dtype, cdtype in itertools.product(DTYPES, COMPLEX_DTYPES):
+        x1 = np.array([1+1j, 1+1j, 3+1j])
+        if dtype.kind == 'c':
+            x1 = x1.astype(dtype)
+        else:
+            x1 = x1.real.astype(dtype)
+        x2 = np.array(1-1j, dtype=cdtype)
+        yield func, x1, x2, cdtype
diff --git a/test/test_utils.py b/test/test_utils.py
new file mode 100644
index 0000000..56c1797
--- /dev/null
+++ b/test/test_utils.py
@@ -0,0 +1,529 @@
+import itertools
+import numpy as np
+import os
+import time
+
+from contextlib import contextmanager
+from numpy.testing import assert_equal
+from pyoperators import Operator
+from pyoperators.utils import (
+    broadcast_shapes, cast, complex_dtype, first, first_is_not, float_dtype,
+    ifirst, ifirst_is_not, ilast, ilast_is_not, groupbykey,
+    inspect_special_values, interruptible, isscalarlike, izip_broadcast, last,
+    last_is_not, least_greater_multiple, one, omp_num_threads, pi,
+    pool_threading, product, reshape_broadcast, setting, settingerr, split,
+    strenum, strplural, strshape, Timer, uninterruptible, zero)
+from pyoperators.utils.testing import (
+    assert_eq, assert_not_in, assert_raises, assert_same)
+
+dtypes = [np.dtype(t) for t in (np.bool8, np.uint8, np.int8, np.uint16,
+          np.int16, np.uint32, np.int32, np.uint64, np.int64, np.float32,
+          np.float64, np.float128, np.complex64, np.complex128, np.complex256)]
+
+
+def assert_dtype(a, d):
+    if a is None:
+        return
+    assert_eq(a.dtype, d)
+
+
+def test_broadcast_shapes():
+    shapes = [((),), ((), ()), ((), (), ()),
+              ((1,),), ((), (1,)), ((), (1,), (1,)),
+              ((2,),), ((), (2,)), ((1,), (2,)),
+              ((), (2,), (2,)), ((), (2,), (1,)), ((2,), (1,), ()),
+              ((1,), (2, 1)), ((), (2, 1)), ((1,), (2, 1), ()),
+              ((1,), (1, 2)), ((), (1, 2)), ((1,), (1, 2), ()),
+              ((2,), (2, 1)), ((), (2,), (2, 1)), ((2,), (2, 1), ()),
+              ((1, 2), (2, 1)), ((), (1, 2), (2, 1)), ((1, 2), (2, 1), ()),
+              ((1, 1, 4), (1, 3, 1), (2, 1, 1), (), (1, 1, 1))]
+    expecteds = [(), (), (),
+                 (1,), (1,), (1,),
+                 (2,), (2,), (2,),
+                 (2,), (2,), (2,),
+                 (2, 1), (2, 1), (2, 1),
+                 (1, 2), (1, 2), (1, 2),
+                 (2, 2), (2, 2), (2, 2),
+                 (2, 2), (2, 2), (2, 2),
+                 (2, 3, 4)]
+
+    def func(shape, expected):
+        assert_equal(broadcast_shapes(*shape), expected)
+    for shape, expected in zip(shapes, expecteds):
+        yield func, shape, expected
+
+
+def test_cast():
+    dtypes = (float, complex, None)
+
+    def func(d1, d2):
+        a1 = None if d1 is None else np.array(1, dtype=d1)
+        a2 = None if d2 is None else np.array(1, dtype=d2)
+        if d1 is None and d2 is None:
+            assert_raises(ValueError, cast, [a1, a2])
+            return
+        expected = d1 if d2 is None else d2 if d1 is None else \
+            np.promote_types(d1, d2)
+        a1_, a2_ = cast([a1, a2])
+        assert_dtype(a1_, expected)
+        assert_dtype(a2_, expected)
+    for d1, d2 in itertools.product(dtypes, repeat=2):
+        yield func, d1, d2
+
+
+def test_complex_dtype():
+    dtypes = (str, bool, int, np.uint32, np.float16, np.float32, np.float64,
+              np.float128)
+    expecteds = (None, complex, complex, complex, complex, np.complex64,
+                 np.complex128, np.complex256)
+
+    def func(dtype, expected):
+        if expected is None:
+            assert_raises(TypeError, complex_dtype, dtype)
+        else:
+            actual = complex_dtype(dtype)
+            assert_eq(actual, expected)
+    for dtype, expected in zip(dtypes, expecteds):
+        yield func, dtype, expected
+
+
+def test_float_dtype():
+    dtypes = (str, bool, int, np.uint32, np.float16, np.float32, np.float64,
+              np.float128)
+    expecteds = (None, float, float, float, np.float16, np.float32, np.float64,
+                 np.float128)
+
+    def func(dtype, expected):
+        if expected is None:
+            assert_raises(TypeError, float_dtype, dtype)
+        else:
+            actual = float_dtype(dtype)
+            assert_eq(actual, expected)
+    for dtype, expected in zip(dtypes, expecteds):
+        yield func, dtype, expected
+
+
+def test_first1():
+    assert first([1, 2, 3], lambda x: x > 1.5) == 2
+    assert last([1, 2, 3], lambda x: x > 1.5) == 3
+
+
+def test_first2():
+    assert_raises(ValueError, first, [1, 2, 3], lambda x: x > 3)
+    assert_raises(ValueError, last, [1, 2, 3], lambda x: x > 3)
+
+
+def test_ifirst1():
+    assert ifirst([1, 2, 3], lambda x: x > 1.5) == 1
+    assert ilast([1, 2, 3], lambda x: x > 1.5) == 2
+
+
+def test_ifirst2():
+    assert_raises(ValueError, ifirst, [1, 2, 3], lambda x: x > 3)
+    assert_raises(ValueError, ilast, [1, 2, 3], lambda x: x > 3)
+
+
+def test_ifirst3():
+    assert ifirst([1, 2, 2, 3], 2.) == 1
+    assert ilast([1, 2, 2, 3], 2.) == 2
+
+
+def test_ifirst4():
+    assert_raises(ValueError, ifirst, [1, 2, 3], 4)
+    assert_raises(ValueError, ilast, [1, 2, 3], 4)
+
+
+def test_first_is_not():
+    assert first_is_not([1, 2], 1) == 2
+    assert first_is_not([None, None, {}], None) == {}
+    assert first_is_not([], None) is None
+    assert first_is_not([None, None], None) is None
+
+    assert last_is_not([1, 2], 2) == 1
+    assert last_is_not([{}, None, None], None) == {}
+    assert last_is_not([], None) is None
+    assert last_is_not([None, None], None) is None
+
+
+def test_ifirst_is_not():
+    assert ifirst_is_not([1, 2, 2], 2) == 0
+    assert ifirst_is_not([2, 1, 1], 2) == 1
+    assert ifirst_is_not([{}, None, None], None) == 0
+    assert_raises(ValueError, ifirst_is_not, [], None)
+    assert_raises(ValueError, ifirst_is_not, [None, None], None,)
+
+    assert ilast_is_not([1, 2, 2], 2) == 0
+    assert ilast_is_not([2, 1, 1], 2) == 2
+    assert ilast_is_not([{}, None, None], None) == 0
+    assert_raises(ValueError, ilast_is_not, [], None)
+    assert_raises(ValueError, ilast_is_not, [None, None], None,)
+
+
+def test_groupbykey():
+    vals = ['a', 'b', 'c', 'd']
+    keys = itertools.combinations_with_replacement([1, 2, 3, 4], 4)
+
+    def func(k):
+        result = list(groupbykey(vals, k))
+        expected = [(k, tuple(i[0] for i in it)) for k, it in
+                    itertools.groupby(zip(vals, k), lambda x: x[1])]
+        assert_equal(result, expected)
+    for k in keys:
+        yield func, k
+
+
+def test_inspect_special_values():
+    def ref(x):
+        nones = nzeros = nmones = nothers = 0
+        for value in x.flat:
+            if value == 0:
+                nzeros += 1
+            elif value == 1:
+                nones += 1
+            elif value == -1:
+                nmones += 1
+            else:
+                nothers += 1
+        if nothers > 0:
+            nmones = nzeros = nones = 0
+        return nmones, nzeros, nones, nothers > 0, np.all(x == x.flat[0])
+
+    def func(x):
+        assert_eq(inspect_special_values(x), ref(x))
+    for d in dtypes:
+        for x in ((1, 1.1, 0, -1, -1), (-1, -1), (0, 0), (1, 1), (2, 2),
+                  (2, 1), np.random.random_integers(-1, 1, size=10)):
+            x = np.asarray(x).astype(d)
+            yield func, x
+
+
+def test_interruptible():
+    import signal
+
+    def func_interruptible():
+        assert signal.getsignal(signal.SIGINT) is signal.default_int_handler
+
+    def func_uninterruptible():
+        assert signal.getsignal(signal.SIGINT) is not \
+            signal.default_int_handler
+
+    with interruptible():
+        func_interruptible()
+        with uninterruptible():
+            func_uninterruptible()
+            with uninterruptible():
+                func_uninterruptible()
+                with interruptible():
+                    func_interruptible()
+                    with interruptible():
+                        func_interruptible()
+                    func_interruptible()
+                func_uninterruptible()
+            func_uninterruptible()
+        func_interruptible()
+    func_interruptible()
+
+
+def test_is_scalar():
+    def func(x):
+        assert isscalarlike(x)
+    for x in (True, 1, 1., 'lkj', u'jj', np.array(1)):
+        yield func, x
+
+
+def test_is_not_scalar():
+    def func(x):
+        assert not isscalarlike(x)
+    for x in ([], (), np.ones((0, 1)), np.ones(1), np.ones(2), object, np.int8,
+              slice, Operator()):
+        yield func, x
+
+
+def test_izip_broadcast1():
+    def g():
+        i = 0
+        while True:
+            yield i
+            i += 1
+    a = [1]
+    b = (np.sin,)
+    c = np.arange(3).reshape((1, 3))
+    d = ('x', 'y', [])
+    e = ['a', 'b', 'c']
+    f = np.arange(6).reshape((3, 2))
+
+    aa = []; bb = []; cc = []; dd = []; ee = []; ff = []; gg = []
+    for a_, b_, c_, d_, e_, f_, g_ in izip_broadcast(a, b, c, d, e, f, g()):
+        aa.append(a_)
+        bb.append(b_)
+        cc.append(c_)
+        dd.append(d_)
+        ee.append(e_)
+        ff.append(f_)
+        gg.append(g_)
+    assert_eq(aa, 3 * a)
+    assert_eq(bb, list(3 * b))
+    assert_eq(cc, [[0, 1, 2], [0, 1, 2], [0, 1, 2]])
+    assert_eq(dd, list(_ for _ in d))
+    assert_eq(ee, list(_ for _ in e))
+    assert_eq(ff, list(_ for _ in f))
+    assert_eq(gg, [0, 1, 2])
+
+
+def test_izip_broadcast2():
+    a = [1]
+    b = (np.sin,)
+    c = np.arange(3).reshape((1, 3))
+    aa = []; bb = []; cc = []
+    for a_, b_, c_ in izip_broadcast(a, b, c):
+        aa.append(a_)
+        bb.append(b_)
+        cc.append(c_)
+    assert_eq(aa, a)
+    assert_eq(tuple(bb), b)
+    assert_eq(cc, c)
+
+
+def test_least_greater_multiple():
+    def func(lgm, expected):
+        assert_eq(lgm, expected)
+    a, b, c = np.ogrid[[slice(4, 11) for m in range(3)]]
+    expected = 2**a * 3**b * 5**c
+    yield func, least_greater_multiple(expected, [2, 3, 5]), expected
+    yield func, least_greater_multiple(expected-1, [2, 3, 5]), expected
+
+
+def test_one_pi_zero():
+    expected = 1, 4 * np.arctan(np.array(1, np.float128)), 0
+
+    def func(f, dtype, exp):
+        assert_same(f(dtype), np.array(exp, dtype=dtype))
+    for f, exp in zip((one, pi, zero), expected):
+        for dtype in (np.float16, np.float32, np.float64, np.float128,
+                      np.complex64, np.complex128, np.complex256):
+            yield func, f, dtype, exp
+
+
+def test_pool_threading():
+    try:
+        import mkl
+    except ImportError:
+        mkl = None
+    mkl_nthreads = mkl.get_max_threads()
+    counter = None
+
+    def func_thread(i):
+        global counter
+        counter += 1
+
+    @contextmanager
+    def get_env(value):
+        try:
+            del os.environ['OMP_NUM_THREADS']
+        except KeyError:
+            pass
+        if value is not None:
+            os.environ['OMP_NUM_THREADS'] = str(value)
+        yield
+        if value is not None:
+            del os.environ['OMP_NUM_THREADS']
+
+    def func(env):
+        global counter
+        with env:
+            omp_num_threads = os.getenv('OMP_NUM_THREADS')
+            expected = omp_num_threads()
+            with pool_threading() as pool:
+                assert_equal(int(os.environ['OMP_NUM_THREADS']), 1)
+                if mkl is not None:
+                    assert_equal(mkl.get_max_threads(), 1)
+                counter = 0
+                pool.map(func_thread, xrange(pool._processes))
+            assert_equal(os.getenv('OMP_NUM_THREADS'), omp_num_threads)
+            if mkl is not None:
+                assert_equal(mkl.get_max_threads(), mkl_nthreads)
+            assert_equal(counter, expected)
+        assert_not_in('OMP_NUM_THREADS', os.environ)
+
+    for env in get_env(None), get_env(1), get_env(3):
+        yield func, env
+
+
+def test_product():
+    def func(o):
+        assert o == 1
+    for o in ([], (), (1,), [1], [2, 0.5], (2, 0.5), np.array(1),
+              np.array([2, 0.5])):
+        yield func, product(o)
+
+
+def test_reshape_broadcast():
+    data = np.arange(20)
+    shapes = (4, 5), (1, 4, 5), (4, 1, 5), (4, 5, 1)
+    new_shapess = (
+        ((4, 5), (1, 4, 5), (2, 4, 5), (2, 3, 4, 5)),
+        ((1, 4, 5), (2, 4, 5), (1, 2, 4, 5), (2, 2, 4, 5), (2, 3, 2, 4, 5)),
+        ((4, 1, 5), (4, 2, 5), (1, 4, 2, 5), (2, 4, 2, 5), (2, 3, 4, 2, 5)),
+        ((4, 5, 1), (4, 5, 2), (1, 4, 5, 2), (2, 4, 5, 2), (2, 3, 4, 5, 2)))
+
+    def func(shape, new_shape):
+        data_ = data.reshape(shape)
+        expected = np.empty(new_shape)
+        expected[...] = data_
+        actual = reshape_broadcast(data_, new_shape)
+        assert_equal(actual, expected)
+    for shape, new_shapes in zip(shapes, new_shapess):
+        for new_shape in new_shapes:
+            yield func, shape, new_shape
+
+
+def test_setting():
+    class Obj():
+        pass
+    obj = Obj()
+    obj.myattr = 'old'
+    with setting(obj, 'myattr', 'mid'):
+        assert obj.myattr == 'mid'
+        with setting(obj, 'myattr', 'new'):
+            assert obj.myattr == 'new'
+        assert obj.myattr == 'mid'
+    assert obj.myattr == 'old'
+
+    with setting(obj, 'otherattr', 'mid'):
+        assert obj.otherattr == 'mid'
+        with setting(obj, 'otherattr', 'new'):
+            assert obj.otherattr == 'new'
+            with setting(obj, 'anotherattr', 'value'):
+                assert obj.anotherattr == 'value'
+            assert not hasattr(obj, 'anotherattr')
+        assert obj.otherattr == 'mid'
+    assert not hasattr(obj, 'otherattr')
+
+
+def test_settingerr():
+    ref1 = np.seterr()
+    ref2 = {'divide': 'ignore', 'invalid': 'ignore', 'over': 'ignore',
+            'under': 'ignore'}
+    ref3 = {'divide': 'raise', 'invalid': 'ignore', 'over': 'warn',
+            'under': 'ignore'}
+
+    with settingerr(all='ignore'):
+        assert_eq(np.seterr(), ref2)
+        with settingerr(divide='raise', over='warn'):
+            assert_eq(np.seterr(), ref3)
+        assert_eq(np.seterr(), ref2)
+    assert_eq(np.seterr(), ref1)
+
+
+def test_split():
+    def func(n, m):
+        slices = split(n, m)
+        assert_eq(len(slices), m)
+        x = np.zeros(n, int)
+        for s in slices:
+            x[s] += 1
+        assert_same(x, 1, broadcasting=True)
+        assert_eq([split(n, m, i) for i in range(m)], slices)
+    for n in range(4):
+        for m in range(1, 6):
+            yield func, n, m
+
+
+def test_strenum():
+    assert_eq(strenum(['blue', 'red', 'yellow'], 'or'),
+              "'blue', 'red' or 'yellow'")
+
+
+def test_strplural():
+    def func(n, nonumber, s, expected):
+        assert_eq(strplural(n, 'cat', nonumber=nonumber, s=s), expected)
+    for n, nonumber, s, expected in zip(
+            4*(0, 1, 2),
+            3*(True,) + 3*(False,) + 3*(True,) + 3*(False,),
+            6*('',) + 6*(':',),
+            ('cat', 'cat', 'cats', 'no cat', '1 cat', '2 cats',
+             'cat', 'cat:', 'cats:', 'no cat', '1 cat:', '2 cats:')):
+        yield func, n, nonumber, s, expected
+
+
+def test_strshape():
+    shapes = (None, (), (1,), (2, 3))
+    broadcasts = None, 'leftward', 'rightward'
+    expectedss = [('None', '()', '1', '(2,3)'),
+                  ('None', '(...)', '(...,1)', '(...,2,3)'),
+                  ('None', '(...)', '(1,...)', '(2,3,...)')]
+
+    def func(shape, broadcast, expected):
+        assert_equal(strshape(shape, broadcast=broadcast), expected)
+    for broadcast, expecteds in zip(broadcasts, expectedss):
+        for shape, expected in zip(shapes, expecteds):
+            yield func, shape, broadcast, expected
+
+
+def test_strshape_error():
+    def func(x):
+        assert_raises(TypeError, strshape, x)
+    for x in 1, object(), [1]:
+        yield func, x
+
+
+def test_timer1():
+    t = Timer()
+    with t:
+        time.sleep(0.01)
+        delta1 = t.elapsed
+        time.sleep(0.01)
+        delta2 = t.elapsed
+    time.sleep(0.01)
+    delta3 = t.elapsed
+    assert abs(delta1 - 0.01) < 0.001
+    assert abs(delta2 - 0.02) < 0.001
+    assert abs(delta3 - 0.02) < 0.001
+    with t:
+        time.sleep(0.01)
+        delta1 = t.elapsed
+        time.sleep(0.01)
+        delta2 = t.elapsed
+    time.sleep(0.01)
+    delta3 = t.elapsed
+    assert abs(delta1 - 0.01) < 0.001
+    assert abs(delta2 - 0.02) < 0.001
+    assert abs(delta3 - 0.02) < 0.001
+
+
+def test_timer2():
+    t = Timer(cumulative=True)
+    with t:
+        time.sleep(0.01)
+        delta1 = t.elapsed
+        time.sleep(0.01)
+        delta2 = t.elapsed
+    time.sleep(0.01)
+    delta3 = t.elapsed
+
+    assert abs(delta1 - 0.01) < 0.001
+    assert abs(delta2 - 0.02) < 0.001
+    assert abs(delta3 - 0.02) < 0.001
+    with t:
+        time.sleep(0.01)
+        delta1 = t.elapsed
+        time.sleep(0.01)
+        delta2 = t.elapsed
+    time.sleep(0.01)
+    delta3 = t.elapsed
+    assert abs(delta1 - 0.03) < 0.001
+    assert abs(delta2 - 0.04) < 0.001
+    assert abs(delta3 - 0.04) < 0.001
+
+
+def test_timer3():
+    t = Timer()
+    try:
+        with t:
+            time.sleep(0.01)
+            raise RuntimeError()
+    except RuntimeError:
+        pass
+    time.sleep(0.01)
+    assert_equal(t._level, 0)
+    assert abs(t.elapsed - 0.01) < 0.001
diff --git a/test/test_zero.py b/test/test_zero.py
new file mode 100644
index 0000000..ef40842
--- /dev/null
+++ b/test/test_zero.py
@@ -0,0 +1,160 @@
+from __future__ import division
+import numpy as np
+from numpy.testing import assert_equal
+from pyoperators import (
+    CompositionOperator, ConstantOperator, Operator, ZeroOperator, flags,
+    rule_manager, O)
+from pyoperators.utils import ndarraywrap
+from pyoperators.utils.testing import (
+    assert_is, assert_is_instance, assert_is_none, assert_same, assert_is_type)
+from .common import OPS, ndarray2, attr2
+
+op = Operator()
+ops = [_() for _ in OPS] + [_(flags={'linear': False}) for _ in OPS]
+zeros_left = (
+    ZeroOperator(classout=ndarray2, attrout=attr2),
+    ZeroOperator(shapein=4, classout=ndarray2, attrout=attr2))
+zeros_right = (
+    ZeroOperator(classout=ndarray2, attrout=attr2),
+    ZeroOperator(classout=ndarray2, attrout=attr2, flags='square'),
+    ZeroOperator(shapein=3, classout=ndarray2, attrout=attr2),
+    ZeroOperator(shapein=3, shapeout=3, classout=ndarray2, attrout=attr2))
+
+
+def test_zero1():
+    z = ZeroOperator()
+    o = Operator(shapein=3, shapeout=6)
+    zo = z(o)
+    assert_is_instance(zo, ZeroOperator)
+    assert_equal(zo.shapein, o.shapein)
+    assert_is_none(zo.shapeout)
+
+
+def test_zero2():
+    z = ZeroOperator(shapein=3, shapeout=6)
+    o = Operator()
+    zo = z(o)
+    assert_is_instance(zo, ZeroOperator)
+    assert_is_none(zo.shapein, 'in')
+    assert_equal(zo.shapeout, z.shapeout, 'out')
+
+
+def test_zero3():
+    z = ZeroOperator(shapein=3, shapeout=6)
+    o = Operator(flags='square')
+    zo = z*o
+    assert_is_instance(zo, ZeroOperator)
+    assert_equal(zo.shapein, z.shapein, 'in')
+    assert_equal(zo.shapeout, z.shapeout, 'out')
+
+
+def test_zero4():
+    z = ZeroOperator()
+    o = Operator(flags='linear')
+    assert_is_instance(z*o, ZeroOperator)
+    assert_is_instance(o*z, ZeroOperator)
+
+
+def test_zero5():
+    z = ZeroOperator()
+    o = Operator(shapein=3, shapeout=6, flags='linear')
+    zo = z*o
+    oz = o*z
+    assert_is_instance(zo, ZeroOperator, 'zo')
+    assert_equal(zo.shapein, o.shapein, 'zo in')
+    assert_is_none(zo.shapeout, 'zo out')
+    assert_is_instance(oz, ZeroOperator, 'oz')
+    assert_is_none(oz.shapein, 'oz, in')
+    assert_equal(oz.shapeout, o.shapeout, 'oz, out')
+
+
+def test_zero6():
+    @flags.linear
+    class Op(Operator):
+        def direct(self, input, output):
+            output[:] = np.concatenate([input, 2*input])
+
+        def transpose(self, input, output):
+            output[:] = input[0:output.size]
+
+        def reshapein(self, shapein):
+            return (2 * shapein[0],)
+
+        def reshapeout(self, shapeout):
+            return (shapeout[0] // 2,)
+    z = ZeroOperator(flags='square')
+    o = Op()
+    od = o.todense(shapein=4)
+    zo = z * o
+    zod_ref = np.dot(np.zeros((8, 8)), od)
+    assert_same((z * o).todense(shapein=4), zod_ref)
+    oz = o * z
+    ozd_ref = np.dot(od, np.zeros((4, 4)))
+    assert_same((o * z).todense(shapein=4), ozd_ref)
+    assert_same(zo.T.todense(shapein=8), zod_ref.T)
+    assert_same(oz.T.todense(shapein=8), ozd_ref.T)
+
+
+def test_zero7():
+    z = ZeroOperator()
+    assert_equal(z*z, z)
+
+
+def test_zero8():
+    class Op(Operator):
+        pass
+    o = Op()
+    assert_is_type(o + O, Op)
+
+
+def test_merge_zero_left():
+    def func(op1, op2):
+        op = op1(op2)
+        assert_is_instance(op, ZeroOperator)
+        attr = {}
+        attr.update(op2.attrout)
+        attr.update(op1.attrout)
+        assert_equal(op.attrout, attr)
+        x = np.ones(3)
+        y = ndarraywrap(4)
+        op(x, y)
+        y2_tmp = np.empty(4)
+        y2 = np.empty(4)
+        op2(x, y2_tmp)
+        op1(y2_tmp, y2)
+        assert_equal(y, y2)
+        assert_is_instance(y, op1.classout)
+    for op1 in zeros_left:
+        for op2 in ops:
+            yield func, op1, op2
+
+
+def test_merge_zero_right():
+    def func(op1, op2):
+        op = op1(op2)
+        attr = {}
+        attr.update(op2.attrout)
+        attr.update(op1.attrout)
+        assert_equal(op.attrout, attr)
+        assert_is(op.classout, op1.classout)
+        if op1.flags.linear:
+            assert_is_type(op, ZeroOperator)
+            assert_same(op.todense(shapein=3, shapeout=4), np.zeros((4, 3)))
+            return
+        if op1.flags.shape_output == 'unconstrained' or \
+           op1.flags.shape_input != 'explicit' and \
+           op2.flags.shape_output != 'explicit':
+            assert_is_type(op, CompositionOperator)
+        else:
+            assert_is_type(op, ConstantOperator)
+
+        if op1.flags.shape_input == 'unconstrained' and \
+           op2.flags.shape_output == 'unconstrained':
+            return
+        with rule_manager(none=True):
+            op_ref = op1(op2)
+        assert_same(op.todense(shapein=3, shapeout=4),
+                    op_ref.todense(shapein=3, shapeout=4))
+    for op1 in ops:
+        for op2 in zeros_right:
+            yield func, op1, op2

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/pyoperators.git