[vspline] 02/72: Initial commit of vspline's source code and examples

Sun Jul 2 09:02:37 UTC 2017

This is an automated email from the git hooks/post-receive script.

kfj-guest pushed a commit to branch master
in repository vspline.

commit ac366e41ae127b574f7ee324c1e8240db1b09f14
Author: Kay F. Jahnke <kfjahnke at gmail.com>
Date:   Wed Oct 12 17:56:13 2016 +0200

    Initial commit of vspline's source code and examples
---
 LICENSE                 |   27 +
 basis.h                 |  236 ++
 brace.h                 |  542 +++++
 bspline.h               |  534 +++++
 common.h                |  530 +++++
 doxy.h                  |  300 +++
 eval.h                  | 1099 ++++++++++
 example/eval.cc         |  132 ++
 example/gradient.cc     |   90 +
 example/pano_extract.cc |  776 +++++++
 example/roundtrip.cc    |  405 ++++
 example/times.txt       | 5502 +++++++++++++++++++++++++++++++++++++++++++++++
 mapping.h               | 1555 ++++++++++++++
 poles.cc                |  660 ++++++
 prefilter.h             | 1915 +++++++++++++++++
 prefilter_poles.cc      |  174 ++
 remap.h                 |  883 ++++++++
 vspline.doxy            | 2303 ++++++++++++++++++++
 vspline.h               |   38 +
 19 files changed, 17701 insertions(+)

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..d4ebd66
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,27 @@
+vspline - generic C++ code for creation and evaluation
+          of uniform b-splines
+
+        Copyright 2015, 2016 by Kay F. Jahnke
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
diff --git a/basis.h b/basis.h
new file mode 100644
index 0000000..d517d9f
--- /dev/null
+++ b/basis.h
@@ -0,0 +1,236 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/*! \file basis.h
+
+    \brief Code to calculate the value B-spline basis function
+    and it's derivatives.
+
+    There are several variants in here. First, there is a perfectly general
+    routine, using the Cox-de Boor recursion. While this is 'nice to have',
+    vspline does not actually use it (except as a reference in unit testing).
+
+    vspline only needs evaluation of the B-spline basis function at multiples
+    of 0.5. With these values it can construct it's evaluators which in turn
+    are capable of evaluating the spline at real coordinates.
+    
+    So next is a specialized routine using an adapted version of the recursion
+    to calculate the basis function's value for integral operands. This isn't
+    used in vspline either - instead vspline uses a third version which abbreviates
+    the recursion by relying on precomputed values for the basis function with
+    derivative 0, which the recursion reaches after as many levels as the
+    requested derivative, so seldom deeper than 2. That makes it very fast.
+
+    For comparison there is also a routine calculating an approximation of the
+    basis function's value (only derivative 0) by means of a gaussian. This
+    routine isn't currently used in vspline.
+
+    for a discussion of the b-spline basis function, have a look at
+    http://www.cs.mtu.edu/~shene/COURSES/cs3621/NOTES/spline/B-spline/bspline-basis.html
+*/
+
+#ifndef VSPLINE_BASIS_H
+#define VSPLINE_BASIS_H
+
+// poles.cc has precomputed basis function values sampled at n * 1/2
+
+#include <vspline/poles.cc>
+
+namespace vspline {
+
+/// Implementation of the Cox-de Boor recursion formula to calculate
+/// the value of the bspline basis function. This code is taken from vigra
+/// but modified to take the spline degree as a parameter. This makes it
+/// easier to handle, since we don't need a vigra::BSpline object of a specific
+/// order to call it. This code is quite expensive for higer spline orders
+/// because the routine calls itself twice recursively, so the performance is
+/// N*N with the spline's degree. Luckily there are ways around using this routine
+/// at all - whenever we need the b-spline basis function value in vspline, it is
+/// at multiples of 1/2, and poles.cc has precomputed values for all spline
+/// degrees covered by vspline. I leave the code in here for reference purposes.
+
+template < class real_type >
+real_type gen_bspline_basis ( real_type x , int degree , int derivative )
+{
+  if ( degree == 0 )
+  {
+    if ( derivative == 0 )
+        return ( x < real_type(0.5) && real_type(-0.5) <= x )
+               ? real_type(1.0)
+               : real_type(0.0) ;
+    else
+        return real_type(0.0);
+  }
+  if ( derivative == 0 )
+  {
+    real_type n12 = real_type((degree + 1.0) / 2.0);
+    return (     ( n12 + x )
+                * gen_bspline_basis<real_type> ( x + real_type(0.5) , degree - 1 , 0 )
+              +   ( n12 - x )
+                * gen_bspline_basis<real_type> ( x - real_type(0.5) , degree - 1 , 0 )
+            )
+            / degree;
+  }
+  else
+  {
+    --derivative;
+    return   gen_bspline_basis<real_type> ( x + real_type(0.5) , degree - 1 , derivative )
+           - gen_bspline_basis<real_type> ( x - real_type(0.5) , degree - 1 , derivative ) ;
+  }
+}
+
+/// this routine is a helper routine to cdb_bspline_basis, the
+/// modified Cox-de Boor recursion formula to calculate the b-spline basis function
+/// for integral operands, operating in int as long as possible. This is achieved by
+/// working with 'x2', the doubled x value. Since in the 'real' recursion, the next
+/// iteration is called with x +/- 1/2, we can call the 'doubled' version with x +/- 1.
+/// This routine recurses 'all the way down to degree 0, So the result is, disregarding
+/// arithmetic errors, the same as the result obtained with the general routine.
+
+template < class real_type >
+real_type cdb_bspline_basis_2 ( int x2 , int degree , int derivative )
+{
+  if ( degree == 0 )
+  {
+    if ( derivative == 0 )
+        return ( x2 < 1 && -1 <= x2 )
+               ? real_type(1.0)
+               : real_type(0.0) ;
+    else
+        return real_type(0.0);
+  }
+  if ( derivative == 0 )
+  {
+    int n122 = degree + 1 ;
+    return (     ( n122 + x2 )
+                * cdb_bspline_basis_2<real_type> ( x2 + 1 , degree - 1 , 0 )
+              +   ( n122 - x2 )
+                * cdb_bspline_basis_2<real_type> ( x2 - 1 , degree - 1 , 0 )
+            )
+            / ( 2 * degree ) ;
+  }
+  else
+  {
+    --derivative;
+    return   cdb_bspline_basis_2<real_type> ( x2 + 1 , degree - 1 , derivative )
+           - cdb_bspline_basis_2<real_type> ( x2 - 1 , degree - 1 , derivative ) ;
+  }
+}
+
+/// modified Cox-de Boor recursion formula to calculate the b-spline basis function
+/// for integral operands, delegates to the 'doubled' routine above
+
+template < class real_type >
+real_type cdb_bspline_basis ( int x , int degree , int derivative = 0 )
+{
+  return cdb_bspline_basis_2<real_type> ( x + x , degree , derivative ) ;
+}
+
+/// see bspline_basis() below!
+///
+/// this helper routine works with the doubled value of x, so it can capture calls equivalent
+/// to basis ( x + .5 ) or basis ( x - .5 ) as basis2 ( x + 1 ) and basis2 ( x - 1 )
+/// having precalculated the basis function at .5 steps, we can therefore avoid
+/// using the general recursion formula. This is a big time-saver for high degrees.
+
+template < class real_type >
+real_type bspline_basis_2 ( int x2 , int degree , int derivative )
+{
+  if ( degree == 0 )
+  {
+    if ( derivative == 0 )
+        return ( x2 < 1 && -1 <= x2 )
+               ? real_type(1.0)
+               : real_type(0.0) ;
+    else
+        return real_type(0.0);
+  }
+  if ( derivative == 0 )
+  {
+    if ( abs ( x2 ) > degree )
+      return real_type ( 0 ) ;
+    // for derivative 0 we have precomputed values:
+    const long double * pk = precomputed_basis_function_values [ degree ] ;
+    return pk [ abs ( x2 ) ] ;
+  }
+  else
+  {
+    --derivative;
+    return   bspline_basis_2<real_type> ( x2 + 1 , degree - 1 , derivative )
+           - bspline_basis_2<real_type> ( x2 - 1 , degree - 1 , derivative ) ;
+  }
+}
+
+/// bspline_basis produces the value of the b-spline basis function for
+/// integral operands, the given degree 'degree' and the desired derivative.
+/// It turns out that this is all we ever need inside vspline, the calculation
+/// of the basis function at arbitrary points is performed via the matrix
+/// multiplication in the weight generating functor, and this functor sets
+/// it's internal matrix up with bspline basis function values at integral
+/// locations.
+///
+/// bspline_basis delegates to bspline_basis_2 above, which picks precomputed
+/// values as soon as derivative becomes 0. This abbreviates the recursion
+/// a lot, since usually the derivative requested is 0 or a small integer.
+/// all internal calculations in vspline accessing b-spline basis function
+/// values are currently using this routine, not the general routine.
+///
+/// Due to the precalculation with long double arithmetic, the precomputed
+/// values aren't precisely equal to the result of running the recursive
+/// routines above on the same arguments.
+
+template < class real_type >
+real_type bspline_basis ( int x , int degree , int derivative = 0 )
+{
+  return bspline_basis_2<real_type> ( x + x , degree , derivative ) ;
+}
+
+/// Gaussian approximation to B-spline basis function. This routine
+/// approximates the basis function of degree spline_degree for real x.
+/// I checked for all degrees up to 20. The partition of unity quality of the
+/// resulting reconstruction filter is okay for larger degrees, the cumulated
+/// error over the covered interval is quite low. Still, as the basis function
+/// is never actually evaluated in vspline (whenever it's needed, it is needed
+/// at n * 1/2 and we have precomputed values for that) there is not much point
+/// in having this function around. I leave the code in for now.
+
+template < typename real_type >
+real_type gaussian_bspline_basis_approximation ( real_type x , int degree )
+{
+  real_type sigma = ( degree + 1 ) / 12.0 ;
+  return   real_type(1.0)
+         / sqrt ( real_type(2.0 * M_PI) * sigma )
+         * exp ( - ( x * x ) / ( real_type(2.0) * sigma ) ) ;
+}
+
+} ; // end of namespace vspline
+
+#endif // #define VSPLINE_BASIS_H
diff --git a/brace.h b/brace.h
new file mode 100644
index 0000000..409cae4
--- /dev/null
+++ b/brace.h
@@ -0,0 +1,542 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/*! \file brace.h
+
+    \brief This file provides code for 'bracing' the spline coefficient array.
+
+    Inspired by libeinspline, I wrote code to 'brace' the spline coefficients. The concept is
+    this: while the IIR filter used to calculate the coefficients has infinite support (though
+    arithmetic precision limits this in real-world applications), the evaluation of the spline
+    at a specific location only looks at a small window of coefficients (compact, finite support).
+    This fact can be exploited by taking note of how large the support area is and providing
+    a few more coefficients in a frame around the 'core' coefficients to allow the evaluation
+    to proceed without having to check for boundary conditions. While the difference is not
+    excessive (the main computational cost is the actual evaluation itself), it's still
+    nice to be able to code the evaluation without boundary checking, which makes the code
+    very straightforward and legible.
+
+    There is another aspect to bracing: In my implementation of vectorized evaluation,
+    the window into the coefficient array used to pick out coefficients to evaluate at
+    a specific location is coded as a set of offsets from it's 'low' corner. This way,
+    several such windows can be processed in parallel. This mechanism can only function
+    efficiently in a braced coefficient array, since it would otherwise have to give up
+    if any of the windows accessed by the vector of coordinates had members outside the
+    (unbraced) coefficient array and submit the coordinate vector to individual processing.
+    I consider the logic to code this and the loss in performance too much of a bother
+    to go down this path; all my evaluation code uses braced coefficient arrays. Of course
+    the user is free to omit bracing, but then they have to use their own evaluation
+    code.
+
+    What's in the brace? Of course this depends on the boundary conditions chosen.
+    In vspline, I offer code for several boundary conditions, but most have something
+    in common: the original, finite sequence is extrapolated into an infinite periodic
+    signal. With straight PERIODIC boundary conditions, the initial sequence is
+    immediately followed and preceded by copies of itself. The other boundary conditions
+    mirror the signal in some way and then repeat the mirrored signal periodically.
+    Using boundary conditions like these, both the extrapolated signal and the
+    coefficients share the same periodicity and mirroring.
+    
+    There are two ways of
+    arriving at a braced coeffcient array: We can start from the extrapolated signal,
+    pick a section large enough to make margin effects vanish (due to limited arithmetic
+    precision), prefilter it and pick out a subsection containing the 'core' coefficients
+    and their support. Alternatively, we can work only on the core coefficients, calculate
+    suitable initial causal and anticausal coeffcients (where the calculation considers
+    the extrapolated signal, which remains implicit), apply the filter and *then* surround
+    the core coefficient array with more coeffcients (the brace) following the same
+    extrapolation pattern as we imposed on the signal, but now on the coefficients
+    rather than on the initial knot point values.
+  
+    The bracing can be performed without any solver-related maths by simply copying
+    (possibly trivially modified) slices of the core coefficients to the margin area.
+
+    Following the 'implicit' scheme, my default modus operandi braces after the
+    prefiltering. Doing so, it is posible to calculate the inital causal and anticausal
+    coefficient for the prefilter exactly. But this exactness is still, eventually,
+    subject to discretization and can only be represented after quantization. If,
+    instead, we prefilter a suitably extrapolated signal, now with arbitrary boundary
+    conditions, the margin effects will vanish towards the center (due to the characteristics
+    of the filter), and the 'core' coefficients will end up the same as in the first
+    approach. So we might as well extrapolate the signal 'far enough', pick any boundary
+    conditions we like (even zero padding), prefilter, and discard the margin outside the
+    area which is unaffected by margin effects. The result is, within arithmetic precision,
+    the same. Both approaches have advantages and disadvantages:
+
+    Implicit extrapolation needs less memory - we only need to provide storage for the
+    core coeffcients, which is just as much as we need for the original signal, so we can
+    operate in-place. The disadvantage of the implicit scheme is that we have to capture
+    the implicit extrapolation in code to calculate the initial causal/anticausal coefficients,
+    which is non-trivial and requires separate routines for each case, as can be seen in my
+    prefiltering code. And if, after prefiltering, we want to brace the core coeffcients
+    for efficient evaluation, we still need additional memory, which, if it hasn't been
+    allocated around the core before prefiltering, even requires us to copy the data out
+    into a larger memory area.
+
+    Explicit extrapolation needs more memory. A typical scheme would be to anticipate the
+    space needed for the explicit extrapolation, allocate enough memory for the extrapolated
+    signal, place the same into the center of the allocated memory, perform the extrapolation
+    and then prefilter. The advantage is that we can run the prefilter with arbitrary initial
+    causal/anticausal coefficients. No matter what the extrapolation looks like, we can always
+    use the same code. And we can extrapolate in any way we see fit, without having to produce
+    code to deal with our choice. If we pick the frame of extrapolated values large enough,
+    we can even pick out the 'braced' coefficient array from the result of the filter.
+
+    Obviously, there is no one 'right' way of doing this. Offered several choices
+    of implicit extrapolation, the user can choose between the implicit and explicit scheme.
+    The code in this file is useful for both choices: for the implicit scheme, bracing is
+    applied after prefiltering to enable evaluation with vspline. For the explicit scheme,
+    bracing may be used on the original data before prefiltering with arbitrary boundary
+    conditions, if the user's extrapolation scheme is covered by the code given here.
+
+    When using the higher-level access methods (via bspline objects), using the explicit or
+    implicit scheme becomes a matter of passing in the right flag, so at this level, a deep
+    understanding of the extrapolation mechanism isn't needed at all. I use the implicit scheme
+    as the default, because it needs slightly less memory and CPU cycles.
+
+    Since the bracing mainly requires copying data or trivial maths we can do the operations
+    on higher-dimensional objects, like slices of a volume. To efficiently code these operations
+    we make use of vigra's multi-math facility and it's bindAt array method, which makes
+    these subarrays easily available.
+
+    TODO: while this is convenient, it's not too fast, as it's neither multithreaded nor
+    vectorized. Still in most 'normal' scenarios the execution time is negligible...
+
+    TODO: there are 'pathological' cases where one brace is larger than the other brace
+    and the width of the core together. These cases can't be handled for all bracing modes
+    and will result in an exception.
+*/
+
+#ifndef VSPLINE_BRACE_H
+#define VSPLINE_BRACE_H
+
+#include <vigra/multi_array.hxx>
+#include <vigra/multi_iterator.hxx>
+#include <vigra/multi_math.hxx>
+#include "common.h"
+
+namespace vspline {
+
+using namespace vigra::multi_math;
+
+/// class bracer encodes the entire bracing process. It also gives the metrics
+/// for the size of the braces expected by the evaluation code.
+
+template < class view_type >
+struct bracer
+{
+  typedef typename view_type::value_type value_type ;
+  typedef typename ExpandElementResult<value_type>::type ele_type ;
+  enum { dimension = view_type::actual_dimension } ;
+  typedef typename view_type::difference_type shape_type ;
+
+  /// calculates the size of the left brace for a given spline degree. In most cases,
+  /// this is the size of the support of the reconstruction filter, rounded up to the
+  /// next integer. So for an even spline, we get the same left brace size as for the
+  /// odd spline one degree higher.
+  ///
+  /// reflect boundary conditions work slightly differently: we somehow have to access
+  /// the area around the point of reflection - coordinates -1 .. 0 and M-1 .. M, which
+  /// are part of the extrapolated signal, so we need a wider brace - 0.5 at either end.
+  /// For an even spline, this results in the same value as for other boundary conditions,
+  /// since the even spline has smaller support (0.5 precisely). But for odd splines,
+  /// we need another coefficient on either end.
+
+  static int left_brace_size ( int spline_degree , bc_code bc )
+  {
+    if ( bc == REFLECT || bc == SPHERICAL )
+      return ( spline_degree + 1 ) / 2 ;
+    else
+      return spline_degree / 2 ;
+  }
+
+/// The right handside bracing differs between periodic and mirrored splines, due to
+/// the amount of initial data: when specifying knot point data for a periodic spline,
+/// the first repetition is omitted (as it is the same value as at coordinate 0), but
+/// for evaluation, it has to be present, so the bracing code will produce it.
+///
+/// initially I was using the minimal bracing possible by coding:
+/// return left_brace_size ( spline_degree , bc ) + ( bc == PERIODIC ? 1 : 0 ) ;
+/// This has the disadvantage that for odd splines it requires checking if incoming
+/// coordinates are precisely at the right end of the defined range and splitting these
+/// coordinates to M-2, 1.0 instead of M-1, 0.0. Being more generous with the right brace
+/// and adding another layer makes this check unnecessary. Since this check is inner loop
+/// stuff where every clock cycle counts, I now use the more generous bracing.
+/// Note that for the periodic case I assume silently that incoming
+/// coordinates won't ever be M-1, as these can be mapped to 0.0 (due to the periodicity,
+/// this is equivalent) - so here I need an extended brace to capture the last unit spacing
+/// of the spline, but I don't need the additional extension to safeguard against v == M-1.
+/// If you use foreign evaluation routines you may want an additional coefficient here.
+  
+  static int right_brace_size ( int spline_degree , bc_code bc )
+  {
+    return   left_brace_size ( spline_degree , bc )
+           + ( ( ( spline_degree & 1 ) || ( bc == PERIODIC ) ) ? 1 : 0 ) ;
+  }
+
+/// this method gives the shape of the braced array, given the unbraced array's shape,
+/// the BC codes and the spline degree. For the shapes involved, this relation holds true:
+/// target_shape = left_corner + core_shape + right_corner
+/// So. for the implicit scheme, to evaluate from a braced spline, target_shape is the minimal
+/// coefficient array size needed by vspline's evaluation code. For the explicit scheme, this
+/// is the section of the coeficient array the evaluation code will look at.
+
+  static shape_type target_shape ( shape_type source_shape ,
+                            TinyVector < bc_code , dimension > bcv ,
+                            int spline_degree )
+  {
+    shape_type target_shape ;
+    for ( int d = 0 ; d < dimension ; d++ )
+      target_shape[d] =   source_shape[d]
+                        + left_brace_size ( spline_degree , bcv[d] )
+                        + right_brace_size ( spline_degree , bcv[d] ) ;
+    return target_shape ;
+  }
+
+/// convenience variant of the previous routine using the same BC for all axes
+
+  static shape_type target_shape ( shape_type source_shape ,
+                            bc_code bc ,
+                            int spline_degree )
+  {
+    TinyVector < bc_code , dimension > bcv ( bc ) ;
+    return target_shape ( source_shape , bcv , spline_degree ) ;
+  }
+  
+/// this method gives the left offset to the 'core' subarray (array minus bracing),
+/// given the BC codes and the spline degree
+
+  static shape_type left_corner ( TinyVector < bc_code , dimension > bcv ,
+                                  int spline_degree )
+  {
+    shape_type target_offset ;
+    for ( int d = 0 ; d < dimension ; d++ )
+      target_offset[d] = left_brace_size ( spline_degree , bcv[d] ) ;
+    return target_offset ;
+  }
+  
+/// this method gives the right offset to the 'core' subarray (array minus bracing),
+/// given the BC codes and the spline degree
+
+  static shape_type right_corner ( TinyVector < bc_code , dimension > bcv ,
+                            int spline_degree )
+  {
+    shape_type target_offset ;
+    for ( int d = 0 ; d < dimension ; d++ )
+      target_offset[d] = right_brace_size ( spline_degree , bcv[d] ) ;
+    return target_offset ;
+  }
+  
+/// given a braced array, return the size of it's 'core', the array without applied bracing
+
+  static shape_type core_shape ( view_type& a ,
+                          TinyVector < bc_code , dimension > bcv ,
+                          int spline_degree )
+  {
+    return a.subarray (   a.shape()
+                        - (   right_corner ( bcv , spline_degree )
+                            + left_corner ( bcv , spline_degree ) ) ) ;
+  }
+
+/// produce a view to the core
+
+  static view_type core_view ( view_type& a ,
+                        TinyVector < bc_code , dimension > bc ,
+                        int spline_degree )
+  {
+    return a.subarray ( left_corner ( bc , spline_degree ) ,
+                        a.shape() - right_corner ( bc , spline_degree ) ) ;
+  }
+
+  /// for spherical images, we require special treatment for two-dimensional
+  /// input data, because we need to shift the values by 180 degrees, or half
+  /// the margin's width. But to compile, we also have to give a procedure
+  /// for the other cases (not 2D), so this is first:
+  
+  template < typename value_type > shift_assign ( value_type target , value_type source )
+  {
+    // should not ever get used, really...
+  }
+
+  /// specialized routine for the 2D case (the slice itself is 1D)
+
+  template < typename value_type > shift_assign ( MultiArrayView < 1 , value_type > target ,
+                                                  MultiArrayView < 1 , value_type > source )
+  {
+    // bit sloppy here, with pathological data (very small source.size()) this will
+    // be imprecise for odd sizes, for even sizes it's always fine. But then full
+    // sphericals always have size 2N * N, so odd sizes should not occur at all for dim 0
+    auto si = source.begin() + source.size() / 2 ;
+    auto se = source.end() ;
+    for ( auto& ti : target )
+    {
+      ti = *si ;
+      ++si ;
+      if ( si >= se )
+        si = source.begin() ;
+    }
+  }
+
+/// apply the bracing to the array, performing the required copy/arithmetic operations
+/// to the 'frame' around the core. This routine performs the operation along axis dim.
+/// This variant takes the sizes of the left and right brace without any reference to
+/// a spline's degree, so it can be fed arbitrary values. This is the most general bracing
+/// routine, which is used by the routines below which derive the brace's size from the
+/// spline's degree. It's also the routine to be used for explicitly extrapolating a signal:
+/// you place the data into the center of a larger array, and pass in the sizes of the 'empty'
+/// space which is to be filled with the extrapolated data.
+///
+/// the bracing is done one-left-one-right, to avoid corner cases as best as posible.
+
+  void apply ( view_type & a , // containing array
+              bc_code bc ,    // boundary condition code
+              int lsz ,       // space to the left which needs to be filled
+              int rsz ,       // ditto, to the right
+              int axis )      // axis along which to apply bracing 
+  {
+    int w = a.shape ( axis ) ; // width of containing array along axis 'axis'
+    int m = w - ( lsz + rsz ) ;    // width of 'core' array
+
+    if ( m < 1 )                   // has to be at least 1
+      throw shape_mismatch ( "combined brace sizes must be at least one less than container size" ) ;
+
+    if (    ( lsz > m + rsz )
+         || ( rsz > m + lsz ) )
+    {
+      // not enough data to fill brace
+      if ( bc == PERIODIC || bc == NATURAL || bc == MIRROR || bc == REFLECT )
+        throw std::out_of_range ( "each brace must be smaller than the sum of it's opposite brace and the core's width" ) ;
+    }
+
+    int l0 = lsz - 1 ; // index of innermost empty slice on the left; like begin()
+    int r0 = lsz + m ; // ditto, on the right
+
+    int lp = l0 + 1 ;  // index of leftmost occupied slice (p for pivot)
+    int rp = r0 - 1 ;  // index of rightmost occupied slice
+
+    int l1 = -1 ;     // index one before outermost empty slice to the left; like end()
+    int r1 = w ;      // index one after outermost empty slice on the right
+
+    int lt = l0 ;     // index to left target slice
+    int rt = r0 ;     // index to right target slice ;
+
+    int ls , rs ;     // indices to left and right source slice, will be set below
+
+    int ds = 1 ;      // step for source index, +1 == forẃard, used for all mirroring modes
+                      // for periodic bracing, it's set to -1.
+
+    switch ( bc )
+    {
+      case PERIODIC :
+      {
+        ls = l0 + m ;
+        rs = r0 - m ;
+        ds = -1 ;      // step through source in reverse direction
+        break ;
+      }
+      case NATURAL :
+      case MIRROR :
+      {
+        ls = l0 + 2 ;
+        rs = r0 - 2 ;
+        break ;
+      }
+      case CONSTANT :
+      case SPHERICAL :
+      case REFLECT :
+      {
+        ls = l0 + 1 ;
+        rs = r0 - 1 ;
+        break ;
+      }
+      case ZEROPAD :
+      {
+        break ;
+      }
+      case IGNORE :
+      case IDENTITY :
+      {
+        // these modes perform no bracing, return prematurely
+        return ;
+      }
+      default:
+      {
+        cerr << "bracing for BC code " << bc_name[bc] << " is not supported" << endl ;
+        break ;
+      }
+    }
+
+    for ( int i = max ( lsz , rsz ) ; i > 0 ; --i )
+    {
+      if ( lt > l1 )
+      {
+        switch ( bc )
+        {
+          case PERIODIC :
+          case MIRROR :
+          case REFLECT :
+          {
+            // with these three bracing modes, we simply copy from source to target
+            a.bindAt ( axis , lt ) = a.bindAt ( axis , ls ) ;
+            break ;
+          }
+          case NATURAL :
+          {
+            // here, we subtract the source slice from twice the 'pivot'
+            // easiest would be:
+            // a.bindAt ( axis , lt ) = a.bindAt ( axis , lp ) * value_type(2) - a.bindAt ( axis , ls ) ;
+            // but this fails in 1D TODO: why?
+            auto target = a.bindAt ( axis , lt ) ; // get a view to left targte slice
+            target = a.bindAt ( axis , lp ) ;      // assign value of left pivot slice
+            target *= ele_type(2) ;                // double that
+            target -= a.bindAt ( axis , ls ) ;     // subtract left source slice
+            break ;
+          }
+          case CONSTANT :
+          {
+            // here, we repeat the 'pivot' slice
+            a.bindAt ( axis , lt ) = a.bindAt ( axis , lp ) ;
+            break ;
+          }
+          case ZEROPAD :
+          {
+            // fill with 0
+            a.bindAt ( axis , lt ) = 0 ;
+            break ;
+          }
+          case SPHERICAL : // needs special treatment
+          {
+            shift_assign ( a.bindAt ( axis , lt ) , a.bindAt ( axis , ls ) ) ;
+            break ;
+          }
+          default :
+            // default: leave untouched
+            break ;
+        }
+        --lt ;
+        ls += ds ;
+      }
+      if ( rt < r1 )
+      {
+        // essentially the same, but with rs instead of ls, etc.
+        switch ( bc )
+        {
+          case PERIODIC :
+          case MIRROR :
+          case REFLECT :
+          {
+            // with these three bracing modes, we simply copy from source to target
+            a.bindAt ( axis , rt ) = a.bindAt ( axis , rs ) ;
+            break ;
+          }
+          case NATURAL :
+          {
+            // here, we subtract the source slice from twice the 'pivot'
+            // the easiest would be:
+            // a.bindAt ( axis , rt ) = a.bindAt ( axis , rp ) * value_type(2) - a.bindAt ( axis , rs ) ;
+            // but this fails in 1D TODO: why?
+            auto target = a.bindAt ( axis , rt ) ; // get a view to right targte slice
+            target = a.bindAt ( axis , rp ) ;      // assign value of pivot slice
+            target *= ele_type(2) ;                // double that
+            target -= a.bindAt ( axis , rs ) ;     // subtract source slice
+            break ;
+          }
+          case CONSTANT :
+          {
+            // here, we repeat the 'pivot' slice
+            a.bindAt ( axis , rt ) = a.bindAt ( axis , rp ) ;
+            break ;
+          }
+          case ZEROPAD :
+          {
+            // fill with 0
+            a.bindAt ( axis , rt ) = 0 ;
+            break ;
+          }
+          case SPHERICAL : // needs special treatment
+          {
+            shift_assign ( a.bindAt ( axis , rt ) , a.bindAt ( axis , rs ) ) ;
+            break ;
+          }
+          default :
+            // default: leave untouched
+            break ;
+        }
+        ++rt ;
+        rs -= ds ;
+      }
+    }
+  }
+  
+/// This variant of apply braces along all axes in one go.
+
+  static void apply ( view_type& a ,          ///< target array, containing the core and (empty) frame
+               TinyVector < bc_code , dimension > bcv ,      ///< boundary condition codes
+               TinyVector < int , dimension > left_corner ,  ///< sizes of left braces
+               TinyVector < int , dimension > right_corner ) ///< sizes of right braces
+  {
+    for ( int dim = 0 ; dim < dimension ; dim++ )
+      apply ( a , bcv[dim] , left_corner[dim] , right_corner[dim] , dim ) ;
+  }
+
+/// apply the bracing to the array, performing the required copy/arithmetic operations
+/// to the 'frame' around the core. This routine performs the operation along axis dim.
+/// Here, the size of the brace is derived from the spline degree. This is a convenience
+/// variant which saves you the explicit calls to left_brace_size and right_brace_size.
+
+  void operator() ( view_type& a ,           ///< target array, containing the core and (empty) frame
+                    bc_code bc ,         ///< boundary condition code
+                    int spline_degree ,  ///< degree of the spline
+                    int dim )            ///< axis along which to brace
+  {
+    // calculate brace sizes
+    int lsz = left_brace_size ( spline_degree , bc ) ;
+    int rsz = right_brace_size ( spline_degree , bc ) ;
+
+    // delegate to apply()
+    apply ( a , bc , lsz , rsz , dim ) ;
+  }
+  
+/// This variant braces along all axes, deriving brace sizes from the spline's degree
+
+  void operator() ( view_type& a ,          ///< target array, containing the core and (empty) frame
+                    bc_code bc ,        ///< boundary condition codes
+                    int spline_degree ) ///< degree of the spline
+  {
+    for ( int dim = 0 ; dim < dimension ; dim++ )
+      (*this) ( a , bc , spline_degree , dim ) ;
+  }
+} ;
+
+
+} ; // end of namespace vspline
+
+#endif // VSPLINE_BRACE_H
diff --git a/bspline.h b/bspline.h
new file mode 100644
index 0000000..6efe450
--- /dev/null
+++ b/bspline.h
@@ -0,0 +1,534 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/*! \file bspline.h
+    \brief defines class bspline
+
+  class bspline is the most convenient access to vspline's functionality.
+  It attempts to do 'the right thing' by automatically creating suitable helper
+  objects and parametrization so that the spline does what it's supposed to do.
+  Most users will not need anything else, and using class bspline is quite
+  straightforward. It's quite possible to have a b-spline up and running with
+  a few lines of code without even having to make choices concerning it's
+  parametrization, since there are sensible defaults for everything. At the same
+  time, pretty much everything *can* be parametrized even at this level.
+  bspline objects can be used without any knowledge of their internals,
+  e.g. as parameters to the remap functions.
+
+  class bspline handles several views to the coefficients it operates on, these
+  are realized as vigra::MultiArrayViews, and they all share the same storage:
+
+  - the 'core', which is a view to an array of data precisely the same shape as
+    the knot point data over which the spline is calculated.
+
+  - 'coeffs', which is a view to the core, plus 'bracing' needed to evaluate
+    the spline with vspline's evaluation code. 'coeffs' contains 'core'.
+    I refer to this view as the 'braced coefficients' as well.
+
+  - 'container', which contains the two views above plus an additional frame
+    of coefficients used for the 'explicit' scheme of extrapolation before
+    prefiltering. 
+
+  Using class bspline, there is a choice of 'strategy'. The simplest strategy is
+  'UNBRACED'. With this strategy, after putting the knot point data into the bspline's 'core'
+  area and calling prefilter(), the core area will contain the b-spline coefficients.
+  The resulting b-spline object can't be evaluated with the code in eval.h, this
+  mode of operation is intended for users who want to do their own processing of the
+  coefficients and don't need the code in eval.h. prefiltering is done using an
+  implicit scheme as far as the border conditions are concerned.
+  
+  The 'standard' strategy is 'BRACED'. Here, after prefiltering, the view 'coeffs'
+  in the bspline object will contain the b-spline coefficients, surrounded by a 'brace'
+  of coefficients which allows code in eval.h to process them without special treatment
+  for values near the border (the brace covers what support is needed by marginal
+  coefficients). Again, an implicit scheme is used.
+  
+  The third strategy, 'EXPLICIT', extrapolates the knot point data in the 'core' area
+  sufficiently far to suppress margin effects when the prefiltering is performed without
+  initial coefficient calculation. If the 'frame' of extrapolated data is large enough,
+  the result is just the same. The inner part of the frame is taken as the brace, so no
+  bracing needs to be performed explicitly. The resulting b-spline object will work with
+  vspline's evaluation code. Note that the explicit scheme uses 'IGNORE' boundary conditions
+  on the (framed) array, which is equivalent to zero-padding.
+
+  Also note that the additional memory needed for the 'frame' will be held throughout the bspline
+  object's life, the only way to 'shrink' the coefficient array to the size of the braced or core
+  coefficients is by copying them out to a smaller array.
+
+  The fourth strategy, 'MANUAL', is identical to 'EXPLICIT', except that automatic extrapolation
+  of the core data to the frame is not performed. Instead, this strategy relies on the user to
+  fill the frame with extrapolated data. This is to allow for the user to apply custom
+  extrapolation schemes. The procedure would be to create the bspline object, fill the core,
+  apply the extrapolation, then call prefilter.
+
+  Probably the most common scenario is that the source data for the spline are available from
+  someplace like a file. Instead of reading the file's contents into memory first and passing
+  the memory to class bspline, there is a more efficient way: a bspline object is set up
+  first, with the specification of the size of the incoming data and the intended mode of
+  operation. The bspline object allocates the memory it will need for the purpose, but
+  doesn't do anything else. The 'empty' bspline object is then 'filled' by the user
+  by putting data into it's 'core' area. Subsequently, prefilter() is called, which converts
+  the data to b-spline coefficients. This way, only one block of memory is used throughout,
+  the initial data are overwritten by the coefficients, operation is in-place and most efficient.
+
+  If this pattern can't be followed, there are alternatives:
+
+  - if a view to an array at least the size of the container array is passed into bspline's
+    constructor, this view is 'adopted' and all operations will use the data it refers to.
+    The caller is responsible for keeping these data alive while the bspline object exists,
+    and relinquishes control over the data, which may be changed by the bspline object.
+    Note that there is a convenience method, 'container_size', which can calculate the
+    shape of a container suitable for the purpose.
+
+  - if data are passed to prefilter(), they will be taken as containing the knot point data,
+    rather than expecting the knot point data to be in the bspline oject's memory already.
+    This can also be used to reuse a bspline object with new data. The data passed in will
+    not be modified. This is most efficient when using an implicit scheme; when used together
+    with EXPLICIT, the data are (automatically) copied into the core area before prefiltering,
+    which is unnecessary with the implicit schemes - they can 'pull in' data in the course
+    of their operation.
+
+  While there is no explicit code to create a 'smoothing spline' - a b-spline evaluating
+  the source data without prefiltering them - this can be achieved simply by creating a b-spline
+  object with spline degree 0 and 'shifting' it to the desired degree for evaluation. Note that
+  you'll need the EXPLICIT strategy for the purpose, because otherwise the spline won't have
+  enough 'headroom' for shifting.
+
+  With shifting, you can also create a 'poor man's pyramid'. While using no additional storage,
+  you can extract smoothed data from the spline by shifting it up. This only goes so far, though,
+  because even a degree-20 b-spline reconstruction kernel's equivalent gaussian doesn't have a very
+  large standard deviation, and evaluation times become very long. From the gaussian approximation
+  of the b-spline basis function, it can be seen that the equivalent gaussian's standard deviation is
+  ( degree + 1 ) / 12.0, so a quintic spline will have a standard deviation of 0.5 only.
+*/
+
+#ifndef VSPLINE_BSPLINE_H
+#define VSPLINE_BSPLINE_H
+
+#include "prefilter.h"
+#include "brace.h"
+
+namespace vspline {
+
+/// struct bspline is a convenience class which bundles a coefficient array (and it's creation)
+/// with a set of metadata describing the parameters used to create the coefficients and the
+/// resulting data. I have chosen to implement class bspline so that there is only a minimal
+/// set of template arguments, namely the spline's data type (like pixels etc.) and it's dimension.
+/// All other parameters relevant to the spline's creation are passed in at construction time.
+/// This way, if explicit specialization becomes necessary (like, to interface to code which
+/// can't use templates) the number of specialzations remains manageable. This design decision
+/// pertains specifically to the spline's degree, which can also be implemented as a template
+/// argument, allowing for some optimization by making some members static. Yet going down this
+/// path requires explicit specialization for every spline degree used and the performance gain
+/// I found doing so was hardly measurable, while automatic testing became difficult and compilation
+/// times grew.
+///
+/// I chose making bspline a struct for now, but messing with the data inside is probably
+/// not a good idea...
+
+template < class value_type , int _dimension >
+struct bspline
+{
+  /// pull the template arg into an enum
+  enum { dimension = _dimension } ;
+  /// if the coefficients are owned, this array holds the data
+  typedef vigra::MultiArray < dimension , value_type > array_type ;
+  /// data are read and written to vigra MultiArrayViews
+  typedef vigra::MultiArrayView < dimension , value_type > view_type ;
+  /// multidimensional index type
+  typedef typename view_type::difference_type shape_type ;
+  /// nD type for one boundary condition per axis
+  typedef TinyVector < bc_code , dimension > bcv_type ;
+
+  /// type for pointer to a prefiltering method
+  typedef void (*p_solve) ( view_type& ,
+                            view_type& ,
+                            bcv_type ,
+                            int ,
+                            int ) ;
+
+  /// elementary type of value_type, float or double
+  typedef typename ExpandElementResult < value_type >::type real_type ;
+
+private:
+
+  array_type _coeffs ;
+  prefilter_strategy strategy ;
+
+public:
+
+  view_type container ;     ///< view to container array
+  view_type coeffs ;        ///< view to the braced coefficient array
+  view_type core ;          ///< view to the core part of the coefficient array
+  int spline_degree ;       ///< degree of the spline (3 == cubic spline)
+  bcv_type bcv ;            ///< boundary condition, see common.h
+  bool braced ;             ///< whether coefficient array is 'braced' or not
+  int horizon ;             ///< additional frame size for explicit scheme
+  shape_type left_brace ;   ///< width(s) of the left handside bracing
+  shape_type right_brace ;  ///< width(s) of the right handside bracing
+  shape_type left_frame ;   ///< width(s) of the left handside bracing
+  shape_type right_frame ;  ///< width(s) of the right handside bracing
+  shape_type container_shape ;   ///< shape of the container array
+  shape_type core_shape ;   ///< shape of the core coefficient array
+  shape_type braced_shape ; ///< shape of the coefficient array + bracing
+
+  /// setup_metrics determines the sizes of the three views and any braces/frames
+  /// needed with the given parameters
+
+  void setup_metrics()
+  {
+    switch ( strategy )
+    {
+      case UNBRACED:
+        // UNBRACED is simple: all internal views are the same. prefiltering will
+        // be done using an implicit scheme.
+        container_shape = braced_shape = core_shape ;
+        left_brace = right_brace = left_frame = right_frame = shape_type() ;
+        braced = false ;
+        break ;
+      case BRACED:
+        // again an implicit prefiltering scheme will be used, but here we add
+        // a 'brace' to the core data, which makes the resulting bspline object
+        // suitable to work with vspline's evaluation code. The container array's
+        // size is the same as the braced core's size.
+        braced_shape = bracer<view_type>::target_shape ( core_shape , bcv , spline_degree ) ;
+        left_brace = bracer<view_type>::left_corner ( bcv , spline_degree ) ;
+        right_brace = bracer<view_type>::right_corner ( bcv , spline_degree ) ;
+        left_frame = left_brace ;
+        right_frame = right_brace ;
+        container_shape = braced_shape ;
+        braced = true ;
+        break ;
+      case EXPLICIT:
+        // here we prepare for an explicit extrapolation. This requires additional
+        // space, namely the 'frame', around the core data, into which the extrapolated
+        // data are put before prefiltering the lot. This frame is applied in excess of
+        // the bracing, to make sure all coefficients inside the brace meet the precision
+        // requirements expressed by the choice of 'horizon'.
+        braced_shape = bracer<view_type>::target_shape ( core_shape , bcv , spline_degree ) ;
+        left_brace = bracer<view_type>::left_corner ( bcv , spline_degree ) ;
+        right_brace = bracer<view_type>::right_corner ( bcv , spline_degree ) ;
+        left_frame = left_brace + horizon ;
+        right_frame = right_brace + horizon ;
+        container_shape = core_shape + left_frame + right_frame ;
+        braced = true ;
+        break ;
+    }
+  }
+
+  /// this method calculates the size of container needed by a bspline object with
+  /// the given parameters. This is a helper routine for use cases where the memory for
+  /// the bspline object is allocated externally and passed into the bspline object.
+
+  static shape_type container_size ( shape_type core_shape ,  ///< shape of knot point data
+            int spline_degree = 3 ,                  ///< spline degree with reasonable default
+            bcv_type bcv = bcv_type ( MIRROR ) ,   ///< boundary conditions and common default
+            prefilter_strategy strategy = BRACED , ///< default strategy is the 'implicit' scheme
+            int horizon = sizeof(real_type) * 3 )  ///< width of frame for explicit scheme (heuristic)
+  {
+    switch ( strategy )
+    {
+      case UNBRACED:
+        return core_shape ;
+        break ;
+      case BRACED:
+        return bracer<view_type>::target_shape ( core_shape , bcv , spline_degree ) ;
+        break ;
+      case EXPLICIT:
+        shape_type braced_shape = bracer<view_type>::target_shape ( core_shape , bcv , spline_degree ) ;
+        braced_shape += 2 * horizon ;
+        return braced_shape ;
+        break ;
+    }
+  }
+
+  /// construct a bspline object with appropriate storage space to contain and process an array
+  /// of knot point data with shape core_shape. Depending on the strategy chosen and the other
+  /// parameters passed, more space than core_shape may be allocated. Once the bspline object
+  /// is ready, it has to be filled with the knot point data and then the prefiltering needs
+  /// to be done. This sequence assures that the knot point data are present in memory only once,
+  /// the prefiltering is done in-place. So the user can create the bspline, fill in data (like,
+  /// from a file), prefilter, and then evaluate.
+  ///
+  /// It's possible to pass in a view to an array providing space for the coefficients,
+  /// or even the coefficients themselves. This is done via the parameter _space. This has
+  /// to be an array of the same or larger shape than the container array would end up having
+  /// given all the other parameters. This view is then 'adopted' and subsequent processing
+  /// will operate on it's data. container_size can be used to get the precise shape of the memory
+  /// needed with the given parameters.
+  ///
+  /// with the EXPLICIT scheme, the horizon is set by default to a value which is
+  /// deemed to be 'sufficiently large' to keep the error 'low enough'. the expression
+  /// used here produces a frame which is roughly the size needed to make any margin
+  /// effects vanish by the time the prefilter hits the core, but it's a bit 'rule of thumb'.
+  
+  // TODO: when bracing/framing is applied, we might widen the array size to a
+  // multiple of the Vc:Vector's Size for the given data type to have better-aligned
+  // access. This may or may not help, has to be tested. We might also want to position
+  // the origin of the brace to an aligned position, since evaluation is based there.
+  
+  bspline ( shape_type _core_shape ,  ///< shape of knot point data
+            int _spline_degree = 3 , ///< spline degree with reasonable default
+            bcv_type _bcv = bcv_type ( MIRROR ) ,   ///< boundary conditions and common default
+            prefilter_strategy _strategy = BRACED , ///< default strategy is the 'implicit' scheme
+            int _horizon = -1 ,                     ///< width of frame for explicit scheme
+            view_type _space = view_type()          ///< coefficient storage to 'adopt'
+          )
+  : core_shape ( _core_shape ) ,
+    spline_degree ( _spline_degree ) ,
+    bcv ( _bcv ) ,
+    strategy ( _strategy )
+  {
+    // heuristic horizon for reasonable precision - we assume that no one in their right
+    // minds would want a negative horizon ;)
+
+    if ( _horizon < 0 )
+      horizon = log2 ( max ( spline_degree , 1 ) ) * 3 * sizeof ( real_type ) ;
+    else
+      horizon = _horizon ; // whatever the user specifies
+
+    // first, calculate all the various shapes and sizes used internally
+    setup_metrics() ;
+
+    // now either adopt external memory or allocate memory for the coefficients
+    if ( _space.hasData() )
+    {
+      // caller has provided space for the coefficient array. This space has to
+      // be at least as large as the container_shape we have determined
+      // to make sure it's compatible with the other parameters
+      if ( ! ( allGreaterEqual ( _space.shape() , container_shape ) ) )
+        throw shape_mismatch ( "the intended container shape does not fit into the shape of the storage space passed in" ) ;
+      // if the shape matches, we adopt the data in _space;
+      // since 'container' was default-constructed, assignment results in a view
+      // to the data in _space, not in copying the data. This means that if the data
+      // _space refers to change or are deallocated, the bspline will become invalid
+      // as well. We take a view to the container_shape-sized subarray only.
+      // _coeffs remains uninitialized.
+      container = _space.subarray ( shape_type() , container_shape ) ;
+    }
+    else
+    {
+      // _space was default-constructed and has no data.
+      // in this case we allocate a container array
+      array_type help ( container_shape ) ;
+      // and swap with the empty default-constructed array _coeffs
+      // so that the memory is automatically deallocated when the bspline
+      // object is destroyed
+      _coeffs.swap ( help ) ;
+      container = _coeffs ;
+    }
+    // finally we set the views to the braced core area and the core area
+    coeffs = container.subarray ( left_frame - left_brace ,
+                                  left_frame + core_shape + right_brace ) ;
+    core = coeffs.subarray ( left_brace , left_brace + core_shape ) ;
+  } ;
+
+  /// prefilter converts the knot point data in the 'core' area into b-spline
+  /// coefficients. Depending on the strategy chosen in the b-spline object's
+  /// constructor, bracing/framing may be applied.
+  ///
+  /// If data are passed in, they have to have precisely the shape
+  /// we have set up in core (_core_shape passed into the constructor).
+  /// These data will then be used in place of any data present in the
+  /// bspline object.
+
+  void prefilter ( bool use_vc = true ,     ///< use Vc by default
+                   int nthreads = ncores , ///< number of threads to use
+                   view_type data = view_type() ) ///< view to knot point data to use instead of 'core'
+  {
+    if ( data.hasData() )
+    {
+      // if the user has passed in data, they have to have precisely the shape
+      // we have set up in core (_core_shape passed into the constructor).
+      // This can have surpising effects if the container array isn't owned by the
+      // spline but constitutes a view to data kept elsewhere (by passing _space to the
+      // constructor).
+      if ( data.shape() != core_shape )
+        throw shape_mismatch ( "when passing data to prefilter, they have to have precisely the core's shape" ) ;
+      if ( strategy == EXPLICIT )
+      {
+        // the explicit scheme requires the data and frame to be together in the
+        // containing array, so we have to copy the data into the core.
+        core = data ;
+      }
+      // the other strategies can move the data from 'data' into the spline's memory
+      // during coefficient generation, so we needn't copy them in first.
+    }
+    else
+    {
+      // otherwise, we assume data are already in 'core' and we operate in-place
+      // note, again, the semantics of the assignment here: since 'data' has no data,
+      // the assignment results in 'adopting' the data in core rather than copying them
+      data = core ;
+    }
+
+    // we call the solver via a function pointer
+    p_solve solve ;
+
+    // for simplicity's sake, if USE_VC isn't defined we use solve_vigra
+    // always and simply ignore the flag use_vc.
+
+#ifdef USE_VC
+    // we have two variants, one is using Vc and the other doesn't
+    if ( use_vc )
+      solve = & solve_vc < view_type , view_type > ;
+    else
+      solve = & solve_vigra < view_type , view_type > ;
+#else
+    solve = & solve_vigra < view_type , view_type > ;
+#endif
+
+    // per default the output will be braced. This does require the output
+    // array to be sufficiently larger than the input; class bracer has code
+    // to provide the right sizes
+
+    bracer<view_type> br ;
+
+    // for the explicit schemes, we use bc code IGNORE
+    bcv_type explicit_bcv ( IGNORE ) ;
+
+    switch ( strategy )
+    {
+      case UNBRACED:
+        // only call the solver, don't do any bracing
+        solve ( data ,
+                core ,
+                bcv ,
+                spline_degree ,
+                nthreads
+              ) ;
+        break ;
+      case BRACED:
+        // solve first, passing in BC codes to pick out the appropriate functions to
+        // calculate the initial causal and anticausal coefficient, then brace result
+        solve ( data ,
+                core ,
+                bcv ,
+                spline_degree ,
+                nthreads
+              ) ;
+        for ( int d = 0 ; d < dimension ; d++ )
+          br ( coeffs , bcv[d] , spline_degree , d ) ;
+        break ;
+      case EXPLICIT:
+        // apply bracing with BC codes passed in, then solve with BC code IGNORE
+        // this automatically fills the brace, as well, since it's part of the frame.
+        for ( int d = 0 ; d < dimension ; d++ )
+          br.apply ( container , bcv[d] , left_frame[d] , right_frame[d] , d ) ;
+        solve ( container ,
+                container ,
+                explicit_bcv ,
+                spline_degree ,
+                nthreads
+              ) ;
+        break ;
+      case MANUAL:
+        // like EXPLICIT, but don't apply a frame, assume a frame was applied
+        // by external code. process whole container with IGNORE BC. For cases
+        // where the frame can't be costructed by applying any of the stock bracing
+        // modes. Note that if any data were passed into this routine, in this case
+        // they will be silently ignored (makes no sense overwriting the core after
+        // having manually framed it in some way)
+        solve ( container ,
+                container ,
+                explicit_bcv ,
+                spline_degree ,
+                nthreads
+              ) ;
+        break ;
+    }
+  }
+
+  /// shift will change the interpretation of the data in a bspline object.
+  /// d is taken as a difference to add to the current spline degree. The coefficients
+  /// remain the same, but creating an evaluator from the shifted spline will make
+  /// the evaluator produce data *as if* the coefficients were those of a spline
+  /// of the changed order. Shifting with positive d will efectively blur the
+  /// interpolated signal, shifting with negative d will sharpen it.
+  /// For shifting to work, the spline has to have enough 'headroom', meaning that
+  /// spline_degree + d, the new spline degree, has to be greater or equal to 0
+  /// and smaller than the largest supported spline degree (lower twenties)
+  /// This is a quick-shot solution to the problem of scaled-down interpolated
+  /// results; it may be faster in some situations to shift the spline up and
+  /// evaluate than to apply smoothing to the source data.
+  /// TODO: look into the transfer function
+
+  int shift ( int d )
+  {
+    int new_degree = spline_degree + d ;
+    if ( new_degree < 0 )
+      return 0 ;
+
+    bracer<view_type> br ;
+    shape_type new_left_brace = br.left_corner ( bcv , new_degree ) ;
+    shape_type new_right_brace = br.right_corner ( bcv , new_degree ) ;
+    if ( ! (    allLessEqual ( new_left_brace , left_frame )
+             && allLessEqual ( new_right_brace , right_frame ) ) )
+      return 0 ;
+
+    spline_degree = new_degree ;
+    left_brace = new_left_brace ;
+    right_brace = new_right_brace ;
+    braced_shape = core_shape + left_brace + right_brace ;
+
+    shape_type coefs_offset = left_frame - new_left_brace ;
+    coeffs.reset() ;
+    coeffs = container.subarray ( coefs_offset , coefs_offset + braced_shape ) ;
+    return d ;
+  }
+
+  /// helper function to << a bspline object to an ostream
+
+  friend ostream& operator<< ( ostream& osr , const bspline& bsp )
+  {
+    osr << "dimension:.................. " << bsp.dimension << endl ;
+    osr << "degree:..................... " << bsp.spline_degree << endl ;
+    osr << "boundary conditions:........" ;
+    for ( auto bc : bsp.bcv )
+      osr << " " << bc_name [ bc ] ;
+    osr << endl ;
+    osr << "shape of container array:... " << bsp.container.shape() << endl ;
+    osr << "shape of braced coefficients " << bsp.coeffs.shape() << endl ;
+    osr << "shape of core:.............. " << bsp.core.shape() << endl ;
+    osr << "braced:..................... " << ( bsp.braced ? std::string ( "yes" ) : std::string ( "no" ) ) << endl ;
+    osr << "left brace:................. " << bsp.left_brace << endl ;
+    osr << "right brace:................ " << bsp.right_brace << endl ;
+    osr << "left frame:................. " << bsp.left_frame << endl ;
+    osr << "right frame:................ " << bsp.right_frame << endl ;
+    osr << ( bsp._coeffs.hasData() ? "bspline object owns data" : "data are owned externally" ) << endl ;
+    return osr ;
+  }
+
+} ;
+
+} ; // end of namespace vspline
+
+#endif // VSPLINE_BSPLINE_H
\ No newline at end of file
diff --git a/common.h b/common.h
new file mode 100644
index 0000000..49e7c0b
--- /dev/null
+++ b/common.h
@@ -0,0 +1,530 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/*! \file common.h
+    \brief definitions common to all files in this project, utility code
+    
+  Currenly there isn't much code here - the only thing used throughout is the definition
+  of the boundary condition codes/mapping codes, which are lumped together for now in one
+  enum. I have also started putting some utility code here.
+  
+  TODO The exceptions need some work.
+*/
+
+#ifndef VSPLINE_COMMON
+#define VSPLINE_COMMON
+
+#include <vigra/multi_array.hxx>
+#include <thread>
+
+#ifdef USE_VC
+#include <Vc/Vc>
+#endif
+
+namespace vspline {
+
+/// dimension-mismatch is thrown if two arrays have different dimensions
+/// which should have the same dimensions.
+
+struct dimension_mismatch
+: std::invalid_argument
+{
+  dimension_mismatch ( const char * msg )
+  : std::invalid_argument ( msg ) { }  ;
+} ;
+
+/// shape mismatch is the exception which is thrown if the shapes of an input array and
+/// an output array do not match.
+
+struct shape_mismatch
+: std::invalid_argument
+{
+  shape_mismatch  ( const char * msg )
+  : std::invalid_argument ( msg ) { }  ;
+} ;
+
+/// exception which is thrown if an opertion is requested which vspline does not support
+
+struct not_supported
+: std::invalid_argument
+{
+  not_supported  ( const char * msg )
+  : std::invalid_argument ( msg ) { }  ;
+} ;
+
+/// exception which is thrown by mapping mode REJECT for out-of-bounds coordinates
+/// this exception is left without a message, it only has a very specific application,
+/// and there it may be thrown often, so we don't want anything slowing it down.
+
+struct out_of_bounds
+{
+} ;
+
+/// This enumeration is used for codes connected to boundary conditions. There are
+/// three aspects to boundary conditions: During prefiltering, if the implicit scheme is used,
+/// the initial causal and anticausal coefficients have to be calculated in a way specific to
+/// the chosen boundary conditions. Bracing, both before prefiltering when using the explicit
+/// scheme, and after prefiltering when using the implicit scheme, also needs these codes to
+/// pick the appropriate extrapolation code to extend the knot point data/coefficients beyond
+/// the core array. Finally, mapping of coordinates into the defined range uses some of the
+/// same codes.
+
+typedef enum { PERIODIC,   ///< periodic boundary conditions / periodic mapping
+               NATURAL,    ///< natural boundary conditions / uses constant mapping
+               MIRROR ,    ///< mirror on the bounds, both bc and mapping
+               REFLECT ,   ///< reflect, so  that f(-1) == f(0), both bc and mapping
+               CONSTANT ,  ///< used for framing, with explicit prefilter scheme, and for mapping
+               ZEROPAD ,   ///< used for boundary condition, bracing
+               IGNORE ,    ///< used for boundary condition, bracing
+               IDENTITY ,  ///< used as solver argument, mostly internal use
+               SPHERICAL , ///< intended use for spherical panoramas, needs work
+               REJECT ,    ///< mapping mode, throws out_of_bounds for out-of-bounds coordinates
+               LIMIT ,     ///< mapping mode, maps out-of-bounds coordinates to left/right boundary
+               RAW         ///< mapping mode, processes coordinates unchecked, may crash the program
+} bc_code;
+
+/// This enumeration is used by the convenience class 'bspline' to determine the prefiltering
+/// scheme to be used.
+
+typedef enum { UNBRACED , ///< implicit scheme, no bracing applied
+               BRACED ,   ///< implicit scheme, bracing will be applied
+               EXPLICIT , ///< explicit scheme, frame with extrapolated signal, brace
+               MANUAL     ///< like explicit, but don't frame before filtering
+} prefilter_strategy  ;
+
+/// bc_name is for diagnostic output of bc codes
+
+std::vector < std::string > bc_name =
+{
+  "PERIODIC",
+  "NATURAL",
+  "MIRROR" ,
+  "REFLECT" ,
+  "CONSTANT" ,
+  "ZEROPAD" ,
+  "IGNORE" ,
+  "IDENTITY" ,
+  "SPHERICAL" ,
+  "REJECT" ,
+  "LIMIT" ,
+  "RAW"
+} ;
+
+/// number of CPU cores in the system; per default, multithreading routines
+/// will perform their task with as many threads.
+
+const int ncores = std::thread::hardware_concurrency() ;
+
+/// split_array_to_chunks partitions an array. This is used for multithreading:
+/// It's possible to distribute the processing to several threads by splitting
+/// the array to chunks so that the axis which is processed is not broken up.
+/// That's what the 'forbid' parameter is for: it keeps the routine from wrongly
+/// splitting the array along a specific axis. With the default of -1, all axes
+/// are considered valid candidates for splitting.
+/// The return value indicates into how many chunks the input array was actually
+/// split. Splitting sounds like an expensive operation, but it is not. All processing
+/// is done with views, the data aren't copied.
+
+template < class view_t >
+int split_array_to_chunks ( view_t & a ,                   ///< view to array to be split n-ways
+                            std::vector < view_t > & res , ///< vector to accomodate the chunks
+                            int n ,                        ///< intended number of chunks
+                            int forbid = -1 )              ///< axis which shouldn't be split
+{
+  const int dim = view_t::actual_dimension ;
+  typedef typename view_t::difference_type shape_t ;
+  
+  // find outermost dimension which can be split n-ways
+  shape_t shape = a.shape() ;
+  
+  // find the outermost dimension that can be split n ways, and it's extent 
+  int split_dim = -1 ;
+  int max_extent = -1 ;
+  for ( int md = dim - 1 ; md >= 0 ; md-- )
+  {
+    if (    md != forbid
+         && shape[md] > max_extent
+         && shape[md] >= n )
+    {
+      max_extent = shape[md] ;
+      split_dim = md ;
+      break ;
+    }
+  }
+  
+  // if the search did not yet succeed:
+  if ( max_extent == -1 )
+  {
+    // repeat process with relaxed conditions: now the search will also succeed
+    // if there is an axis which can be split less than n ways
+    for ( int md = dim - 1 ; md >= 0 ; md-- )
+    {
+      if (    md != forbid
+           && shape[md] > 1 )
+      {
+        max_extent = shape[md] ;
+        split_dim = md ;
+        break ;
+      }
+    }
+  }
+  
+  if ( split_dim != -1 ) // we have found a dimension for splitting
+  {
+    int w = shape [ split_dim ] ;  // extent of the dimension we can split
+    n = std::min ( n , w ) ;       // just in case, if that is smaller than n
+    
+    int cut [ n ] ;   // where to chop up this dimension
+    
+    for ( int i = 0 ; i < n ; i++ )
+      cut[i] = ( (i+1) * w ) / n ;   // roughly equal chunks, but certainly last cut == a.end()
+
+    shape_t start , end = shape ;
+
+    for ( int i = 0 ; i < n ; i++ )
+    {
+      end [ split_dim ] = cut [ i ];                  // apply the cut locations
+      res.push_back ( a.subarray ( start , end ) ) ;
+      start [ split_dim ] = end [ split_dim ] ;
+    }
+  }
+  else // no luck, fill the vector with the initial array as it's sole member
+  {
+    res.push_back ( a ) ;
+    n = 1 ;
+  }
+  return n ; // return the number of chunks
+}
+
+/// divide_and_conquer tries to split an array into chunks and processes each chunk
+/// in a separate thread with the functor passed as 'func'. This functor takes a reference
+/// to a chunk of data and it's offset in the original array. There's a variant of run()
+/// taking a point function and a variant taking a vectorized point function
+
+template < class view_type > // type of input array
+struct divide_and_conquer
+{
+  enum { dim = view_type::actual_dimension } ;
+  typedef typename view_type::value_type value_type ;
+  enum { channels = vigra::ExpandElementResult < value_type > :: size } ;
+  typedef typename vigra::ExpandElementResult < value_type > :: type ele_type ;
+  typedef typename view_type::difference_type shape_type ;
+
+  /// callable chunk_func is used to process each individual chunk in it's
+  /// separate thread. Alternatively, if splitting fails, it is used on the whole
+  /// data set. The additional shape which is passed in can be used by the chunk
+  /// processing function to determine it's position inside the 'whole' array.
+
+  typedef std::function < void ( view_type & , // chunk of data to process
+                                 shape_type )  // offset of chunk in 'whole' data set
+                        > chunk_func ;
+
+  /// run tries to split 'data' into 'nthread' views. If 'forbid' refers to an axis
+  /// of 'data', this axis will not be split. If splitting fails, 'func' is applied
+  /// to 'data', otherwise, 'func' is applied to all chunks that resulted from splitting,
+  /// concurrently, in separate threads.
+
+  static void run ( view_type & data ,   // array of data to divide (into chunks)
+                    chunk_func func ,    // functor to apply to each chunk
+                    int forbid = -1 ,    // axis that should not be split (default: no restriction)
+                    int nthreads = ncores )
+  {
+    if ( nthreads > 1 )
+    {
+      // we split the result array into equal chunks
+      // to process them by one thread each
+
+      std::vector<view_type> dn ; // space for chunks
+      nthreads = split_array_to_chunks <view_type> ( data , dn , nthreads , forbid ) ;
+      std::thread * t[nthreads] ;
+      shape_type offset ;
+
+      for ( int s = 0 ; s < nthreads ; s++ )
+      {
+        t[s] = new std::thread ( func , std::ref(dn[s]) , offset ) ;
+        for ( int d = 0 ; d < dim ; d ++ )
+        {
+          if ( dn[s].shape ( d ) != data.shape(d) )
+            offset[d] += dn[s].shape(d) ;
+        }
+      }
+      for ( int s = 0 ; s < nthreads ; s++ )
+      {
+        t[s]->join() ;
+        delete t[s] ;
+      }
+    }
+    else // if we're single-threaded we call func in this thread
+    {
+      func ( data , shape_type() ) ;
+    }
+  }
+
+  /// next we have a few methods for applying point functors to a MultiArrayView.
+  /// this is done by building a suitable chunk_func from a point functor and an
+  /// 'applicator' routine, and then passing this chunk_func to run() above, to
+  /// have it applied in multiple threads. First we define the mechanism for
+  /// point functors operating on value_type:
+  ///
+  /// callable point_func 'treats' a value_type passed in by reference
+
+  typedef std::function < void ( value_type & ) > point_func ;
+
+  /// apply is a helper function that applies 'f' to each value in 'data'.
+  /// By binding it's first agument to a specific point function, we receive
+  /// a functor that satisfies the chunk_func signature.
+
+  static void apply ( point_func f , view_type & data , shape_type offset )
+  {
+    for ( auto& value : data )
+      f ( value ) ;
+  }
+
+  /// overload of 'run' applying a point function to all values in all chunks
+  /// this is where the chunk_func needed for run() above is built and used
+
+  static void run ( view_type & data ,   // array of data to divide (into chunks)
+                    point_func func ,    // point functor to apply to each value
+                    int nthreads = ncores )
+  {
+    using namespace std::placeholders ;
+
+    // we use bind to make a chunk_func by binding the point function argument
+    // to apply() to 'func'. The resulting two-argument functor satisfies the
+    // chunk_func signature and can be passed to run() above
+
+    chunk_func treat = std::bind ( apply ,    // use apply to apply
+                                   func ,     // this point function
+                                   _1 ,       // to this chunk of data
+                                   _2 ) ;     // located at this offset inside 'data'
+
+    // the resulting functor is passed to the version of run() taking a chunk_func
+
+    run ( data , treat , -1 , nthreads ) ; // delegate processing to run variant above
+  }
+  
+#ifdef USE_VC
+
+  typedef Vc::Vector < ele_type > value_v ;
+  enum { vsize = value_v::Size } ;
+
+  /// now we repeat the pattern for vectorized operation.
+  /// callable v_point_func 'treats' a value_v passed in by reference.
+
+  typedef std::function < void ( value_v & ) > v_point_func ;
+  
+  /// again we have a helper function. We'll use bind on the first
+  /// argument to generate a functor satisfying the chunk_func signature.
+  /// vapply reads individual elements into a vector-sized buffer and calls
+  /// the vectorized point function vf on them.
+  
+  static void vapply ( v_point_func vf ,
+                       view_type & data ,
+                       shape_type offset )
+  {
+    int aggregates = data.elementCount() / vsize ;             // number of full vectors
+    int leftovers = data.elementCount() - aggregates * vsize ; // and of any leftover single values
+    value_v buffer ;
+    auto it = data.begin() ;
+
+    if ( data.isUnstrided() )
+    {
+      typedef Vc::SimdArray < int , vsize > ic_v ;
+      typedef vigra::TinyVector < ic_v , channels > mc_ic_v ;
+
+      // if data is unstrided, we can process it's contents by gather/scatter
+      // operations to vectors which we process with the functor vf. This is
+      // about as fast as we can go.
+
+      mc_ic_v mc_interleave ;
+      for ( int ch = 0 ; ch < channels ; ch++ )
+      {
+        mc_interleave[ch] = ic_v::IndexesFromZero() ;
+        mc_interleave[ch] *= channels ;
+        mc_interleave[ch] += ch ;
+      }
+
+      value_type * destination = data.data() ;
+
+      for ( int a = 0 ; a < aggregates ; a++ )
+      {
+        for ( int ch = 0 ; ch  < channels ; ch++ )
+        {
+          buffer.gather ( (ele_type*) destination , mc_interleave [ ch ] ) ;
+          vf ( buffer ) ;
+          buffer.scatter ( (ele_type*) destination , mc_interleave [ ch ] ) ;
+        }
+        destination += vsize ;
+      }
+      it += aggregates * vsize ;
+    }
+    else
+    {
+      // if the data are strided, we 'manually' fill a vector at a time
+      for ( int a = 0 ; a < aggregates ; a++ )
+      {
+        for ( int ch = 0 ; ch < channels ; ch++ )
+        {
+          for ( int e = 0 ; e < vsize ; e++ )            // fill vector
+            buffer[e] = it[e][ch] ;
+          vf ( buffer ) ;                                // call functor
+          for ( int e = 0 ; e < vsize ; e++ )
+            it[e][ch] = buffer[e] ;                      // unpack vector
+        }
+        it += vsize ;
+      }
+    }
+
+    // process leftovers, if any
+
+    if ( leftovers )
+    {
+      for ( int ch = 0 ; ch < channels ; ch++ )
+      {
+        int e ;
+        for ( e = 0 ; e < leftovers ; e++ )
+          buffer[e] = it[e][ch] ;          // fill in leftover values
+
+        for ( ; e < vsize ; e++ )          // just in case no aggregates were processed previously:
+          buffer[e] = it[0][ch] ;          // fill buffer up with suitable values
+
+        vf ( buffer ) ;                    // process buffer
+
+        for ( e = 0 ; e < leftovers ; e++ )
+          it[e][ch] = buffer[e] ;          // write back processed values
+      }
+    }
+  }
+
+  /// overload of 'run' applying a vectorized point function to all elements in all chunks
+
+  static void run ( view_type & data ,   // array of data to divide (into chunks)
+                    v_point_func vfunc , // vectorized point functor
+                    int nthreads = ncores )
+  {
+    using namespace std::placeholders ;
+
+    // again we use bind to create a functor satisfying the chunk_func signature
+    // by using bind - this time on vapply, where we bind the first two arguments
+    // to 'vfunc' and 'func'.
+
+    chunk_func treat = std::bind ( vapply ,   // use vapply to apply
+                                   vfunc ,    // this vectorized point functor
+                                   _1 ,       // to this chunk of data
+                                   _2 ) ;     // which is at this offset inside 'data'
+
+    // the resulting functor is delegated to the version of run() taking a chunk_func
+
+    run ( data , treat , -1 , nthreads ) ;
+  }
+  
+
+#endif
+
+} ;
+
+/// divide_and_conquer_2 works just like divide_and_conquer, but operates on
+/// two arrays synchronously.
+
+template < class view_type ,   // type of first array
+           class view_type_2 > // type of second array
+struct divide_and_conquer_2
+{
+  typedef typename view_type::value_type value_type ;
+  enum { dim = view_type::actual_dimension } ;
+  typedef typename view_type::difference_type shape_type ;
+
+  // run() takes two MultiArrayViews of the same shape, splits them
+  // in the same way, and applies a functor to each resulting pair of chunks. This is
+  // needed for functions like remap().
+  // TODO: we might even formulate a general manifold by accepting a vector of arrays,
+  // but currently there is no code in vspline which uses more than two views together.
+
+  typedef std::function < void ( view_type & ,   // chunk of data to process
+                                 view_type_2 & , // chunk of data2 to process
+                                 shape_type )    // offset of chunk in 'whole' data set
+                        > chunk_func_2 ;
+
+  static void run ( view_type & data ,   // array of data to divide (into chunks)
+                    view_type_2 & data2 , // second array to coprocess with data
+                    chunk_func_2 func ,  // functor to apply to each pair of chunks
+                    int forbid = -1 ,    // axis that should not be split (default: no restriction)
+                    int nthreads = ncores )
+  {
+    if ( view_type_2::actual_dimension != dim )
+      throw ( dimension_mismatch ( "both views must have the same dimension" ) ) ;
+
+    // make sure that data and data2 are shape-compatible
+    // we silently assume that we can coiterate both arrays in scan order.
+    if ( data.shape() != data2.shape() )
+    {
+      throw shape_mismatch ( "both views must have the same shape" ) ;
+    }
+
+    if ( nthreads > 1 )
+    {
+      // we split both arrays into equal chunks
+      // to coprocess them by one thread each
+
+      std::vector<view_type> dn ; // space for chunks of data
+      nthreads = split_array_to_chunks <view_type> ( data , dn , nthreads , forbid ) ;
+
+      std::vector<view_type_2> d2n ; // space for chunks of data2
+      nthreads = split_array_to_chunks <view_type_2> ( data2 , d2n , nthreads , forbid ) ;
+
+      std::thread * t[nthreads] ;
+      shape_type offset ;
+
+      for ( int s = 0 ; s < nthreads ; s++ )
+      {
+        t[s] = new std::thread ( func , std::ref(dn[s]) , std::ref(d2n[s]), offset ) ;
+        for ( int d = 0 ; d < dim ; d ++ )
+        {
+          if ( dn[s].shape ( d ) != data.shape(d) )
+            offset[d] += dn[s].shape(d) ;
+        }
+      }
+      for ( int s = 0 ; s < nthreads ; s++ )
+      {
+        t[s]->join() ;
+        delete t[s] ;
+      }
+    }
+    else // if we're single-threaded we call func in this thread
+    {
+      func ( data , data2 , shape_type() ) ;
+    }
+  }
+} ;
+
+} ; // end of namespace vspline
+
+#endif // VSPLINE_COMMON
\ No newline at end of file
diff --git a/doxy.h b/doxy.h
new file mode 100644
index 0000000..f42dcfb
--- /dev/null
+++ b/doxy.h
@@ -0,0 +1,300 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*                     of uniform b-splines                             */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+// This header doesn't contain any code, only the text for the main page of the documentation.
+
+/*! \mainpage
+
+ \section intro_sec Introduction
+
+ vspline is a header-only generic C++ library for the creation and processing of uniform B-splines.
+ It aims to be as comprehensive as feasibly possible, yet at the same time producing code
+ which performs well, so that it can be used in production.
+ 
+ vspline was developed on a Linux system using gcc. It has not been tested with other
+ systems or compilers, and as of this writing I am aware that the code probably isn't portable;
+ My code uses elements from the C++11 standard (mainly the auto keyword and range-based for loops).
+ 
+ vspline relies heavily on two other libraries:
+ 
+ - <a href="http://ukoethe.github.io/vigra/">VIGRA</a>, mainly for handling of multidimensional
+ arrays and general signal processing
+ 
+ - <a href="https://compeng.uni-frankfurt.de/index.php?id=vc">Vc</a>, for the use of the CPU's
+ vector units
+ 
+ I find VIGRA indispensible, omitting it from vspline is not really an option. It is possible
+ not to use Vc: either it's use can be disabled at compile time (see 'Compilation' below), or
+ the higher-level routines can be called with the flag use_vc set to false, which will prevent
+ vector code from being used even if it has been compiled in. This is for situations where
+ the binary can't be modified but the vectorized code doesn't work on the target system.
+ 
+ I have made an attempt to generalize the code so that it can handle
+
+ - arbitrary real data types and their aggregates
+ 
+ - a reasonable selection of boundary conditions
+ 
+ - prefiltering with implicit and explicit extrapolation schemes
+ 
+ - arbitrary spline orders
+ 
+ - arbitrary dimensions of the spline
+ 
+ - in multithreaded code
+ 
+ - using the CPU's vector units if possible
+
+On the evaluation side I provide
+
+ - evaluation of the spline at point locations in the defined range
+ 
+ - evaluation of the spline's derivatives
+
+ - mapping of arbitrary coordinates into the defined range
+ 
+ - evaluation of nD arrays of coordinates (generalized remap() function)
+ 
+ - transformation functor based remap function
+ 
+ \section install_sec Installation
+ 
+ vspline is header-only, so it's sufficient to place the headers where your code can access them.
+ VIGRA and Vc are supposed to be installed in a location where they can be found so that includes
+ along the lines of #include <vigra/...> succeed.
+
+ \section compile_sec Compilation
+ 
+ To compile software using vspline, I use this g++ call:
+ 
+ g++ -D USE_VC -pthread -O3 -march=native -fabi-version=6 --std=c++11 your_code.cc -lVc -lvigraimpex
+ 
+ where the -lvigraimpex can be omitted if vigraimpex (VIGRA's image import/export library)
+ is not used.
+ 
+ The -fabi-version=6 suppresses certain issues with Vc
+ 
+ Please note that an executable using Vc produced on your system may likely not work on
+ a machine with another CPU. It's best to compile on the intended target. Alternatively,
+ the target architecture can be passed explicitly to gcc, please refer to gcc's manual.
+ 'Not work' in this context means that it may as well crash due to an illegal instruction
+ or wrong alignment.
+ 
+ If you can't use Vc, the code can be made to compile without Vc by omitting -D USE_VC
+ and other flags relevant for Vc:
+ 
+ g++ -pthread -O3 --std=c++11 your_code.cc -lvigraimpex
+ 
+ All access to Vc in the code is inside #define USE_VC .... #endif statements, so not
+ defining USE_VC will effectively prevent it's use.
+ 
+ For simplicity's sake, even if the code isn't compiled to use Vc, the higher level code
+ will still accept the common use_vc flag in the call signatures, but it's value wont have an
+ effect. The documentation is built to contain text for vectorized operation, if this
+ is unwanted, change the doxy file.
+ 
+ \section license_sec License
+ 
+ vspline is free software, licensed under this license:
+ 
+    vspline - a set of generic tools for creation and evaluation
+              of uniform b-splines
+
+            Copyright 2015, 2016 by Kay F. Jahnke
+
+    Permission is hereby granted, free of charge, to any person
+    obtaining a copy of this software and associated documentation
+    files (the "Software"), to deal in the Software without
+    restriction, including without limitation the rights to use,
+    copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following
+    conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the
+    Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+    OTHER DEALINGS IN THE SOFTWARE.
+
+ \section quickstart_sec Quickstart
+ 
+ TODO slightly out of data, bspline constructs differently now
+ 
+ If you stick with the high-level code, using class bspline or the remap function,
+ most of the parametrization is easy. Here are a few examples what you can do.
+ 
+ Let's suppose you have data in a 2D vigra MultiArray 'a'. vspline can handle float
+ and double values, and also their 'aggregates', meaning data types like pixels or
+ vigra's TinyVector. But for now, let's assume you have plain float data. Creating
+ the bspline object is easy:
+ 
+ typedef vspline::bspline < float , 2 > spline_type ; // fix the type of the spline
+ 
+ spline_type bspl ( a.shape() ) ; // create bspline object 'bspl' suitable for your data
+ 
+ bspl.core = a ;         // copy the source data into the bspline object's 'core' area
+ 
+ bspl.prefilter() ; // run prefilter() to convert original data to b-spline coefficients
+ 
+ Now obviously many things have been done by default here: The default spline degree
+ was used - it's 3, for a cubic spline. Also, boundary treatment mode 'MIRROR' was
+ used per default. Further default parameters cause the spline to be 'braced' so that
+ it can be evaluated with vspline's evaluation routines, Vc (if compiled in) was
+ used for prefiltering, and the code used as many threads as your system has physical cores.
+ You could have passed different values for all the parameters I have mentioned to the
+ constructor - the only template arguments are the value type and the number of dimensions,
+ which have to be known at compile time.
+ 
+ while the sequence of operations indicated here looks a bit verbose (why not create the
+ bspline object by a call like bspl ( a ) ?), in 'real' code you can use bspl.core straight
+ away as the space to contain your data - you might get the data from a file or by some other
+ process.
+ 
+ Next you may want to evaluate the spline at some pair of coordinates x, y.
+ To do so, you can use this idiom:
+ 
+ typedef evaluator_type < 2 , float , float > eval_type ; // get the appropriate evaluator type
+ 
+ eval_type ev ( bspl ) ;                                  // create the evaluator
+ 
+ Now, assuming you have float x and y:
+ 
+ float result = ev ( { x , y } ) ; // evaluate at (x,y)
+ 
+ the braces are needed because the evaluator expects a 'multi_coordinate' which is a
+ vigra::TinyVector of as many real values as the spline has dimensions. You will encounter
+ parameters of this type throughout: vspline's code is dimension-agnostic, and parameters
+ passed in often have to have as many components as there are dimensions in the concrete
+ object you are handling.
+ 
+ Again, some things have happened by default. The evaluator was constructed with a
+ bspline object, making sure that the evaluator is compatible. vspline can also
+ calculate the spline's derivatives. The default is plain evaluation, but you can pass
+ a request for production of derivatives to the evaluator's constructor. Let's assume you
+ want the first derivative along axis 0 (the x axis):
+ 
+ eval_type eval_dx ( bsp , { 1 , 0 } ) ; // ask for an evaluator producing dx
+ 
+ float dx = eval_dx ( { x , y } ) ;      // use the evaluator
+ 
+ For every constellation of derivatives you'll have to create a distinct evaluator.
+ This is not an expensive operation - the same coefficients are used in all cases, only
+ the weight functors used internally differ. Calculating the spline's derivatives is even
+ slightly faster than plain evaluation, since there are less multiplications to perform.
+ 
+ What about the remap functions? The little introduction demonstrated how you can evaluate
+ the spline at a single location. Most of the time, though, you'll require evaluation at
+ many coordinates. This is what remap does. Instead of a single multi_coordinate, you pass
+ a whole vigra::MultiArrayView full of multi_coordinates to it - and another MultiArrayView
+ of the same dimension and shape to accept the results of evaluating the spline at every
+ multi_coordinate in the first array. Here's a simple example, using the same array 'a' as above:
+ 
+ // create a 1D array containing (2D) coordinates into 'a'
+ vigra::MultiArray < 1 , vigra::TinyVector < float , 2 > > coordinate_array ( 3 ) ;
+ 
+ ... // fill in the coordinates
+
+ // create an array to accomodate the result of the remap operation
+ vigra::MultiArray < 1 , float > target_array ( 3 ) ;
+ 
+ // perform the remap
+ remap < float , float , 2 , 1 > ( a , coordinate_array , target_array ) ;
+ 
+ now the three resultant values are in the target array.
+ 
+ And that's about it - vspline aims to provide all possible variants of b-splines, code to
+ create and evaluate them and to do so for arrays of coordinates. So if you dig deeper into
+ the code base, you'll find that you can stray off the default path, but there should rarely
+ be any need not to use the high-level object 'bspline' or the remap function, or it's relative,
+ which doesn't construct the spline internally but takes a bspline as a parameter. The
+ transformation-based remap function is an alternative if the coordinates at which the source
+ array is to be sampled are the result of a mathematical operation (the transformation function)
+ on the target coordinates. In this case you don't need the array of coordinates.
+ 
+ While one might argue that the remap routine I present shouldn't be lumped together with the
+ 'proper' b-spline code, I feel that only by tightly coupling it with the b-spline code I can
+ make it really fast. And only by processing several coordinates at once (by multithreading and
+ vectorization) the hardware can be exploited fully. I might even make remap a method of the
+ b-spline evaluator - yet another variant of operator(), processing whole arrays of coordinates
+ instead of merely aggregates of a handful.
+ 
+ \section design_sec Design
+ 
+ You can do everything vspline does with other software - there are several freely available
+ implementations of b-spline interpolation and remap routines. What I wanted to create was
+ an implementation which was as general as possible and at the same time as fast as possible,
+ and, on top of that, comprehensive.
+
+ These demands are not easy to satisfy at the same time, but I feel that my design comes 
+ close. While generality is achieved by generic programming, speed needs exploitation of hardware
+ features, and merely relying on the compiler is not enough. The largest speedup I saw was
+ simply multithreading the code. This may seem like a trivial observation, but my design
+ is influenced by it: in order to efficiently multithread, the problem has to be partitioned
+ so that it can be processed by independent threads. You can see the partitioning both in
+ prefiltering and later in the remap routine, in fact, both even share code to do so. Currently
+ only multidimensional splines benefit from multithreading, but I dare say that huge 1D splines are
+ probably not *that* common - and partitioning them is trivial, if one uses a bit of overlap.
+ TODO: make no assumptions, just code the 1D partitioning
+ 
+ Another speedup method is data-parallel processing. This is often thought to be the domain of
+ GPUs, but modern CPUs also offer it in the form of vector units. I chose implementing data-parallel
+ processing in the CPU, as it offers tight integration with unvectorized CPU code. It's familiar
+ terrain, and the way from writing conventional CPU code to vector unit code is not too far,
+ when using tools like Vc, which abstract the hardware away.
+
+ Using both techniques together makes vspline fast. The target I was roughly aiming at was to
+ achieve frame rates of ca. 50 fps in float RGB and full HD, producing the images via remap from
+ a precalculated warp array. On my system, I have almost reached that goal - my remap times are
+ around 21 msec (for a cubic spline). The idea is to exploit the CPU fully, leaving the GPU, if
+ present, free to do something else, instead of letting the CPU idle while the GPU performs the
+ remapping. Keeping only the CPU busy also reduces overall traffic on the system. To really benefit
+ from my scheme, you have to have a reasonably modern CPU, though.
+
+ \section Literature
+ 
+ There is a large amount of literature on b-splines available online. Here's a pick:
+ 
+ http://bigwww.epfl.ch/thevenaz/interpolation/
+ 
+ http://soliton.ae.gatech.edu/people/jcraig/classes/ae4375/notes/b-splines-04.pdf
+ 
+ http://www.cs.mtu.edu/~shene/COURSES/cs3621/NOTES/spline/B-spline/bspline-basis.html
+ 
+ http://www.cs.mtu.edu/~shene/COURSES/cs3621/NOTES/spline/B-spline/bspline-ex-1.html
+*/
diff --git a/eval.h b/eval.h
new file mode 100644
index 0000000..a3bc39c
--- /dev/null
+++ b/eval.h
@@ -0,0 +1,1099 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/*! \file eval.h
+
+    \brief code to evaluate uniform b-splines
+
+    This body of code contains the central class evaluator and auxilliary classes
+    which are needed for it's smooth operation.
+
+    The evaluation is a reasonably straightforward process: A subset of the coefficient
+    array, containing coefficients 'near' the point of interest, is picked out, and
+    a weighted summation over this subset produces the result of the evaluation.
+    The complex bit is to have the right coefficients in the first place
+    (this is what prefiltering does), and to use the appropriate weights on
+    the coefficient window. For b-splines, there is an efficient method to
+    calculate the weights by means of a matrix multiplication, which is easily
+    extended to handle b-spline derivatives as well. Since this code lends itself
+    to a generic implementation, and it can be parametrized by the spline's order,
+    and since the method performs well, I use it here in preference to the code
+    which Thevenaz uses (which is, for the orders of splines it encompasses, the matrix
+    multiplication written out with a few optimizations, like omitting multiplications
+    with zero, and slightly more concise calculation of powers)
+*/
+
+#ifndef VSPLINE_EVAL_H
+#define VSPLINE_EVAL_H
+
+#include <thread>
+#include <math.h>
+#include <complex>
+#include <cmath>
+#include <iostream>
+#include <array>
+#include <assert.h>
+
+#include <vigra/multi_array.hxx>
+#include <vigra/multi_iterator.hxx>
+#include <vigra/multi_math.hxx>
+
+#include "basis.h"
+#include "mapping.h"
+#include "bspline.h"
+
+namespace vspline {
+
+using namespace std ;
+using namespace vigra ;
+using namespace vigra::multi_math;
+
+/// The routine 'calculate_weight_matrix' originates from vigra. I took the original
+/// routine BSplineBase<ORDER, T>::calculateWeightMatrix() from vigra and changed it
+/// in several ways:
+///
+/// - the spline degree is now a runtime parameter, not a template argument
+/// - the derivative degree is passed in as an additional parameter, directly
+///   yielding the appropriate weight matrix needed to calculate a b-spline's derivative
+///   with only a slight modification to the original code
+/// - the code uses my modified bspline basis function which takes the degree as a
+///   run time parameter instead of a template argument and works with integral
+///   operands and precalculated values, which makes it very fast, even for high
+///   spline degrees. bspline_basis() is in basis.h.
+
+template < class target_type >
+MultiArray < 2 , target_type > calculate_weight_matrix ( int degree , int derivative )
+{
+  const int order = degree + 1 ;
+  
+  if ( derivative >= order ) // guard against impossible parameters
+    return MultiArray < 2 , target_type >() ;
+
+  // allocate space for the weight matrix
+  MultiArray < 2 , target_type > res = MultiArray < 2 , target_type > ( order , order - derivative ) ;
+  
+  long double faculty = 1.0 ;
+  
+  for ( int row = 0 ; row < order - derivative ; row++ )
+  {
+    if ( row > 1 )
+      faculty *= row ;
+
+    int x = degree / 2 ; // (note: integer division)
+
+    // we store to a MultiArray, which is row-major, so storing as we do
+    // places the results in memory in the precise order in which we want to
+    // use them in the weight calculation.
+    // note how we pass x to bspline_basis() as an integer. This way, we pick
+    // a very efficient version of the basis function which only evaluates at
+    // whole numbers. This basis function version does hardly any calculations
+    // but instead relies on precalculated values. see bspline_basis in prefilter.h
+    
+    for ( int column = 0 ; column < order ; ++column , --x )
+      res ( column , row ) = bspline_basis<long double> ( x , degree , row + derivative ) / faculty;
+  }
+
+  return res;
+}
+
+/// while we deal with B-splines here, there is no need to limit the evaluator
+/// code only to B-spline basis functions. The logic is the same for any type of evaluation
+/// which functions like a separable convolution of an equilateral subarray of the coefficient
+/// array, and the only thing specific to b-splines is the weight generation.
+///
+/// So I coded the general case, which can use any weight generation function. Coding this
+/// introduces a certain degree of complexity, which I feel is justified for the flexibility
+/// gained. And it may turn out useful if we employ gaussians as basis functions to
+/// approximate high-order b-splines. The complexity is mainly due to the fact that, while
+/// we can write a simple (templated) function to generate weights (as above), we can't pass
+/// such a template as an object to a function. Instead we use an abstract base class for
+/// the weight functor and inherit from it for specific weight generation methods.
+///
+/// I made some investigations towards coding evaluation of splines with different orders
+/// along the different axes, but this introduced too much extra complexity for my taste and
+/// took me too far away from simply providing efficient code for b-splines, so I abandoned
+/// the attempts. Therefore the weight functors for a specific spline all have to have a common
+/// ORDER and generate ORDER weights. The only way to force lesser order weight functors into
+/// this scheme if it has to be done is to set some of the weights to zero. Weight functors
+/// of higher ORDER than the spline can't be accomodated, if that should be necessary, the ORDER
+/// of the entire spline has to be raised.
+///
+/// Note the use of 'delta' in the functions below. this is due to the fact that these functors
+/// are called with the fractional part of a real valued coordinate.
+///
+/// first we define a base class for (multi-)functors calculating weights.
+/// this base class can accomodate weight calculation with any weight generation
+/// function using the same signature. It is not specific to b-splines.
+/// We access the weight functors via a pointer to this base class in the code below.
+
+template < typename rc_type >
+struct weight_functor_base
+{
+  // we define two pure virtual overloads for operator(), one for unvectorized
+  // and one for vectorized operation. In case the scope of evaluation is extended
+  // to other types of values, we'll have to add the corresponding signatures here.
+  
+  virtual void operator() ( rc_type* result , const rc_type& delta ) = 0 ;
+  
+#ifdef USE_VC
+
+  typedef Vc::Vector < rc_type >       rc_type_v ;
+
+  virtual void operator() ( rc_type_v* result , const rc_type_v& delta ) = 0 ;
+
+#endif
+} ;
+
+/// this functor calculates weights for a b-spline or it's derivatives.
+/// with d == 0, the weights are calculated for plain evaluation.
+/// Initially I implemented weight_matrix as a static member, hoping the code
+/// would perform better, but I could not detect significant benefits. Letting
+/// the constructor choose the derivative gives more flexibility and less type
+/// proliferation.
+
+template < typename target_type ,   // type for weights (may be a Vc::Vector)
+           typename rc_type >       // single real value
+
+struct bspline_derivative_weights
+#ifdef USE_VC
+: public Vc::VectorAlignedBase
+#endif
+{
+  typedef typename MultiArray<2,rc_type>::iterator wm_iter ;
+
+  MultiArray<2,rc_type> weight_matrix ;
+  const int degree ;
+  const int derivative ;
+  const int columns ;
+  wm_iter wm_begin ;
+  wm_iter wm_end ;
+
+  bspline_derivative_weights ( int _degree , int _derivative = 0 )
+  : weight_matrix ( calculate_weight_matrix<rc_type> ( _degree , _derivative ) ) ,
+    degree ( _degree ) ,
+    derivative ( _derivative ) ,
+    columns ( _degree + 1 )
+  { 
+    wm_begin = weight_matrix.begin() ;
+    wm_end = weight_matrix.end() ;
+  } ;
+  
+  void operator() ( target_type* result , const target_type & delta )
+  {
+    register target_type power = delta ;
+    register wm_iter factor_it = wm_begin ;
+    register const wm_iter factor_end = wm_end ;
+
+    // the result is initialized with the first row of the 'weight matrix'.
+    // We save ourselves multiplying it with delta^0.
+ 
+    for ( int c = 0 ; c < columns ; c++ )
+    {
+      result[c] = *factor_it ;
+      ++factor_it ;
+    }
+    
+    if ( degree )
+    {
+      for ( ; ; )
+      {
+        for ( int c = 0 ; c < columns ; c++ )
+        {
+          result[c] += power * *factor_it ;
+          ++factor_it ;
+        }
+        if ( factor_it == factor_end ) // avoid multiplication if exhausted, break now
+          break ;
+        power *= delta ;               // otherwise produce next power(s) of delta(s)
+      }
+    }
+  }
+} ;
+
+/// we derive from the weight functor base class to obtain a (multi-) functor
+/// specifically for (derivatives of) a b-spline :
+
+template < typename rc_type >
+struct bspline_derivative_weight_functor
+: public weight_functor_base < rc_type >
+{
+  typedef weight_functor_base < rc_type > base_class ;
+
+  // set up the fully specialized functors to which operator() delegates:
+
+  bspline_derivative_weights < rc_type , rc_type >  weights ;
+
+#ifdef USE_VC
+  using typename base_class::rc_type_v ;
+  
+  bspline_derivative_weights < rc_type_v , rc_type >  weights_v ; 
+#endif
+
+  bspline_derivative_weight_functor ( int degree , int d = 0 )
+  : weights ( degree , d )
+#ifdef USE_VC
+  , weights_v ( degree , d )
+#endif
+  {
+  }
+  
+  // handle the weight calculation by delegation to the functors set up at construction
+  
+  virtual void operator() ( rc_type* result , const rc_type& delta )
+  {
+    weights ( result , delta ) ;
+  }
+
+#ifdef USE_VC
+  virtual void operator() ( rc_type_v* result , const rc_type_v& delta )
+  {
+    weights_v ( result , delta ) ;
+  }
+#endif
+} ;
+
+// not a very useful weight function, but it suits to prove the concept of plug-in
+// weight functions works as intended. Instead of calculating the weights, as in the functor
+// above, this one simply returns equal weights. The result is that, no matter what delta
+// is passed in, the weights are the same and average over the coefficients to which they
+// are applied.
+// Note here that one important property of the weights is that they constitute
+// a partition of unity. Both the (derivative) b-spline weights and this simple
+// weight functor share this property.
+// currently unused, but left in for now
+
+/*
+
+template < typename rc_type >
+struct average_weight_functor
+: public weight_functor_base < rc_type >
+{
+  typedef weight_functor_base < rc_type > base_class ;
+  using typename base_class::rc_type_v ;
+
+  const rc_type weight ;
+  const int order ;
+  
+  average_weight_functor ( int degree )
+  : weight ( rc_type ( 1.0 ) / rc_type ( degree + 1 ) ) ,
+    order ( degree + 1 )
+  { } ;
+  
+  virtual void operator() ( rc_type* result , const rc_type& delta )
+  {
+    for ( int e = 0 ; e < order ; e++ )
+      result[e] = weight ;
+  }
+  
+  virtual void operator() ( rc_type_v* result , const rc_type_v& delta )
+  {
+    for ( int e = 0 ; e < order ; e++ )
+      result[e] = weight ;
+  }  
+} ;
+
+// here's another example, using a gaussian to approximate the b-spline basis function
+
+template < typename rc_type >
+struct gaussian_weight_functor
+: public weight_functor_base < rc_type >
+{
+  typedef weight_functor_base < rc_type > base_class ;
+
+  const int degree ;
+  const int order ;
+  
+  gaussian_weight_functor ( int _degree , int d = 0 )
+  : degree ( _degree ) ,
+    order ( _degree + 1 )
+  { } ;
+  
+  virtual void operator() ( rc_type* result , const rc_type& delta )
+  {
+    rc_type x = - degree / 2 - delta ; // only odd splines for now
+    for ( int e = 0 ; e < order ; e++ , x += rc_type(1.0) )
+      result[e] = gaussian_bspline_basis_approximation<rc_type> ( x , degree ) ;
+  }
+
+#ifdef USE_VC
+
+  using typename base_class::rc_type_v ;
+  virtual void operator() ( rc_type_v* result , const rc_type_v& delta )
+  {
+    rc_type_v x = rc_type_v ( - degree / 2 ) - delta ; // only odd splines for now
+    for ( int e = 0 ; e < order ; e++ , x += rc_type(1.0) )
+      result[e] = gaussian_bspline_basis_approximation<rc_type_v> ( x , degree ) ;
+  }
+
+#endif
+} ;
+
+*/
+
+/*
+// for speed tests we duplicate above weight generation functor definitions
+// to see if calculating weights for all dimensions at once is faster
+
+template < typename rc_type , typename rc_type_v = rc_type >
+struct alt_weight_functor_base
+{
+  // we define two pure virtual overloads for operator(), one for unvectorized
+  // and one for vectorized operation. In case the scope of evaluation is extended
+  // to other types of values, we'll have to add the corresponding signatures here.
+  
+  virtual void operator() ( rc_type* result , const rc_type& delta ) = 0 ;
+  
+#ifdef USE_VC
+
+  virtual void operator() ( rc_type_v* result , const rc_type_v& delta ) = 0 ;
+
+#endif
+} ;
+
+/// we derive from the weight functor base class to obtain a (multi-) functor
+/// specifically for (derivatives of) a b-spline :
+
+template < typename rc_type , typename rc_type_v = rc_type >
+struct alt_bspline_derivative_weight_functor
+: public alt_weight_functor_base < rc_type , rc_type_v >
+{
+  typedef alt_weight_functor_base < rc_type , rc_type_v > base_class ;
+
+  // set up the fully specialized functors to which operator() delegates:
+
+  bspline_derivative_weights < rc_type , rc_type >  weights ;
+
+#ifdef USE_VC
+
+  bspline_derivative_weights < rc_type_v , rc_type >  weights_v ; 
+
+#endif
+
+  alt_bspline_derivative_weight_functor ( int degree , int d = 0 )
+  : weights ( degree , d )
+#ifdef USE_VC
+  , weights_v ( degree , d )
+#endif
+  {
+  }
+  
+  // handle the weight calculation by delegation to the functors set up at construction
+  
+  virtual void operator() ( rc_type* result , const rc_type& delta )
+  {
+    weights ( result , delta ) ;
+  }
+
+#ifdef USE_VC
+  virtual void operator() ( rc_type_v* result , const rc_type_v& delta )
+  {
+    weights_v ( result , delta ) ;
+  }
+#endif
+} ;
+*/
+
+/// class evaluator encodes evaluation of a B-spline. This is coded so that it is
+/// agnostic of the spline's dimension, order, and data type. While the typedefs are many and
+/// look difficult, they are mainly used to use concise names in the actual calculation
+/// routines, which are by contrast simple.
+/// I have already hinted at the process used, but here it is again in a nutshell:
+/// The coordinate at which the spline is to be evaluated is split into it's integral part
+/// and a remaining fraction. The integral part defines the location where a window from the
+/// coefficient array is taken, and the fractional part defines the weights to use in calculating
+/// a weighted sum over this window. This weighted sum represents the result of the evaluation.
+/// Coordinate splitting (and mapping to the spline's defined range) is done with mapping
+/// objects, as defined above. The generation of the weights to be applied to the window
+/// of coefficients is performed by employing the weight functors above. What's left to do is
+/// to bring all the components together, which happens in class evaluator. The workhorse
+/// code in the subclasses _eval and _v_eval takes care of performing the necessary operations
+/// recursively ove the dimensions of the spline.
+
+// TODO: if evaluator objects are only ever used one per thread, the 'workspace' for the
+// weights might as well be made a member of class evaluator. The current implementation is
+// thread-safe, though, and would allow to use the same evaluator object from several threads.
+
+template < int dimension ,         ///< dimension of the spline
+           class value_type ,      ///< type of spline coefficients/result (pixels etc.)
+           class rc_type = float , ///< singular real coordinate, float or double
+           class ic_type = int >   ///< singular integral coordinate, currently only int
+
+class evaluator
+#ifdef USE_VC
+: public Vc::VectorAlignedBase
+#endif
+{
+public:
+  
+  enum { level = dimension - 1 } ;
+
+  /// value_type is the data type of the coefficients the evaluator processes,
+  /// and also the data type for the results it produces. These values may be
+  /// aggregates like pixels or TinyVectors. For certain operations these values
+  /// are taken apart into their components, using vigra's ExpandElement mechanism.
+  /// This mechanism can be used to introduce aggregate types that aren't already
+  /// known to vigra. ele_type is the type of an individual element of such an
+  /// aggregate; if value_type isn't an aggregate, it's the same as value_type.
+  /// If a value_type is used which isn't known to vigra's ExpandElement
+  /// mechanism already, the expansion has to be defined in order to make this
+  /// value_type work with this code.
+
+  enum { channels = ExpandElementResult < value_type > :: size } ;
+
+  typedef typename ExpandElementResult < value_type > :: type      ele_type ;
+
+  /// array_type is used for the coefficient array
+
+  typedef MultiArrayView < dimension , value_type >                array_type ;
+
+  /// type used for nD integral coordinates, array shapes
+
+  typedef typename array_type::difference_type                     shape_type ;
+
+  /// type for a multidimensional real coordinate
+
+  typedef TinyVector < rc_type , dimension >                       nd_rc_type ;
+
+  /// type for a 'split' coordinate, mainly used internally in the evaluator, but
+  /// also in 'warp arrays' used for remapping
+
+  typedef split_type < dimension , ic_type , rc_type >             split_coordinate_type ;
+
+  /// the evaluator can handle raw coordinates, but it needs a mapping to do so.
+ 
+  typedef typename MultiArray < 1 , ic_type > :: iterator          offset_iterator ;
+  typedef MultiArrayView < dimension + 1 , ele_type >              component_view_type ;
+  typedef typename component_view_type::difference_type            expanded_shape_type ;
+  typedef TinyVector < ele_type* , channels >                      component_base_type ;
+
+#ifdef USE_VC
+
+  // for vectorized operation, we need a few extra typedefs
+  // I use a _v suffix to indicate vectorized types and the prefixes
+  // mc_ for multichannel and nd_ for multidimensional
+
+  // TODO: while the use of SimdArrays is convenient, I have reason to believe
+  // it's significantly slower than using Vc::Vectors. So I'd like a mechanism
+  // to use Vc::Vectors if the size lends itself to it (float/int combination)
+  // and only use SimdArrays when necessary (double data, float coordinates)
+
+  // TODO: investigate Vc's simdize functionality; the explicit coding of the
+  // vectorized types below might be simplified by 'simdizing' singular types
+  
+  /// a vector of the elementary type of value_type,
+  /// which is used for coefficients and results:
+
+  typedef Vc::Vector < ele_type > ele_v ;
+  
+  /// element count of Simd data types, always the same as the number of elements in a Vc::Vector
+  /// of ele_type and used throughout. If elementary types of a size different from ele_type's
+  /// are used, they are vectorized by using SimdArrays.
+  
+  enum { vsize = ele_v::Size } ;
+
+  /// compatible-sized SimdArray of vsize ic_type
+
+  typedef Vc::SimdArray < ic_type , vsize > ic_v ;
+
+  /// compatible-sized SimdArray of vsize rc_type
+
+  typedef Vc::SimdArray < rc_type , vsize > rc_v ;
+
+  /// multichannel value as SoA, for pixels etc.
+
+  typedef TinyVector < ele_v , channels > mc_ele_v ;
+
+  /// SoA for nD coordinates/components
+
+  typedef TinyVector < rc_v , dimension > nd_rc_v ;
+
+  /// SoA for nD shapes (or multidimensional indices)
+
+  typedef TinyVector < ic_v , dimension > nd_ic_v ;
+
+  /// SoA for multichannel indices (used for gather/scatter operations)
+
+  typedef TinyVector < ic_v , channels >  mc_ic_v ;
+
+#else
+  
+  enum { vsize = 1 } ;
+  
+#endif // USE_VC
+  
+  typedef nd_mapping < split_coordinate_type , dimension , vsize > nd_mapping_type ;
+
+  typedef weight_functor_base < ele_type > weight_functor_base_type ;
+
+  /// in the context of b-spline calculation, this is the weight generating
+  /// functor which will be used:
+
+  typedef bspline_derivative_weight_functor < ele_type > bspline_weight_functor_type ;
+  
+//   to try out gaussian weights, one might instead use
+//   typedef gaussian_weight_functor < ele_type > bspline_weight_functor_type ;
+  
+  /// we need one functor per dimension:
+    
+  typedef TinyVector < weight_functor_base_type* , dimension > nd_weight_functor ;
+  
+  // while in the context of B-splines the weight functors are, of course, functors which
+  // calculate the weights via the B-spline basis functions, the formulation we use here
+  // allows us to use any set of functors that satisfy the argument type. With this
+  // flexible approach, trying out other basis functions becomes simple: just write the
+  // functor and pass it in, it doesn't have to have anything to do with B-splines at all.
+  // But currently we limit the evaluator to use b-spline weights.
+  
+private:
+  
+  nd_weight_functor fweight ;       ///< set of pointers to weight functors, one per dimension
+  const array_type& coefficients ;  ///< b-spline coefficient array
+  const expanded_shape_type expanded_stride ;        ///< strides in terms of expanded value_type
+  MultiArray < 1 , ic_type > offsets ;               ///< offsets in terms of value_type
+  MultiArray < 1 , ic_type > component_offsets ;     ///< offsets in terms of ele_type, for vc op
+  component_base_type component_base ;
+  component_view_type component_view ;
+  nd_mapping_type mmap ;                 ///< mapping of real coordinates to spline coordinates
+  bspline_weight_functor_type wfd0 ;    ///< default weight functor: underived bspline
+  const int spline_degree ;
+  const int ORDER ;
+
+public:
+
+#ifdef USE_VC
+  nd_ic_v nd_interleave ;            ///< gather/scatter indexes for interleaving nD
+  mc_ic_v mc_interleave ;            ///< and mc vectors
+#endif
+
+  /// this constructor is the most flexible variant.
+
+  evaluator ( const array_type& _coefficients ,
+              nd_mapping_type _mmap ,
+              int _spline_degree ,
+              TinyVector < int , dimension > _derivative = TinyVector < int , dimension >(0) )
+  : coefficients ( _coefficients ) ,
+    spline_degree ( _spline_degree ) ,
+    ORDER ( _spline_degree + 1 ) ,
+    component_view ( _coefficients.expandElements ( 0 ) ) ,
+    expanded_stride ( _coefficients.expandElements ( 0 ).stride() ) ,
+    wfd0 ( _spline_degree , 0 ) ,
+    mmap ( _mmap )               // I enforce the passing in of a mapping, even though it
+                                 // may not be used at all. TODO: can I do better?
+  {
+    // initalize the weight functors. In this constructor, we use only bspline weight
+    // functors, even though the evaluator can operate with all weight functors
+    // filling in the right number of basis values given a delta. To make use of this
+    // flexibility, one would derive from this class or write another constructor.
+    // Note how we code so that the default case (plain evaluation with no derivatives)
+    // results in use of only one single weight functor.
+
+    for ( int d = 0 ; d < dimension ; d++ )
+    {
+      if ( _derivative[d] )
+      {
+        fweight[d] = new bspline_weight_functor_type ( _spline_degree , _derivative[d] ) ;
+      }
+      else
+      {
+        fweight[d] = &wfd0 ; // pick the default if derivative is 0
+      }
+    }
+    
+    // calculate the number of offsets needed and create the array to hold them
+    // The evaluation forms a weighted sum of a window into the coeffcicent array.
+    // The sequence of offsets we calculate here is the set of pointer differences
+    // from the first element in that window to all elements in the window. It's
+    // another way of coding this window, where all index calculations have already
+    // been done beforehand rather than performing it during the traversal of the
+    // window by means of stride/shape arithmetic. Coding the window in this fashion
+    // also makes it easy to vectorize the code.
+    
+    int noffsets = ORDER ;
+    for ( int exp = 1 ; exp < dimension ; exp++ )
+      noffsets *= ORDER ;
+    
+    offsets = MultiArray < 1 , ptrdiff_t > ( noffsets ) ;
+    component_offsets = MultiArray < 1 , ptrdiff_t > ( noffsets ) ;
+    
+    // we fill the offset array in a simple fashion: we do a traversal of the window
+    // and calculate the pointer difference for every element reached. We use the
+    // same loop to code the corresponding offsets to elementary values (ele_type)
+  
+    auto sample = coefficients.subarray ( shape_type() , shape_type(ORDER) ) ;
+    auto base = sample.data() ;
+    auto target = offsets.begin() ;
+    auto component_target = component_offsets.begin() ;
+
+    for ( auto &e : sample )
+    {
+      *target = &e - base ;
+      *component_target = channels * *target ;
+      ++target ;
+      ++component_target ;
+    }
+
+    // set up a set of base adresses for the component channels. This is needed
+    // for the vectorization, as the vector units can only work on elementary types
+    // (ele_type) and not on aggregates, like pixels.
+    
+    expanded_shape_type eshp ;
+    for ( int i = 0 ; i < channels ; i++ )
+    {
+      eshp[0] = i ;
+      component_base[i] = &(component_view[eshp]) ;
+    }
+    
+#ifdef USE_VC
+
+    // fill the gather/scatter information for vectorized operation
+    // we only need to do this once on evaluator creation. For now this
+    // code is limited to use ints to scatter/gather. This limits the
+    // array size which can be processed, but in real applications this
+    // should rarely become a problem. Still a thing to keep in mind.
+    // TODO this should be factored out and be static
+
+    for ( int d = 0 ; d < dimension ; d++ )
+    {
+      nd_interleave[d] = ic_v::IndexesFromZero() ;
+      nd_interleave[d] *= ic_type ( dimension ) ;
+      nd_interleave[d] += ic_type ( d ) ;
+    }
+    
+    for ( int ch = 0 ; ch < channels ; ch++ )
+    {
+      mc_interleave[ch] = ic_v::IndexesFromZero() ;
+      mc_interleave[ch] *= ic_type ( channels ) ;
+      mc_interleave[ch] += ic_type ( ch ) ;
+    }
+    
+#endif
+
+  } ;
+
+
+  /// simplified constructor from a bspline object
+  
+  evaluator ( const bspline < value_type , dimension > & bspl ,
+              TinyVector < int , dimension > _derivative = TinyVector < int , dimension >(0) )
+  : evaluator ( bspl.coeffs ,
+                nd_mapping_type ( bspl ) ,
+                bspl.spline_degree ,
+                _derivative )
+  {
+    if ( bspl.spline_degree > 1 && ! bspl.braced )
+      throw not_supported ( "for spline degree > 1: evaluation needs braced coefficients" ) ;
+  } ;
+  
+  nd_mapping_type& get_mapping()
+  {
+    return mmap ;
+  }
+  
+  int workspace_size()
+  {
+    // allocate enough dtype to contain bases for all dimensions
+    return ORDER * dimension ;
+  }
+
+  // TODO: docu here needs to be adapted, there is no more basis iterator, instead
+  // p_workspace is used
+  
+  /// fill_multi_basis calculates the weights to be applied to a section of the coefficients from
+  /// the fractional parts of the split coordinates. What is calculated here is the evaluation
+  /// of the spline's basis function at dx, dx+1 , dx+2..., but doing it naively is computationally
+  /// expensive, as the evaluation of the spline's basis function at arbitrary values has to
+  /// look at the value, find out the right interval, and then calculate the value with the
+  /// appropriate function. But we always have to calculate the basis function for *all*
+  /// intervals anyway, and the method used here performs this tasks effectively using a
+  /// vector/matrix multiplication.
+  ///
+  /// If the spline is more than 1-dimensional, we need a set of weights for every dimension.
+  /// We use a TinyVector of basis_type for the purpose, and iterate over it recursively
+  /// in the evaluation process.
+  ///
+  /// Contrary to my initial implementation, I fill the 'workspace' in order ascending with
+  /// the axis, so now weights for axis 0 are first etc.. This results in a slighly funny-looking
+  /// initial call to _eval, but the confusion from reverse-filling the workspace was probably worse.
+  
+  template < typename dtype ,
+             typename nd_rc_type >
+  void fill_multi_basis ( dtype * p_workspace ,
+                          const nd_rc_type& c )
+  {
+    auto ci = c.cbegin() ;
+    for ( int axis = 0 ; axis < dimension ; ++ci , ++axis )
+      (*(fweight[axis])) ( p_workspace + axis * ORDER , *ci ) ;
+  }
+
+  /// _eval is the workhorse routine and implements the recursive arithmetic needed to
+  /// evaluate the spline. First the 'basis' for the current dimension is obtained
+  /// from the basis iterator. Once the basis has been obtained, it's values are
+  /// multiplied with the results of recursively calling _eval for the next
+  /// lower dimension and the products summed up to produce the return value.
+  /// The scheme of using a recursive evaluation has several benefits:
+  /// - it needs no explicit intermediate storage of partial sums (uses stack instead)
+  /// - it makes the process dimension-agnostic in an elegant way
+  ///
+  /// this _eval works with a base pointer and an iterator over offsets, just like the vectorized version.
+  /// note that this routine is used for operation on braced splines, with the sequence of offsets to be
+  /// visited fixed at the evaluator's construction. But in this non-vectorized routine, passing in a
+  /// different sequence of offsets for evaluation in an area where boundary conditions apply would be
+  /// feasible (akin to Thevenaz' indexing), even though it would require a lot of new logic, since
+  /// currently the bracing takes care of the boundary conditions.
+  ///  
+  /// I'd write a plain function template and partially specialize it, but that's not allowed,
+  /// so I use a functor instead:
+  
+  template < int level , class dtype >
+  struct _eval
+  {
+    dtype operator() ( const dtype* & pdata ,
+                       offset_iterator & ofs ,
+                       const ele_type * p_workspace ,
+                       const int& ORDER
+                     )
+    {
+      dtype result = dtype() ;
+      for ( int i = 0 ; i < ORDER ; i++ )
+      {
+        result +=   p_workspace[i]
+                  * _eval < level - 1 , dtype >() ( pdata , ofs , p_workspace - ORDER , ORDER ) ;
+      }
+      return result ;
+    }
+  } ;
+
+  /// at level 0 the recursion ends, now we finally apply the weights for axis 0
+  /// to the window of coefficients. Note how ofs is passed in per reference. This looks
+  /// wrong, but it's necessary: When, in the course of the recursion, the level 0
+  /// routine is called again, it needs to access the next bunch of ORDER coefficients.
+  /// Just incrementing the reference saves us incrementing higher up.
+  
+  template < class dtype >
+  struct _eval < 0 , dtype >
+  {
+    dtype operator() ( const dtype* & pdata ,
+                       offset_iterator & ofs ,
+                       const ele_type * p_workspace ,
+                       const int& ORDER
+                     )
+    {
+      dtype result = dtype() ;
+      for ( int i = 0 ; i < ORDER ; i++ )
+      {
+        result += pdata [ *ofs ] * p_workspace[i] ;
+        ++ofs ;
+      }
+      return result ;
+    }
+  } ;
+
+#ifdef USE_VC
+
+  /// vectorized version of _eval
+  /// to operate with vsize values synchronously, we need a bit more indexing than in the
+  /// non-vectorized version. the second parameter, origin, constitutes a gather operand
+  /// which, applied to the base adress, handles a set of windows to be processed in parallel.
+  /// if the gather operation is repeated with offsetted base addresses, the result vector is
+  /// built in the same way as the single result value in the unvectorized code above.
+  /// note that the vectorized routine can't function like this when it comes to
+  /// evaluating unbraced splines: it relies on the bracing and can't do without it, because
+  /// it works with a fixed sequence of offsets, whereas the evaluation of an unbraced spline
+  /// might use a different offset sequence for values affected by the boundary condition.
+  /// Nevertheless I have chosen this implementation, as the speed gain by vectorization is
+  /// so large that the extra memory needed for the bracing seems irrelevant. I have to concede,
+  /// though, that this rules out in-place spline generation on a packed array of knot point values -
+  /// either the knot point data come in with bracing space already present, or the operation has
+  /// to use a separate target array.
+
+  template < class dtype , int level >
+  struct _v_eval
+  {
+    dtype operator() ( const component_base_type& base , ///< base adresses of components
+                       const ic_v& origin ,              ///< offsets to evaluation window origins
+                       offset_iterator & ofs ,           ///< offsets to coefficients inside this window
+                       const ele_v * p_workspace ,       ///< vectorized weights
+                       const int& ORDER )
+    {
+      dtype sum = dtype() ;    ///< to accumulate the result
+      dtype subsum ; ///< to pick up the result of the recursive call
+
+      for ( int i = 0 ; i < ORDER ; i++ )
+      {
+        subsum = _v_eval < dtype , level - 1 >() ( base , origin , ofs , p_workspace - ORDER , ORDER );
+        for ( int ch = 0 ; ch < channels ; ch++ )
+        {
+          sum[ch] += p_workspace[i] * subsum[ch] ;
+        }
+      }
+      return sum ;
+    }  
+  } ;
+
+  /// the level 0 routine terminates the recursion
+  
+  template < class dtype >
+  struct _v_eval < dtype , 0 >
+  {
+    dtype operator() ( const component_base_type& base , ///< base adresses of components
+                       const ic_v& origin ,              ///< offsets to evaluation window origins
+                       offset_iterator & ofs ,           ///< offsets to coefficients in this window
+                       const ele_v * p_workspace ,       ///< vectorized weights
+                       const int& ORDER )
+    {
+      dtype sum = dtype() ;
+
+      for ( int i = 0 ; i < ORDER ; i++ )
+      {
+        for ( int ch = 0 ; ch < channels ; ch++ )
+        {
+          sum[ch] += p_workspace[i] * ele_v ( base[ch] , origin + *ofs ) ;
+        }
+        ++ofs ;
+      }
+      return sum ;
+    }  
+  } ;
+
+#endif // USE_VC
+
+  // next are the variants of operator(). there are quite a few, since I've coded for operation
+  // from different starting points and both for vectorized and nonvectorized operation.
+  
+  /// variant of operator() which takes a shape and a multi_basis. This is the final delegate
+  /// calling the recursive _eval method. Note how we pass _eval the workspace, increasing the
+  /// pointer by level * ORDER. By doing so we adress the weights for the highest dimension,
+  /// which is processed at the highest level of the recursion. The recursive call to _eval
+  /// *decreases* p_workspace by ORDER, etc. until for processing along the 0 (or x) axis
+  /// the workspace pointer is equal again to what this here eval receives as p_workspace.
+
+  value_type eval ( const shape_type& s ,     // lower corner of the subarray
+                    ele_type * p_workspace )  // precalculated multi_basis
+  {
+    const value_type * base = & coefficients [ s ] ;
+    auto ofs = offsets.begin() ;                // offsets reflects the positions inside the subarray
+    return _eval<level,value_type>() ( base , ofs , p_workspace + level * ORDER , ORDER ) ;
+  }
+
+  /// this variant of eval takes a split coordinate:
+  
+  value_type operator() ( const split_coordinate_type& sc ,  // presplit coordinate
+                          ele_type * p_workspace )           // space for weights
+  {
+    fill_multi_basis ( p_workspace , sc.tune ) ;
+    return eval ( sc.select , p_workspace ) ;
+  }
+
+  /// this variant take a coordinate which hasn't been mapped yet, so it uses
+  /// the nd_mapping, mmap, and applies it. It's the most convenient form but
+  /// also the slowest, due to the mapping, which is quite expensive computationally.
+  
+  value_type operator() ( const nd_rc_type& c ,      /// coordinate
+                          ele_type * p_workspace )   /// space for weights
+  {
+    split_coordinate_type sc ;
+    mmap ( c , sc ) ;  /// apply the mappings
+    fill_multi_basis ( p_workspace , sc.tune ) ;
+    return eval ( sc.select , p_workspace ) ;
+  }
+
+  /// variant to use if the caller doesn't provide a workspace
+  /// this is less efficient than the above version, so for mass evaluations, like in the
+  /// remap routine, it isn't used, but it's nice to have for ad-hoc use where one doesn't
+  /// want to look into the workings of the code.
+
+  value_type operator() ( const nd_rc_type& c )   // coordinate
+  {
+    ele_type p_workspace[workspace_size()] ;      // provide space for weights
+    return operator() ( c , p_workspace ) ;       // delegate to above routine
+  }
+
+  /* currently unused:
+  /// we also allow passing in of a single real value which is used to
+  /// construct a nd_rc_type. beware! might cause hard-to-track bugs
+
+  value_type operator() ( const rc_type& c )
+  {
+    return operator() ( nd_rc_type(c) ) ;
+  }
+  */
+
+#ifdef USE_VC
+
+  /// vectorized variants of the previous routines
+  
+  mc_ele_v operator() ( nd_ic_v& s ,           // lower corners of the subarrays
+                        ele_v * p_workspace )  // precalculated weights
+  {
+    // first we sum up the coordinates in all dimensions into origin values. this is analogous
+    // to forming origin = & coefficients [ ... ] - coefficients.data() in unvectorized code
+    // but since we can't do vectorized adress arithmetic (TODO can we?) ...
+    // note that we use both the strides and offsets appropriate for an expanded array,
+    // and component_base has pointers to the component type.
+    
+    ic_v origin = s[0] * ic_type ( expanded_stride [ 1 ] ) ;
+    for ( int d = 1 ; d < dimension ; d++ )
+      origin += s[d] * ic_type ( expanded_stride [ d + 1 ] ) ;
+    
+    // to iterate over the positions of all coefficients in the window over which the weighted sum
+    // is formed, we use this iterator:
+    
+    auto ofs = component_offsets.begin() ;
+    
+    // now we can call the recursive _eval routine
+    
+    return _v_eval < mc_ele_v , level >() ( component_base , origin , ofs ,
+                                            p_workspace + level * ORDER , ORDER ) ;
+  }
+
+  /// here we take the approach to require the calling function to present pointers to
+  /// vsize input and vsize output values, stored contiguously, so that we can use
+  /// 'standard' gather/scatter operations here with precomputed indexes. Performing the
+  /// (de)interleaving here simplifies matters for the calling code if it has the data
+  /// in contiguous memory. But this is not always the case, for example when the
+  /// data are strided.
+  
+  void operator() ( const split_coordinate_type* const psc , // pointer to vsize presplit coordinates
+                    value_type* result ,                     // pointer to vsize result values
+                    ele_v * p_workspace )                    // space for weight vectors
+  {
+    nd_ic_v select ;
+    nd_rc_v tune ;
+
+    // this is awkward, but if we get split coordinates interleaved like this,
+    // we have to manually deinterleave them unless we make potentially unsafe
+    // assumptions about the size of the components (if they are the same and packed,
+    // we might gather instead...)
+    
+    for ( int vi = 0 ; vi < vsize ; vi++ )
+    {
+      for ( int d = 0 ; d < dimension ; d++ )
+      {
+        select[d][vi] = int ( psc[vi].select[d] ) ;
+        tune[d][vi] = rc_type ( psc[vi].tune[d] ) ;
+      }
+    }
+
+    // calculate the result
+    
+    fill_multi_basis ( p_workspace , tune ) ;
+
+    mc_ele_v v_result = operator() ( select , p_workspace ) ;
+
+    // and deposit it in the memory the caller provides
+    for ( int ch = 0 ; ch < channels ; ch++ )
+      v_result[ch].scatter ( (ele_type*)result , mc_interleave[ch] ) ;
+  }
+
+  /// This variant of operator() works directly on vector data (of unsplit coordinates)
+  /// This burdens the calling code with (de)interleaving the data. But often the calling
+  /// code performs a traversal of a large body of data and is therefore in a better position
+  /// to perform the (de)interleaving e.g. by a gather/scatter operation.
+  
+  void operator() ( const nd_rc_v & input ,  // number of dimensions * coordinate vectors
+                    mc_ele_v & result ,      // number of channels * value vectors
+                    ele_v * p_workspace )    // space for weight vectors
+  {
+    nd_ic_v select ;
+    nd_rc_v tune ;
+
+    // map the incoming coordinates to the spline's range
+
+    mmap ( input , select , tune ) ;
+
+    // calculate the weights
+
+    fill_multi_basis ( p_workspace , tune ) ;
+
+    // delegate
+
+    result = operator() ( select , p_workspace ) ;
+  }
+
+  /// This variant operates on unsplit coordinates. Here again we require the calling function
+  /// to pass pointers to vsize input and output values in contiguous memory. The data are
+  /// (de)interleved in this routine, the actual evaluation is delegated to the variant working
+  /// on vectorized data.
+  
+  void operator() ( const nd_rc_type* const pmc ,  // pointer to vsize muli_coordinates
+                    value_type* result ,           // pointer to vsize result values
+                    ele_v * p_workspace )          // space for weight vectors
+  {
+    nd_rc_v input ;
+    mc_ele_v v_result ;
+
+    // gather the incoming (interleaved) coordinates
+    for ( int d = 0 ; d < dimension ; d++ )
+      input[d].gather ( (const rc_type* const)pmc , nd_interleave[d] ) ;
+
+    // call operator() for vectorized data
+    operator() ( input , v_result , p_workspace ) ;
+
+    // and deposit it in the memory the caller provides
+    for ( int ch = 0 ; ch < channels ; ch++ )
+      v_result[ch].scatter ( (ele_type*)result , mc_interleave[ch] ) ;
+  }
+
+  /// mixed form, where input is a vectorized coordinate
+  /// and output goes to interleaved memory
+
+  void operator() ( const nd_rc_v & input ,  // number of dimensions * coordinate vectors
+                    value_type* result ,     // pointer to vsize result values
+                    ele_v * p_workspace )    // space for weight vectors
+  {
+    mc_ele_v v_result ;
+
+    // call operator() for vectorized data
+
+    operator() ( input , v_result , p_workspace ) ;
+
+    // and deposit result in the memory the caller provides
+
+    for ( int ch = 0 ; ch < channels ; ch++ )
+      v_result[ch].scatter ( (ele_type*)result , mc_interleave[ch] ) ;
+  }
+
+#endif // USE_VC
+
+  ~evaluator()
+  {
+    // we don't want a memory leak!
+    for ( int d = 0 ; d < dimension ; d++ )
+    {
+      if ( fweight[d] != &wfd0 )
+        delete fweight[d] ;
+    }
+  }
+} ;
+
+} ; // end of namespace vspline
+
+#endif // VSPLINE_EVAL_H
diff --git a/example/eval.cc b/example/eval.cc
new file mode 100644
index 0000000..1577993
--- /dev/null
+++ b/example/eval.cc
@@ -0,0 +1,132 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/// eval.cc
+///
+/// takes a set of knot point values from cin, calculates a 1D b-spline
+/// over them, and evaluates it at coordinates taken from cin 
+
+#include <vspline/vspline.h>
+#include <iomanip>
+
+using namespace std ;
+using namespace vigra ;
+using namespace vspline ;
+
+int main ( int argc , char * argv[] )
+{
+  // get the spline degree and boundary conditions from the console
+
+  cout << "enter spline degree: " ;
+  int spline_degree ;
+  cin >> spline_degree ;
+  
+  int bci = -1 ;
+  bc_code bc ;
+  
+  while ( bci < 1 || bci > 4 )
+  {
+    cout << "choose boundary condition" << endl ;
+    cout << "1) MIRROR" << endl ;
+    cout << "2) REFLECT" << endl ;
+    cout << "3) NATURAL" << endl ;
+    cout << "4) PERIODIC" << endl ;
+    cin >> bci ;
+  }
+  
+  switch ( bci )
+  {
+    case 1 :
+      bc = MIRROR ;
+      break ;
+    case 2 :
+      bc = REFLECT ;
+      break ;
+    case 3 :
+      bc = NATURAL ;
+      break ;
+    case 4 :
+      bc = PERIODIC ;
+      break ;
+  }
+  // put the BC code into a TinyVector
+  TinyVector < bc_code , 1 > bcv ( bc ) ;
+
+  // obtain knot point values
+
+  double v ;
+  std::vector<double> dv ;
+  cout << "enter knot point values (end with EOF)" << endl ;
+  while ( cin >> v )
+    dv.push_back ( v ) ;
+
+  cin.clear() ;
+  
+  // put the size into a TinyVector
+  TinyVector < int , 1 > shape ( dv.size() ) ;
+  
+  // fix the type for the bspline object
+  typedef bspline < double , 1 > spline_type ;
+  spline_type bsp  ( shape , spline_degree , bcv , EXPLICIT ) ;
+  cout << "created bspline object:" << endl << bsp << endl ;
+
+  // fill the data into the spline's 'core' area
+  for ( size_t i = 0 ; i < dv.size() ; i++ )
+    bsp.core[i] = dv[i] ;
+
+  // prefilter the data
+  bsp.prefilter() ;
+  
+  cout << fixed << showpoint << setprecision(12) ;
+  cout << "spline coefficients (with frame)" << endl ;
+  for ( auto& coeff : bsp.container )
+    cout << " " << coeff << endl ;
+
+  // fix the type for the evaluator and create it
+  typedef evaluator < 1 , double , double , int > eval_type ;
+  eval_type ev ( bsp ) ;
+  auto map = ev.get_mapping() ;
+  int ic ;
+  double rc ;
+
+  cout << "enter coordinates to evaluate (end with EOF)" << endl ;
+  while ( ! cin.eof() )
+  {
+    // get a coordinate
+    cin >> v ;
+    // evaluate it
+    double res = ev ( v ) ;
+    // apply the mapping to the coordinate to output that as well
+    map ( v , ic , rc , 0 ) ;
+
+    cout << v << " -> ( " << ic << " , " << rc << " ) -> " << res << endl ;
+  }
+}
diff --git a/example/gradient.cc b/example/gradient.cc
new file mode 100644
index 0000000..2a787f3
--- /dev/null
+++ b/example/gradient.cc
@@ -0,0 +1,90 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/// gradient.cc
+///
+/// If we create a b-spline over an array containing, at each grid point,
+/// the sum of the grid point's coordinates, each 1D row, column, etc will
+/// hold a linear gradient with first derivative == 1. If we use NATURAL
+/// BCs, evaluating the spline with real coordinates anywhere inside the
+/// defined range should produce precisely the sum of the coordinates.
+/// This is a good test for both the precision of the evaluation and it's
+/// correct functioning, particularly with higher-D arrays. 
+
+#include <vspline/vspline.h>
+
+using namespace std ;
+
+main ( int argc , char * argv[] )
+{
+  typedef vspline::bspline < double , 5 > spline_type ;
+  typedef typename spline_type::shape_type shape_type ;
+  typedef typename spline_type::view_type view_type ;
+  typedef typename spline_type::bcv_type bcv_type ;
+  
+  shape_type core_shape = { 10 , 4 , 13 , 9 , 6 } ;
+
+  // note how with these small array dimensions, we can't use the EXPLICIT scheme:
+  // the horizon, 12, is wider than the smallest extent, and bracing (with the current
+  // bracing code) will produce erroneous results.
+
+  spline_type bspl ( core_shape , 3 , bcv_type ( vspline::NATURAL ) ) ;
+  view_type core = bspl.core ;
+  
+  for ( int d = 0 ; d < bspl.dimension ; d++ )
+  {
+    for ( int c = 0 ; c < core_shape[d] ; c++ )
+      core.bindAt ( d , c ) += c ;
+  }
+  
+  bspl.prefilter() ;
+
+  typedef vspline::evaluator < bspl.dimension , double , double > evaluator_type ;
+  typedef typename evaluator_type::nd_rc_type coordinate_type ;
+  
+  evaluator_type ev ( bspl ) ;
+  double * ws = new double [ ev.workspace_size() ] ;
+  
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  // std::mt19937 gen(12345);   // fix starting value for reproducibility
+
+  coordinate_type c ;
+  
+  for ( int times = 0 ; times < 10000 ; times++ )
+  {
+    for ( int d = 0 ; d < bspl.dimension ; d++ )
+      c[d] = ( core_shape[d] - 1 ) * std::generate_canonical<double, 20>(gen) ;
+    double delta = ev(c) - sum(c) ;
+    if ( delta > 2.0e-14 )
+      cout << c << " -> delta = " << delta << endl ;
+  }
+}
\ No newline at end of file
diff --git a/example/pano_extract.cc b/example/pano_extract.cc
new file mode 100644
index 0000000..4eb792e
--- /dev/null
+++ b/example/pano_extract.cc
@@ -0,0 +1,776 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform rational b-splines                           */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/// \file pano_extract.cc
+///
+/// \brief demonstration of transformation-based remap
+///
+/// This program extracts an rectilinear image
+/// panorama with a given size, horizontal field of view, yaw, pitch and roll
+/// from a (full) spherical
+
+#include <cmath>
+
+#include <vspline/vspline.h>
+
+#include <vigra/multi_math.hxx>
+
+using namespace vigra ;
+using namespace vigra::multi_math;
+using namespace std ;
+using namespace vspline ;
+
+#include <vigra/stdimage.hxx>
+#include <vigra/imageinfo.hxx>
+#include <vigra/impex.hxx>
+#include <vigra/impexalpha.hxx>
+
+#define PRINT_ELAPSED
+
+#ifdef PRINT_ELAPSED
+#include <ctime>
+#include <chrono>
+#endif
+
+#include <vigra/quaternion.hxx>
+
+/// concatenation of roll, pitch and yaw into a single quaternion
+/// we use a right-handed coordinate system: the positive x axis points towards us,
+/// the positive y axis to the right and the positive z axis up. the subjective
+/// point of view is taken to be at the origin, so the view directly ahead is
+/// along the negative x-axis.
+
+template < typename rc_type >
+Quaternion<rc_type> get_rotation_q ( rc_type yaw , rc_type pitch , rc_type roll )
+{
+  typedef Quaternion<rc_type> quaternion ;
+
+  // first calculate the component quaternions.
+  // for a right-handed system and clockwise rotations, we have this formula:
+  // q = cos ( theta / 2 ) + ( ux * i + uy * j + uz * k ) * sin ( theta / 2 )
+  // where theta is the angle of rotation  and (ux, uy, uz) is the axis around
+  // which we want to rotate some 3D object.
+  
+  // we take roll to be a clockwise rotation around the line of sight (-1, 0, 0)
+  // to achieve the clockwise rotation of the section taken out of the panorama,
+  // we have to rotate the target counterclockwise. So we get -sin ( -roll / 2.0 ),
+  // and the two negative signs cancel out.
+  quaternion q_roll ( cos ( roll / 2.0 ) , sin ( roll / 2.0 ) , 0.0 , 0.0 ) ;
+  
+  // if we consider a target in the line of sight, we take pitch as moving it upwards,
+  // rotating clockwise around the positive y axis (0 , 1, 0)
+  quaternion q_pitch ( cos ( pitch / 2.0 ) , 0.0 , sin ( pitch / 2.0 ) , 0.0 ) ;
+
+  // if we consider a target in the line of sight, we take yaw as moving it to the right,
+  // which is counterclockwise around the z axis, or clockwise around the negative
+  // z axis (0, 0, -1)
+  quaternion q_yaw ( cos ( yaw / 2.0 ) , 0.0 , 0.0 , - sin ( yaw / 2.0 ) ) ;
+
+  // now produce the concatenated operation by multiplication of the components
+  quaternion total = q_yaw ;
+  total *= q_pitch ;
+  total *= q_roll ;
+
+  return total ;
+}
+
+/// apply the rotation codified in a quaternion to a 3D point
+
+template < typename rc_type >
+TinyVector < rc_type , 3 >
+rotate_q ( const TinyVector < rc_type , 3 > & v , Quaternion<rc_type> q )
+{
+  typedef Quaternion<rc_type> quaternion ;
+  TinyVector < rc_type , 3 > result ;
+  quaternion vq ( 0.0 , v[0] , v[1] , v[2] ) ;
+  quaternion qc = conj ( q ) ;
+  q *= vq ;
+  q *= qc ;
+  result[0] = q[1] ;
+  result[1] = q[2] ;
+  result[2] = q[3] ;
+  return result ;
+}
+
+/// tf_spherical_rectilinear transforms incoming (target) image coordinates to corresponding
+/// spherical coordinates into the sherical panoramic image. The main coding effort
+/// for the intended image transformation is in this functor. The strategy is this:
+/// with the transformation-based remap, we receive 2D coordinate into the target image.
+/// We try and precalculate everything we can from the constructor parameters and 'load'
+/// the functor with the precalculated values, leaving only the steps which depend on
+/// the incoming cordinates to the operator() of the functor. Instead of rotating
+/// every incoming coordinate wit yaw, pitch and roll, we only rotate three points:
+/// the top left, lower left and upper right of the target image. While we are using
+/// cartesian coordinates, we can now simply generate the transformed cartesian coordinates
+/// by following straight lines with dx, dy and dz derived from the three rotated points.
+/// Once we have the cartesian coordinates, we only have to transform them to spherical
+/// coordinates and do a bit of scaling and shifting to receive coordinates into the
+/// panoramic source image. Note that in this code we model pixels as small rectangular
+/// shapes. If an image's width is w, it has w pixels along the x axis. The 'position'
+/// of a pixel is identified with it's center. If the pixels are dx apart, The 'positions'
+/// of the pixels are dx/2, dx+dx/2, 2dx+dx/2 etc. dx is equal to the image width divided
+/// by w.
+
+template < class rc_type >
+class tf_spherical_rectilinear
+{
+  typedef Quaternion<rc_type> quaternion ;
+  typedef TinyVector<rc_type,3> p3d ;
+  typedef TinyVector < rc_type , 2 > coordinate_type ;
+  
+  p3d s0 ; // position of sample(0,0) of target
+  p3d dh ; // step vector in target's horizontal
+  p3d dv ; // step vector in target's vertical
+  rc_type scale_x ; // scaling factors to transform theta/phi to pano pixel coordinates
+  rc_type scale_y ;
+
+public:
+  
+  tf_spherical_rectilinear ( int w_pano ,    // width of the (spherical) panorama
+                          int h_pano ,    // height of same
+                          rc_type _hfov , // horizontal field of view of section (in degrees)
+                          int w_out ,     // width of same
+                          int h_out ,     // height of same
+                          rc_type _yaw ,   // yaw of extracted section
+                          rc_type _pitch , // pitch of same
+                          rc_type _roll )  // roll of same
+  {
+    // convert incoming angles to radians
+    rc_type hfov = _hfov * rc_type ( M_PI ) / rc_type ( 180.0 ) ;
+    rc_type yaw = _yaw * rc_type ( M_PI ) / rc_type ( 180.0 ) ;
+    rc_type pitch = _pitch * rc_type ( M_PI ) / rc_type ( 180.0 ) ;
+    rc_type roll = _roll * rc_type ( M_PI ) / rc_type ( 180.0 ) ;
+
+    // calculate a quaternion representing the positioning of the target
+    // as a 3D rotation of the same from an initial position centered at the
+    // spherical image's center. 
+    quaternion qr = get_rotation_q ( yaw , pitch , roll ) ;
+
+    // calculate inverse aspect ratio of target
+    rc_type inverse_aspect_ratio = rc_type ( h_out ) / rc_type ( w_out ) ;
+
+    // initially position target so that it's center touches the unit sphere at it's equator.
+    // Then get it's half width (cartesian distance of center to edge)
+    rc_type wh = tan ( hfov / rc_type ( 2.0 ) ) ;
+//     cout << "hfov: " << hfov << " wh: " << wh << endl ;
+    
+    // now calculate the position of the top left corner of the target (origin),
+    // and of the upper right and lower left corner
+    p3d origin = { rc_type ( -1.0 ) , - wh ,   wh * inverse_aspect_ratio } ;
+    p3d ur     = { rc_type ( -1.0 ) ,   wh ,   wh * inverse_aspect_ratio } ;
+    p3d ll     = { rc_type ( -1.0 ) , - wh , - wh * inverse_aspect_ratio } ;
+
+//     cout << "origin: " << origin << " ur: " << ur << " ll: " << ll << endl ;
+    // perform the rotation of the target by applying the quaternion
+    origin = rotate_q ( origin , qr ) ;
+    ur = rotate_q ( ur , qr ) ;
+    ll = rotate_q ( ll , qr ) ;
+    
+    // calculate the difference from a target pixel to it's horizontal/vertical neighbour
+    // TODO: can we lower the error here????
+
+    dh = ( ur - origin ) / rc_type ( w_out ) ;  
+    dv = ( ll - origin ) / rc_type ( h_out ) ;  
+    
+    // the center of the target's (0,0) pixel is half a step from the origin
+    s0 = origin + rc_type ( 0.5 ) * dh + rc_type ( 0.5 ) * dv ;
+
+    // calculate the scaling factors
+    scale_x = w_pano / rc_type ( 2.0 * M_PI ) ;
+    scale_y = h_pano / rc_type ( M_PI ) ;
+  } ;
+
+  /// operator() of the transformation receives an incoming 2D coordinate into the
+  /// target image and produces a 2D coordinate into the source image (the panorama).
+  /// The intermediate 3D processing is encapsuled in the functor.
+
+  void operator() ( const coordinate_type & c_in ,
+                          coordinate_type & c_out )
+  {
+    rc_type x = c_in[0] ; // incoming coordinates (into the target)
+    rc_type y = c_in[1] ;
+
+    p3d s = s0 + x * dh + y * dv ; // get the target pixel's 3D location
+
+    // calculate the corresponding spherical coordinate
+    rc_type norm = sqrt ( s[0] * s[0] + s[1] * s[1] + s[2] * s[2] ) ;
+
+    // mind the sign: we're going clockwise, atan2 ( y , x ) is going counterclockwise
+    rc_type theta = - atan2 ( s[1] , s[0] ) ;
+
+    rc_type phi = acos ( s[2] / norm ) ;
+
+    // scale/shift the resulting spherical coordinates to the size of the panoramic image
+    // and assign the result to the output coordinates. Why -0.5? If we were to 'land'
+    // at an angle of 0.0, this would be flush with the image's margin, .5 pixels to the
+    // 'left' of the first pixel's center.
+    c_out[0] = theta * scale_x - rc_type ( 0.5 ) ;
+    c_out[1] = phi * scale_y - rc_type ( 0.5 );
+  }
+  
+#ifdef USE_VC
+
+  /// operator() for vectorized operation. This takes the Simd type as it's template argument,
+  /// so we don't have to make it explicit here as long as all operations we use are supported.
+  /// It's easier this way, since the Simd type is a SimdArray the size of which is determined
+  /// by the coordinate type (float or double) used in process_image.
+  ///
+  /// The code is slightly more involved than the scalar version above, also because it makes
+  /// the 3 components of the 3D variables explicit. Vc doesn't offer a vectorized acos function,
+  /// so we use the asin instead.
+  ///
+  /// The vectorized code produces a measurable speedup, most likely due to the vectorized
+  /// transcendental functions.
+
+  template < class rc_v >
+  void operator() ( const TinyVector < rc_v , 2 > & c_in ,
+                          TinyVector < rc_v , 2 > & c_out )
+  {
+    rc_v x = c_in[0] ;
+    rc_v y = c_in[1] ;
+
+    rc_v sx ( s0[0] ) ;
+    rc_v sy ( s0[1] ) ;
+    rc_v sz ( s0[2] ) ;
+
+    sx += x * dh[0] + y * dv[0] ;
+    sy += x * dh[1] + y * dv[1] ;
+    sz += x * dh[2] + y * dv[2] ;
+
+    rc_v norm = sqrt ( sx * sx + sy * sy + sz * sz ) ;
+
+    rc_v theta = - atan2 ( sy , sx ) ;
+
+    // rc_v phi = acos ( sz / norm ) ; // oops... would be nice but Vc doesn't have acos!
+    rc_v phi ( rc_v ( M_PI / 2.0 ) ) ;
+    phi -= asin ( sz / norm ) ;
+
+    c_out[0] = theta * scale_x - rc_type ( 0.5 ) ;
+    c_out[1] = phi * scale_y - rc_type ( 0.5 ) ;
+  }
+
+#endif
+
+} ;
+
+// To present a program which is useful and 'correct' in photography terms,
+// we have to deal with gamma and colour space. We make the silent assumption
+// that incomning data with unsigned char and unsigned short pixels are in sRGB,
+// while other incoming types are linear RGB already.
+
+#include <vigra/colorconversions.hxx>
+
+/// functor to convert a UINT8 RGB pixel from sRGB to linear RGB
+/// this uses vigra's functor, which in turn uses a lookup table
+/// and is therefore fast.
+
+void degamma ( TinyVector<unsigned char,3> & v )
+{
+  static vigra::sRGB2RGBFunctor< unsigned char , unsigned char > fun ;
+  v = fun ( v ) ;
+} 
+
+/// functor to convert a pixel from sRGB to linear RGB. This functor
+/// actually performs the calculation. This is used for other pixels.
+
+template < typename value_type , int _max = 255 >
+void to_l_rgb ( value_type& v )
+{
+  typedef typename vigra::ExpandElementResult < value_type > :: type ele_type ;
+  const ele_type max ( _max ) ;
+
+  v /= max ;
+
+  for ( auto & component : v )
+  {
+     component = ( component <= 0.04045 ) 
+                 ? max * component / 12.92
+                 : max * pow ( (component + 0.055) / 1.055 , 2.4 ) ;
+  }
+}
+
+#ifdef USE_VC
+
+/// functor to convert a vector of single values to linear RGB
+
+template < typename entry_type , int _max = 255 >
+void v_to_l_rgb ( Vc::Vector < entry_type > & v )
+{
+  const entry_type max ( _max ) ;
+  
+  v *= entry_type ( 1.0 / max ) ;
+
+  auto mask = ( v <= entry_type ( 0.04045 ) ) ;
+  v ( mask ) *= entry_type ( max / 12.92 ) ;
+  v ( ! mask ) = entry_type ( max )
+                 * exp (    entry_type ( 2.4 )
+                          * log ( ( v + entry_type ( 0.055 ) ) * entry_type ( 1.0 / 1.055 ) )
+                       ) ;
+
+// ouch... Vc does not provide pow(), so we use
+// pow(x,y) = exp^(y*lnx)
+
+// would be nice:
+//                  * pow ( ( v + entry_type ( 0.055 ) ) * entry_type ( 1.0 / 1.055 ) ,
+//                          entry_type ( 2.4 ) ) ;
+}
+
+#endif
+
+/// process_image is the workhorse routine bringing it all together.
+///
+/// reading the image is reasonably involved, since it tries to cover all sorts of
+/// image input:
+///
+/// - images with 8 bit, 16 bit and real-valued pixels
+/// - images with or without alpha channel
+///
+/// BC code SPHERICAL can not be processed with an implicit scheme, so we have to set
+/// up the b-spline to use an explicit scheme. The difference, from the user side, is
+/// marginal.
+///
+/// Once the spline is ready, the transformation functor is set up with the parameters
+/// passed into main. Finally, tf_remap is called to do the processing.
+///
+/// process_image also handles images with an alpha channel. The alpha channel is also
+/// mapped to the target image (if it's present), but for the alpha channel it only uses
+/// linear interpolation (a b-spline of degree 1).
+
+template < class rc_type >
+void process_image ( char * name ,
+                     rc_type roll ,
+                     rc_type pitch ,
+                     rc_type yaw ,
+                     int width ,
+                     int height ,
+                     rc_type hfov ,
+                     int spline_degree )
+{
+  // first we have to read and preprocess the source data. The naive way of
+  // doing this would be to read the image into some buffer and then erect a
+  // b-spline over this buffer. But class b-spline is designed to offer a better
+  // way: we create the b-spline object *first*, then acquire the data straight
+  // into the b-spline object's 'core' area.
+
+  cout << fixed << showpoint ; //  << setprecision(32) ;
+
+  vigra::ImageImportInfo imageInfo(name);
+  // print some information
+  std::cout << "Image information:\n";
+  std::cout << "  file format: " << imageInfo.getFileType() << std::endl;
+  std::cout << "  width:       " << imageInfo.width() << std::endl;
+  std::cout << "  height:      " << imageInfo.height() << std::endl;
+  std::cout << "  pixel type:  " << imageInfo.getPixelType() << std::endl;
+  std::cout << "  color image: ";
+  if (imageInfo.isColor())    std::cout << "yes (";
+  else                        std::cout << "no  (";
+  std::cout << "number of channels: " << imageInfo.numBands() << ")\n";
+
+  int extra_bands = imageInfo.numExtraBands() ;
+
+  if ( extra_bands > 1 )
+  {
+    // are there really images with more than one extra channel?
+    throw not_supported ( "can only process at most one extra channel" ) ;
+  }
+
+  typedef vigra::RGBValue<rc_type,0,1,2> pixel_type; 
+  typedef vigra::RGBValue<UInt16,0,1,2> pixel_type_ui16 ; 
+  typedef vigra::MultiArray<2, pixel_type> array_type ;
+  typedef vigra::MultiArrayView<2, pixel_type> view_type ;
+  typedef typename view_type::difference_type shape_type ;
+
+  TinyVector < bc_code , 2 > bcv = { PERIODIC , SPHERICAL } ;
+
+  // find out the image's shape
+  shape_type core_shape = imageInfo.shape() ;
+  // create a suitable bspline object
+  bspline < pixel_type , 2 > bspl ( core_shape , spline_degree , bcv , EXPLICIT ) ;
+  // get the view to the core coefficient area (to put the image data there)
+  view_type core = bspl.core ;
+
+  cout << "created bspline object:" << endl << bspl << endl ;
+
+  // now we're being tightfisted with memory. We could just go ahead and create
+  // a b-spline object for the alpha channel no matter if it's actually present, but
+  // instead we only set up the empty shell of the storage needed and only fill it
+  // with life (memory) if we have to:
+
+  // potential storage for alpha channel plus brace, empty for now
+  vigra::MultiArray<2,float> alpha_channel ;
+  // view to alpha channel's core area, empty for now
+  vigra::MultiArrayView<2,float> alpha_view ;
+
+  if ( extra_bands )
+  {
+    // if there is an alpha channel, we want to load it into the core area of an array
+    // which we can use as a container for a degree-1 b-spline. This container array
+    // will be a bit larger than the core shape, due to the boundary conditions used.
+    // we have a convenience function yielding this shape:
+    shape_type alpha_shape = bspline<float,2>::container_size ( core_shape , 1 , bcv ) ;
+    // only if the alpha channel is actually present, we fill the array and the view
+    // above with life:
+    vigra::MultiArray<2,float> target ( alpha_shape ) ;
+    alpha_channel.swap ( target ) ; // swap data into alpha_channel
+    // get a view to it's core area
+    shape_type left_corner = bracer<view_type>::left_corner ( bcv , 1 ) ;
+    alpha_view = alpha_channel.subarray ( left_corner , left_corner + core_shape ) ;
+  }
+
+  
+  cout << "reading panorama data... " << endl ;
+
+#ifdef PRINT_ELAPSED
+  std::chrono::system_clock::time_point start = std::chrono::system_clock::now();
+#endif
+  
+  auto input_pixel_type = imageInfo.getPixelType() ;
+  auto enum_pixel_type = imageInfo.pixelType() ;
+  
+  TinyVector < rc_type , 3 > min_value ;
+  TinyVector < rc_type , 3 > max_value ;
+  int component_max ;
+  
+  if ( enum_pixel_type == vigra::ImageImportInfo::UINT8 )
+  {
+    // for UINT8 pixels, we use can degamma with a lookup table, which is less
+    // procise but much faster, but we have to read to a char buffer first:
+
+    typedef vigra::RGBValue<unsigned char,0,1,2> uc_pixel ;
+
+    vigra::MultiArray < 2 , uc_pixel > buffer ( core_shape ) ;
+    typedef vigra::MultiArrayView < 2 , uc_pixel > buffer_view ;
+
+    if ( extra_bands )
+      vigra::importImageAlpha ( imageInfo , buffer , alpha_view ) ;
+    else
+      vigra::importImage ( imageInfo , buffer ) ;
+
+#ifdef PRINT_ELAPSED
+  std::chrono::system_clock::time_point end = std::chrono::system_clock::now();
+  cout << "reading took "
+       << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count()
+       << " ms" << endl ;
+#endif
+
+#ifdef PRINT_ELAPSED
+  start = std::chrono::system_clock::now();
+#endif
+
+    // vectorization doesn't work for unsigned char, but we can multithread:
+
+    divide_and_conquer < buffer_view > :: run ( buffer , degamma ) ;
+
+    // still we need to work on floating point data
+
+    auto it = core.begin() ;
+    for ( auto & pix : buffer )
+    {
+      *it = pix ;
+      ++it ;
+    }
+
+    component_max = 255 ;
+  }
+  else if ( enum_pixel_type == vigra::ImageImportInfo::UINT16 )
+  {
+    // for UINT16, we degamma in floating point. We do this in-place,
+    // and we can use 'divide_and_conquer'
+
+    if ( extra_bands )
+      vigra::importImageAlpha ( imageInfo , core , alpha_view ) ;
+    else
+      vigra::importImage ( imageInfo , core ) ;
+
+#ifdef PRINT_ELAPSED
+  std::chrono::system_clock::time_point end = std::chrono::system_clock::now();
+  cout << "reading took "
+       << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count()
+       << " ms" << endl ;
+#endif
+
+#ifdef PRINT_ELAPSED
+  start = std::chrono::system_clock::now();
+#endif
+
+    // perform conversion from sRGB to linear RGB.
+    // silently assumes input actually is sRGB...
+    // this conversion takes quite a lot of time!
+
+#ifdef USE_VC
+    divide_and_conquer < decltype(core) > :: run ( core , v_to_l_rgb<rc_type,65535> ) ;
+#else
+    divide_and_conquer < decltype(core) > :: run ( core , to_l_rgb<pixel_type,65535> ) ;
+#endif
+    component_max = 65535 ;
+  }
+  else
+  {
+    // whatever pixel type this is, just import, don't degamma
+
+    if ( extra_bands )
+      vigra::importImageAlpha ( imageInfo , core , alpha_view ) ;
+    else
+      vigra::importImage ( imageInfo , core ) ;
+
+#ifdef PRINT_ELAPSED
+  std::chrono::system_clock::time_point end = std::chrono::system_clock::now();
+  cout << "reading took "
+       << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count()
+       << " ms" << endl ;
+#endif
+
+#ifdef PRINT_ELAPSED
+  start = std::chrono::system_clock::now();
+#endif
+
+  }
+
+  // we determine minimum and maximum values for every channel to use saturation
+  // arithmetics further down
+  // TODO: this also looks at transparent pixels...
+
+  for ( int ch = 0 ; ch < 3 ; ch++ )
+  {
+    vigra::FindMinMax<rc_type> minmax;   // functor to find range
+    vigra::inspectMultiArray ( core.bindElementChannel(ch) , minmax ) ;
+    min_value[ch] = minmax.min ;
+    max_value[ch] = minmax.max ;
+    cout << "incoming: ch " << ch << " minimum " << minmax.min << " maximum " << minmax.max << endl ;
+  }
+  rc_type rgb_min = min ( min_value ) ;
+  rc_type rgb_max = max ( max_value ) ;
+  cout << "RGB min: " << rgb_min << " RGB max: " << rgb_max << endl ;
+
+#ifdef PRINT_ELAPSED
+  std::chrono::system_clock::time_point end = std::chrono::system_clock::now();
+  cout << "converting to linear RGB took "
+       << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count()
+       << " ms" << endl ;
+#endif
+
+#ifdef PRINT_ELAPSED
+  start = std::chrono::system_clock::now();
+#endif
+  
+  cout << "creating b-spline... " << std::flush ;
+  
+  // perform prefiltering
+  bspl.prefilter() ;
+
+#ifdef PRINT_ELAPSED
+  end = std::chrono::system_clock::now();
+  cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count()
+       << " ms" << endl ;
+#endif
+
+  shape_type out_shape = shape_type ( width , height ) ;
+  array_type result ( out_shape ) ;
+  vigra::MultiArray<2,float> alpha_result ;
+
+  // now we set up the transformation functor
+
+  tf_spherical_rectilinear<rc_type> tf_se ( core_shape[0] ,    // width of the (spherical) panorama
+                                            core_shape[1] ,    // height of same
+                                            hfov ,             // horizontal field of view of section (in degrees)
+                                            width ,            // width of same
+                                            height ,           // height of same
+                                            yaw ,              // yaw of extracted section
+                                            pitch ,            // pitch of same
+                                            roll ) ;           // roll of same
+
+#ifdef USE_VC
+
+  // this looks funny - passing in the same transform twice, both for scalar and for vector
+  // operation? but tf_se is a functor with overloaded operator(), and as the accepted types
+  // of std::function are encoded in vspline::transformation's two-argument constructor's
+  // signature, the correct overload is picked for each case. Note that using the single-argument
+  // constructor would also work here, but result in broadcasting the scalar routine to the vector
+  // data, resulting in slower operation. The one-argument form is only a crutch if vectorized
+  // code can't be had.
+
+  vspline::transformation < pixel_type , rc_type , 2 , 2 >
+    tf ( tf_se , tf_se ) ;
+
+#else
+
+  // ... or if Vc can't be used. In this case only the single-argument constructor
+  // can be used:
+
+  vspline::transformation < pixel_type , rc_type , 2 , 2 >
+    tf ( tf_se ) ;
+
+#endif
+
+  // this usually takes the least amount of time:
+
+  cout << "creating target image... " << std::flush ;
+
+#ifdef PRINT_ELAPSED
+  start = std::chrono::system_clock::now();
+#endif
+  
+  for ( int times = 0 ; times < 1 ; times++ )
+  {
+    vspline::tf_remap < pixel_type , rc_type , 2 , 2 >
+      ( bspl , tf , result ) ;
+  }
+
+  if ( extra_bands )
+  {
+    // create a b-spline over the alpha channel, using the data in alpha_channel.
+    // this way we have the alpha channel data from the image in alpha_channel's core
+    // area, and the only thing left to do is the prefiltering, which amounts to
+    // bracing the data, since we're only using a degree 1 spline (linear interpolation)
+    // on the alpha channel.
+
+    bspline < float , 2 > bspl_alpha ( core_shape , 1 , bcv , BRACED , -1 , alpha_channel ) ;
+    bspl_alpha.prefilter() ;
+
+    // swap data into alpha_result (fill it with life, it was created empty)
+    vigra::MultiArray<2,float> target ( out_shape ) ;
+    alpha_result.swap ( target ) ;
+
+    // we need a slightly different transformation here, working on single floats
+    // instead of pixels of three RGB values
+
+#ifdef USE_VC
+    vspline::transformation < float , rc_type , 2 , 2 >
+      tf_alpha ( tf_se , tf_se ) ;
+#else
+    vspline::transformation < float , rc_type , 2 , 2 >
+      tf_alpha ( tf_se ) ;
+#endif
+
+    // now we do a transformation-based remap of the alpha channel
+    vspline::tf_remap < float , rc_type , 2 , 2 >
+      ( bspl_alpha , tf_alpha , alpha_result ) ;
+  }
+
+#ifdef PRINT_ELAPSED
+  end = std::chrono::system_clock::now();
+  cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count()
+       << " ms" << endl ;
+#endif
+
+  // b-splines may produce overshoots, especially if the input data have
+  // numerical flaws, like sharp discontinuities. so we cut off any values
+  // exceeding the original image's extremal values (saturation arithmetics)
+
+  for ( int ch = 0 ; ch < 3 ; ch++ )
+  {
+    vigra::FindMinMax<rc_type> minmax;   // functor to find range
+    vigra::inspectMultiArray ( result.bindElementChannel(ch) , minmax ) ;
+    cout << "result: ch " << ch << " minimum " << minmax.min << " maximum " << minmax.max << endl ;
+    cout << "values outside incoming range (overshoot) will be clipped" << endl ;
+    for ( auto& pix : result.bindElementChannel(ch) )
+    {
+      if ( pix > max_value[ch] )
+        pix = max_value[ch] ;
+      if ( pix < min_value[ch] )
+        pix = min_value[ch] ;
+    }
+  }
+
+  // next we convert back from linear RGB to sRGB using vigra's functor for convenience.
+  // TODO: for now, this is neither multithreaded nor vectorized...
+
+  if (    enum_pixel_type == vigra::ImageImportInfo::UINT8
+       || enum_pixel_type == vigra::ImageImportInfo::UINT16 )
+  {
+    vigra::RGB2sRGBFunctor < rc_type , rc_type > to_gamma ( component_max ) ;
+    for ( auto & pix : result )
+      pix = to_gamma ( pix ) ;
+  }
+  // and export with a forced range mapping to avoid vigra's automatic mapping of the
+  // brightness values to 0...max.
+  // TODO .setForcedRangeMapping is not in vigra documentation
+
+  cout << "saving result to extract.tif" << endl ;
+
+  if ( extra_bands )
+    vigra::exportImageAlpha ( result ,
+                              alpha_result ,
+                              vigra::ImageExportInfo ( "extract.tif" )
+                              .setPixelType(input_pixel_type)
+                              .setCompression("DEFLATE")
+                              .setForcedRangeMapping ( rgb_min , rgb_max , rgb_min , rgb_max ) );
+  else
+    vigra::exportImage ( result ,
+                         vigra::ImageExportInfo ( "extract.tif" )
+                         .setPixelType(input_pixel_type)
+                         .setCompression("DEFLATE")
+                         .setForcedRangeMapping ( rgb_min , rgb_max , rgb_min , rgb_max ) );
+}
+
+/// main only takes the parameters and passes them on to process_image.
+/// Admittedly the UI is a bit spartan.
+
+int main ( int argc , char * argv[] )
+{
+  if ( argc < 9 )
+  {
+    cout << std::string ( argv[0] ) << " - extract rectilinear section from spherical image" << endl ;
+    cout << "parameters: spherical image, roll, pitch, yaw," << endl ;
+    cout << "rectilinear image width, height, horizontal field of view, spline degree" << endl ;
+    cout << "all angles in degrees" << endl ;
+    cout << "example: pano_extract pano.tif 0.0 -90.0 0.0 2000 2000 60.0 3" << endl ;
+    cout << "creates a 60 degree wide nadir image from pano.tif" << endl ;
+    cout << "output is written to extract.tif" << endl ;
+    cout << "It's assumed images are sRGB if pixels are UCHAR8 or UCHAR16" << endl ;
+    cout << "or linear RGB if otherwise. Monochrome is not supported." << endl ;
+    cout << "alpha channel will be honoured if present." << endl ;
+    return 1 ;
+  }
+
+  cout << fixed << showpoint ;
+  
+  float r = atof ( argv[2] ) ;
+  float p = atof ( argv[3] ) ;
+  float y = atof ( argv[4] ) ;
+  int w = atoi ( argv[5] ) ;
+  int h = atoi ( argv[6] ) ;
+  float hfov = atof ( argv[7] ) ;
+  int d = atoi ( argv[8] ) ;
+  
+  cout << "image " << std::string(argv[1]) << endl ;
+  cout << "roll " << r << " pitch " << p << " yaw " << y << endl ;
+  cout << "output width: " << w << " height: " << h << endl ;
+  cout << "output hfov: " << hfov << endl ;
+  cout << "spline degree: " << d << endl ;
+
+  process_image<float> ( argv[1] , r , p , y , w , h , hfov , d ) ; 
+
+  return 0 ;
+}
+
+
+
diff --git a/example/roundtrip.cc b/example/roundtrip.cc
new file mode 100644
index 0000000..e0e60a0
--- /dev/null
+++ b/example/roundtrip.cc
@@ -0,0 +1,405 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/// roundtrip.cc
+///
+/// load an image, create a b-spline from it, and restore the original data,
+/// both by normal evaluation and by convolution with the restoration kernel.
+/// all of this is done 100 times each with different boundary conditions,
+/// spline degrees and in float and double arothmetic, the processing times
+/// and differences between input and restored signal are printed to cout.
+///
+/// compile with:
+/// g++ -std=c++11 -o roundtrip -O3 -pthread -DUSE_VC=1 roundtrip.cc -lvigraimpex -lVc
+
+#include <vspline/vspline.h>
+
+#include <vigra/stdimage.hxx>
+#include <vigra/imageinfo.hxx>
+#include <vigra/impex.hxx>
+#include <vigra/accumulator.hxx>
+#include <vigra/multi_math.hxx>
+
+#define PRINT_ELAPSED
+
+#ifdef PRINT_ELAPSED
+#include <ctime>
+#include <chrono>
+#endif
+
+using namespace std ;
+using namespace vigra ;
+using namespace vigra::acc;
+using namespace vigra::multi_math;
+
+/// we use identity transformations on coordinates. Incoming coordinates will be
+/// translated straight to outgoing coordinates. If we use such a transformation
+/// we'd expect to recover the input.
+
+template < class rc_type > // float or double for coordinates
+void tf_identity ( const TinyVector < rc_type , 2 > & c_in ,
+                         TinyVector < rc_type , 2 > & c_out )
+{
+  c_out = c_in ;
+}
+
+#ifdef USE_VC
+
+template < class rc_v >
+void vtf_identity ( const TinyVector < rc_v , 2 > & c_in ,
+                          TinyVector < rc_v , 2 > & c_out )
+{
+  c_out = c_in ;
+}
+
+#endif
+
+template < class array_type , typename real_type >
+void check_diff ( const array_type v1 )
+{
+//   array_type v1 = ( target - data ) ;
+  typedef vigra::MultiArray<2,real_type> error_array ;
+  error_array ea ( vigra::multi_math::squaredNorm ( v1 ) ) ;
+  AccumulatorChain<real_type,Select< Mean, Maximum> > ac ;
+  extractFeatures(ea.begin(), ea.end(), ac);
+  std::cout << "warped image diff Mean: " << sqrt(get<Mean>(ac)) << std::endl;
+  std::cout << "warped image diff Maximum: " << sqrt(get<Maximum>(ac)) << std::endl;
+}
+
+template < class view_type , typename real_type , typename rc_type >
+void roundtrip ( view_type & data ,
+                 vspline::bc_code bc ,
+                 int DEGREE ,
+                 bool use_vc ,
+                 int TIMES = 10 )
+{
+  typedef typename view_type::value_type pixel_type ;
+  typedef typename view_type::difference_type Shape;
+  typedef MultiArray < 2 , pixel_type > array_type ;
+  typedef int int_type ;
+
+#ifdef USE_VC
+  
+  const int vsize = Vc::Vector < real_type > :: Size ;
+  
+#else
+  
+  const int vsize = 1 ;
+  
+#endif
+
+  TinyVector < vspline::bc_code , 2 > bcv ( bc ) ;
+  
+  int Nx = data.width() ;
+  int Ny = data.height() ;
+//   cout << "Nx: " << Nx << " Ny: " << Ny << endl ;
+
+  vspline::bspline < pixel_type , 2 > bsp ( data.shape() , DEGREE , bcv ) ; // , vspline::EXPLICIT ) ;
+  bsp.core = data ;
+//   cout << "created bspline object:" << endl << bsp << endl ;
+  
+#ifdef PRINT_ELAPSED
+  std::chrono::system_clock::time_point start = std::chrono::system_clock::now();
+#endif
+  
+  for ( int times = 0 ; times < TIMES - 1 ; times++ )
+    bsp.prefilter ( use_vc ) ;
+
+  bsp.core = data ;
+  bsp.prefilter ( use_vc ) ;
+
+#ifdef PRINT_ELAPSED
+  std::chrono::system_clock::time_point end = std::chrono::system_clock::now();
+  cout << "avg 10 x prefilter:........................ "
+       << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() / TIMES
+       << " ms" << endl ;
+#endif
+       
+  // get a view to the core coefficients (those which aren't part of the brace)
+  view_type cfview = bsp.core ;
+
+  // create an evaluator
+  typedef vspline::evaluator<2,pixel_type,rc_type,int_type> eval_type ;
+
+  // create the evaluator
+  eval_type ev ( bsp ) ;
+
+  // get the coordinate and split coordinate types from the evaluator
+  typedef typename eval_type::nd_rc_type coordinate_type ;
+  typedef typename eval_type::split_coordinate_type split_type ;
+  
+  typedef vspline::nd_mapping < split_type , 2 , vsize > mmap ;
+  
+  // obtain the mapping from the evaluator:
+  mmap map = ev.get_mapping() ;
+
+  typedef vigra::MultiArray<2, coordinate_type> coordinate_array ;
+  
+  typedef vigra::MultiArray<2, split_type> warp_array ;
+  typedef vigra::MultiArrayView<2, split_type> warp_view ;
+
+  int Tx = Nx ;
+  int Ty = Ny ;
+
+  // now we create a warp array of coordinates at which the spline will be evaluated.
+  // We create two versions of the warp array: one storing ordinary real coordinates
+  // (fwarp) and the other storing pre-split coordinates. Also create a target array
+  // to contain the result.
+
+  coordinate_array fwarp ( Shape(Tx,Ty) ) ;
+  warp_array _warp ( Shape(Tx+5,Ty+4) ) ;
+  warp_view warp = _warp.subarray ( Shape(2,1) , Shape(-3,-3) ) ;
+  array_type target ( Shape(Tx,Ty) ) ;
+  rc_type dfx = 0.0 , dfy = 0.0 ;
+//   if ( bcv[0] == vspline::REFLECT )
+//     dfx = .5 ;
+//   if ( bcv[1] == vspline::REFLECT )
+//     dfy = .5 ;
+  
+  for ( int times = 0 ; times < 1 ; times++ )
+  {
+    for ( int y = 0 ; y < Ty ; y++ )
+    {
+      rc_type fy = (rc_type)(y) + dfy ;
+      for ( int x = 0 ; x < Tx ; x++ )
+      {
+        rc_type fx = (rc_type)(x) + dfx ;
+        // store the 'ordinary' cordinate to fwarp[x,y]
+        fwarp [ Shape ( x , y ) ] = coordinate_type ( fx , fy ) ;
+        // and apply the mapping to (fx, fy), storing the result to warp[x,y]
+        map ( fx , warp [ Shape ( x , y ) ] , 0 ) ;
+        map ( fy , warp [ Shape ( x , y ) ] , 1 ) ;
+      }
+    }
+  }
+  
+//   coordinate_type c = { 1.0 , 1.0 } ;
+//   pixel_type px = ev ( c ) ;
+//   cout << c << " -> " << px << endl ;
+//   coordinate_type c1 = { -1.0 , -1.0 } ;
+//   pixel_type px1 = ev ( c1 ) ;
+//   cout << c1 << " -> " << px1 << endl ;
+
+#ifdef PRINT_ELAPSED
+  start = std::chrono::system_clock::now();
+#endif
+  
+  for ( int times = 0 ; times < TIMES ; times++ )
+    vspline::remap<pixel_type,split_type,2,2>
+      ( bsp , warp , target , use_vc ) ;
+
+  
+#ifdef PRINT_ELAPSED
+  end = std::chrono::system_clock::now();
+  cout << "avg 10 x remap1 from pre-split coordinates: "
+       << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() / 10.0
+       << " ms" << endl ;
+#endif
+  
+  check_diff<array_type,real_type> ( target - data ) ;
+  
+#ifdef PRINT_ELAPSED
+  start = std::chrono::system_clock::now();
+#endif
+  
+  for ( int times = 0 ; times < TIMES ; times++ )
+    vspline::remap<pixel_type,coordinate_type,2,2>
+      ( bsp , fwarp , target , use_vc ) ;
+
+  
+#ifdef PRINT_ELAPSED
+  end = std::chrono::system_clock::now();
+  cout << "avg 10 x remap1 from unsplit coordinates:.. "
+       << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() / 10.0
+       << " ms" << endl ;
+#endif
+       
+  check_diff<array_type,real_type> ( target - data ) ;
+  
+#ifdef PRINT_ELAPSED
+  start = std::chrono::system_clock::now();
+#endif
+  
+  for ( int times = 0 ; times < TIMES ; times++ )
+    vspline::remap<pixel_type,coordinate_type,2,2>
+      ( data , fwarp , target , bcv , DEGREE , use_vc ) ;
+
+ 
+#ifdef PRINT_ELAPSED
+  end = std::chrono::system_clock::now();
+  cout << "avg 10 x remap with internal spline:....... "
+       << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() / 10.0
+       << " ms" << endl ;
+#endif
+
+  check_diff<array_type,real_type> ( target - data ) ;
+
+#ifdef USE_VC
+
+  vspline::transformation < pixel_type , rc_type , 2 , 2 >
+    tf ( tf_identity<rc_type> ) ; // , vtf_identity<rc_v>  ) ;
+
+#else
+ 
+  // using this call when USE_VC is defined will result in broadcasting
+  // of the single-element coordinate transform. The effect is the same,
+  // but the code is potentially slower.
+
+  vspline::transformation < pixel_type , rc_type , 2 , 2 >
+    tf ( tf_identity<rc_type> ) ;
+
+#endif
+
+#ifdef PRINT_ELAPSED
+  start = std::chrono::system_clock::now();
+#endif
+  
+  for ( int times = 0 ; times < TIMES ; times++ )
+    vspline::tf_remap < pixel_type , rc_type , 2 , 2 >
+      ( data , tf , target , bcv , DEGREE , use_vc ) ;
+
+ 
+  // note:: with REFLECT this doesn't come out right, because of the .5 difference!
+      
+#ifdef PRINT_ELAPSED
+  end = std::chrono::system_clock::now();
+  cout << "avg 10 x remap with functor & internal bspl "
+       << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() / 10.0
+       << " ms" << endl ;
+#endif
+
+#ifdef PRINT_ELAPSED
+  start = std::chrono::system_clock::now();
+#endif
+  
+  for ( int times = 0 ; times < TIMES ; times++ )
+    vspline::tf_remap < pixel_type , rc_type , 2 , 2 >
+      ( bsp , tf , target , use_vc ) ;
+
+  // note:: with REFLECT this doesn't come out right, because of the .5 difference!
+      
+#ifdef PRINT_ELAPSED
+  end = std::chrono::system_clock::now();
+  cout << "avg 10 x remap with functor & external bspl "
+       << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() / 10.0
+       << " ms" << endl ;
+#endif
+
+  check_diff < array_type , real_type > ( target - data ) ;
+
+  cout << "difference original data/restored data:" << endl ;
+  vspline::restore_from_braced<view_type> ( bsp.coeffs , bsp.coeffs , DEGREE ) ;
+  array_type diff ( cfview - data ) ;
+  check_diff<view_type,real_type> ( diff ) ;
+  cout << endl ;
+}
+
+template < class real_type , class rc_type >
+void process_image ( char * name )
+{
+  cout << fixed << showpoint ; //  << setprecision(32) ;
+  
+  // the import and info-displaying code is taken from vigra:
+
+  vigra::ImageImportInfo imageInfo(name);
+  // print some information
+  std::cout << "Image information:\n";
+  std::cout << "  file format: " << imageInfo.getFileType() << std::endl;
+  std::cout << "  width:       " << imageInfo.width() << std::endl;
+  std::cout << "  height:      " << imageInfo.height() << std::endl;
+  std::cout << "  pixel type:  " << imageInfo.getPixelType() << std::endl;
+  std::cout << "  color image: ";
+  if (imageInfo.isColor())    std::cout << "yes (";
+  else                        std::cout << "no  (";
+  std::cout << "number of channels: " << imageInfo.numBands() << ")\n";
+
+  typedef vigra::RGBValue<real_type,0,1,2> pixel_type; 
+  typedef vigra::MultiArray<2, pixel_type> array_type ;
+  typedef vigra::MultiArrayView<2, pixel_type> view_type ;
+
+//   // to test that strided data are processed correctly, we load the image
+//   // to an inner subarray of containArray
+//   
+//   array_type containArray(imageInfo.shape()+vigra::Shape2(3,5));
+//   view_type imageArray = containArray.subarray(vigra::Shape2(1,2),vigra::Shape2(-2,-3)) ;
+  
+  array_type containArray ( imageInfo.shape() );
+  view_type imageArray ( containArray ) ;
+  
+  vigra::importImage(imageInfo, imageArray);
+  
+  // test these bc codes:
+  vspline::bc_code bcs[] = { vspline::MIRROR , vspline::REFLECT , vspline::NATURAL , vspline::PERIODIC } ;
+
+  for ( int b = 0 ; b < 4 ; b++ )
+  {
+    vspline::bc_code bc = bcs[b] ;
+    for ( int spline_degree = 0 ; spline_degree < 8 ; spline_degree++ )
+    {
+      cout << "testing bc code " << vspline::bc_name[bc]
+          << " spline degree " << spline_degree << endl ;
+      roundtrip < view_type , real_type , rc_type > ( imageArray , bc , spline_degree , false ) ;
+
+      cout << "testing bc code " << vspline::bc_name[bc]
+          << " spline degree " << spline_degree << " using Vc" << endl ;
+      roundtrip < view_type , real_type , rc_type > ( imageArray , bc , spline_degree , true ) ;
+    }
+  }
+}
+
+int main ( int argc , char * argv[] )
+{
+  cout << fixed << showpoint ;
+
+// double coordinates work as well, but currently I am getting an annoying warning:
+
+// /usr/local/include/vigra/tinyvector.hxx: In static member function ‘static void vigra::detail
+// ::UnrollLoop<LEVEL>::assignScalar(T1*, T2) [with T1 = Vc_1::SimdArray<double, 8ul, Vc_1::Vect
+// or<double, Vc_1::VectorAbi::Avx>, 4ul>; T2 = Vc_1::SimdArray<double, 8ul, Vc_1::Vector<double
+// , Vc_1::VectorAbi::Avx>, 4ul>; int LEVEL = 2]’:
+// /usr/local/include/vigra/tinyvector.hxx:428:17: note: The ABI for passing parameters with 64-
+// byte alignment has changed in GCC 4.6
+//      static void assignScalar(T1 * left, T2 right)
+
+// so I leave the test with double coordinates commented out for now:
+
+  cout << "testing float data, float coordinates" << endl ;
+  process_image<float,float> ( argv[1] ) ;
+
+  cout << endl << "testing double data, double coordinates" << endl ;
+  process_image<double,double> ( argv[1] ) ;
+  
+  cout << "testing float data, double coordinates" << endl ;
+  process_image<float,double> ( argv[1] ) ;
+  
+  cout << endl << "testing double data, float coordinates" << endl ;
+  process_image<double,float> ( argv[1] ) ;
+}
diff --git a/example/times.txt b/example/times.txt
new file mode 100644
index 0000000..52ffd3e
--- /dev/null
+++ b/example/times.txt
@@ -0,0 +1,5502 @@
+testing float data, float coordinates
+Image information:
+  file format: JPEG
+  width:       1920
+  height:      1079
+  pixel type:  UINT8
+  color image: yes (number of channels: 3)
+testing bc code MIRROR spline degree 0
+avg 10 x prefilter:........................ 4.700000 ms
+avg 10 x remap1 from pre-split coordinates: 15.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 26.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 29.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 32.400000 ms
+avg 10 x remap with functor & external bspl 23.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 0 using Vc
+avg 10 x prefilter:........................ 5.200000 ms
+avg 10 x remap1 from pre-split coordinates: 6.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 6.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 14.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 15.600000 ms
+avg 10 x remap with functor & external bspl 6.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 1
+avg 10 x prefilter:........................ 4.500000 ms
+avg 10 x remap1 from pre-split coordinates: 28.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 34.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 41.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 43.400000 ms
+avg 10 x remap with functor & external bspl 37.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 1 using Vc
+avg 10 x prefilter:........................ 4.900000 ms
+avg 10 x remap1 from pre-split coordinates: 9.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 11.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 18.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 17.600000 ms
+avg 10 x remap with functor & external bspl 9.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 2
+avg 10 x prefilter:........................ 14.500000 ms
+avg 10 x remap1 from pre-split coordinates: 47.000000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap1 from unsplit coordinates:.. 54.000000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap with internal spline:....... 75.700000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap with functor & internal bspl 71.700000 ms
+avg 10 x remap with functor & external bspl 55.000000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+difference original data/restored data:
+warped image diff Mean: 0.000017
+warped image diff Maximum: 0.000070
+
+testing bc code MIRROR spline degree 2 using Vc
+avg 10 x prefilter:........................ 10.400000 ms
+avg 10 x remap1 from pre-split coordinates: 14.100000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap1 from unsplit coordinates:.. 15.400000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap with internal spline:....... 27.300000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap with functor & internal bspl 28.400000 ms
+avg 10 x remap with functor & external bspl 14.500000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+difference original data/restored data:
+warped image diff Mean: 0.000017
+warped image diff Maximum: 0.000070
+
+testing bc code MIRROR spline degree 3
+avg 10 x prefilter:........................ 14.500000 ms
+avg 10 x remap1 from pre-split coordinates: 73.700000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000137
+avg 10 x remap1 from unsplit coordinates:.. 82.000000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000137
+avg 10 x remap with internal spline:....... 99.900000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000137
+avg 10 x remap with functor & internal bspl 99.000000 ms
+avg 10 x remap with functor & external bspl 86.600000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000137
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000117
+
+testing bc code MIRROR spline degree 3 using Vc
+avg 10 x prefilter:........................ 10.200000 ms
+avg 10 x remap1 from pre-split coordinates: 21.000000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000137
+avg 10 x remap1 from unsplit coordinates:.. 23.100000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000137
+avg 10 x remap with internal spline:....... 34.300000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000137
+avg 10 x remap with functor & internal bspl 34.800000 ms
+avg 10 x remap with functor & external bspl 22.500000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000137
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000117
+
+testing bc code MIRROR spline degree 4
+avg 10 x prefilter:........................ 23.800000 ms
+avg 10 x remap1 from pre-split coordinates: 109.600000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap1 from unsplit coordinates:.. 116.100000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap with internal spline:....... 145.000000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap with functor & internal bspl 147.200000 ms
+avg 10 x remap with functor & external bspl 116.800000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+difference original data/restored data:
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+
+testing bc code MIRROR spline degree 4 using Vc
+avg 10 x prefilter:........................ 12.800000 ms
+avg 10 x remap1 from pre-split coordinates: 30.400000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap1 from unsplit coordinates:.. 32.600000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap with internal spline:....... 45.500000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap with functor & internal bspl 45.100000 ms
+avg 10 x remap with functor & external bspl 31.300000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+difference original data/restored data:
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+
+testing bc code MIRROR spline degree 5
+avg 10 x prefilter:........................ 24.600000 ms
+avg 10 x remap1 from pre-split coordinates: 152.300000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000102
+avg 10 x remap1 from unsplit coordinates:.. 161.800000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000102
+avg 10 x remap with internal spline:....... 182.300000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000102
+avg 10 x remap with functor & internal bspl 185.000000 ms
+avg 10 x remap with functor & external bspl 158.400000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000102
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000101
+
+testing bc code MIRROR spline degree 5 using Vc
+avg 10 x prefilter:........................ 12.800000 ms
+avg 10 x remap1 from pre-split coordinates: 41.700000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000102
+avg 10 x remap1 from unsplit coordinates:.. 40.900000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000102
+avg 10 x remap with internal spline:....... 68.200000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000102
+avg 10 x remap with functor & internal bspl 56.800000 ms
+avg 10 x remap with functor & external bspl 42.200000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000102
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000101
+
+testing bc code MIRROR spline degree 6
+avg 10 x prefilter:........................ 36.600000 ms
+avg 10 x remap1 from pre-split coordinates: 197.300000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000114
+avg 10 x remap1 from unsplit coordinates:.. 207.400000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000114
+avg 10 x remap with internal spline:....... 246.700000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000114
+avg 10 x remap with functor & internal bspl 243.800000 ms
+avg 10 x remap with functor & external bspl 206.700000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000114
+difference original data/restored data:
+warped image diff Mean: 0.000026
+warped image diff Maximum: 0.000111
+
+testing bc code MIRROR spline degree 6 using Vc
+avg 10 x prefilter:........................ 12.900000 ms
+avg 10 x remap1 from pre-split coordinates: 58.200000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000114
+avg 10 x remap1 from unsplit coordinates:.. 59.600000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000114
+avg 10 x remap with internal spline:....... 69.700000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000114
+avg 10 x remap with functor & internal bspl 73.100000 ms
+avg 10 x remap with functor & external bspl 55.500000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000114
+difference original data/restored data:
+warped image diff Mean: 0.000026
+warped image diff Maximum: 0.000111
+
+testing bc code MIRROR spline degree 7
+avg 10 x prefilter:........................ 36.800000 ms
+avg 10 x remap1 from pre-split coordinates: 252.800000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000123
+avg 10 x remap1 from unsplit coordinates:.. 266.200000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000123
+avg 10 x remap with internal spline:....... 298.600000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000123
+avg 10 x remap with functor & internal bspl 302.100000 ms
+avg 10 x remap with functor & external bspl 269.100000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000123
+difference original data/restored data:
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000112
+
+testing bc code MIRROR spline degree 7 using Vc
+avg 10 x prefilter:........................ 13.600000 ms
+avg 10 x remap1 from pre-split coordinates: 71.400000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000127
+avg 10 x remap1 from unsplit coordinates:.. 70.800000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000127
+avg 10 x remap with internal spline:....... 87.500000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000127
+avg 10 x remap with functor & internal bspl 87.700000 ms
+avg 10 x remap with functor & external bspl 70.300000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000127
+difference original data/restored data:
+warped image diff Mean: 0.000024
+warped image diff Maximum: 0.000142
+
+testing bc code MIRROR spline degree 8
+avg 10 x prefilter:........................ 48.300000 ms
+avg 10 x remap1 from pre-split coordinates: 325.000000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000139
+avg 10 x remap1 from unsplit coordinates:.. 323.900000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000139
+avg 10 x remap with internal spline:....... 378.000000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000139
+avg 10 x remap with functor & internal bspl 375.200000 ms
+avg 10 x remap with functor & external bspl 332.900000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000139
+difference original data/restored data:
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000135
+
+testing bc code MIRROR spline degree 8 using Vc
+avg 10 x prefilter:........................ 16.100000 ms
+avg 10 x remap1 from pre-split coordinates: 89.800000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000139
+avg 10 x remap1 from unsplit coordinates:.. 90.300000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000139
+avg 10 x remap with internal spline:....... 109.200000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000139
+avg 10 x remap with functor & internal bspl 114.800000 ms
+avg 10 x remap with functor & external bspl 93.300000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000139
+difference original data/restored data:
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000135
+
+testing bc code REFLECT spline degree 0
+avg 10 x prefilter:........................ 4.400000 ms
+avg 10 x remap1 from pre-split coordinates: 16.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 21.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 29.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 30.300000 ms
+avg 10 x remap with functor & external bspl 23.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 0 using Vc
+avg 10 x prefilter:........................ 4.800000 ms
+avg 10 x remap1 from pre-split coordinates: 6.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 7.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 15.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 13.800000 ms
+avg 10 x remap with functor & external bspl 7.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 1
+avg 10 x prefilter:........................ 4.500000 ms
+avg 10 x remap1 from pre-split coordinates: 28.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 37.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 42.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 44.400000 ms
+avg 10 x remap with functor & external bspl 37.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 1 using Vc
+avg 10 x prefilter:........................ 5.300000 ms
+avg 10 x remap1 from pre-split coordinates: 9.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 10.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 18.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 19.000000 ms
+avg 10 x remap with functor & external bspl 9.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 2
+avg 10 x prefilter:........................ 14.700000 ms
+avg 10 x remap1 from pre-split coordinates: 47.400000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000092
+avg 10 x remap1 from unsplit coordinates:.. 58.100000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000092
+avg 10 x remap with internal spline:....... 69.000000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000092
+avg 10 x remap with functor & internal bspl 71.200000 ms
+avg 10 x remap with functor & external bspl 55.100000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000092
+difference original data/restored data:
+warped image diff Mean: 0.000017
+warped image diff Maximum: 0.000078
+
+testing bc code REFLECT spline degree 2 using Vc
+avg 10 x prefilter:........................ 10.100000 ms
+avg 10 x remap1 from pre-split coordinates: 14.500000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000092
+avg 10 x remap1 from unsplit coordinates:.. 14.600000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000092
+avg 10 x remap with internal spline:....... 27.500000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000092
+avg 10 x remap with functor & internal bspl 27.300000 ms
+avg 10 x remap with functor & external bspl 14.500000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000092
+difference original data/restored data:
+warped image diff Mean: 0.000017
+warped image diff Maximum: 0.000078
+
+testing bc code REFLECT spline degree 3
+avg 10 x prefilter:........................ 15.300000 ms
+avg 10 x remap1 from pre-split coordinates: 73.300000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000144
+avg 10 x remap1 from unsplit coordinates:.. 83.800000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000144
+avg 10 x remap with internal spline:....... 97.700000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000144
+avg 10 x remap with functor & internal bspl 104.200000 ms
+avg 10 x remap with functor & external bspl 82.900000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000144
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000135
+
+testing bc code REFLECT spline degree 3 using Vc
+avg 10 x prefilter:........................ 9.800000 ms
+avg 10 x remap1 from pre-split coordinates: 22.800000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000144
+avg 10 x remap1 from unsplit coordinates:.. 21.800000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000144
+avg 10 x remap with internal spline:....... 36.700000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000144
+avg 10 x remap with functor & internal bspl 35.100000 ms
+avg 10 x remap with functor & external bspl 21.600000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000144
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000135
+
+testing bc code REFLECT spline degree 4
+avg 10 x prefilter:........................ 25.500000 ms
+avg 10 x remap1 from pre-split coordinates: 106.800000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000154
+avg 10 x remap1 from unsplit coordinates:.. 116.400000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000154
+avg 10 x remap with internal spline:....... 142.100000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000154
+avg 10 x remap with functor & internal bspl 146.500000 ms
+avg 10 x remap with functor & external bspl 116.900000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000154
+difference original data/restored data:
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000147
+
+testing bc code REFLECT spline degree 4 using Vc
+avg 10 x prefilter:........................ 11.600000 ms
+avg 10 x remap1 from pre-split coordinates: 32.100000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000154
+avg 10 x remap1 from unsplit coordinates:.. 31.200000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000154
+avg 10 x remap with internal spline:....... 45.600000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000154
+avg 10 x remap with functor & internal bspl 45.800000 ms
+avg 10 x remap with functor & external bspl 31.600000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000154
+difference original data/restored data:
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000147
+
+testing bc code REFLECT spline degree 5
+avg 10 x prefilter:........................ 25.100000 ms
+avg 10 x remap1 from pre-split coordinates: 151.000000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000130
+avg 10 x remap1 from unsplit coordinates:.. 161.500000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000130
+avg 10 x remap with internal spline:....... 183.500000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000130
+avg 10 x remap with functor & internal bspl 185.800000 ms
+avg 10 x remap with functor & external bspl 163.100000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000130
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000129
+
+testing bc code REFLECT spline degree 5 using Vc
+avg 10 x prefilter:........................ 11.500000 ms
+avg 10 x remap1 from pre-split coordinates: 41.900000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000130
+avg 10 x remap1 from unsplit coordinates:.. 47.900000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000130
+avg 10 x remap with internal spline:....... 56.000000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000130
+avg 10 x remap with functor & internal bspl 55.700000 ms
+avg 10 x remap with functor & external bspl 42.500000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000130
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000129
+
+testing bc code REFLECT spline degree 6
+avg 10 x prefilter:........................ 36.100000 ms
+avg 10 x remap1 from pre-split coordinates: 198.100000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000145
+avg 10 x remap1 from unsplit coordinates:.. 206.300000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000145
+avg 10 x remap with internal spline:....... 250.000000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000145
+avg 10 x remap with functor & internal bspl 245.200000 ms
+avg 10 x remap with functor & external bspl 207.200000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000145
+difference original data/restored data:
+warped image diff Mean: 0.000026
+warped image diff Maximum: 0.000163
+
+testing bc code REFLECT spline degree 6 using Vc
+avg 10 x prefilter:........................ 13.700000 ms
+avg 10 x remap1 from pre-split coordinates: 58.900000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000145
+avg 10 x remap1 from unsplit coordinates:.. 62.400000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000145
+avg 10 x remap with internal spline:....... 73.900000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000145
+avg 10 x remap with functor & internal bspl 71.000000 ms
+avg 10 x remap with functor & external bspl 55.000000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000145
+difference original data/restored data:
+warped image diff Mean: 0.000026
+warped image diff Maximum: 0.000163
+
+testing bc code REFLECT spline degree 7
+avg 10 x prefilter:........................ 36.400000 ms
+avg 10 x remap1 from pre-split coordinates: 254.800000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000154
+avg 10 x remap1 from unsplit coordinates:.. 267.700000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000154
+avg 10 x remap with internal spline:....... 299.200000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000154
+avg 10 x remap with functor & internal bspl 310.000000 ms
+avg 10 x remap with functor & external bspl 260.800000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000154
+difference original data/restored data:
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000165
+
+testing bc code REFLECT spline degree 7 using Vc
+avg 10 x prefilter:........................ 13.600000 ms
+avg 10 x remap1 from pre-split coordinates: 71.900000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000197
+avg 10 x remap1 from unsplit coordinates:.. 72.800000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000197
+avg 10 x remap with internal spline:....... 91.300000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000197
+avg 10 x remap with functor & internal bspl 86.000000 ms
+avg 10 x remap with functor & external bspl 72.900000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000197
+difference original data/restored data:
+warped image diff Mean: 0.000024
+warped image diff Maximum: 0.000201
+
+testing bc code REFLECT spline degree 8
+avg 10 x prefilter:........................ 48.800000 ms
+avg 10 x remap1 from pre-split coordinates: 316.200000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000184
+avg 10 x remap1 from unsplit coordinates:.. 324.300000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000184
+avg 10 x remap with internal spline:....... 381.200000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000184
+avg 10 x remap with functor & internal bspl 378.700000 ms
+avg 10 x remap with functor & external bspl 323.900000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000184
+difference original data/restored data:
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000179
+
+testing bc code REFLECT spline degree 8 using Vc
+avg 10 x prefilter:........................ 15.400000 ms
+avg 10 x remap1 from pre-split coordinates: 89.900000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000184
+avg 10 x remap1 from unsplit coordinates:.. 92.000000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000184
+avg 10 x remap with internal spline:....... 106.500000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000184
+avg 10 x remap with functor & internal bspl 112.100000 ms
+avg 10 x remap with functor & external bspl 89.800000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000184
+difference original data/restored data:
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000179
+
+testing bc code PERIODIC spline degree 0
+avg 10 x prefilter:........................ 4.800000 ms
+avg 10 x remap1 from pre-split coordinates: 15.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 21.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 29.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 31.600000 ms
+avg 10 x remap with functor & external bspl 23.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 0 using Vc
+avg 10 x prefilter:........................ 5.000000 ms
+avg 10 x remap1 from pre-split coordinates: 7.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 7.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 14.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 14.800000 ms
+avg 10 x remap with functor & external bspl 7.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 1
+avg 10 x prefilter:........................ 4.400000 ms
+avg 10 x remap1 from pre-split coordinates: 27.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 34.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 42.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 45.800000 ms
+avg 10 x remap with functor & external bspl 35.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 1 using Vc
+avg 10 x prefilter:........................ 5.000000 ms
+avg 10 x remap1 from pre-split coordinates: 9.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 10.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 17.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 17.700000 ms
+avg 10 x remap with functor & external bspl 9.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 2
+avg 10 x prefilter:........................ 15.300000 ms
+avg 10 x remap1 from pre-split coordinates: 52.000000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000084
+avg 10 x remap1 from unsplit coordinates:.. 52.700000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000084
+avg 10 x remap with internal spline:....... 71.100000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000084
+avg 10 x remap with functor & internal bspl 71.800000 ms
+avg 10 x remap with functor & external bspl 55.400000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000084
+difference original data/restored data:
+warped image diff Mean: 0.000017
+warped image diff Maximum: 0.000076
+
+testing bc code PERIODIC spline degree 2 using Vc
+avg 10 x prefilter:........................ 10.300000 ms
+avg 10 x remap1 from pre-split coordinates: 13.900000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000084
+avg 10 x remap1 from unsplit coordinates:.. 14.400000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000084
+avg 10 x remap with internal spline:....... 28.100000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000084
+avg 10 x remap with functor & internal bspl 27.600000 ms
+avg 10 x remap with functor & external bspl 16.400000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000084
+difference original data/restored data:
+warped image diff Mean: 0.000017
+warped image diff Maximum: 0.000076
+
+testing bc code PERIODIC spline degree 3
+avg 10 x prefilter:........................ 15.300000 ms
+avg 10 x remap1 from pre-split coordinates: 76.000000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000098
+avg 10 x remap1 from unsplit coordinates:.. 79.800000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000098
+avg 10 x remap with internal spline:....... 98.200000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000098
+avg 10 x remap with functor & internal bspl 103.400000 ms
+avg 10 x remap with functor & external bspl 80.400000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000098
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000094
+
+testing bc code PERIODIC spline degree 3 using Vc
+avg 10 x prefilter:........................ 10.500000 ms
+avg 10 x remap1 from pre-split coordinates: 21.300000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000098
+avg 10 x remap1 from unsplit coordinates:.. 22.200000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000098
+avg 10 x remap with internal spline:....... 35.600000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000098
+avg 10 x remap with functor & internal bspl 36.700000 ms
+avg 10 x remap with functor & external bspl 22.300000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000098
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000094
+
+testing bc code PERIODIC spline degree 4
+avg 10 x prefilter:........................ 24.700000 ms
+avg 10 x remap1 from pre-split coordinates: 108.600000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap1 from unsplit coordinates:.. 118.800000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap with internal spline:....... 150.200000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap with functor & internal bspl 143.000000 ms
+avg 10 x remap with functor & external bspl 116.200000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+difference original data/restored data:
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000143
+
+testing bc code PERIODIC spline degree 4 using Vc
+avg 10 x prefilter:........................ 11.700000 ms
+avg 10 x remap1 from pre-split coordinates: 32.000000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap1 from unsplit coordinates:.. 32.400000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap with internal spline:....... 46.200000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap with functor & internal bspl 46.200000 ms
+avg 10 x remap with functor & external bspl 32.500000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+difference original data/restored data:
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000143
+
+testing bc code PERIODIC spline degree 5
+avg 10 x prefilter:........................ 24.900000 ms
+avg 10 x remap1 from pre-split coordinates: 148.700000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000114
+avg 10 x remap1 from unsplit coordinates:.. 162.500000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000114
+avg 10 x remap with internal spline:....... 181.600000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000114
+avg 10 x remap with functor & internal bspl 186.600000 ms
+avg 10 x remap with functor & external bspl 156.000000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000114
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000114
+
+testing bc code PERIODIC spline degree 5 using Vc
+avg 10 x prefilter:........................ 11.200000 ms
+avg 10 x remap1 from pre-split coordinates: 46.500000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000114
+avg 10 x remap1 from unsplit coordinates:.. 45.600000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000114
+avg 10 x remap with internal spline:....... 57.900000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000114
+avg 10 x remap with functor & internal bspl 57.800000 ms
+avg 10 x remap with functor & external bspl 42.500000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000114
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000114
+
+testing bc code PERIODIC spline degree 6
+avg 10 x prefilter:........................ 35.800000 ms
+avg 10 x remap1 from pre-split coordinates: 202.600000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000131
+avg 10 x remap1 from unsplit coordinates:.. 207.600000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000131
+avg 10 x remap with internal spline:....... 252.400000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000131
+avg 10 x remap with functor & internal bspl 245.100000 ms
+avg 10 x remap with functor & external bspl 206.900000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000131
+difference original data/restored data:
+warped image diff Mean: 0.000026
+warped image diff Maximum: 0.000132
+
+testing bc code PERIODIC spline degree 6 using Vc
+avg 10 x prefilter:........................ 17.100000 ms
+avg 10 x remap1 from pre-split coordinates: 56.000000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000131
+avg 10 x remap1 from unsplit coordinates:.. 56.100000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000131
+avg 10 x remap with internal spline:....... 70.200000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000131
+avg 10 x remap with functor & internal bspl 71.100000 ms
+avg 10 x remap with functor & external bspl 55.700000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000131
+difference original data/restored data:
+warped image diff Mean: 0.000026
+warped image diff Maximum: 0.000132
+
+testing bc code PERIODIC spline degree 7
+avg 10 x prefilter:........................ 37.200000 ms
+avg 10 x remap1 from pre-split coordinates: 255.800000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000187
+avg 10 x remap1 from unsplit coordinates:.. 266.900000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000187
+avg 10 x remap with internal spline:....... 298.500000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000187
+avg 10 x remap with functor & internal bspl 305.500000 ms
+avg 10 x remap with functor & external bspl 260.200000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000187
+difference original data/restored data:
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000159
+
+testing bc code PERIODIC spline degree 7 using Vc
+avg 10 x prefilter:........................ 13.400000 ms
+avg 10 x remap1 from pre-split coordinates: 69.900000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000153
+avg 10 x remap1 from unsplit coordinates:.. 71.500000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000153
+avg 10 x remap with internal spline:....... 90.800000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000153
+avg 10 x remap with functor & internal bspl 87.400000 ms
+avg 10 x remap with functor & external bspl 76.000000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000153
+difference original data/restored data:
+warped image diff Mean: 0.000024
+warped image diff Maximum: 0.000160
+
+testing bc code PERIODIC spline degree 8
+avg 10 x prefilter:........................ 50.800000 ms
+avg 10 x remap1 from pre-split coordinates: 317.300000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000173
+avg 10 x remap1 from unsplit coordinates:.. 326.500000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000173
+avg 10 x remap with internal spline:....... 375.500000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000173
+avg 10 x remap with functor & internal bspl 382.600000 ms
+avg 10 x remap with functor & external bspl 323.000000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000173
+difference original data/restored data:
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000167
+
+testing bc code PERIODIC spline degree 8 using Vc
+avg 10 x prefilter:........................ 15.500000 ms
+avg 10 x remap1 from pre-split coordinates: 91.600000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000173
+avg 10 x remap1 from unsplit coordinates:.. 89.900000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000173
+avg 10 x remap with internal spline:....... 121.500000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000173
+avg 10 x remap with functor & internal bspl 109.600000 ms
+avg 10 x remap with functor & external bspl 91.400000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000173
+difference original data/restored data:
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000167
+
+testing bc code NATURAL spline degree 0
+avg 10 x prefilter:........................ 4.400000 ms
+avg 10 x remap1 from pre-split coordinates: 14.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 21.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 30.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 30.000000 ms
+avg 10 x remap with functor & external bspl 23.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 0 using Vc
+avg 10 x prefilter:........................ 5.000000 ms
+avg 10 x remap1 from pre-split coordinates: 7.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 7.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 16.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 14.500000 ms
+avg 10 x remap with functor & external bspl 6.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 1
+avg 10 x prefilter:........................ 4.400000 ms
+avg 10 x remap1 from pre-split coordinates: 28.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 36.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 41.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 43.700000 ms
+avg 10 x remap with functor & external bspl 34.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 1 using Vc
+avg 10 x prefilter:........................ 4.800000 ms
+avg 10 x remap1 from pre-split coordinates: 9.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 9.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 17.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 21.600000 ms
+avg 10 x remap with functor & external bspl 9.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 2
+avg 10 x prefilter:........................ 13.500000 ms
+avg 10 x remap1 from pre-split coordinates: 49.000000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap1 from unsplit coordinates:.. 54.200000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap with internal spline:....... 70.200000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap with functor & internal bspl 70.800000 ms
+avg 10 x remap with functor & external bspl 55.700000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+difference original data/restored data:
+warped image diff Mean: 0.000017
+warped image diff Maximum: 0.000075
+
+testing bc code NATURAL spline degree 2 using Vc
+avg 10 x prefilter:........................ 10.300000 ms
+avg 10 x remap1 from pre-split coordinates: 13.900000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap1 from unsplit coordinates:.. 16.500000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap with internal spline:....... 27.100000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap with functor & internal bspl 28.200000 ms
+avg 10 x remap with functor & external bspl 13.900000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+difference original data/restored data:
+warped image diff Mean: 0.000017
+warped image diff Maximum: 0.000075
+
+testing bc code NATURAL spline degree 3
+avg 10 x prefilter:........................ 16.500000 ms
+avg 10 x remap1 from pre-split coordinates: 74.600000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000165
+avg 10 x remap1 from unsplit coordinates:.. 80.400000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000165
+avg 10 x remap with internal spline:....... 101.600000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000165
+avg 10 x remap with functor & internal bspl 98.600000 ms
+avg 10 x remap with functor & external bspl 81.200000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000165
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000144
+
+testing bc code NATURAL spline degree 3 using Vc
+avg 10 x prefilter:........................ 10.500000 ms
+avg 10 x remap1 from pre-split coordinates: 20.400000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000165
+avg 10 x remap1 from unsplit coordinates:.. 21.600000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000165
+avg 10 x remap with internal spline:....... 35.300000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000165
+avg 10 x remap with functor & internal bspl 33.900000 ms
+avg 10 x remap with functor & external bspl 22.900000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000165
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000144
+
+testing bc code NATURAL spline degree 4
+avg 10 x prefilter:........................ 24.300000 ms
+avg 10 x remap1 from pre-split coordinates: 109.300000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000171
+avg 10 x remap1 from unsplit coordinates:.. 115.000000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000171
+avg 10 x remap with internal spline:....... 151.200000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000171
+avg 10 x remap with functor & internal bspl 144.400000 ms
+avg 10 x remap with functor & external bspl 115.000000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000171
+difference original data/restored data:
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000198
+
+testing bc code NATURAL spline degree 4 using Vc
+avg 10 x prefilter:........................ 12.400000 ms
+avg 10 x remap1 from pre-split coordinates: 30.800000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000171
+avg 10 x remap1 from unsplit coordinates:.. 31.500000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000171
+avg 10 x remap with internal spline:....... 47.500000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000171
+avg 10 x remap with functor & internal bspl 48.600000 ms
+avg 10 x remap with functor & external bspl 31.500000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000171
+difference original data/restored data:
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000198
+
+testing bc code NATURAL spline degree 5
+avg 10 x prefilter:........................ 25.200000 ms
+avg 10 x remap1 from pre-split coordinates: 155.700000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000194
+avg 10 x remap1 from unsplit coordinates:.. 154.700000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000194
+avg 10 x remap with internal spline:....... 180.900000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000194
+avg 10 x remap with functor & internal bspl 185.700000 ms
+avg 10 x remap with functor & external bspl 166.500000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000194
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000180
+
+testing bc code NATURAL spline degree 5 using Vc
+avg 10 x prefilter:........................ 11.800000 ms
+avg 10 x remap1 from pre-split coordinates: 42.200000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000194
+avg 10 x remap1 from unsplit coordinates:.. 42.800000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000194
+avg 10 x remap with internal spline:....... 56.000000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000194
+avg 10 x remap with functor & internal bspl 56.100000 ms
+avg 10 x remap with functor & external bspl 42.500000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000194
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000180
+
+testing bc code NATURAL spline degree 6
+avg 10 x prefilter:........................ 37.000000 ms
+avg 10 x remap1 from pre-split coordinates: 201.300000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000184
+avg 10 x remap1 from unsplit coordinates:.. 208.200000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000184
+avg 10 x remap with internal spline:....... 241.000000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000184
+avg 10 x remap with functor & internal bspl 245.700000 ms
+avg 10 x remap with functor & external bspl 211.500000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000184
+difference original data/restored data:
+warped image diff Mean: 0.000026
+warped image diff Maximum: 0.000174
+
+testing bc code NATURAL spline degree 6 using Vc
+avg 10 x prefilter:........................ 13.800000 ms
+avg 10 x remap1 from pre-split coordinates: 56.300000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000184
+avg 10 x remap1 from unsplit coordinates:.. 55.100000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000184
+avg 10 x remap with internal spline:....... 69.300000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000184
+avg 10 x remap with functor & internal bspl 73.100000 ms
+avg 10 x remap with functor & external bspl 56.300000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000184
+difference original data/restored data:
+warped image diff Mean: 0.000026
+warped image diff Maximum: 0.000174
+
+testing bc code NATURAL spline degree 7
+avg 10 x prefilter:........................ 36.400000 ms
+avg 10 x remap1 from pre-split coordinates: 254.600000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000214
+avg 10 x remap1 from unsplit coordinates:.. 264.300000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000214
+avg 10 x remap with internal spline:....... 298.800000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000214
+avg 10 x remap with functor & internal bspl 304.200000 ms
+avg 10 x remap with functor & external bspl 260.700000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000214
+difference original data/restored data:
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000214
+
+testing bc code NATURAL spline degree 7 using Vc
+avg 10 x prefilter:........................ 13.200000 ms
+avg 10 x remap1 from pre-split coordinates: 72.900000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000307
+avg 10 x remap1 from unsplit coordinates:.. 74.500000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000307
+avg 10 x remap with internal spline:....... 89.300000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000307
+avg 10 x remap with functor & internal bspl 92.000000 ms
+avg 10 x remap with functor & external bspl 71.700000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000307
+difference original data/restored data:
+warped image diff Mean: 0.000024
+warped image diff Maximum: 0.000307
+
+testing bc code NATURAL spline degree 8
+avg 10 x prefilter:........................ 47.400000 ms
+avg 10 x remap1 from pre-split coordinates: 317.000000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000243
+avg 10 x remap1 from unsplit coordinates:.. 327.900000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000243
+avg 10 x remap with internal spline:....... 373.800000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000243
+avg 10 x remap with functor & internal bspl 385.700000 ms
+avg 10 x remap with functor & external bspl 322.200000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000243
+difference original data/restored data:
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000258
+
+testing bc code NATURAL spline degree 8 using Vc
+avg 10 x prefilter:........................ 16.700000 ms
+avg 10 x remap1 from pre-split coordinates: 91.100000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000243
+avg 10 x remap1 from unsplit coordinates:.. 87.800000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000243
+avg 10 x remap with internal spline:....... 111.100000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000243
+avg 10 x remap with functor & internal bspl 106.500000 ms
+avg 10 x remap with functor & external bspl 90.400000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000243
+difference original data/restored data:
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000258
+
+
+testing double data, float coordinates
+Image information:
+  file format: JPEG
+  width:       1920
+  height:      1079
+  pixel type:  UINT8
+  color image: yes (number of channels: 3)
+testing bc code MIRROR spline degree 0
+avg 10 x prefilter:........................ 8.400000 ms
+avg 10 x remap1 from pre-split coordinates: 13.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 18.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 32.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 35.600000 ms
+avg 10 x remap with functor & external bspl 20.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 0 using Vc
+avg 10 x prefilter:........................ 8.900000 ms
+avg 10 x remap1 from pre-split coordinates: 11.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 11.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 26.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 24.500000 ms
+avg 10 x remap with functor & external bspl 10.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 1
+avg 10 x prefilter:........................ 8.300000 ms
+avg 10 x remap1 from pre-split coordinates: 19.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 26.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 40.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 43.100000 ms
+avg 10 x remap with functor & external bspl 28.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 1 using Vc
+avg 10 x prefilter:........................ 9.700000 ms
+avg 10 x remap1 from pre-split coordinates: 14.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 15.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 29.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 30.200000 ms
+avg 10 x remap with functor & external bspl 14.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 2
+avg 10 x prefilter:........................ 17.700000 ms
+avg 10 x remap1 from pre-split coordinates: 29.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 37.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 60.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 62.400000 ms
+avg 10 x remap with functor & external bspl 38.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 2 using Vc
+avg 10 x prefilter:........................ 18.900000 ms
+avg 10 x remap1 from pre-split coordinates: 22.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 23.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 47.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 47.300000 ms
+avg 10 x remap with functor & external bspl 23.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 3
+avg 10 x prefilter:........................ 17.300000 ms
+avg 10 x remap1 from pre-split coordinates: 51.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 53.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 77.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 78.300000 ms
+avg 10 x remap with functor & external bspl 56.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 3 using Vc
+avg 10 x prefilter:........................ 18.300000 ms
+avg 10 x remap1 from pre-split coordinates: 32.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 32.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 56.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 58.600000 ms
+avg 10 x remap with functor & external bspl 32.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 4
+avg 10 x prefilter:........................ 18.700000 ms
+avg 10 x remap1 from pre-split coordinates: 68.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 79.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 99.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 101.400000 ms
+avg 10 x remap with functor & external bspl 76.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 4 using Vc
+avg 10 x prefilter:........................ 22.500000 ms
+avg 10 x remap1 from pre-split coordinates: 45.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 45.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 71.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 74.700000 ms
+avg 10 x remap with functor & external bspl 46.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 5
+avg 10 x prefilter:........................ 18.200000 ms
+avg 10 x remap1 from pre-split coordinates: 98.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 100.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 125.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 127.300000 ms
+avg 10 x remap with functor & external bspl 104.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 5 using Vc
+avg 10 x prefilter:........................ 21.500000 ms
+avg 10 x remap1 from pre-split coordinates: 60.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 61.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 95.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 89.100000 ms
+avg 10 x remap with functor & external bspl 61.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 6
+avg 10 x prefilter:........................ 21.700000 ms
+avg 10 x remap1 from pre-split coordinates: 125.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 130.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 157.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 167.100000 ms
+avg 10 x remap with functor & external bspl 133.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 6 using Vc
+avg 10 x prefilter:........................ 24.700000 ms
+avg 10 x remap1 from pre-split coordinates: 79.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 80.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 113.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 111.400000 ms
+avg 10 x remap with functor & external bspl 85.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 7
+avg 10 x prefilter:........................ 20.200000 ms
+avg 10 x remap1 from pre-split coordinates: 163.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 165.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 192.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 201.400000 ms
+avg 10 x remap with functor & external bspl 169.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 7 using Vc
+avg 10 x prefilter:........................ 24.500000 ms
+avg 10 x remap1 from pre-split coordinates: 101.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 102.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 130.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 136.500000 ms
+avg 10 x remap with functor & external bspl 100.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 8
+avg 10 x prefilter:........................ 26.000000 ms
+avg 10 x remap1 from pre-split coordinates: 202.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 207.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 239.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 240.300000 ms
+avg 10 x remap with functor & external bspl 210.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 8 using Vc
+avg 10 x prefilter:........................ 29.600000 ms
+avg 10 x remap1 from pre-split coordinates: 127.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 124.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 159.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 163.400000 ms
+avg 10 x remap with functor & external bspl 124.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 0
+avg 10 x prefilter:........................ 8.900000 ms
+avg 10 x remap1 from pre-split coordinates: 12.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 22.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 33.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 33.700000 ms
+avg 10 x remap with functor & external bspl 19.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 0 using Vc
+avg 10 x prefilter:........................ 9.600000 ms
+avg 10 x remap1 from pre-split coordinates: 11.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 11.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 26.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 24.200000 ms
+avg 10 x remap with functor & external bspl 12.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 1
+avg 10 x prefilter:........................ 8.400000 ms
+avg 10 x remap1 from pre-split coordinates: 19.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 26.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 40.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 42.300000 ms
+avg 10 x remap with functor & external bspl 27.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 1 using Vc
+avg 10 x prefilter:........................ 8.900000 ms
+avg 10 x remap1 from pre-split coordinates: 14.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 17.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 28.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 32.100000 ms
+avg 10 x remap with functor & external bspl 15.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 2
+avg 10 x prefilter:........................ 18.400000 ms
+avg 10 x remap1 from pre-split coordinates: 30.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 36.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 61.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 61.100000 ms
+avg 10 x remap with functor & external bspl 39.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 2 using Vc
+avg 10 x prefilter:........................ 18.800000 ms
+avg 10 x remap1 from pre-split coordinates: 22.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 21.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 47.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 47.400000 ms
+avg 10 x remap with functor & external bspl 23.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 3
+avg 10 x prefilter:........................ 17.000000 ms
+avg 10 x remap1 from pre-split coordinates: 47.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 55.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 78.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 78.600000 ms
+avg 10 x remap with functor & external bspl 55.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 3 using Vc
+avg 10 x prefilter:........................ 19.000000 ms
+avg 10 x remap1 from pre-split coordinates: 35.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 32.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 56.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 61.900000 ms
+avg 10 x remap with functor & external bspl 32.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 4
+avg 10 x prefilter:........................ 19.500000 ms
+avg 10 x remap1 from pre-split coordinates: 68.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 75.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 98.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 103.700000 ms
+avg 10 x remap with functor & external bspl 77.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 4 using Vc
+avg 10 x prefilter:........................ 23.500000 ms
+avg 10 x remap1 from pre-split coordinates: 46.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 45.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 74.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 72.100000 ms
+avg 10 x remap with functor & external bspl 45.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 5
+avg 10 x prefilter:........................ 19.800000 ms
+avg 10 x remap1 from pre-split coordinates: 95.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 103.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 124.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 131.600000 ms
+avg 10 x remap with functor & external bspl 103.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 5 using Vc
+avg 10 x prefilter:........................ 22.100000 ms
+avg 10 x remap1 from pre-split coordinates: 59.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 62.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 88.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 90.500000 ms
+avg 10 x remap with functor & external bspl 60.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 6
+avg 10 x prefilter:........................ 20.000000 ms
+avg 10 x remap1 from pre-split coordinates: 134.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 130.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 156.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 160.000000 ms
+avg 10 x remap with functor & external bspl 137.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 6 using Vc
+avg 10 x prefilter:........................ 23.600000 ms
+avg 10 x remap1 from pre-split coordinates: 84.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 79.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 109.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 109.800000 ms
+avg 10 x remap with functor & external bspl 79.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 7
+avg 10 x prefilter:........................ 22.000000 ms
+avg 10 x remap1 from pre-split coordinates: 162.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 174.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 190.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 195.800000 ms
+avg 10 x remap with functor & external bspl 170.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 7 using Vc
+avg 10 x prefilter:........................ 25.500000 ms
+avg 10 x remap1 from pre-split coordinates: 105.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 100.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 130.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 130.800000 ms
+avg 10 x remap with functor & external bspl 101.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 8
+avg 10 x prefilter:........................ 25.200000 ms
+avg 10 x remap1 from pre-split coordinates: 205.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 204.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 237.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 246.300000 ms
+avg 10 x remap with functor & external bspl 209.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 8 using Vc
+avg 10 x prefilter:........................ 28.100000 ms
+avg 10 x remap1 from pre-split coordinates: 124.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 125.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 166.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 158.900000 ms
+avg 10 x remap with functor & external bspl 123.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 0
+avg 10 x prefilter:........................ 8.300000 ms
+avg 10 x remap1 from pre-split coordinates: 12.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 19.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 30.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 34.300000 ms
+avg 10 x remap with functor & external bspl 19.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 0 using Vc
+avg 10 x prefilter:........................ 9.100000 ms
+avg 10 x remap1 from pre-split coordinates: 12.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 10.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 25.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 25.900000 ms
+avg 10 x remap with functor & external bspl 11.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 1
+avg 10 x prefilter:........................ 8.200000 ms
+avg 10 x remap1 from pre-split coordinates: 20.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 24.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 39.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 43.500000 ms
+avg 10 x remap with functor & external bspl 26.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 1 using Vc
+avg 10 x prefilter:........................ 8.900000 ms
+avg 10 x remap1 from pre-split coordinates: 14.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 16.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 28.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 30.000000 ms
+avg 10 x remap with functor & external bspl 15.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 2
+avg 10 x prefilter:........................ 18.200000 ms
+avg 10 x remap1 from pre-split coordinates: 30.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 35.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 62.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 60.900000 ms
+avg 10 x remap with functor & external bspl 38.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 2 using Vc
+avg 10 x prefilter:........................ 18.700000 ms
+avg 10 x remap1 from pre-split coordinates: 23.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 21.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 47.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 46.900000 ms
+avg 10 x remap with functor & external bspl 23.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 3
+avg 10 x prefilter:........................ 17.100000 ms
+avg 10 x remap1 from pre-split coordinates: 49.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 52.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 75.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 78.200000 ms
+avg 10 x remap with functor & external bspl 54.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 3 using Vc
+avg 10 x prefilter:........................ 18.500000 ms
+avg 10 x remap1 from pre-split coordinates: 32.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 33.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 57.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 57.800000 ms
+avg 10 x remap with functor & external bspl 33.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 4
+avg 10 x prefilter:........................ 20.500000 ms
+avg 10 x remap1 from pre-split coordinates: 68.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 79.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 98.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 100.700000 ms
+avg 10 x remap with functor & external bspl 75.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 4 using Vc
+avg 10 x prefilter:........................ 21.200000 ms
+avg 10 x remap1 from pre-split coordinates: 45.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 44.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 76.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 74.200000 ms
+avg 10 x remap with functor & external bspl 46.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 5
+avg 10 x prefilter:........................ 19.800000 ms
+avg 10 x remap1 from pre-split coordinates: 99.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 99.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 123.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 127.000000 ms
+avg 10 x remap with functor & external bspl 103.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 5 using Vc
+avg 10 x prefilter:........................ 22.500000 ms
+avg 10 x remap1 from pre-split coordinates: 60.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 61.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 91.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 89.400000 ms
+avg 10 x remap with functor & external bspl 60.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 6
+avg 10 x prefilter:........................ 21.900000 ms
+avg 10 x remap1 from pre-split coordinates: 125.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 131.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 156.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 165.500000 ms
+avg 10 x remap with functor & external bspl 132.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 6 using Vc
+avg 10 x prefilter:........................ 24.000000 ms
+avg 10 x remap1 from pre-split coordinates: 80.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 78.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 112.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 108.900000 ms
+avg 10 x remap with functor & external bspl 84.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 7
+avg 10 x prefilter:........................ 20.400000 ms
+avg 10 x remap1 from pre-split coordinates: 162.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 164.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 193.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 202.600000 ms
+avg 10 x remap with functor & external bspl 168.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 7 using Vc
+avg 10 x prefilter:........................ 25.300000 ms
+avg 10 x remap1 from pre-split coordinates: 100.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 102.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 131.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 136.400000 ms
+avg 10 x remap with functor & external bspl 101.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 8
+avg 10 x prefilter:........................ 24.600000 ms
+avg 10 x remap1 from pre-split coordinates: 201.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 214.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 241.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 239.200000 ms
+avg 10 x remap with functor & external bspl 211.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 8 using Vc
+avg 10 x prefilter:........................ 30.200000 ms
+avg 10 x remap1 from pre-split coordinates: 128.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 124.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 157.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 159.700000 ms
+avg 10 x remap with functor & external bspl 124.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 0
+avg 10 x prefilter:........................ 9.200000 ms
+avg 10 x remap1 from pre-split coordinates: 16.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 18.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 32.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 34.300000 ms
+avg 10 x remap with functor & external bspl 19.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 0 using Vc
+avg 10 x prefilter:........................ 9.700000 ms
+avg 10 x remap1 from pre-split coordinates: 11.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 11.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 25.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 24.900000 ms
+avg 10 x remap with functor & external bspl 12.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 1
+avg 10 x prefilter:........................ 8.200000 ms
+avg 10 x remap1 from pre-split coordinates: 19.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 24.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 39.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 40.300000 ms
+avg 10 x remap with functor & external bspl 26.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 1 using Vc
+avg 10 x prefilter:........................ 9.200000 ms
+avg 10 x remap1 from pre-split coordinates: 14.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 15.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 30.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 30.200000 ms
+avg 10 x remap with functor & external bspl 15.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 2
+avg 10 x prefilter:........................ 18.000000 ms
+avg 10 x remap1 from pre-split coordinates: 29.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 37.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 63.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 61.900000 ms
+avg 10 x remap with functor & external bspl 39.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 2 using Vc
+avg 10 x prefilter:........................ 19.200000 ms
+avg 10 x remap1 from pre-split coordinates: 23.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 22.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 47.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 47.700000 ms
+avg 10 x remap with functor & external bspl 23.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 3
+avg 10 x prefilter:........................ 17.300000 ms
+avg 10 x remap1 from pre-split coordinates: 47.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 53.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 77.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 78.800000 ms
+avg 10 x remap with functor & external bspl 54.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 3 using Vc
+avg 10 x prefilter:........................ 18.400000 ms
+avg 10 x remap1 from pre-split coordinates: 35.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 32.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 56.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 56.100000 ms
+avg 10 x remap with functor & external bspl 32.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 4
+avg 10 x prefilter:........................ 18.900000 ms
+avg 10 x remap1 from pre-split coordinates: 68.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 73.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 97.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 103.200000 ms
+avg 10 x remap with functor & external bspl 76.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 4 using Vc
+avg 10 x prefilter:........................ 26.300000 ms
+avg 10 x remap1 from pre-split coordinates: 44.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 46.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 71.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 72.400000 ms
+avg 10 x remap with functor & external bspl 45.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 5
+avg 10 x prefilter:........................ 18.900000 ms
+avg 10 x remap1 from pre-split coordinates: 94.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 101.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 123.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 130.900000 ms
+avg 10 x remap with functor & external bspl 102.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 5 using Vc
+avg 10 x prefilter:........................ 22.000000 ms
+avg 10 x remap1 from pre-split coordinates: 61.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 61.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 87.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 89.400000 ms
+avg 10 x remap with functor & external bspl 61.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 6
+avg 10 x prefilter:........................ 21.300000 ms
+avg 10 x remap1 from pre-split coordinates: 128.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 130.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 155.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 159.300000 ms
+avg 10 x remap with functor & external bspl 135.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 6 using Vc
+avg 10 x prefilter:........................ 25.300000 ms
+avg 10 x remap1 from pre-split coordinates: 85.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 78.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 110.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 109.800000 ms
+avg 10 x remap with functor & external bspl 78.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 7
+avg 10 x prefilter:........................ 20.800000 ms
+avg 10 x remap1 from pre-split coordinates: 163.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 171.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 190.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 195.400000 ms
+avg 10 x remap with functor & external bspl 171.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 7 using Vc
+avg 10 x prefilter:........................ 26.500000 ms
+avg 10 x remap1 from pre-split coordinates: 105.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 99.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 131.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 131.100000 ms
+avg 10 x remap with functor & external bspl 100.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 8
+avg 10 x prefilter:........................ 24.200000 ms
+avg 10 x remap1 from pre-split coordinates: 205.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 204.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 235.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 250.600000 ms
+avg 10 x remap with functor & external bspl 211.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 8 using Vc
+avg 10 x prefilter:........................ 28.000000 ms
+avg 10 x remap1 from pre-split coordinates: 127.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 128.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 162.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 158.100000 ms
+avg 10 x remap with functor & external bspl 124.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing float data, double coordinates
+Image information:
+  file format: JPEG
+  width:       1920
+  height:      1079
+  pixel type:  UINT8
+  color image: yes (number of channels: 3)
+testing bc code MIRROR spline degree 0
+avg 10 x prefilter:........................ 4.500000 ms
+avg 10 x remap1 from pre-split coordinates: 15.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 21.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 30.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 33.700000 ms
+avg 10 x remap with functor & external bspl 25.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 0 using Vc
+avg 10 x prefilter:........................ 4.800000 ms
+avg 10 x remap1 from pre-split coordinates: 8.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 10.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 18.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 19.900000 ms
+avg 10 x remap with functor & external bspl 10.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 1
+avg 10 x prefilter:........................ 4.500000 ms
+avg 10 x remap1 from pre-split coordinates: 29.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 36.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 42.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 50.300000 ms
+avg 10 x remap with functor & external bspl 37.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 1 using Vc
+avg 10 x prefilter:........................ 5.200000 ms
+avg 10 x remap1 from pre-split coordinates: 9.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 12.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 21.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 19.400000 ms
+avg 10 x remap with functor & external bspl 11.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 2
+avg 10 x prefilter:........................ 14.800000 ms
+avg 10 x remap1 from pre-split coordinates: 47.300000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap1 from unsplit coordinates:.. 55.500000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap with internal spline:....... 69.600000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap with functor & internal bspl 74.000000 ms
+avg 10 x remap with functor & external bspl 58.600000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+difference original data/restored data:
+warped image diff Mean: 0.000017
+warped image diff Maximum: 0.000070
+
+testing bc code MIRROR spline degree 2 using Vc
+avg 10 x prefilter:........................ 10.300000 ms
+avg 10 x remap1 from pre-split coordinates: 14.900000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap1 from unsplit coordinates:.. 18.600000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap with internal spline:....... 32.900000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap with functor & internal bspl 32.700000 ms
+avg 10 x remap with functor & external bspl 18.800000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+difference original data/restored data:
+warped image diff Mean: 0.000017
+warped image diff Maximum: 0.000070
+
+testing bc code MIRROR spline degree 3
+avg 10 x prefilter:........................ 14.700000 ms
+avg 10 x remap1 from pre-split coordinates: 78.200000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000137
+avg 10 x remap1 from unsplit coordinates:.. 82.000000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000137
+avg 10 x remap with internal spline:....... 98.400000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000137
+avg 10 x remap with functor & internal bspl 100.200000 ms
+avg 10 x remap with functor & external bspl 82.700000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000137
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000117
+
+testing bc code MIRROR spline degree 3 using Vc
+avg 10 x prefilter:........................ 10.700000 ms
+avg 10 x remap1 from pre-split coordinates: 21.200000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000137
+avg 10 x remap1 from unsplit coordinates:.. 25.600000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000137
+avg 10 x remap with internal spline:....... 39.500000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000137
+avg 10 x remap with functor & internal bspl 37.300000 ms
+avg 10 x remap with functor & external bspl 24.300000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000137
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000117
+
+testing bc code MIRROR spline degree 4
+avg 10 x prefilter:........................ 25.400000 ms
+avg 10 x remap1 from pre-split coordinates: 114.200000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap1 from unsplit coordinates:.. 115.500000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap with internal spline:....... 142.800000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap with functor & internal bspl 145.600000 ms
+avg 10 x remap with functor & external bspl 121.100000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+difference original data/restored data:
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+
+testing bc code MIRROR spline degree 4 using Vc
+avg 10 x prefilter:........................ 11.500000 ms
+avg 10 x remap1 from pre-split coordinates: 31.700000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap1 from unsplit coordinates:.. 34.500000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap with internal spline:....... 50.300000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap with functor & internal bspl 54.400000 ms
+avg 10 x remap with functor & external bspl 34.700000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+difference original data/restored data:
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+
+testing bc code MIRROR spline degree 5
+avg 10 x prefilter:........................ 25.200000 ms
+avg 10 x remap1 from pre-split coordinates: 150.600000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000102
+avg 10 x remap1 from unsplit coordinates:.. 156.400000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000102
+avg 10 x remap with internal spline:....... 186.500000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000102
+avg 10 x remap with functor & internal bspl 191.100000 ms
+avg 10 x remap with functor & external bspl 158.400000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000102
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000101
+
+testing bc code MIRROR spline degree 5 using Vc
+avg 10 x prefilter:........................ 11.600000 ms
+avg 10 x remap1 from pre-split coordinates: 41.600000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000102
+avg 10 x remap1 from unsplit coordinates:.. 45.400000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000102
+avg 10 x remap with internal spline:....... 58.800000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000102
+avg 10 x remap with functor & internal bspl 59.100000 ms
+avg 10 x remap with functor & external bspl 47.400000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000102
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000101
+
+testing bc code MIRROR spline degree 6
+avg 10 x prefilter:........................ 36.700000 ms
+avg 10 x remap1 from pre-split coordinates: 205.300000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000114
+avg 10 x remap1 from unsplit coordinates:.. 206.000000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000114
+avg 10 x remap with internal spline:....... 243.400000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000114
+avg 10 x remap with functor & internal bspl 256.000000 ms
+avg 10 x remap with functor & external bspl 208.100000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000114
+difference original data/restored data:
+warped image diff Mean: 0.000026
+warped image diff Maximum: 0.000111
+
+testing bc code MIRROR spline degree 6 using Vc
+avg 10 x prefilter:........................ 14.100000 ms
+avg 10 x remap1 from pre-split coordinates: 56.200000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000114
+avg 10 x remap1 from unsplit coordinates:.. 59.800000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000114
+avg 10 x remap with internal spline:....... 75.200000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000114
+avg 10 x remap with functor & internal bspl 80.000000 ms
+avg 10 x remap with functor & external bspl 60.000000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000114
+difference original data/restored data:
+warped image diff Mean: 0.000026
+warped image diff Maximum: 0.000111
+
+testing bc code MIRROR spline degree 7
+avg 10 x prefilter:........................ 35.500000 ms
+avg 10 x remap1 from pre-split coordinates: 258.200000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000123
+avg 10 x remap1 from unsplit coordinates:.. 259.700000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000123
+avg 10 x remap with internal spline:....... 301.700000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000123
+avg 10 x remap with functor & internal bspl 302.900000 ms
+avg 10 x remap with functor & external bspl 265.600000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000123
+difference original data/restored data:
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000112
+
+testing bc code MIRROR spline degree 7 using Vc
+avg 10 x prefilter:........................ 14.100000 ms
+avg 10 x remap1 from pre-split coordinates: 74.600000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000127
+avg 10 x remap1 from unsplit coordinates:.. 74.800000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000127
+avg 10 x remap with internal spline:....... 90.500000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000127
+avg 10 x remap with functor & internal bspl 89.100000 ms
+avg 10 x remap with functor & external bspl 73.300000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000127
+difference original data/restored data:
+warped image diff Mean: 0.000024
+warped image diff Maximum: 0.000142
+
+testing bc code MIRROR spline degree 8
+avg 10 x prefilter:........................ 47.900000 ms
+avg 10 x remap1 from pre-split coordinates: 322.700000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000139
+avg 10 x remap1 from unsplit coordinates:.. 323.700000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000139
+avg 10 x remap with internal spline:....... 382.500000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000139
+avg 10 x remap with functor & internal bspl 377.200000 ms
+avg 10 x remap with functor & external bspl 334.800000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000139
+difference original data/restored data:
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000135
+
+testing bc code MIRROR spline degree 8 using Vc
+avg 10 x prefilter:........................ 15.500000 ms
+avg 10 x remap1 from pre-split coordinates: 88.700000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000139
+avg 10 x remap1 from unsplit coordinates:.. 91.300000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000139
+avg 10 x remap with internal spline:....... 112.000000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000139
+avg 10 x remap with functor & internal bspl 109.300000 ms
+avg 10 x remap with functor & external bspl 93.600000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000139
+difference original data/restored data:
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000135
+
+testing bc code REFLECT spline degree 0
+avg 10 x prefilter:........................ 4.500000 ms
+avg 10 x remap1 from pre-split coordinates: 15.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 22.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 30.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 32.700000 ms
+avg 10 x remap with functor & external bspl 31.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 0 using Vc
+avg 10 x prefilter:........................ 4.800000 ms
+avg 10 x remap1 from pre-split coordinates: 7.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 10.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 17.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 17.300000 ms
+avg 10 x remap with functor & external bspl 10.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 1
+avg 10 x prefilter:........................ 4.500000 ms
+avg 10 x remap1 from pre-split coordinates: 27.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 35.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 43.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 46.500000 ms
+avg 10 x remap with functor & external bspl 39.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 1 using Vc
+avg 10 x prefilter:........................ 5.000000 ms
+avg 10 x remap1 from pre-split coordinates: 9.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 13.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 20.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 21.100000 ms
+avg 10 x remap with functor & external bspl 12.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 2
+avg 10 x prefilter:........................ 14.100000 ms
+avg 10 x remap1 from pre-split coordinates: 47.800000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000092
+avg 10 x remap1 from unsplit coordinates:.. 55.600000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000092
+avg 10 x remap with internal spline:....... 69.800000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000092
+avg 10 x remap with functor & internal bspl 74.700000 ms
+avg 10 x remap with functor & external bspl 63.300000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000092
+difference original data/restored data:
+warped image diff Mean: 0.000017
+warped image diff Maximum: 0.000078
+
+testing bc code REFLECT spline degree 2 using Vc
+avg 10 x prefilter:........................ 10.200000 ms
+avg 10 x remap1 from pre-split coordinates: 14.200000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000092
+avg 10 x remap1 from unsplit coordinates:.. 18.100000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000092
+avg 10 x remap with internal spline:....... 30.800000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000092
+avg 10 x remap with functor & internal bspl 30.500000 ms
+avg 10 x remap with functor & external bspl 17.700000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000092
+difference original data/restored data:
+warped image diff Mean: 0.000017
+warped image diff Maximum: 0.000078
+
+testing bc code REFLECT spline degree 3
+avg 10 x prefilter:........................ 13.800000 ms
+avg 10 x remap1 from pre-split coordinates: 75.800000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000144
+avg 10 x remap1 from unsplit coordinates:.. 80.800000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000144
+avg 10 x remap with internal spline:....... 97.800000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000144
+avg 10 x remap with functor & internal bspl 101.400000 ms
+avg 10 x remap with functor & external bspl 85.000000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000144
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000135
+
+testing bc code REFLECT spline degree 3 using Vc
+avg 10 x prefilter:........................ 10.300000 ms
+avg 10 x remap1 from pre-split coordinates: 22.400000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000144
+avg 10 x remap1 from unsplit coordinates:.. 28.200000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000144
+avg 10 x remap with internal spline:....... 37.300000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000144
+avg 10 x remap with functor & internal bspl 38.000000 ms
+avg 10 x remap with functor & external bspl 25.500000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000144
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000135
+
+testing bc code REFLECT spline degree 4
+avg 10 x prefilter:........................ 24.300000 ms
+avg 10 x remap1 from pre-split coordinates: 108.500000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000154
+avg 10 x remap1 from unsplit coordinates:.. 114.400000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000154
+avg 10 x remap with internal spline:....... 142.900000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000154
+avg 10 x remap with functor & internal bspl 146.200000 ms
+avg 10 x remap with functor & external bspl 125.500000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000154
+difference original data/restored data:
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000147
+
+testing bc code REFLECT spline degree 4 using Vc
+avg 10 x prefilter:........................ 12.100000 ms
+avg 10 x remap1 from pre-split coordinates: 31.400000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000154
+avg 10 x remap1 from unsplit coordinates:.. 35.100000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000154
+avg 10 x remap with internal spline:....... 48.000000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000154
+avg 10 x remap with functor & internal bspl 48.200000 ms
+avg 10 x remap with functor & external bspl 34.000000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000154
+difference original data/restored data:
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000147
+
+testing bc code REFLECT spline degree 5
+avg 10 x prefilter:........................ 24.300000 ms
+avg 10 x remap1 from pre-split coordinates: 150.900000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000130
+avg 10 x remap1 from unsplit coordinates:.. 158.800000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000130
+avg 10 x remap with internal spline:....... 188.100000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000130
+avg 10 x remap with functor & internal bspl 188.700000 ms
+avg 10 x remap with functor & external bspl 160.000000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000130
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000129
+
+testing bc code REFLECT spline degree 5 using Vc
+avg 10 x prefilter:........................ 11.000000 ms
+avg 10 x remap1 from pre-split coordinates: 44.200000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000130
+avg 10 x remap1 from unsplit coordinates:.. 46.100000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000130
+avg 10 x remap with internal spline:....... 59.400000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000130
+avg 10 x remap with functor & internal bspl 59.000000 ms
+avg 10 x remap with functor & external bspl 48.800000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000130
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000129
+
+testing bc code REFLECT spline degree 6
+avg 10 x prefilter:........................ 36.600000 ms
+avg 10 x remap1 from pre-split coordinates: 197.800000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000145
+avg 10 x remap1 from unsplit coordinates:.. 203.500000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000145
+avg 10 x remap with internal spline:....... 246.500000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000145
+avg 10 x remap with functor & internal bspl 247.500000 ms
+avg 10 x remap with functor & external bspl 207.400000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000145
+difference original data/restored data:
+warped image diff Mean: 0.000026
+warped image diff Maximum: 0.000163
+
+testing bc code REFLECT spline degree 6 using Vc
+avg 10 x prefilter:........................ 13.900000 ms
+avg 10 x remap1 from pre-split coordinates: 57.700000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000145
+avg 10 x remap1 from unsplit coordinates:.. 58.600000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000145
+avg 10 x remap with internal spline:....... 73.700000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000145
+avg 10 x remap with functor & internal bspl 78.600000 ms
+avg 10 x remap with functor & external bspl 60.500000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000145
+difference original data/restored data:
+warped image diff Mean: 0.000026
+warped image diff Maximum: 0.000163
+
+testing bc code REFLECT spline degree 7
+avg 10 x prefilter:........................ 36.400000 ms
+avg 10 x remap1 from pre-split coordinates: 253.200000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000154
+avg 10 x remap1 from unsplit coordinates:.. 261.800000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000154
+avg 10 x remap with internal spline:....... 309.600000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000154
+avg 10 x remap with functor & internal bspl 303.000000 ms
+avg 10 x remap with functor & external bspl 274.600000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000154
+difference original data/restored data:
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000165
+
+testing bc code REFLECT spline degree 7 using Vc
+avg 10 x prefilter:........................ 13.000000 ms
+avg 10 x remap1 from pre-split coordinates: 74.100000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000197
+avg 10 x remap1 from unsplit coordinates:.. 76.400000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000197
+avg 10 x remap with internal spline:....... 93.800000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000197
+avg 10 x remap with functor & internal bspl 93.700000 ms
+avg 10 x remap with functor & external bspl 76.400000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000197
+difference original data/restored data:
+warped image diff Mean: 0.000024
+warped image diff Maximum: 0.000201
+
+testing bc code REFLECT spline degree 8
+avg 10 x prefilter:........................ 48.500000 ms
+avg 10 x remap1 from pre-split coordinates: 322.700000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000184
+avg 10 x remap1 from unsplit coordinates:.. 326.800000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000184
+avg 10 x remap with internal spline:....... 379.200000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000184
+avg 10 x remap with functor & internal bspl 378.800000 ms
+avg 10 x remap with functor & external bspl 334.300000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000184
+difference original data/restored data:
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000179
+
+testing bc code REFLECT spline degree 8 using Vc
+avg 10 x prefilter:........................ 14.600000 ms
+avg 10 x remap1 from pre-split coordinates: 90.100000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000184
+avg 10 x remap1 from unsplit coordinates:.. 92.500000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000184
+avg 10 x remap with internal spline:....... 110.800000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000184
+avg 10 x remap with functor & internal bspl 109.600000 ms
+avg 10 x remap with functor & external bspl 98.600000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000184
+difference original data/restored data:
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000179
+
+testing bc code PERIODIC spline degree 0
+avg 10 x prefilter:........................ 4.500000 ms
+avg 10 x remap1 from pre-split coordinates: 15.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 23.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 28.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 33.700000 ms
+avg 10 x remap with functor & external bspl 27.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 0 using Vc
+avg 10 x prefilter:........................ 5.000000 ms
+avg 10 x remap1 from pre-split coordinates: 8.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 10.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 20.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 18.700000 ms
+avg 10 x remap with functor & external bspl 10.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 1
+avg 10 x prefilter:........................ 4.600000 ms
+avg 10 x remap1 from pre-split coordinates: 27.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 36.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 41.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 45.600000 ms
+avg 10 x remap with functor & external bspl 40.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 1 using Vc
+avg 10 x prefilter:........................ 5.100000 ms
+avg 10 x remap1 from pre-split coordinates: 9.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 13.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 19.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 20.000000 ms
+avg 10 x remap with functor & external bspl 13.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 2
+avg 10 x prefilter:........................ 15.000000 ms
+avg 10 x remap1 from pre-split coordinates: 48.100000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000084
+avg 10 x remap1 from unsplit coordinates:.. 56.400000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000084
+avg 10 x remap with internal spline:....... 71.700000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000084
+avg 10 x remap with functor & internal bspl 74.200000 ms
+avg 10 x remap with functor & external bspl 57.000000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000084
+difference original data/restored data:
+warped image diff Mean: 0.000017
+warped image diff Maximum: 0.000076
+
+testing bc code PERIODIC spline degree 2 using Vc
+avg 10 x prefilter:........................ 10.100000 ms
+avg 10 x remap1 from pre-split coordinates: 14.100000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000084
+avg 10 x remap1 from unsplit coordinates:.. 20.400000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000084
+avg 10 x remap with internal spline:....... 31.100000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000084
+avg 10 x remap with functor & internal bspl 33.000000 ms
+avg 10 x remap with functor & external bspl 18.200000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000084
+difference original data/restored data:
+warped image diff Mean: 0.000017
+warped image diff Maximum: 0.000076
+
+testing bc code PERIODIC spline degree 3
+avg 10 x prefilter:........................ 15.200000 ms
+avg 10 x remap1 from pre-split coordinates: 73.700000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000098
+avg 10 x remap1 from unsplit coordinates:.. 86.300000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000098
+avg 10 x remap with internal spline:....... 97.900000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000098
+avg 10 x remap with functor & internal bspl 105.800000 ms
+avg 10 x remap with functor & external bspl 82.800000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000098
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000094
+
+testing bc code PERIODIC spline degree 3 using Vc
+avg 10 x prefilter:........................ 9.800000 ms
+avg 10 x remap1 from pre-split coordinates: 22.100000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000098
+avg 10 x remap1 from unsplit coordinates:.. 24.200000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000098
+avg 10 x remap with internal spline:....... 37.400000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000098
+avg 10 x remap with functor & internal bspl 38.900000 ms
+avg 10 x remap with functor & external bspl 24.300000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000098
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000094
+
+testing bc code PERIODIC spline degree 4
+avg 10 x prefilter:........................ 24.900000 ms
+avg 10 x remap1 from pre-split coordinates: 108.200000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap1 from unsplit coordinates:.. 117.100000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap with internal spline:....... 146.900000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap with functor & internal bspl 146.900000 ms
+avg 10 x remap with functor & external bspl 117.600000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+difference original data/restored data:
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000143
+
+testing bc code PERIODIC spline degree 4 using Vc
+avg 10 x prefilter:........................ 11.500000 ms
+avg 10 x remap1 from pre-split coordinates: 32.900000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap1 from unsplit coordinates:.. 36.000000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap with internal spline:....... 49.800000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+avg 10 x remap with functor & internal bspl 48.900000 ms
+avg 10 x remap with functor & external bspl 38.900000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000145
+difference original data/restored data:
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000143
+
+testing bc code PERIODIC spline degree 5
+avg 10 x prefilter:........................ 24.300000 ms
+avg 10 x remap1 from pre-split coordinates: 154.700000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000114
+avg 10 x remap1 from unsplit coordinates:.. 155.400000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000114
+avg 10 x remap with internal spline:....... 181.900000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000114
+avg 10 x remap with functor & internal bspl 187.100000 ms
+avg 10 x remap with functor & external bspl 158.000000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000114
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000114
+
+testing bc code PERIODIC spline degree 5 using Vc
+avg 10 x prefilter:........................ 14.800000 ms
+avg 10 x remap1 from pre-split coordinates: 43.100000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000114
+avg 10 x remap1 from unsplit coordinates:.. 45.000000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000114
+avg 10 x remap with internal spline:....... 58.000000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000114
+avg 10 x remap with functor & internal bspl 58.700000 ms
+avg 10 x remap with functor & external bspl 45.000000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000114
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000114
+
+testing bc code PERIODIC spline degree 6
+avg 10 x prefilter:........................ 37.200000 ms
+avg 10 x remap1 from pre-split coordinates: 199.200000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000131
+avg 10 x remap1 from unsplit coordinates:.. 207.600000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000131
+avg 10 x remap with internal spline:....... 244.600000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000131
+avg 10 x remap with functor & internal bspl 246.200000 ms
+avg 10 x remap with functor & external bspl 208.800000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000131
+difference original data/restored data:
+warped image diff Mean: 0.000026
+warped image diff Maximum: 0.000132
+
+testing bc code PERIODIC spline degree 6 using Vc
+avg 10 x prefilter:........................ 15.900000 ms
+avg 10 x remap1 from pre-split coordinates: 55.700000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000131
+avg 10 x remap1 from unsplit coordinates:.. 59.600000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000131
+avg 10 x remap with internal spline:....... 76.200000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000131
+avg 10 x remap with functor & internal bspl 75.000000 ms
+avg 10 x remap with functor & external bspl 60.100000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000131
+difference original data/restored data:
+warped image diff Mean: 0.000026
+warped image diff Maximum: 0.000132
+
+testing bc code PERIODIC spline degree 7
+avg 10 x prefilter:........................ 35.300000 ms
+avg 10 x remap1 from pre-split coordinates: 255.100000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000187
+avg 10 x remap1 from unsplit coordinates:.. 264.800000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000187
+avg 10 x remap with internal spline:....... 297.600000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000187
+avg 10 x remap with functor & internal bspl 307.100000 ms
+avg 10 x remap with functor & external bspl 262.100000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000187
+difference original data/restored data:
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000159
+
+testing bc code PERIODIC spline degree 7 using Vc
+avg 10 x prefilter:........................ 14.900000 ms
+avg 10 x remap1 from pre-split coordinates: 70.200000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000153
+avg 10 x remap1 from unsplit coordinates:.. 76.200000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000153
+avg 10 x remap with internal spline:....... 92.700000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000153
+avg 10 x remap with functor & internal bspl 96.400000 ms
+avg 10 x remap with functor & external bspl 74.600000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000153
+difference original data/restored data:
+warped image diff Mean: 0.000024
+warped image diff Maximum: 0.000160
+
+testing bc code PERIODIC spline degree 8
+avg 10 x prefilter:........................ 49.700000 ms
+avg 10 x remap1 from pre-split coordinates: 316.600000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000173
+avg 10 x remap1 from unsplit coordinates:.. 331.600000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000173
+avg 10 x remap with internal spline:....... 375.300000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000173
+avg 10 x remap with functor & internal bspl 389.500000 ms
+avg 10 x remap with functor & external bspl 327.300000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000173
+difference original data/restored data:
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000167
+
+testing bc code PERIODIC spline degree 8 using Vc
+avg 10 x prefilter:........................ 15.500000 ms
+avg 10 x remap1 from pre-split coordinates: 90.400000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000173
+avg 10 x remap1 from unsplit coordinates:.. 94.900000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000173
+avg 10 x remap with internal spline:....... 115.100000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000173
+avg 10 x remap with functor & internal bspl 109.700000 ms
+avg 10 x remap with functor & external bspl 92.500000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000173
+difference original data/restored data:
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000167
+
+testing bc code NATURAL spline degree 0
+avg 10 x prefilter:........................ 4.500000 ms
+avg 10 x remap1 from pre-split coordinates: 16.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 22.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 29.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 34.800000 ms
+avg 10 x remap with functor & external bspl 25.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 0 using Vc
+avg 10 x prefilter:........................ 4.900000 ms
+avg 10 x remap1 from pre-split coordinates: 8.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 10.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 18.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 20.100000 ms
+avg 10 x remap with functor & external bspl 10.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 1
+avg 10 x prefilter:........................ 4.500000 ms
+avg 10 x remap1 from pre-split coordinates: 29.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 34.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 41.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 44.000000 ms
+avg 10 x remap with functor & external bspl 42.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 1 using Vc
+avg 10 x prefilter:........................ 5.500000 ms
+avg 10 x remap1 from pre-split coordinates: 9.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 13.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 22.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 20.100000 ms
+avg 10 x remap with functor & external bspl 12.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 2
+avg 10 x prefilter:........................ 15.900000 ms
+avg 10 x remap1 from pre-split coordinates: 47.700000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap1 from unsplit coordinates:.. 54.100000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap with internal spline:....... 70.200000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap with functor & internal bspl 73.400000 ms
+avg 10 x remap with functor & external bspl 57.300000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+difference original data/restored data:
+warped image diff Mean: 0.000017
+warped image diff Maximum: 0.000075
+
+testing bc code NATURAL spline degree 2 using Vc
+avg 10 x prefilter:........................ 9.900000 ms
+avg 10 x remap1 from pre-split coordinates: 15.900000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap1 from unsplit coordinates:.. 20.700000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap with internal spline:....... 32.900000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+avg 10 x remap with functor & internal bspl 33.000000 ms
+avg 10 x remap with functor & external bspl 18.800000 ms
+warped image diff Mean: 0.000018
+warped image diff Maximum: 0.000082
+difference original data/restored data:
+warped image diff Mean: 0.000017
+warped image diff Maximum: 0.000075
+
+testing bc code NATURAL spline degree 3
+avg 10 x prefilter:........................ 14.800000 ms
+avg 10 x remap1 from pre-split coordinates: 77.900000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000165
+avg 10 x remap1 from unsplit coordinates:.. 81.600000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000165
+avg 10 x remap with internal spline:....... 97.500000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000165
+avg 10 x remap with functor & internal bspl 99.600000 ms
+avg 10 x remap with functor & external bspl 82.100000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000165
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000144
+
+testing bc code NATURAL spline degree 3 using Vc
+avg 10 x prefilter:........................ 10.300000 ms
+avg 10 x remap1 from pre-split coordinates: 21.200000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000165
+avg 10 x remap1 from unsplit coordinates:.. 25.700000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000165
+avg 10 x remap with internal spline:....... 38.000000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000165
+avg 10 x remap with functor & internal bspl 38.400000 ms
+avg 10 x remap with functor & external bspl 25.300000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000165
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000144
+
+testing bc code NATURAL spline degree 4
+avg 10 x prefilter:........................ 26.100000 ms
+avg 10 x remap1 from pre-split coordinates: 108.000000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000171
+avg 10 x remap1 from unsplit coordinates:.. 120.600000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000171
+avg 10 x remap with internal spline:....... 143.000000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000171
+avg 10 x remap with functor & internal bspl 146.200000 ms
+avg 10 x remap with functor & external bspl 121.500000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000171
+difference original data/restored data:
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000198
+
+testing bc code NATURAL spline degree 4 using Vc
+avg 10 x prefilter:........................ 11.400000 ms
+avg 10 x remap1 from pre-split coordinates: 33.500000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000171
+avg 10 x remap1 from unsplit coordinates:.. 36.200000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000171
+avg 10 x remap with internal spline:....... 50.100000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000171
+avg 10 x remap with functor & internal bspl 50.500000 ms
+avg 10 x remap with functor & external bspl 37.400000 ms
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000171
+difference original data/restored data:
+warped image diff Mean: 0.000045
+warped image diff Maximum: 0.000198
+
+testing bc code NATURAL spline degree 5
+avg 10 x prefilter:........................ 24.800000 ms
+avg 10 x remap1 from pre-split coordinates: 149.600000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000194
+avg 10 x remap1 from unsplit coordinates:.. 155.000000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000194
+avg 10 x remap with internal spline:....... 183.200000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000194
+avg 10 x remap with functor & internal bspl 191.400000 ms
+avg 10 x remap with functor & external bspl 158.100000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000194
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000180
+
+testing bc code NATURAL spline degree 5 using Vc
+avg 10 x prefilter:........................ 11.000000 ms
+avg 10 x remap1 from pre-split coordinates: 42.600000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000194
+avg 10 x remap1 from unsplit coordinates:.. 45.700000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000194
+avg 10 x remap with internal spline:....... 58.800000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000194
+avg 10 x remap with functor & internal bspl 59.400000 ms
+avg 10 x remap with functor & external bspl 44.700000 ms
+warped image diff Mean: 0.000023
+warped image diff Maximum: 0.000194
+difference original data/restored data:
+warped image diff Mean: 0.000022
+warped image diff Maximum: 0.000180
+
+testing bc code NATURAL spline degree 6
+avg 10 x prefilter:........................ 36.200000 ms
+avg 10 x remap1 from pre-split coordinates: 203.900000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000184
+avg 10 x remap1 from unsplit coordinates:.. 205.300000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000184
+avg 10 x remap with internal spline:....... 242.000000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000184
+avg 10 x remap with functor & internal bspl 252.000000 ms
+avg 10 x remap with functor & external bspl 208.500000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000184
+difference original data/restored data:
+warped image diff Mean: 0.000026
+warped image diff Maximum: 0.000174
+
+testing bc code NATURAL spline degree 6 using Vc
+avg 10 x prefilter:........................ 13.100000 ms
+avg 10 x remap1 from pre-split coordinates: 58.400000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000184
+avg 10 x remap1 from unsplit coordinates:.. 59.600000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000184
+avg 10 x remap with internal spline:....... 74.600000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000184
+avg 10 x remap with functor & internal bspl 81.100000 ms
+avg 10 x remap with functor & external bspl 60.200000 ms
+warped image diff Mean: 0.000027
+warped image diff Maximum: 0.000184
+difference original data/restored data:
+warped image diff Mean: 0.000026
+warped image diff Maximum: 0.000174
+
+testing bc code NATURAL spline degree 7
+avg 10 x prefilter:........................ 36.800000 ms
+avg 10 x remap1 from pre-split coordinates: 263.100000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000214
+avg 10 x remap1 from unsplit coordinates:.. 259.700000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000214
+avg 10 x remap with internal spline:....... 308.400000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000214
+avg 10 x remap with functor & internal bspl 301.900000 ms
+avg 10 x remap with functor & external bspl 264.300000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000214
+difference original data/restored data:
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000214
+
+testing bc code NATURAL spline degree 7 using Vc
+avg 10 x prefilter:........................ 14.400000 ms
+avg 10 x remap1 from pre-split coordinates: 70.600000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000307
+avg 10 x remap1 from unsplit coordinates:.. 80.900000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000307
+avg 10 x remap with internal spline:....... 91.300000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000307
+avg 10 x remap with functor & internal bspl 89.800000 ms
+avg 10 x remap with functor & external bspl 73.600000 ms
+warped image diff Mean: 0.000025
+warped image diff Maximum: 0.000307
+difference original data/restored data:
+warped image diff Mean: 0.000024
+warped image diff Maximum: 0.000307
+
+testing bc code NATURAL spline degree 8
+avg 10 x prefilter:........................ 48.500000 ms
+avg 10 x remap1 from pre-split coordinates: 322.400000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000243
+avg 10 x remap1 from unsplit coordinates:.. 328.000000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000243
+avg 10 x remap with internal spline:....... 379.500000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000243
+avg 10 x remap with functor & internal bspl 385.500000 ms
+avg 10 x remap with functor & external bspl 327.500000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000243
+difference original data/restored data:
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000258
+
+testing bc code NATURAL spline degree 8 using Vc
+avg 10 x prefilter:........................ 20.700000 ms
+avg 10 x remap1 from pre-split coordinates: 88.400000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000243
+avg 10 x remap1 from unsplit coordinates:.. 95.800000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000243
+avg 10 x remap with internal spline:....... 112.300000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000243
+avg 10 x remap with functor & internal bspl 110.800000 ms
+avg 10 x remap with functor & external bspl 96.100000 ms
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000243
+difference original data/restored data:
+warped image diff Mean: 0.000031
+warped image diff Maximum: 0.000258
+
+
+testing double data, double coordinates
+Image information:
+  file format: JPEG
+  width:       1920
+  height:      1079
+  pixel type:  UINT8
+  color image: yes (number of channels: 3)
+testing bc code MIRROR spline degree 0
+avg 10 x prefilter:........................ 8.200000 ms
+avg 10 x remap1 from pre-split coordinates: 12.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 20.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 31.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 37.700000 ms
+avg 10 x remap with functor & external bspl 24.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 0 using Vc
+avg 10 x prefilter:........................ 9.900000 ms
+avg 10 x remap1 from pre-split coordinates: 12.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 11.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 26.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 26.100000 ms
+avg 10 x remap with functor & external bspl 11.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 1
+avg 10 x prefilter:........................ 8.800000 ms
+avg 10 x remap1 from pre-split coordinates: 19.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 26.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 39.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 41.900000 ms
+avg 10 x remap with functor & external bspl 29.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 1 using Vc
+avg 10 x prefilter:........................ 8.600000 ms
+avg 10 x remap1 from pre-split coordinates: 15.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 14.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 31.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 29.600000 ms
+avg 10 x remap with functor & external bspl 17.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 2
+avg 10 x prefilter:........................ 17.600000 ms
+avg 10 x remap1 from pre-split coordinates: 31.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 39.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 60.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 63.500000 ms
+avg 10 x remap with functor & external bspl 40.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 2 using Vc
+avg 10 x prefilter:........................ 19.600000 ms
+avg 10 x remap1 from pre-split coordinates: 21.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 22.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 46.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 47.900000 ms
+avg 10 x remap with functor & external bspl 22.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 3
+avg 10 x prefilter:........................ 17.300000 ms
+avg 10 x remap1 from pre-split coordinates: 46.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 54.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 77.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 78.900000 ms
+avg 10 x remap with functor & external bspl 58.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 3 using Vc
+avg 10 x prefilter:........................ 18.600000 ms
+avg 10 x remap1 from pre-split coordinates: 31.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 32.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 59.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 58.200000 ms
+avg 10 x remap with functor & external bspl 33.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 4
+avg 10 x prefilter:........................ 18.600000 ms
+avg 10 x remap1 from pre-split coordinates: 69.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 76.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 100.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 103.900000 ms
+avg 10 x remap with functor & external bspl 81.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 4 using Vc
+avg 10 x prefilter:........................ 21.900000 ms
+avg 10 x remap1 from pre-split coordinates: 44.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 46.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 75.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 72.500000 ms
+avg 10 x remap with functor & external bspl 46.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 5
+avg 10 x prefilter:........................ 17.800000 ms
+avg 10 x remap1 from pre-split coordinates: 93.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 101.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 125.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 128.400000 ms
+avg 10 x remap with functor & external bspl 108.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 5 using Vc
+avg 10 x prefilter:........................ 22.700000 ms
+avg 10 x remap1 from pre-split coordinates: 60.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 59.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 88.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 89.700000 ms
+avg 10 x remap with functor & external bspl 62.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 6
+avg 10 x prefilter:........................ 21.700000 ms
+avg 10 x remap1 from pre-split coordinates: 123.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 139.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 159.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 165.800000 ms
+avg 10 x remap with functor & external bspl 138.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 6 using Vc
+avg 10 x prefilter:........................ 24.000000 ms
+avg 10 x remap1 from pre-split coordinates: 79.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 85.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 109.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 110.400000 ms
+avg 10 x remap with functor & external bspl 80.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 7
+avg 10 x prefilter:........................ 20.000000 ms
+avg 10 x remap1 from pre-split coordinates: 164.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 167.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 200.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 196.400000 ms
+avg 10 x remap with functor & external bspl 176.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 7 using Vc
+avg 10 x prefilter:........................ 25.900000 ms
+avg 10 x remap1 from pre-split coordinates: 99.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 107.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 132.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 131.700000 ms
+avg 10 x remap with functor & external bspl 101.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 8
+avg 10 x prefilter:........................ 24.600000 ms
+avg 10 x remap1 from pre-split coordinates: 199.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 210.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 239.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 245.000000 ms
+avg 10 x remap with functor & external bspl 217.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code MIRROR spline degree 8 using Vc
+avg 10 x prefilter:........................ 27.900000 ms
+avg 10 x remap1 from pre-split coordinates: 124.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 123.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 160.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 163.000000 ms
+avg 10 x remap with functor & external bspl 125.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 0
+avg 10 x prefilter:........................ 8.200000 ms
+avg 10 x remap1 from pre-split coordinates: 12.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 19.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 32.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 36.000000 ms
+avg 10 x remap with functor & external bspl 24.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 0 using Vc
+avg 10 x prefilter:........................ 8.800000 ms
+avg 10 x remap1 from pre-split coordinates: 11.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 12.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 26.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 26.600000 ms
+avg 10 x remap with functor & external bspl 12.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 1
+avg 10 x prefilter:........................ 9.000000 ms
+avg 10 x remap1 from pre-split coordinates: 19.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 27.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 39.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 42.700000 ms
+avg 10 x remap with functor & external bspl 30.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 1 using Vc
+avg 10 x prefilter:........................ 9.400000 ms
+avg 10 x remap1 from pre-split coordinates: 14.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 15.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 29.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 32.000000 ms
+avg 10 x remap with functor & external bspl 16.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 2
+avg 10 x prefilter:........................ 16.900000 ms
+avg 10 x remap1 from pre-split coordinates: 30.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 38.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 60.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 65.200000 ms
+avg 10 x remap with functor & external bspl 40.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 2 using Vc
+avg 10 x prefilter:........................ 18.500000 ms
+avg 10 x remap1 from pre-split coordinates: 21.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 22.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 48.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 47.700000 ms
+avg 10 x remap with functor & external bspl 22.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 3
+avg 10 x prefilter:........................ 17.000000 ms
+avg 10 x remap1 from pre-split coordinates: 46.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 57.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 77.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 79.900000 ms
+avg 10 x remap with functor & external bspl 56.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 3 using Vc
+avg 10 x prefilter:........................ 18.800000 ms
+avg 10 x remap1 from pre-split coordinates: 31.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 33.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 58.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 57.100000 ms
+avg 10 x remap with functor & external bspl 34.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 4
+avg 10 x prefilter:........................ 18.600000 ms
+avg 10 x remap1 from pre-split coordinates: 68.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 75.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 104.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 104.000000 ms
+avg 10 x remap with functor & external bspl 78.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 4 using Vc
+avg 10 x prefilter:........................ 20.700000 ms
+avg 10 x remap1 from pre-split coordinates: 43.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 45.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 71.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 74.800000 ms
+avg 10 x remap with functor & external bspl 45.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 5
+avg 10 x prefilter:........................ 19.300000 ms
+avg 10 x remap1 from pre-split coordinates: 99.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 101.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 126.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 130.300000 ms
+avg 10 x remap with functor & external bspl 104.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 5 using Vc
+avg 10 x prefilter:........................ 24.100000 ms
+avg 10 x remap1 from pre-split coordinates: 59.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 61.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 93.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 89.500000 ms
+avg 10 x remap with functor & external bspl 60.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 6
+avg 10 x prefilter:........................ 20.800000 ms
+avg 10 x remap1 from pre-split coordinates: 124.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 132.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 159.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 168.200000 ms
+avg 10 x remap with functor & external bspl 136.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 6 using Vc
+avg 10 x prefilter:........................ 24.500000 ms
+avg 10 x remap1 from pre-split coordinates: 78.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 80.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 111.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 110.600000 ms
+avg 10 x remap with functor & external bspl 85.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 7
+avg 10 x prefilter:........................ 21.900000 ms
+avg 10 x remap1 from pre-split coordinates: 158.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 168.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 195.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 200.500000 ms
+avg 10 x remap with functor & external bspl 171.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 7 using Vc
+avg 10 x prefilter:........................ 26.100000 ms
+avg 10 x remap1 from pre-split coordinates: 98.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 105.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 128.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 135.300000 ms
+avg 10 x remap with functor & external bspl 100.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 8
+avg 10 x prefilter:........................ 24.200000 ms
+avg 10 x remap1 from pre-split coordinates: 198.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 209.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 247.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 242.200000 ms
+avg 10 x remap with functor & external bspl 213.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code REFLECT spline degree 8 using Vc
+avg 10 x prefilter:........................ 28.400000 ms
+avg 10 x remap1 from pre-split coordinates: 131.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 127.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 158.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 160.900000 ms
+avg 10 x remap with functor & external bspl 126.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 0
+avg 10 x prefilter:........................ 8.000000 ms
+avg 10 x remap1 from pre-split coordinates: 11.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 22.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 33.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 35.300000 ms
+avg 10 x remap with functor & external bspl 24.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 0 using Vc
+avg 10 x prefilter:........................ 8.600000 ms
+avg 10 x remap1 from pre-split coordinates: 11.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 11.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 24.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 25.200000 ms
+avg 10 x remap with functor & external bspl 10.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 1
+avg 10 x prefilter:........................ 8.700000 ms
+avg 10 x remap1 from pre-split coordinates: 18.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 26.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 38.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 42.400000 ms
+avg 10 x remap with functor & external bspl 29.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 1 using Vc
+avg 10 x prefilter:........................ 8.700000 ms
+avg 10 x remap1 from pre-split coordinates: 15.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 15.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 29.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 29.900000 ms
+avg 10 x remap with functor & external bspl 17.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 2
+avg 10 x prefilter:........................ 17.100000 ms
+avg 10 x remap1 from pre-split coordinates: 30.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 37.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 59.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 65.400000 ms
+avg 10 x remap with functor & external bspl 41.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 2 using Vc
+avg 10 x prefilter:........................ 19.200000 ms
+avg 10 x remap1 from pre-split coordinates: 20.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 23.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 46.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 48.200000 ms
+avg 10 x remap with functor & external bspl 23.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 3
+avg 10 x prefilter:........................ 17.100000 ms
+avg 10 x remap1 from pre-split coordinates: 46.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 53.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 79.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 76.300000 ms
+avg 10 x remap with functor & external bspl 54.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 3 using Vc
+avg 10 x prefilter:........................ 18.700000 ms
+avg 10 x remap1 from pre-split coordinates: 35.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 31.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 57.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 56.000000 ms
+avg 10 x remap with functor & external bspl 31.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 4
+avg 10 x prefilter:........................ 17.100000 ms
+avg 10 x remap1 from pre-split coordinates: 68.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 75.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 99.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 105.700000 ms
+avg 10 x remap with functor & external bspl 79.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 4 using Vc
+avg 10 x prefilter:........................ 21.400000 ms
+avg 10 x remap1 from pre-split coordinates: 48.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 47.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 73.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 74.200000 ms
+avg 10 x remap with functor & external bspl 46.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 5
+avg 10 x prefilter:........................ 18.200000 ms
+avg 10 x remap1 from pre-split coordinates: 94.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 104.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 126.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 131.600000 ms
+avg 10 x remap with functor & external bspl 103.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 5 using Vc
+avg 10 x prefilter:........................ 21.000000 ms
+avg 10 x remap1 from pre-split coordinates: 60.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 60.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 87.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 87.100000 ms
+avg 10 x remap with functor & external bspl 61.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 6
+avg 10 x prefilter:........................ 20.600000 ms
+avg 10 x remap1 from pre-split coordinates: 128.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 133.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 159.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 161.100000 ms
+avg 10 x remap with functor & external bspl 137.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 6 using Vc
+avg 10 x prefilter:........................ 25.800000 ms
+avg 10 x remap1 from pre-split coordinates: 82.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 78.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 110.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 111.800000 ms
+avg 10 x remap with functor & external bspl 79.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 7
+avg 10 x prefilter:........................ 20.100000 ms
+avg 10 x remap1 from pre-split coordinates: 160.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 174.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 193.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 195.700000 ms
+avg 10 x remap with functor & external bspl 172.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 7 using Vc
+avg 10 x prefilter:........................ 26.400000 ms
+avg 10 x remap1 from pre-split coordinates: 103.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 100.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 130.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 129.400000 ms
+avg 10 x remap with functor & external bspl 102.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 8
+avg 10 x prefilter:........................ 23.400000 ms
+avg 10 x remap1 from pre-split coordinates: 206.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 209.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 239.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 249.000000 ms
+avg 10 x remap with functor & external bspl 214.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code PERIODIC spline degree 8 using Vc
+avg 10 x prefilter:........................ 30.400000 ms
+avg 10 x remap1 from pre-split coordinates: 123.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 126.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 163.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 157.400000 ms
+avg 10 x remap with functor & external bspl 124.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 0
+avg 10 x prefilter:........................ 7.700000 ms
+avg 10 x remap1 from pre-split coordinates: 13.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 17.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 32.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 36.500000 ms
+avg 10 x remap with functor & external bspl 22.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 0 using Vc
+avg 10 x prefilter:........................ 8.500000 ms
+avg 10 x remap1 from pre-split coordinates: 12.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 11.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 26.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 24.000000 ms
+avg 10 x remap with functor & external bspl 10.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 1
+avg 10 x prefilter:........................ 8.300000 ms
+avg 10 x remap1 from pre-split coordinates: 19.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 25.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 39.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 46.100000 ms
+avg 10 x remap with functor & external bspl 28.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 1 using Vc
+avg 10 x prefilter:........................ 9.300000 ms
+avg 10 x remap1 from pre-split coordinates: 14.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 15.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 29.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 30.700000 ms
+avg 10 x remap with functor & external bspl 15.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 2
+avg 10 x prefilter:........................ 17.000000 ms
+avg 10 x remap1 from pre-split coordinates: 30.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 37.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 59.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 63.900000 ms
+avg 10 x remap with functor & external bspl 39.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 2 using Vc
+avg 10 x prefilter:........................ 18.300000 ms
+avg 10 x remap1 from pre-split coordinates: 21.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 22.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 46.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 48.000000 ms
+avg 10 x remap with functor & external bspl 23.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 3
+avg 10 x prefilter:........................ 16.600000 ms
+avg 10 x remap1 from pre-split coordinates: 52.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 53.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 76.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 78.500000 ms
+avg 10 x remap with functor & external bspl 56.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 3 using Vc
+avg 10 x prefilter:........................ 18.700000 ms
+avg 10 x remap1 from pre-split coordinates: 32.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 33.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 55.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 57.500000 ms
+avg 10 x remap with functor & external bspl 31.900000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 4
+avg 10 x prefilter:........................ 18.300000 ms
+avg 10 x remap1 from pre-split coordinates: 68.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 78.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 100.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 102.800000 ms
+avg 10 x remap with functor & external bspl 78.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 4 using Vc
+avg 10 x prefilter:........................ 21.800000 ms
+avg 10 x remap1 from pre-split coordinates: 44.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 44.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 74.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 73.100000 ms
+avg 10 x remap with functor & external bspl 45.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 5
+avg 10 x prefilter:........................ 18.400000 ms
+avg 10 x remap1 from pre-split coordinates: 99.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 100.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 124.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 127.200000 ms
+avg 10 x remap with functor & external bspl 105.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 5 using Vc
+avg 10 x prefilter:........................ 21.200000 ms
+avg 10 x remap1 from pre-split coordinates: 59.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 60.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 93.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 87.500000 ms
+avg 10 x remap with functor & external bspl 60.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 6
+avg 10 x prefilter:........................ 19.900000 ms
+avg 10 x remap1 from pre-split coordinates: 123.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 134.100000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 156.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 169.800000 ms
+avg 10 x remap with functor & external bspl 136.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 6 using Vc
+avg 10 x prefilter:........................ 26.000000 ms
+avg 10 x remap1 from pre-split coordinates: 76.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 79.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 110.800000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 110.800000 ms
+avg 10 x remap with functor & external bspl 83.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 7
+avg 10 x prefilter:........................ 20.200000 ms
+avg 10 x remap1 from pre-split coordinates: 157.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 167.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 195.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 202.100000 ms
+avg 10 x remap with functor & external bspl 169.600000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 7 using Vc
+avg 10 x prefilter:........................ 25.800000 ms
+avg 10 x remap1 from pre-split coordinates: 99.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 104.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 130.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 136.900000 ms
+avg 10 x remap with functor & external bspl 100.300000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 8
+avg 10 x prefilter:........................ 24.100000 ms
+avg 10 x remap1 from pre-split coordinates: 198.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 211.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 246.700000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 246.200000 ms
+avg 10 x remap with functor & external bspl 216.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
+testing bc code NATURAL spline degree 8 using Vc
+avg 10 x prefilter:........................ 30.100000 ms
+avg 10 x remap1 from pre-split coordinates: 129.500000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap1 from unsplit coordinates:.. 125.000000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with internal spline:....... 161.200000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+avg 10 x remap with functor & internal bspl 160.900000 ms
+avg 10 x remap with functor & external bspl 123.400000 ms
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+difference original data/restored data:
+warped image diff Mean: 0.000000
+warped image diff Maximum: 0.000000
+
diff --git a/mapping.h b/mapping.h
new file mode 100644
index 0000000..7819af2
--- /dev/null
+++ b/mapping.h
@@ -0,0 +1,1555 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/*! \file mapping.h
+
+    \brief code to handle the processing of incoming real coordinates
+
+    One might argue that incoming coordinates should be required to be inside
+    the defined range of the coefficients. While this may or may not be enforced
+    or checked with the code given here, I also provide 'mapping' routines which
+    correspond to some common boundary conditions used for a spline. With these
+    mappings, incoming coordinates are 'folded into' the defined range of the spline
+    by applying the relevant transformation, like mirroring or applying a modulo
+    operation for periodic BCs.
+
+    While the mappings are central to the evaluation process, there isn't usually a
+    need to directly handle them: If a bspline object is used to contain a coefficient
+    array and it's metadata, the appropriate mapping can be derived from it, and the
+    mapping doesn't have to be made explicit.
+
+    Note how we use the same enumeration from common.h to codify boundary
+    conditions and bracings, because often they correspond, and, more specifically,
+    those boundary condition codes used to create coefficient arrays (bspline objects)
+    with implicit prefiltering schemes have corresponding mappings.
+
+    for BC codes MIRROR, REFLECT and PERIODIC this means that incoming coordinates will
+    be mapped so that they 'land' at a coordinate inside the defined range. But note that
+    for NATURAL BCs, there is no coordinate inside the defined range that would produce
+    the extrapolated value used for natural boundary conditions, since this value is not
+    to be found anywhere inside the defined range - it's instead the result of a mathematical
+    operation. So the mapping in this case picks the value at the end of the defined range.
+
+    First we set up a few types to handle 'split coordinates'. A split coordinate consits
+    of an integral part and a fractional part. Typically, these coordinates are the result
+    of applying the modf operation to a real-valued coordinate. While commonly the 'splitting'
+    of the real-valued coordinates is performed during the spline evaluation, this can lead
+    to poorer performance, and if the splitting is done beforehand and the split coordinates
+    are reused, there is less arithmetic to perform - at the cost of higher memory use, since
+    split coordinates are bulkier.
+
+    The three classes we use in this context are all intended as n-dimensional objects.
+    The definition of 'split_type' as containing two n-dimensional components instead of
+    a TinyVector of 1D-split types is due to the fact that we need these separate components
+    in the evaluation: the first one determines the origin of the subarray of the braced spline
+    which will be processed into the result, and the second component constitutes a
+    'nd_fraction', which is needed to calculate the evaluator's weights. So we go with
+    the struct of arrays approach instead an array of structs. A slight disadvantage here is
+    that 1D split coordinates are also formulated as manifolds (TinyVector < ... , 1  >).
+
+    Next we define 'mappings'. Coordinates at which the spline is to be evaluated come in
+    originally as real values. These real values have to be split (therefore the types
+    above), but we also may want to perform the same mirroring or other boundary conditions
+    on input coordinates, since we can only evaluate the spline within it's defined range.
+    This is what mappings do. They apply the boundary conditions and then split the coordinate.
+    Our approach to incoming coordinates outside the spline's defined range is this:
+    If the boundary conditions lend themselves to it, such coordinates are mapped onto
+    coordinates inside the defined range ('folded in', as I say). If the boundary conditions
+    don't work this way (like with 'natural' boundary conditions, where the extrapolated signal
+    is the result of an arithmetic operation), we do something arbitrary to the outlying
+    coordinate (like clamp it to the neares extremal value) in the conviction that a user
+    requesting such boundary conditions is aware of the problem.
+
+    vectorized operation with long ints is not yet available, so for now I'll have to limit
+    the vectorized code to 32bit ints. This will still suffice for most cases, but if the arrays
+    become really large it'll eventually break.
+    
+    I hope that by the time this becomes an issue, 64bit int SIMD operation becomes available,
+    until then, if it has to be used, only the non-vectorized variant can be used for long ints,
+    or (slow) workaround code for vectors of long values has to be provided (which I did out of
+    curiosity: it works, but it makes the vectorized code slower than the scalar)
+    Anyway I'd advocate splitting very large coefficient arrays into tiles. Since the support
+    of the evaluation routine is only as large as the spline's order, duplicated strips of
+    coefficients in the margins would take up a comparatively small part of the tile and the
+    access to memory would be more localized, since access within the same tile would be the
+    likely case. I have some hopes for vigra's chunked arrays in this context.
+
+*/
+
+#ifndef VSPLINE_MAPPING_H
+#define VSPLINE_MAPPING_H
+ 
+#include <vigra/multi_iterator.hxx>
+#include <vigra/multi_math.hxx>
+
+#include "common.h"
+
+namespace vspline {
+
+using namespace std ;
+using namespace vigra ;
+using namespace vigra::multi_math;
+
+typedef int default_ic_type ;
+typedef float default_rc_type ;
+
+// type naming scheme:
+// nd_ : multidimensional
+// mc_ : multichannel
+// ic  : integral coordinate/integral component
+// rc  : real coordinate/real component
+// value : 'payload' type, like pixel
+// ele : elementary type of val
+// _v: vecorized type, single Vc:Vector or SimdArray, or structure thereof
+
+/// nd_ic_type is simply a TinyVector of integral numbers. These are the
+/// integral parts of the incoming real coordinates, which are used to determine the
+/// location of the window into the coefficient array which will be used to produce the
+/// evaluation result for the incoming coordinates.
+
+template < int N = 1 ,
+           typename IT = default_ic_type >
+struct nd_ic_type
+:public TinyVector < IT , N >
+{
+  typedef IT ic_type ;
+  typedef IT value_type ;
+  enum { dimension = N } ;
+} ;
+
+/// nd_rc_type is a TinyVector of real numbers in the range of [0.0 - 1.0],
+/// in the case of odd splines, or [-0.5 - 0.5] in the case of even splines. They constitute
+/// the fractional part of a real coordinate left over when the integral part is taken away.
+/// They are the 'delta' which is fed into the weight-generating functors producing the
+/// weights of the weighted summation of the coefficients in the window which is defined
+/// by the integral parts of the coordinates.
+
+template < int N = 1 ,
+           typename FT = default_rc_type >
+struct nd_rc_type
+:public TinyVector < FT , N >
+{
+  typedef FT rc_type ;
+  typedef FT value_type ;
+  enum { dimension = N } ;
+} ;
+
+/// struct split_type contains n-dimensional 'split coordinates', consisting of the
+/// integral and fracional part of the original real coordinates, separated so that
+/// they can be processed by the evaluation routine.
+
+template < int N = 1 ,
+           typename IT = default_ic_type ,
+           typename FT = default_rc_type >
+struct split_type
+{
+  typedef IT ic_type ;
+  typedef FT rc_type ;
+  typedef FT value_type ;
+  enum { dimension = N } ;
+  nd_ic_type<N,IT>   select ; ///< named select because it selects the range of coefficients
+  nd_rc_type<N,FT> tune ;   ///< named tune because it is the base for the weights 
+} ;
+
+#ifdef USE_VC
+
+/// since Vc doesn't offer a vectorized modf function, we have to code it. We make the effort not
+/// to simply iterate over the vector's components but write 'proper' vector code to make this
+/// operation as efficient as possible.
+
+template <class real_v>
+real_v v_modf ( const real_v& source ,
+                real_v * const iptr )
+{
+  typedef typename real_v::mask_type mask_type ;
+  typedef typename real_v::EntryType single_type ;
+  
+  mask_type negative = Vc::isnegative ( source ) ;
+  real_v help ( source ) ;
+  
+  // we treat the case that any of the incoming vector components is negative separately
+  // to avoid the corresponding code altogether in the - hopefully - most common case
+  // where none of the incoming values are in fact negative.
+
+  if ( any_of ( negative ) )
+  {
+    help(negative) = -source ;
+    (*iptr) = Vc::floor ( help ) ;
+    help -= (*iptr) ;
+    (*iptr)(negative) = -(*iptr) ;
+    help(negative) = -help ;
+    return help ;
+  }
+  else
+  {
+    // for all positive components, the operation is trivial: 
+    (*iptr) = Vc::floor ( source ) ;
+    return ( help - *iptr ) ;
+  }
+}
+
+/// for now, I'm taking the easy road to a vectorized fmod by using apply and a lambda expression
+/// TODO: handcoding this might increase performance
+
+template <class real_v>
+real_v v_fmod ( const real_v& lhs ,
+                const typename real_v::EntryType rhs )
+{
+  return lhs.apply ( [&rhs] ( typename real_v::EntryType lhs )
+                     {
+                       return fmod ( lhs , rhs ) ;
+                     }
+                   ) ;
+}
+
+#endif // USE_VC
+
+/// to handle the transformation of real-valued coordinates to the integer/real pairs
+/// processed by the solver, we use 'mapping' objects. These handle several tasks needed
+/// in this context:
+///
+/// - they apply boundary conditions, like mirroring
+/// - they split the resulting value into an integral and a real part
+/// - they take into account special requirements for odd and even splines
+///
+/// we derive all mappings from this common base class and later on access mappings via
+/// a pointer to the base class, which has a virtual operator().
+///
+/// It's slightly annoying that the folding of the coordinates into the defined range
+/// and the splitting into real and integral part should be lumped together in one operation,
+/// but decoupling the two would require more arithmetic, and the goal is to be as fast as
+/// possible, after all. For now, I'll consider those mappings which perform 'folding in'
+/// of variables as an added bonus; the same results can be achieved by performing a coordinate
+/// transformation routine on the coordinates first and then applying a minimal mapping
+/// like 'reject' or 'limit'.
+///
+/// TODO: perform runtime measurements to see if the 'lumping together' really has a
+/// measurable performance advantage. If not, all mappings performing arithmetic could
+/// be abandoned, which would be cleaner design-wise.
+///
+/// I have made another design decision concerning the size of the bracing. Initially I was
+/// using a right brace which was as small as possible, but this forced me to check incoming
+/// coordinates to odd splines for v == M-1, because these then needed special treatment -
+/// the split coordinate had to be set to M-2, 1.0 instead of M-1, 0.0 which would have
+/// produced an out-of-bounds access. Now, for odd splines,  I use a right brace which is
+/// one slice larger, and accessing the spline at M-1, 0.0 is now safe, so the test can be omitted.
+/// I feel the sacrifice of one slice's worth of memory is worth the performance gain and increased
+/// code transparency.
+
+template < typename split_type , int vsize = 1 >
+class mapping
+{
+public:
+  
+  typedef typename split_type::ic_type int_t ;
+  typedef typename split_type::rc_type real_t ;
+
+public:
+  
+  /// we define a virtual operator(), this is what each specific mapping
+  /// has to provide. Currently all operator() variants are pure virtual
+  /// so that they fail to compile if they aren't defined 'further down'.
+  
+  virtual void operator() ( real_t v , int_t& iv , real_t& fv ) = 0 ;
+  
+  /// operator() for this parameter signature is delegated to the first one.
+  /// while the first one operates on single values, here we have a split_type
+  /// (which is n-D), and pick out the split parts for a specific dimension
+  
+  void operator() ( real_t v , split_type& s , const int & dim )
+  {
+    operator() ( v , s.select[dim] , s.tune[dim] ) ;
+  }
+  
+#ifdef USE_VC
+
+  typedef Vc::SimdArray < int_t , vsize > ic_v ;
+  typedef Vc::SimdArray < real_t , vsize > rc_v ;
+
+  /// this is the operator() for vectorized operation
+
+  virtual void operator() ( rc_v v , ic_v & iv , rc_v & fv ) = 0 ;
+
+#endif
+
+  /// produce the spline coordinates expessed as a floating point value.
+  /// this is to test the mappings and make sure they do what they're supposed to do.
+  
+  real_t test ( real_t v )
+  {
+    int_t iv ;
+    real_t fv ;
+    this->operator() ( v , iv , fv ) ;
+    return real_t(iv) + real_t(fv) ;
+  }
+
+} ;
+
+/// the simplest and fastest mapping is the 'raw' mapping. Here, only the split is performed,
+/// come what may. The user is responsible for the suitability of incoming coordinates, it is
+/// silently assumed that 0.0 <= v <= M-1; if this is not true, the results may be false or
+/// the program may crash.
+
+template < typename split_type , int vsize = 1 >
+class odd_mapping_raw: public mapping < split_type , vsize >
+{
+  
+  
+  typedef typename split_type::ic_type int_t ;
+  typedef typename split_type::rc_type real_t ;
+  
+public:
+  
+  odd_mapping_raw ( int M )
+    { } ;
+    
+  virtual void operator() ( real_t v , int_t& iv , real_t& fv )
+  {
+    real_t fl_i ;
+    fv = modf ( v , &fl_i ) ; // split v into integral and remainder from [0...1[
+    iv = int_t ( fl_i ) ;     // set integer part from float representing it
+  }
+
+#ifdef USE_VC
+
+  typedef Vc::SimdArray < int_t , vsize > ic_v ;
+  typedef Vc::SimdArray < real_t , vsize > rc_v ;
+
+  /// this is the operator() for vectorized operation
+
+  virtual void operator() ( rc_v v , ic_v & iv , rc_v & fv )
+  {
+    rc_v fl_i ;
+    fv = v_modf ( v , &fl_i ) ; // split v into integral and remainder from [0...1[
+    iv = fl_i  ;      // set integer part from float representing it
+  }
+
+#endif
+} ;
+
+template < typename split_type , int vsize = 1 >
+class even_mapping_raw: public mapping < split_type , vsize >
+{
+  
+  
+  typedef typename split_type::ic_type int_t ;
+  typedef typename split_type::rc_type real_t ;
+    
+public:
+  
+  even_mapping_raw ( int M )
+    { } ;
+  
+  virtual void operator() ( real_t v , int_t& iv , real_t& fv )
+  {
+    real_t fl_i ;
+    fv = modf ( v + real_t ( 0.5 ) , &fl_i ) - real_t ( 0.5 ) ;
+    iv = int_t ( fl_i ) ;     // set integer part from float representing it
+  }
+
+#ifdef USE_VC
+
+  typedef Vc::SimdArray < int_t , vsize > ic_v ;
+  typedef Vc::SimdArray < real_t , vsize > rc_v ;
+
+  /// this is the operator() for vectorized operation
+
+  virtual void operator() ( rc_v v , ic_v & iv , rc_v & fv )
+  {
+    rc_v fl_i ;
+    v += real_t ( 0.5 ) ;
+    fv = v_modf ( v , &fl_i ) ;         // split v into integral and remainder in [-0.5 ... 0.5]
+    fv -= real_t ( 0.5 ) ;
+    iv = fl_i  ;              // set integer part from float representing it
+  }
+
+#endif
+} ;
+
+// All remaining mappings in this file are safe insofar as out-of-bounds incoming coordinates
+// will either result in an exception or be handled in a way specific to the mapping.
+
+/// the next mapping mode is LIMIT. Here any out-of-bounds coordinates are set to the
+/// nearest valid value.
+
+template < typename split_type , int vsize = 1 >
+class odd_mapping_limit: public mapping < split_type , vsize >
+{
+  
+  
+  typedef typename split_type::ic_type int_t ;
+  typedef typename split_type::rc_type real_t ;
+  
+  const real_t _ceiling ;
+  
+public:
+  
+  odd_mapping_limit ( int M )
+  : _ceiling ( M - 1 )
+    { } ;
+    
+  virtual void operator() ( real_t v , int_t& iv , real_t& fv )
+  {
+    if ( v < 0.0 )
+    {
+      iv = 0 ;               // if v is below 0.0, we pass the value for v == 0.0
+      fv = 0.0 ;
+      return ;               // in this case we're done prematurely
+    }
+   else if ( v > _ceiling )
+    {
+      iv = int_t ( _ceiling ) ;
+      fv = 0.0 ;
+      return ;
+    }    
+    real_t fl_i ;
+    fv = modf ( v , &fl_i ) ;   // split v into integral and remainder from [0...1[
+    iv = int_t ( fl_i ) ;       // set integer part from float representing it
+  }
+
+#ifdef USE_VC
+
+  typedef Vc::SimdArray < int_t , vsize > ic_v ;
+  typedef Vc::SimdArray < real_t , vsize > rc_v ;
+
+  /// this is the operator() for vectorized operation
+
+  virtual void operator() ( rc_v v , ic_v & iv , rc_v & fv )
+  {
+    v ( v > _ceiling ) = _ceiling ;
+    
+    v ( v < real_t ( 0.0 ) ) = real_t ( 0.0 ) ;
+    
+    rc_v fl_i ;
+    fv = v_modf ( v , &fl_i ) ; // split v into integral and remainder from [0...1[
+    iv = fl_i  ;      // set integer part from float representing it
+  }
+
+#endif
+} ;
+
+template < typename split_type , int vsize = 1 >
+class even_mapping_limit: public mapping < split_type , vsize >
+{
+  
+  
+  typedef typename split_type::ic_type int_t ;
+  typedef typename split_type::rc_type real_t ;
+    
+  const real_t _ceiling ;
+  
+public:
+  
+  even_mapping_limit ( int M )
+  : _ceiling ( M - 1 )
+    { } ;
+  
+  virtual void operator() ( real_t v , int_t& iv , real_t& fv )
+  {
+    if ( v > _ceiling )
+      v = real_t ( _ceiling ) ;
+
+    else if ( v < 0 )
+      v = real_t ( 0.0 ) ;
+
+    real_t fl_i ;
+    fv = modf ( v + real_t(0.5) , &fl_i ) - real_t(0.5) ;
+    iv = int_t ( fl_i ) ;
+  }
+
+#ifdef USE_VC
+
+  typedef Vc::SimdArray < int_t , vsize > ic_v ;
+  typedef Vc::SimdArray < real_t , vsize > rc_v ;
+
+  /// this is the operator() for vectorized operation
+
+  virtual void operator() ( rc_v v , ic_v & iv , rc_v & fv )
+  {
+    v ( v > _ceiling ) = _ceiling ;
+
+    v ( v < real_t ( 0.0 ) ) = real_t ( 0.0 ) ;
+
+    rc_v fl_i ;
+    v += real_t(0.5) ;
+    fv = v_modf ( v , &fl_i ) ;         // split v into integral and remainder in [-0.5 ... 0.5]
+    fv -= real_t(0.5) ;
+    iv = fl_i  ;              // set integer part from float representing it
+  }
+
+#endif
+} ;
+
+/// with mapping mode REJECT, out-of-bounds incoming coordinate values result in
+/// an exception (out_of_bounds). The calling code has to catch the exception if it
+/// wants to proceed. If the mapping is on vector types, some of the result may be
+/// valid. The out-of-bounds results can be recognized by the int part being set to
+/// -1, which is a value which can't be valid. Any out-of-bounds element triggers
+/// the exception.
+/// TODO: We might simply throw the exception and not bother with the -1 masking.
+/// The exception object might be made to contain the offending value.
+
+template < typename split_type , int vsize = 1 >
+class odd_mapping_reject: public mapping < split_type , vsize >
+{
+  
+  
+  typedef typename split_type::ic_type int_t ;
+  typedef typename split_type::rc_type real_t ;
+  
+  const real_t _ceiling ;
+  
+public:
+  
+  odd_mapping_reject ( int M )
+  : _ceiling ( M - 1 )
+    { } ;
+    
+  virtual void operator() ( real_t v , int_t& iv , real_t& fv )
+  {
+    if ( v < 0.0 || v > _ceiling )    // reject out-of-bounds values
+      throw out_of_bounds() ;
+
+    // now we are sure that v is safely inside [ 0 : _ceiling ]
+    real_t fl_i ;
+    fv = modf ( v , &fl_i ) ;   // split v into integral and remainder from [0...1[
+    iv = int_t ( fl_i ) ;       // set integer part from float representing it
+  }
+
+#ifdef USE_VC
+
+  typedef Vc::SimdArray < int_t , vsize > ic_v ;
+  typedef Vc::SimdArray < real_t , vsize > rc_v ;
+
+  /// this is the operator() for vectorized operation
+
+  virtual void operator() ( rc_v v , ic_v & iv , rc_v & fv )
+  {
+    rc_v fl_i ;
+
+    auto too_large = ( v > real_t ( _ceiling ) ) ;
+    auto too_small = ( v < real_t ( 0.0 ) ) ;
+    auto mask = too_small | too_large ;
+    
+    if ( any_of ( mask ) )
+    {
+      // v has some out-of-bounds values
+      fv = v_modf ( v , &fl_i ) ;    // split v into integral and remainder from [0...1[
+      iv = fl_i  ;         // set integer part from float representing it
+      iv ( mask ) = int_t ( -1 ) ;   // set iv to -1 at out-of-bounds values
+      throw out_of_bounds() ;
+    }
+    
+    fv = v_modf ( v , &fl_i ) ; // split v into integral and remainder from [0...1[
+    iv = fl_i  ;      // set integer part from float representing it
+  }
+
+#endif
+} ;
+
+/// I use the same rejection criterion here as in odd splines, which is debatable:
+/// one might argue that coordinate values from -0.5 to 0.0 and M - 1.0 to M - 0.5
+/// can be processed meaningfully and hence should not be rejected.
+
+template < typename split_type , int vsize = 1 >
+class even_mapping_reject: public mapping < split_type , vsize >
+{
+  
+  
+  typedef typename split_type::ic_type int_t ;
+  typedef typename split_type::rc_type real_t ;
+    
+  const real_t _ceiling ;
+
+public:
+  
+  even_mapping_reject ( int M )
+  : _ceiling ( M - 1 )
+    { } ;
+  
+  virtual void operator() ( real_t v , int_t& iv , real_t& fv )
+  {
+    real_t fl_i ;
+    if ( v < 0.0 || v > _ceiling )  // test range
+      throw out_of_bounds() ;
+
+    // split v into integral and remainder in [-0.5 ... 0.5]
+    fv = modf ( v + real_t(0.5) , &fl_i ) - real_t(0.5) ;
+    iv = int_t ( fl_i ) ;     // set integer part from float representing it
+  }
+
+#ifdef USE_VC
+
+  typedef Vc::SimdArray < int_t , vsize > ic_v ;
+  typedef Vc::SimdArray < real_t , vsize > rc_v ;
+
+  /// this is the operator() for vectorized operation
+
+  virtual void operator() ( rc_v v , ic_v & iv , rc_v & fv )
+  {
+    rc_v fl_i ;
+    
+    auto too_large = ( v > _ceiling ) ;
+    auto too_small = ( v < real_t ( 0.0 ) ) ;
+    auto mask = too_small | too_large ;
+    
+    if ( any_of ( mask ) )
+    {
+      // v has some out-of-bounds values
+      fv = v_modf ( v , &fl_i ) ;    // split v into integral and remainder from [0...1[
+      iv = fl_i  ;         // set integer part from float representing it
+      iv ( mask ) = int_t ( -1 ) ;   // set iv to -1 at out-of-bounds values
+      throw out_of_bounds() ;
+    }
+    // now we are sure that v is safely inside [ 0 : _ceiling [
+    v += real_t(0.5) ;
+    fv = v_modf ( v , &fl_i ) ;           // split v into integral and remainder in [-0.5 ... 0.5]
+    fv -= real_t(0.5) ;
+    iv = fl_i  ;              // set integer part from float representing it
+  }
+
+#endif
+} ;
+
+/// the next mapping is for coefficients/data mirrored at the ends of the defined
+/// range. This is probably the most commonly used type and also the type which
+/// P. Thevenaz recommends. Like all mappings defined in this body of code, it comes
+/// in two variants: one for odd splines and one for even ones. The handling is
+/// different for both cases: in an odd spline, the delta is in [0:1], in even
+/// splines it's in [-0.5:+0.5].
+///
+/// This is mirroring 'on the bounds':
+///
+/// f ( -x ) == f ( x )
+///
+/// f ( (M-1) + x ) == f ( (M-1) - x )
+
+template < typename split_type , int vsize = 1 >
+class odd_mapping_mirror: public mapping < split_type , vsize >
+{
+  
+  
+  typedef typename split_type::ic_type int_t ;
+  typedef typename split_type::rc_type real_t ;
+  
+  const real_t _ceiling ;
+  const real_t _ceilx2 ;
+  
+public:
+  
+  odd_mapping_mirror ( int M )
+  : _ceilx2 ( M + M - 2 ) ,
+    _ceiling ( M - 1 )
+    { } ;
+    
+  // with odd splines we have to be careful not to access the coefficient matrix
+  // with iv == M - 1 and this results in extra safeguarding code.
+    
+  virtual void operator() ( real_t v , int_t& iv , real_t& fv )
+  {
+    real_t fl_i ;
+    if ( v < 0.0 )              // apply mirror left boundary condition
+      v = -v ;
+    if ( v > _ceiling )
+    {
+      v = fmod ( v , _ceilx2 ) ; // map to first period (which is 2M)
+      if ( v > _ceiling )
+      {
+        v = _ceilx2 - v ;        // right border mirror
+      }
+    }
+    // now we are sure that v is safely inside [ 0 : _ceiling ]
+    fv = modf ( v , &fl_i ) ; // split v into integral and remainder from [0...1[
+    iv = int_t ( fl_i ) ;     // set integer part from float representing it
+  }
+
+#ifdef USE_VC
+
+  // vectorized version of operator()
+  
+  typedef Vc::SimdArray < int_t , vsize > ic_v ;
+  typedef Vc::SimdArray < real_t , vsize > rc_v ;
+
+  /// this is the operator() for vectorized operation
+
+  virtual void operator() ( rc_v v , ic_v & iv , rc_v & fv )
+  {
+    rc_v fl_i ;
+    
+    v = abs ( v ) ;                   // left mirror, v is now >= 0
+
+    if ( any_of ( v > _ceiling ) )
+    {
+      v = v_fmod ( v , _ceilx2 ) ;        // map to one full period
+      v -= _ceiling ;                     // center
+      v = abs ( v ) ;                     // map to half period
+      v = _ceiling - v ;                  // flip
+      fv = v_modf ( v , &fl_i ) ;         // split v into integral and remainder from [0...1[
+      iv = fl_i  ;              // set integer part from float representing it
+      return ;
+    }
+    // here we are sure that v is safely inside [ 0 : _ceiling ]
+    fv = v_modf ( v , &fl_i ) ; // split v into integral and remainder from [0...1[
+    iv = fl_i  ;      // set integer part from float representing it
+  }
+
+#endif
+} ;
+
+/// even splines give us more 'room to menoevre' - we can safely access the
+/// coefficient matrix at either end of the defined range and even up to half the
+/// spline's unit spacing beyond the boundaries. We pay for this by needing a
+/// brace as large as for the next-larger odd spline. This fact - that the support
+/// for even splines is just as large as the one for the next higher odd spline -
+/// makes them less commonly used.
+
+template < typename split_type , int vsize = 1 >
+class even_mapping_mirror: public mapping < split_type , vsize >
+{
+  
+  
+  typedef typename split_type::ic_type int_t ;
+  typedef typename split_type::rc_type real_t ;
+    
+  const real_t _ceiling ;
+  const real_t _ceilx2 ;
+
+public:
+  
+  even_mapping_mirror ( int M )
+  : _ceiling ( M - 1 ) ,
+    _ceilx2 ( M + M - 2 )
+    { } ;
+  
+  
+  virtual void operator() ( real_t v , int_t& iv , real_t& fv )
+  {
+    real_t fl_i ;
+    if ( v < 0.0 )               // apply mirror left boundary condition
+      v = -v ;
+    if ( v > _ceiling )
+    {
+      v = fmod ( v , _ceilx2 ) ; // apply right border mirror
+      if ( v > _ceiling )
+      {
+        v = _ceilx2 - v ;        // no need to guard against too large v
+      }
+    }
+    // now v is <= _ceiling.
+    // split v into integral and remainder in [-0.5 ... 0.5]
+    fv = modf ( v + real_t(0.5) , &fl_i ) - real_t(0.5) ;
+    iv = int_t ( fl_i ) ;     // set integer part from float representing it
+  }
+
+#ifdef USE_VC
+
+  typedef Vc::SimdArray < int_t , vsize > ic_v ;
+  typedef Vc::SimdArray < real_t , vsize > rc_v ;
+
+  /// this is the operator() for vectorized operation
+
+  virtual void operator() ( rc_v v , ic_v & iv , rc_v & fv )
+  {
+    rc_v fl_i ;
+    
+    v = abs ( v ) ;                     // left mirror
+    if ( any_of ( v > _ceiling ) )
+    {
+      v = v_fmod ( v , _ceilx2 ) ;      // map to one full period
+      v -= _ceiling ;                   // center
+      v = abs ( v ) ;                   // map to half period
+      v = _ceiling - v ;                // flip
+    }
+    // now we are sure that v is safely inside [ 0 : _ceiling ]
+    v += real_t(0.5) ;
+    fv = v_modf ( v , &fl_i ) ;         // split v into integral and remainder in [-0.5 ... 0.5]
+    fv -= real_t(0.5) ;
+    iv = fl_i  ;              // set integer part from float representing it
+  }
+
+#endif
+} ;
+
+/// Here the periodic mapping is presented so that the first coefficient coincides
+/// with the origin of the coordinate system. The bracing is supposed to have been done with
+/// the periodic bracer (see brace.h), which provides enough coefficients to the right to allow
+/// for one full period to be calculated without bounds checking on the coefficients.
+/// Yet we could also conceive of the coefficients to coincide with coordinates
+/// .5, 1.5, 2.5 ...
+/// Which fits nicely with an even spline, which has a wider defined range due to
+/// the smaller support. In that case we could possibly even save ourselves the last coefficient.
+/// The first period finishes one unit spacing beyond the location of the last knot
+/// point, so if the spline is constructed over N values, the mapping has to be constructed
+/// with parameter M = N + 1. The bracing applied with the periodic bracer makes sure that
+/// coordinates up to M can be processed.
+
+template < typename split_type , int vsize = 1 >
+class odd_mapping_periodic: public mapping < split_type , vsize >
+{
+  
+  
+  typedef typename split_type::ic_type int_t ;
+  typedef typename split_type::rc_type real_t ;
+  
+  const real_t _ceiling ;
+  
+public:
+  
+  odd_mapping_periodic ( int M )
+  : _ceiling ( M - 1 ) { } ;
+  
+  virtual void operator() ( real_t v , int_t& iv , real_t& fv )
+  {
+    real_t fl_i ;
+    if ( v < 0.0 )
+    {
+      v = _ceiling + fmod ( v , _ceiling ) ; // force to period - 1 and shift to first period
+    }
+    else if ( v >= _ceiling )     // note the use of >= here, v == _ceiling => v = 0
+    {
+      v = fmod ( v , _ceiling ) ; // force to first period (this also results in v <= _ceiling)
+    }
+    fv = modf ( v , &fl_i ) ;   // split v into integral and remainder from [0...1[
+    iv = int_t ( fl_i ) ;       // set integer part from float representing it
+  }
+  
+#ifdef USE_VC
+
+  typedef Vc::SimdArray < int_t , vsize > ic_v ;
+  typedef Vc::SimdArray < real_t , vsize > rc_v ;
+
+  /// this is the operator() for vectorized operation
+
+  virtual void operator() ( rc_v v , ic_v & iv , rc_v & fv )
+  {
+    rc_v fl_i ;
+    if ( any_of ( v < real_t(0) ) || any_of ( v >= _ceiling ) )
+    {
+      v = v_fmod ( v , _ceiling ) ;       // apply modulo (this also results in v <= _ceiling)
+      v ( v < real_t(0) ) += _ceiling ;   // shift values below zero up one period
+    }
+    fv = v_modf ( v , &fl_i ) ;        // split v into integral and remainder from [0...1[
+    iv = fl_i  ;             // set integer part from float representing it
+  }
+
+#endif
+} ;
+
+template < typename split_type , int vsize = 1 >
+class even_mapping_periodic: public mapping < split_type , vsize >
+{
+  
+  
+  typedef typename split_type::ic_type int_t ;
+  typedef typename split_type::rc_type real_t ;
+    
+  const real_t _ceiling ;
+
+public:
+  
+  even_mapping_periodic ( int M )
+  : _ceiling ( M - 1 ) { } ;
+  
+  virtual void operator() ( real_t v , int_t& iv , real_t& fv )
+  {
+    real_t fl_i ;
+    
+    if ( v < 0.0 )
+    {
+      v = _ceiling + fmod ( v , _ceiling ) ; // force to period - 1 and shift to first period
+    }
+    else if ( v >= ( _ceiling ) )
+    {
+      v = fmod ( v , _ceiling ) ; // force to first period
+    }
+    // split v into integral and remainder in [-0.5 ... 0.5]
+    fv = modf ( v + real_t(0.5) , &fl_i ) - real_t(0.5) ;
+    iv = int_t ( fl_i ) ;     // set integer part from float representing it
+  }
+
+#ifdef USE_VC
+
+  typedef Vc::SimdArray < int_t , vsize > ic_v ;
+  typedef Vc::SimdArray < real_t , vsize > rc_v ;
+
+  /// this is the operator() for vectorized operation
+
+  virtual void operator() ( rc_v v , ic_v & iv , rc_v & fv )
+  {
+    rc_v fl_i ;
+    if ( any_of ( v < real_t(0) ) || any_of ( v >= _ceiling ) )
+    {
+      v = v_fmod ( v , _ceiling ) ;     // apply modulo
+      v ( v < real_t(0) ) += _ceiling ; // shift values below zero up one period
+    }
+    v += real_t(0.5) ;
+    fv = v_modf ( v , &fl_i ) ;        // split v into integral and remainder in [-0.5 ... 0.5]
+    fv -= real_t(0.5) ;
+    iv = fl_i  ;             // set integer part from float representing it
+  }
+
+#endif
+} ;
+
+/// now the mapping for natural boundary conditions. Note that I use a generalization
+/// here: classically natural boundary conditions mean that all derivatives of the spline
+/// above the first are zero. Since the DSP approach to b-splines does not consider
+/// derivatives at the ends of the spline, there must be a different method to obtain the
+/// same result. I use point symmetry at the ends of the spline, which has the desired
+/// effect without using the derivatives explicitly.
+/// here we cannot obtain coordinates to data inside the defined range which would yield us
+/// values for the extrapolated signal (at least not in the adjoining area), since the
+/// values in the extrapolated signal are defined by
+/// f(x) - f(0) == f(0) - f(-x); f(x+n-1) - f(n-1) == f(n-1) - f (n-1-x)
+/// so there is an arithmetic operation involved which we can't represent by indexing.
+/// In other words, we can't 'fold' in the coordinates from outside the defined range
+/// as we could with periodic or mirror bcs.
+/// nevertheless we want to perform the usual transformations to real indices which
+/// correspond to data *inside* the defined range. What we do outside the defined range
+/// is arbitrary, so here I repeat the value at the bounds ad infinitum.
+/// TODO: this is silly, since it's not really specific to natural BCs.
+/// Might call it 'clamp' instead.
+
+template < typename split_type , int vsize = 1 >
+class odd_mapping_constant: public mapping < split_type , vsize >
+{
+  
+  
+  typedef typename split_type::ic_type int_t ;
+  typedef typename split_type::rc_type real_t ;
+  
+  real_t _ceiling ;
+  
+public:
+  
+  odd_mapping_constant ( int M )
+  : _ceiling ( M - 1 ) { } ;
+  
+  virtual void operator() ( real_t v , int_t& iv , real_t& fv )
+  {
+    real_t fl_i ;
+    if ( v < 0.0 )
+    {
+      iv = 0 ;               // if v is below 0.0, we pass the value for v == 0.0
+      fv = 0.0 ;
+      return ;               // in this case we're done prematurely
+    }
+   else if ( v > _ceiling )
+    {
+      iv = int_t ( _ceiling ) ;
+      fv = 0.0 ;
+      return ;
+    }
+    fv = modf ( v , &fl_i ) ;   // split v into integral and remainder from [0...1[
+    iv = int_t ( fl_i ) ;     // set integer part from float representing it
+  }
+  
+#ifdef USE_VC
+
+  typedef Vc::SimdArray < int_t , vsize > ic_v ;
+  typedef Vc::SimdArray < real_t , vsize > rc_v ;
+
+  /// this is the operator() for vectorized operation
+
+  virtual void operator() ( rc_v v , ic_v & iv , rc_v & fv )
+  {
+    rc_v fl_i ;
+    v ( v < real_t ( 0 ) ) = real_t ( 0 ) ;
+    v ( v > _ceiling ) = _ceiling ;
+    fv = v_modf ( v , &fl_i ) ;         // split v into integral and remainder from [0...1[
+    iv = fl_i  ;              // set integer part from float representing it
+  }
+
+#endif
+} ;
+
+template < typename split_type , int vsize = 1 >
+class even_mapping_constant: public mapping < split_type , vsize >
+{
+  
+  
+  typedef typename split_type::ic_type int_t ;
+  typedef typename split_type::rc_type real_t ;
+    
+  real_t _ceiling ;
+
+public:
+  
+  even_mapping_constant ( int M )
+  : _ceiling ( M - 1 ) { } ;
+  
+  virtual void operator() ( real_t v , int_t& iv , real_t& fv )
+  {
+    if ( v < 0.0 )
+    {
+      iv = 0 ;               // if v is below 0.0, we pass the value for v == 0.0
+      fv = 0.0 ;
+      return ;               // in this case we're done prematurely
+    }
+   else if ( v > _ceiling )
+    {
+      iv = int_t ( _ceiling ) ;
+      fv = 0.0 ;
+      return ;
+    }
+    // split v into integral and remainder in [-0.5 ... 0.5]
+    real_t fl_i ;
+    fv = modf ( v + real_t(0.5) , &fl_i ) - real_t(0.5) ;
+    iv = int_t ( fl_i ) ;     // set integer part from float representing it
+  }
+
+#ifdef USE_VC
+
+  typedef Vc::SimdArray < int_t , vsize > ic_v ;
+  typedef Vc::SimdArray < real_t , vsize > rc_v ;
+
+  /// this is the operator() for vectorized operation
+
+  virtual void operator() ( rc_v v , ic_v & iv , rc_v & fv )
+  {
+    rc_v fl_i ;
+    v ( v < real_t(0) ) = real_t(0) ;
+    v ( v > _ceiling ) = _ceiling ;
+    v += real_t(0.5) ;
+    fv = v_modf ( v , &fl_i ) ;        // split v into integral and remainder in [-0.5 ... 0.5]
+    fv -= real_t(0.5) ;
+    iv = fl_i  ;             // set integer part from float representing it
+  }
+
+#endif
+} ;
+
+/// Mapping for REFLECT boundary conditions for odd splines
+///
+/// This mapping will map coordinates to [ -0.5 - M-0.5 ] and needs a spline with
+/// a wider bracing. If the spline is created via a bspline object with REFLECT BCs,
+/// this will be done automatically. The widened range is due to the point of reflection,
+/// which is half a unit spacing 'further out' than for MIRROR BCs.
+/// In my initial implementation I was using a coordinate shift by 0.5 to put the
+/// origin of the unmapped coordinates to the point of reflection, but I have now changed
+/// the behaviour to use the same origin as all the other mappings, for reasons of
+/// consistency and easier automatic testing - this way, the restoration with grid
+/// coordinates, which is used in the roundtrip test, will produce the expected result.
+/// So, the left point of reflection as at -0.5, the right point of reflection at M - 0.5.
+
+template < typename split_type , int vsize = 1 >
+class odd_mapping_reflect: public mapping < split_type , vsize >
+{
+  
+  
+  typedef typename split_type::ic_type int_t ;
+  typedef typename split_type::rc_type real_t ;
+  
+  const real_t _ceiling ;
+  const real_t _ceilx2 ;
+  
+public:
+  
+  odd_mapping_reflect ( int M )
+  : _ceiling ( M - 2 ) ,
+    _ceilx2 ( M + M - 4 )
+    { } ;
+  
+  virtual void operator() ( real_t v , int_t& iv , real_t& fv )
+  {
+    v += real_t ( 0.5 ) ;        // delete this to change back to origin at reflection
+    real_t fl_i ;
+    if ( v < real_t ( 0.0 ) )    // apply reflect left boundary condition
+      v = -v ;
+    if ( v > _ceiling )
+    {
+      v = fmod ( v , _ceilx2 ) ; // apply right border reflect
+      if ( v > _ceiling )
+      {
+        v = _ceilx2 - v ;
+      }
+    }
+    // now we have v in [ 0 | _ceiling ]
+    // which corresponds to spline coordinates [ 0.5 , _ceiling + 0.5 ]
+    v += 0.5 ;
+    fv = modf ( v , &fl_i ) ; // split v into integral and remainder from [0...1[
+    iv = int_t ( fl_i ) ;     // set integer part from float representing it
+  }
+  
+#ifdef USE_VC
+
+  typedef Vc::SimdArray < int_t , vsize > ic_v ;
+  typedef Vc::SimdArray < real_t , vsize > rc_v ;
+
+  /// this is the operator() for vectorized operation
+
+  virtual void operator() ( rc_v v , ic_v & iv , rc_v & fv )
+  {
+    rc_v fl_i ;
+    v += real_t ( 0.5 ) ;
+    v = abs ( v ) ;
+    if ( any_of ( v > _ceiling ) )
+    {
+      v = v_fmod ( v , _ceilx2 ) ;        // map to one full period
+      v ( v > _ceiling ) = _ceilx2 - v ;
+    }
+    v += real_t(0.5) ;
+    fv = v_modf ( v , &fl_i ) ; // split v into integral and remainder from [0...1[
+    iv = fl_i  ;      // set integer part from float representing it
+  }
+
+#endif
+} ;
+
+/// mapping for REFLECT boundary conditions for even splines
+/// this mapping will map coordinates to [ -0.5 - M-0.5 ].
+/// for the even case, we don't need the widened brace, since it has smaller support,
+/// but we have to guard against the special case that v == M - 0.5, since this might
+/// be mapped to M, -0.5 which is outside the defined range. In this special case
+/// we use M-1, 0.5 instead which is equivalent and inside the range.
+
+template < typename split_type , int vsize = 1 >
+class even_mapping_reflect: public mapping < split_type , vsize >
+{
+  
+  
+  typedef typename split_type::ic_type int_t ;
+  typedef typename split_type::rc_type real_t ;
+    
+  const real_t _ceiling ;
+  const real_t _ceilx2 ;
+
+public:
+  
+  even_mapping_reflect ( int M )
+  : _ceiling ( M ) ,
+    _ceilx2 ( M + M )
+  { } ;
+  
+  virtual void operator() ( real_t v , int_t& iv , real_t& fv )
+  {
+    v += real_t ( 0.5 ) ;
+    real_t fl_i ;
+    if ( v < 0.0 )              // apply reflect left boundary condition
+      v = -v ;
+    if ( v > _ceiling )
+    {
+      v = fmod ( v , _ceilx2 ) ; // apply right border reflect
+      if ( v > _ceiling )
+      {
+        v = _ceilx2 - v ;
+      }
+    }
+    if ( v >= _ceiling ) // guard against special == case; defensively use >= instead of ==
+    {
+      iv = _ceiling - 1 ;
+      fv = real_t ( 0.5 ) ;
+      return ;
+    }
+    // now we have v in [ 0 | _ceiling ]
+    // which corresponds to spline coordinates [ -0.5 | _ceiling + 0.5 [
+    // split v into integral and remainder in [-0.5 ... 0.5]
+    fv = modf ( v , &fl_i ) - real_t(0.5) ;
+    iv = int_t ( fl_i ) ;     // set integer part from float representing it
+  }
+
+#ifdef USE_VC
+
+  typedef Vc::SimdArray < int_t , vsize > ic_v ;
+  typedef Vc::SimdArray < real_t , vsize > rc_v ;
+
+  /// this is the operator() for vectorized operation
+
+  virtual void operator() ( rc_v v , ic_v & iv , rc_v & fv )
+  {
+    rc_v fl_i ;
+    v += real_t ( 0.5 ) ;
+    v = abs ( v ) ;
+    if ( any_of ( v > _ceiling ) )
+    {
+      v = v_fmod ( v , _ceilx2 ) ;        // map to one full period
+      v ( v > _ceiling ) = _ceilx2 - v ;
+    }
+    fv = v_modf ( v , &fl_i ) - real_t(0.5) ;
+    iv = fl_i  ;      // set integer part from float representing it
+    auto mask = ( v >= _ceiling ) ;  // guard against special == case; defensively use >= instead of ==
+    if ( any_of ( mask ) )
+    {
+      iv ( mask ) = _ceiling - 1 ;
+      fv ( mask ) = real_t ( 0.5 ) ;
+    }
+  }
+
+#endif
+} ;
+
+/// create a mapping for an odd-degree spline given a BC code, the spline degree,
+/// and the extent along the axis in question
+
+template < typename split_type , int vsize = 1 >
+mapping < split_type , vsize > * create_odd_mapping ( bc_code bc , int spline_degree , int M )
+{
+  switch ( bc )
+  {
+    case RAW :
+    {
+      return new odd_mapping_raw < split_type , vsize > ( M ) ;
+      break ;
+    }
+    case LIMIT :
+    {
+      return new odd_mapping_limit < split_type , vsize > ( M ) ;
+      break ;
+    }
+    case REJECT :
+    {
+      return new odd_mapping_reject < split_type , vsize > ( M ) ;
+      break ;
+    }
+    case CONSTANT :
+    case NATURAL :
+    {
+      return new odd_mapping_constant < split_type , vsize > ( M ) ;
+      break ;
+    }
+    case MIRROR :
+    {
+      return new odd_mapping_mirror < split_type , vsize > ( M ) ;
+      break ;
+    }
+    case REFLECT :
+    case SPHERICAL :
+    {
+      return new odd_mapping_reflect < split_type , vsize > ( M + 2 ) ;
+      break ;
+    }
+    case PERIODIC :
+    {
+      return new odd_mapping_periodic < split_type , vsize > ( M + 1 ) ;
+      break ;
+    }
+    default:
+    {
+      // TODO: throw exception instead?
+      cerr << "mapping for BC code " << bc_name[bc] << " is not supported" << endl ;
+      return new odd_mapping_reject < split_type , vsize > ( M ) ;
+      break ;
+    }
+  }
+}
+
+/// create a mapping for an even-degree spline given a BC code, the spline degree,
+/// and the extent along the axis in question
+
+template < typename split_type , int vsize = 1 >
+mapping < split_type , vsize > * create_even_mapping ( bc_code bc , int spline_degree , int M )
+{
+  switch ( bc )
+  {
+    case RAW :
+    {
+      return new even_mapping_raw < split_type , vsize > ( M ) ;
+      break ;
+    }
+    case LIMIT :
+    {
+      return new even_mapping_limit < split_type , vsize > ( M ) ;
+      break ;
+    }
+    case REJECT :
+    {
+      return new even_mapping_reject < split_type , vsize > ( M ) ;
+      break ;
+    }
+    case NATURAL :
+    case CONSTANT :
+    {
+      return new even_mapping_constant < split_type , vsize > ( M ) ;
+      break ;
+    }
+    case MIRROR :
+    {
+      return new even_mapping_mirror < split_type , vsize > ( M ) ;
+      break ;
+    }
+    case REFLECT :
+    case SPHERICAL :
+    {
+      return new even_mapping_reflect < split_type , vsize > ( M ) ;
+      break ;
+    }
+    case PERIODIC :
+    {
+      return new even_mapping_periodic < split_type , vsize > ( M + 1 ) ;
+      break ;
+    }
+    default:
+    {
+      // TODO: throw exception instead?
+      cerr << "mapping for BC code " << bc_name[bc] << " is not supported" << endl ;
+      return new even_mapping_reject < split_type , vsize > ( M ) ;
+      break ;
+    }
+  }
+}
+
+/// class nd_mapping handles a set of mappings, one per axis.
+/// It provides the same operations as the 1D mapping class; for 1D use the axis
+/// to which the mapping is applied is determined by an additional parameter,
+/// axis. Internally, the mappings are accessed via a pointer to the base class.
+/// The additional routines (without exis parameter) iterate over all axes.
+/// Since in this object we keep base class pointers to the actual mappings,
+/// we need additional code to assure proper deletion of the mapping objects,
+/// which are created by new. Hence the copy constructor and assignment operator.
+
+template < typename split_type , int dimension , int vsize = 1 >
+class nd_mapping
+{
+public:
+  
+  typedef mapping < split_type , vsize > mapping_type ;
+
+  typedef typename mapping_type::int_t int_t ;
+  typedef typename mapping_type::real_t real_t ;
+  typedef TinyVector < bc_code , dimension > bcv_type ;
+  typedef TinyVector < MultiArrayIndex , dimension > shape_type ;
+  typedef TinyVector < real_t , dimension > nd_real_t ;
+  typedef TinyVector < int_t , dimension > nd_int_t ;
+
+#ifdef USE_VC
+
+  typedef typename mapping_type::rc_v real_v ;
+  typedef typename mapping_type::ic_v int_v ;
+  typedef TinyVector < real_v , dimension > nd_real_v ;
+  typedef TinyVector < int_v , dimension > nd_int_v ;
+
+#endif
+
+private:
+  
+  TinyVector < mapping_type* , dimension > map ; // container for the mappings
+  bcv_type bcv ;
+  int spline_degree ;
+  shape_type shape ;
+  
+public:
+  
+  /// 'standard' constructor for a nd_mapping. This constructor takes the
+  /// values which a nd_mapping requires for it's operation directly, and the
+  /// other constructors delegate to it.
+
+  nd_mapping ( const bcv_type&  _bcv ,
+                  const int&        _spline_degree ,
+                  const shape_type& _shape  )
+  : bcv ( _bcv ) ,
+    spline_degree ( _spline_degree ) ,
+    shape ( _shape )
+  {
+    for ( int d = 0 ; d < dimension ; d++ )
+    {
+      if ( spline_degree & 1 )
+        map[d] = create_odd_mapping < split_type , vsize > ( bcv[d] , spline_degree , shape[d] ) ;
+      else
+        map[d] = create_even_mapping < split_type , vsize > ( bcv[d] , spline_degree , shape[d] ) ;
+    }
+  }
+  
+  /// convenience variant taking a single boundary condition code, which is used for all axes
+  
+  nd_mapping ( const bc_code&    bc ,
+               const int&        _spline_degree ,
+               const shape_type& _shape  )
+  : nd_mapping ( bcv_type ( bc ) , spline_degree , _shape )
+  {
+  } ;
+  
+  /// convenience variant constructing a nd_mapping from a bspline object
+  /// A bspline object has all components needed to construct a nd_mapping:
+  /// It has a set of boundary condition codes (those the spline was constructed with),
+  /// the spline's degree and the core shape of it's coefficient array, giving us the limits
+  /// for the mapping. Creating the mapping like this keeps us from using mapping modes
+  /// RAW, LIMIT and REJECT, since these are only codes for mappings, not for boundary
+  /// conditions a spline can be constructed with. But all the BC codes used for spline
+  /// construction, like MIRROR, REFLECT, NATURAL and PERIODIC have corresponding
+  /// mappings.
+
+  template < class bspline >
+  nd_mapping ( const bspline & bspl )
+  : nd_mapping ( bspl.bcv ,
+                 bspl.spline_degree ,
+                 bspl.core_shape )
+  { } ;
+  
+  /// since we have members which need delete, we need a copy constructor
+  
+  nd_mapping ( const nd_mapping& other )
+  : nd_mapping ( other.bcv , other.spline_degree , other.shape )
+  { } ;
+  
+  /// assignment operator, for the same reason
+  
+  nd_mapping& operator= ( const nd_mapping& other )
+  {
+    bcv = other.bcv ;
+    shape = other.shape ;
+    spline_degree = other.spline_degree ;
+    for ( int d = 0 ; d < dimension ; d++ )
+    {
+      if ( map[d] )
+        delete map[d] ;
+      if ( spline_degree & 1 )
+        map[d] = create_odd_mapping < split_type , vsize > ( bcv[d] , spline_degree , shape[d] ) ;
+      else
+        map[d] = create_even_mapping < split_type , vsize > ( bcv[d] , spline_degree , shape[d] ) ;
+    }
+  }
+  
+  /// the destructor deletes the mapping objects. If the nd_mapping was
+  /// default-constructed, these will be 0, so we have to safeguard against this case.
+  /// It would be nice if we could provide a default constructor, but there is no
+  /// sensible default shape or spline degree we could provide.
+  
+  ~nd_mapping()
+  {
+    for ( int d = 0 ; d < dimension ; d++ )
+    {
+      if ( map[d] )
+        delete map[d] ;
+    }
+  }
+  
+  /// apply the mapping along axis 'axis' to coordinate v, resulting in the setting
+  /// of the integral part iv and the fraczional part fv
+  
+  void operator() ( real_t v , int_t& iv , real_t& fv , const int & axis )
+  {
+    ( * ( map [ axis ] ) ) ( v , iv , fv ) ;
+  }
+  
+  /// same operation, but along all axes, taking a multi-coordinate and setting
+  /// the corresponding n-dimensional objects for the integral and fractional parts
+  
+  void operator() ( nd_real_t v , nd_int_t& iv , nd_real_t& fv )
+  {
+    for ( int axis = 0 ; axis < dimension ; axis++ )
+      ( * ( map [ axis ] ) ) ( v[axis] , iv[axis] , fv[axis] ) ;
+  }
+  
+  /// different signature, now we handle a split_type object as the operation's target
+
+  void operator() ( real_t v , split_type& s , const int & axis )
+  {
+    ( * ( map [ axis ] ) ) ( v , s.select[axis] , s.tune[axis] ) ;
+  }
+  
+  /// the same in nD
+  
+  void operator() ( nd_real_t v , split_type& s )
+  {
+    for ( int axis = 0 ; axis < dimension ; axis++ )
+      ( * ( map [ axis ] ) ) ( v[axis] , s.select[axis] , s.tune[axis] ) ;
+  }
+  
+#ifdef USE_VC
+
+  /// finally, operation on Vc vectors of incoming coordinates. Again, first the
+  /// 1D version with the 'axis' parameter
+  
+  void operator() ( Vc::Vector < real_t > v ,
+                    Vc::SimdArray < int_t , Vc::Vector<real_t>::size() >& iv ,
+                    Vc::Vector < real_t >& fv ,
+                    const int & axis )
+  {
+    ( * ( map [ axis ] ) ) ( v , iv , fv ) ;
+  }
+  
+  /// and the nD version, operating on vector aggregates
+  
+  void operator() ( nd_real_v v ,
+                    nd_int_v & iv ,
+                    nd_real_v & fv )
+  {
+    for ( int axis = 0 ; axis < dimension ; axis++ )
+      ( * ( map [ axis ] ) ) ( v[axis] , iv[axis] , fv[axis] ) ;
+  }
+
+#endif // USE_VC
+} ;
+
+/*
+void mapping_test()
+{
+  float in[] = { -1.0 , -0.5 , 0.0 , 0.5 , 8.5 , 9.0 , 9.4999 , 9.5 , 9.501 , 10.0 , 10.5 , 11.0 , 11.5 , 12.0 } ;
+  const int innum = sizeof(in) / sizeof(float) ;
+  
+  typedef split_type<> st ;
+  
+  mapping<st,innum> * pmap[] = { new odd_mapping_mirror<st,innum>(10) ,
+                             new even_mapping_mirror<st,innum>(10) ,
+                             new odd_mapping_periodic<st,innum>(10) ,
+                             new even_mapping_periodic<st,innum>(10) ,
+                             new odd_mapping_reflect<st,innum>(12) ,
+                             new even_mapping_reflect<st,innum>(10) ,
+                             new odd_mapping_constant<st,innum>(10) ,
+                             new even_mapping_constant<st,innum>(10) } ;
+  char * nmap[] = { "odd_mapping_mirror(10)" ,
+                    "even_mapping_mirror(10)" ,
+                    "odd_mapping_periodic(10)" ,
+                    "even_mapping_periodic(10)" ,
+                    "odd_mapping_reflect(12)" ,
+                    "even_mapping_reflect(10)" ,
+                    "odd_mapping_constant(10)" ,
+                    "even_mapping_constant(10)" } ;
+ mapping<st,innum>::int_t oi ;
+ mapping<st,innum>::real_t of ;
+ mapping<st,innum>::ic_v v_oi ;
+ mapping<st,innum>::rc_v v_of ;
+ mapping<st,innum>::rc_v v_in ;
+ 
+ for ( int i = 0 ; i < sizeof ( pmap ) / sizeof ( mapping<st,innum>* ) ; i++ )
+ {
+   cout << "testing " << nmap[i] << endl ;
+   for ( int k = 0 ; k < sizeof ( in ) / sizeof ( float ) ; k++ )
+   {
+     (*(pmap[i])) ( in[k] , oi , of ) ;
+     cout << in[k] << " -> " << oi << ", " << of << endl ;
+   }
+ }
+ for ( int i = 0 ; i < sizeof ( pmap ) / sizeof ( mapping<st,innum>* ) ; i++ )
+ {
+   cout << "testing " << nmap[i] << endl ;
+//    for ( int k = 0 ; k < sizeof ( in ) / sizeof ( float ) ; k++ )
+//    {
+     v_in = mapping<st,innum>::rc_v ( in ) ;
+     (*(pmap[i])) ( v_in , v_oi , v_of ) ;
+     cout << v_in << " -> " << v_oi << ", " << v_of << endl ;
+//    }
+ }
+}
+*/
+} ; // end of namespace vspline
+
+#endif // VSPLINE_MAPPING_H
diff --git a/poles.cc b/poles.cc
new file mode 100644
index 0000000..f91715b
--- /dev/null
+++ b/poles.cc
@@ -0,0 +1,660 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/*! \file poles.cc
+
+    \brief precalculated prefilter poles and basis function values
+
+    The contents of this file below the coments can be generated using prefilter_poles.cc
+    While the precalculated basis function values can be generated in long double
+    precision (with code in basis.h), the filter poles are calculated using
+    gsl and BLAS, which provide only double precision.
+*/
+
+long double K0[] = {
+ 1L ,   // basis(0)
+ } ; 
+long double K1[] = {
+ 1L ,   // basis(0)
+ 0.5L ,   // basis(0.5)
+ } ; 
+long double K2[] = {
+ 0.75L ,   // basis(0)
+ 0.5L ,   // basis(0.5)
+ 0.125L ,   // basis(1)
+ } ; 
+double Poles_2[] = {
+-0.17157287525381015314 ,
+} ;
+long double K3[] = {
+ 0.66666666666666666668L ,   // basis(0)
+ 0.47916666666666666666L ,   // basis(0.5)
+ 0.16666666666666666667L ,   // basis(1)
+ 0.020833333333333333334L ,   // basis(1.5)
+ } ; 
+double Poles_3[] = {
+-0.26794919243112280682 ,
+} ;
+long double K4[] = {
+ 0.59895833333333333332L ,   // basis(0)
+ 0.45833333333333333334L ,   // basis(0.5)
+ 0.19791666666666666667L ,   // basis(1)
+ 0.041666666666666666668L ,   // basis(1.5)
+ 0.0026041666666666666667L ,   // basis(2)
+ } ; 
+double Poles_4[] = {
+-0.36134122590021944266 ,
+-0.013725429297339164503 ,
+} ;
+long double K5[] = {
+ 0.55000000000000000001L ,   // basis(0)
+ 0.4380208333333333333L ,   // basis(0.5)
+ 0.21666666666666666667L ,   // basis(1)
+ 0.061718750000000000001L ,   // basis(1.5)
+ 0.0083333333333333333337L ,   // basis(2)
+ 0.00026041666666666666668L ,   // basis(2.5)
+ } ; 
+double Poles_5[] = {
+-0.43057534709997430378 ,
+-0.043096288203264443428 ,
+} ;
+long double K6[] = {
+ 0.51102430555555555553L ,   // basis(0)
+ 0.41944444444444444449L ,   // basis(0.5)
+ 0.22879774305555555554L ,   // basis(1)
+ 0.079166666666666666666L ,   // basis(1.5)
+ 0.015668402777777777778L ,   // basis(2)
+ 0.001388888888888888889L ,   // basis(2.5)
+ 2.170138888888888889e-05L ,   // basis(3)
+ } ; 
+double Poles_6[] = {
+-0.48829458930304570075 ,
+-0.081679271076237264237 ,
+-0.0014141518083258114435 ,
+} ;
+long double K7[] = {
+ 0.47936507936507936512L ,   // basis(0)
+ 0.4025964161706349206L ,   // basis(0.5)
+ 0.23630952380952380952L ,   // basis(1)
+ 0.094024367559523809525L ,   // basis(1.5)
+ 0.023809523809523809525L ,   // basis(2)
+ 0.0033776661706349206347L ,   // basis(2.5)
+ 0.00019841269841269841271L ,   // basis(3)
+ 1.5500992063492063493e-06L ,   // basis(3.5)
+ } ; 
+double Poles_7[] = {
+-0.5352804307964414976 ,
+-0.12255461519232610512 ,
+-0.0091486948096082820747 ,
+} ;
+long double K8[] = {
+ 0.45292096819196428568L ,   // basis(0)
+ 0.38737599206349206352L ,   // basis(0.5)
+ 0.24077768477182539682L ,   // basis(1)
+ 0.10647321428571428571L ,   // basis(1.5)
+ 0.032126968625992063494L ,   // basis(2)
+ 0.0061259920634920634923L ,   // basis(2.5)
+ 0.00063476562499999999998L ,   // basis(3)
+ 2.4801587301587301589e-05L ,   // basis(3.5)
+ 9.6881200396825396832e-08L ,   // basis(4)
+ } ; 
+double Poles_8[] = {
+-0.57468690924881216109 ,
+-0.16303526929727354955 ,
+-0.023632294694844447475 ,
+-0.00015382131064169135559 ,
+} ;
+long double K9[] = {
+ 0.43041776895943562614L ,   // basis(0)
+ 0.3736024025676532187L ,   // basis(0.5)
+ 0.24314925044091710759L ,   // basis(1)
+ 0.1168385769744819224L ,   // basis(1.5)
+ 0.040255731922398589063L ,   // basis(2)
+ 0.0094531293058311287482L ,   // basis(2.5)
+ 0.0013833774250440917108L ,   // basis(3)
+ 0.00010588576974481922398L ,   // basis(3.5)
+ 2.7557319223985890654e-06L ,   // basis(4)
+ 5.3822889109347442683e-09L ,   // basis(4.5)
+ } ; 
+double Poles_9[] = {
+-0.60799738916862233751 ,
+-0.20175052019315406482 ,
+-0.04322260854048156492 ,
+-0.0021213069031808251541 ,
+} ;
+long double K10[] = {
+ 0.41096264282441854056L ,   // basis(0)
+ 0.36109843474426807762L ,   // basis(0.5)
+ 0.24406615618885719797L ,   // basis(1)
+ 0.12543871252204585538L ,   // basis(1.5)
+ 0.047983348920442019401L ,   // basis(2)
+ 0.013183421516754850087L ,   // basis(2.5)
+ 0.0024532852307408785274L ,   // basis(3)
+ 0.00027915564373897707232L ,   // basis(3.5)
+ 1.5887978636188271604e-05L ,   // basis(4)
+ 2.7557319223985890654e-07L ,   // basis(4.5)
+ 2.6911444554673721342e-10L ,   // basis(5)
+ } ; 
+double Poles_10[] = {
+-0.63655066396958059904 ,
+-0.23818279837754796624 ,
+-0.065727033228304657109 ,
+-0.0075281946755491966489 ,
+-1.6982762823274620556e-05 ,
+} ;
+long double K11[] = {
+ 0.39392556517556517558L ,   // basis(0)
+ 0.34970223188744306906L ,   // basis(0.5)
+ 0.24396028739778739779L ,   // basis(1)
+ 0.13256116543210659421L ,   // basis(1.5)
+ 0.055202020202020202023L ,   // basis(2)
+ 0.017163149607531321399L ,   // basis(2.5)
+ 0.0038238786676286676285L ,   // basis(3)
+ 0.00057128626126327135446L ,   // basis(3.5)
+ 5.1006092672759339424e-05L ,   // basis(4)
+ 2.1667994232691498315e-06L ,   // basis(4.5)
+ 2.5052108385441718776e-08L ,   // basis(5)
+ 1.2232474797578964246e-11L ,   // basis(5.5)
+ } ; 
+double Poles_11[] = {
+-0.66126606890063921451 ,
+-0.27218034929481393913 ,
+-0.089759599793708844118 ,
+-0.016669627366234951449 ,
+-0.00051055753444649576434 ,
+} ;
+long double K12[] = {
+ 0.37884408454472999147L ,   // basis(0)
+ 0.33927295023649190319L ,   // basis(0.5)
+ 0.24313091801014469471L ,   // basis(1)
+ 0.13845146655042488376L ,   // basis(1.5)
+ 0.061867668009041325489L ,   // basis(2)
+ 0.021268582401394901395L ,   // basis(2.5)
+ 0.0054581869256967252307L ,   // basis(3)
+ 0.00099847474413446635658L ,   // basis(3.5)
+ 0.00012091392059187537162L ,   // basis(4)
+ 8.5239798781465448132e-06L ,   // basis(4.5)
+ 2.7086165069699140876e-07L ,   // basis(5)
+ 2.0876756987868098981e-09L ,   // basis(5.5)
+ 5.0968644989912351028e-13L ,   // basis(6)
+ } ; 
+double Poles_12[] = {
+-0.68286488419809487915 ,
+-0.30378079328817336746 ,
+-0.11435052002714780894 ,
+-0.028836190198661435652 ,
+-0.0025161662172618224839 ,
+-1.883305645063344802e-06 ,
+} ;
+long double K13[] = {
+ 0.36537086948545281884L ,   // basis(0)
+ 0.32968987958591001189L ,   // basis(0.5)
+ 0.24178841798633465302L ,   // basis(1)
+ 0.14331501747174208366L ,   // basis(1.5)
+ 0.067974967258821425491L ,   // basis(2)
+ 0.0254044062949849888L ,   // basis(2.5)
+ 0.0073122366959172514726L ,   // basis(3)
+ 0.0015671731081656330546L ,   // basis(3.5)
+ 0.00023762984700484700481L ,   // basis(4)
+ 2.3492285420207986942e-05L ,   // basis(4.5)
+ 1.3133086049752716419e-06L ,   // basis(5)
+ 3.125375747123929632e-08L ,   // basis(5.5)
+ 1.6059043836821614601e-10L ,   // basis(6)
+ 1.960332499612013501e-14L ,   // basis(6.5)
+ } ; 
+double Poles_13[] = {
+-0.701894251817016257 ,
+-0.33310723293052579841 ,
+-0.13890111319434489401 ,
+-0.043213866740361948915 ,
+-0.0067380314152448743045 ,
+-0.00012510011321441739246 ,
+} ;
+long double K14[] = {
+ 0.35323915669918929845L ,   // basis(0)
+ 0.32085024502063192543L ,   // basis(0.5)
+ 0.24008299041558734203L ,   // basis(1)
+ 0.14732180094624291054L ,   // basis(1.5)
+ 0.073541032564067060978L ,   // basis(2)
+ 0.029499800232377117299L ,   // basis(2.5)
+ 0.009341081854512256905L ,   // basis(3)
+ 0.002275919650051594496L ,   // basis(3.5)
+ 0.00041109051149372196722L ,   // basis(4)
+ 5.2046374591017448155e-05L ,   // basis(4.5)
+ 4.2229561084936041826e-06L ,   // basis(5)
+ 1.8776463468923786384e-07L ,   // basis(5.5)
+ 3.3486357751247422931e-09L ,   // basis(6)
+ 1.1470745597729724715e-11L ,   // basis(6.5)
+ 7.0011874986143339324e-16L ,   // basis(7)
+ } ; 
+double Poles_14[] = {
+-0.71878378723766189751 ,
+-0.36031907191881451524 ,
+-0.16303351479903732679 ,
+-0.059089482194828991946 ,
+-0.013246756734847169382 ,
+-0.00086402404095337124838 ,
+-2.0913096775274000322e-07 ,
+} ;
+long double K15[] = {
+ 0.34224026135534072046L ,   // basis(0)
+ 0.31266660625176080971L ,   // basis(0.5)
+ 0.23812319491070731152L ,   // basis(1)
+ 0.15061194980399698684L ,   // basis(1.5)
+ 0.078595253866748575742L ,   // basis(2)
+ 0.033503802571649835525L ,   // basis(2.5)
+ 0.011502274487496875064L ,   // basis(3)
+ 0.003117493948498863913L ,   // basis(3.5)
+ 0.00064854900635323915745L ,   // basis(4)
+ 9.9440249438946462506e-05L ,   // basis(4.5)
+ 1.057200426826749578e-05L ,   // basis(5)
+ 7.0683979027987963181e-07L ,   // basis(5.5)
+ 2.5045990654456262921e-08L ,   // basis(6)
+ 3.348642542939324287e-10L ,   // basis(6.5)
+ 7.6471637318198164765e-13L ,   // basis(7)
+ 2.3337291662047779775e-17L ,   // basis(7.5)
+ } ; 
+double Poles_15[] = {
+-0.73387257168597164192 ,
+-0.3855857342780184549 ,
+-0.18652010845105168602 ,
+-0.075907592047656735623 ,
+-0.021752065796541687759 ,
+-0.0028011514820764091618 ,
+-3.0935680451474410063e-05 ,
+} ;
+long double K16[] = {
+ 0.33220826914249586032L ,   // basis(0)
+ 0.30506442781494322298L ,   // basis(0.5)
+ 0.23598831687663609049L ,   // basis(1)
+ 0.15330093144015230863L ,   // basis(1.5)
+ 0.083172975045518980468L ,   // basis(2)
+ 0.03738103391018481751L ,   // basis(2.5)
+ 0.013757630909488189399L ,   // basis(3)
+ 0.0040808725321077028254L ,   // basis(3.5)
+ 0.00095448286788948239937L ,   // basis(4)
+ 0.00017072700505627712969L ,   // basis(4.5)
+ 2.2348950637818187112e-05L ,   // basis(5)
+ 2.0041660421228046889e-06L ,   // basis(5.5)
+ 1.1074718796168506873e-07L ,   // basis(6)
+ 3.131465753406890973e-09L ,   // basis(6.5)
+ 3.1393546448057462799e-11L ,   // basis(7)
+ 4.7794773323873852978e-14L ,   // basis(7.5)
+ 7.2929036443899311795e-19L ,   // basis(8)
+ } ; 
+double Poles_16[] = {
+-0.74743238775188380885 ,
+-0.40907360475830745195 ,
+-0.2092287193405746315 ,
+-0.093254718980160661301 ,
+-0.031867706120390963676 ,
+-0.0062584067851372366872 ,
+-0.00030156536330664312833 ,
+-2.3232486364235544612e-08 ,
+} ;
+long double K17[] = {
+ 0.32300939415699870668L ,   // basis(0)
+ 0.29797995870819162778L ,   // basis(0.5)
+ 0.23373674923065111L ,   // basis(1)
+ 0.15548403615015999844L ,   // basis(1.5)
+ 0.087311640770182303119L ,   // basis(2)
+ 0.041108064309116914771L ,   // basis(2.5)
+ 0.016073921990964784645L ,   // basis(3)
+ 0.0051528238735766806875L ,   // basis(3.5)
+ 0.0013308125721335362832L ,   // basis(4)
+ 0.00027040492583018919549L ,   // basis(4.5)
+ 4.1821549694989869669e-05L ,   // basis(5)
+ 4.695715379871064023e-06L ,   // basis(5.5)
+ 3.5643941839232455477e-07L ,   // basis(6)
+ 1.6314974698479856617e-08L ,   // basis(6.5)
+ 3.6845271901099787809e-10L ,   // basis(7)
+ 2.7700195120810122023e-12L ,   // basis(7.5)
+ 2.8114572543455207634e-15L ,   // basis(8)
+ 2.144971660114685641e-20L ,   // basis(8.5)
+ } ; 
+double Poles_17[] = {
+-0.75968322407197097501 ,
+-0.43093965318021570932 ,
+-0.23108984359938430919 ,
+-0.11082899331622909911 ,
+-0.043213911456682692347 ,
+-0.011258183689472329655 ,
+-0.0011859331251521279364 ,
+-7.6875625812547303262e-06 ,
+} ;
+long double K18[] = {
+ 0.31453440085864671822L ,   // basis(0)
+ 0.29135844665108330336L ,   // basis(0.5)
+ 0.2314117793664616011L ,   // basis(1)
+ 0.15724011346206745634L ,   // basis(1.5)
+ 0.091048500593391361557L ,   // basis(2)
+ 0.044670474960158529868L ,   // basis(2.5)
+ 0.018422928690498247476L ,   // basis(3)
+ 0.0063191164101958155308L ,   // basis(3.5)
+ 0.0017772776557432943289L ,   // basis(4)
+ 0.00040219803091097442175L ,   // basis(4.5)
+ 7.1383891069110100448e-05L ,   // basis(5)
+ 9.5907105586580192779e-06L ,   // basis(5.5)
+ 9.271047742986201033e-07L ,   // basis(6)
+ 5.9734083260063868359e-08L ,   // basis(6.5)
+ 2.2685078926749432357e-09L ,   // basis(7)
+ 4.0941846266406646114e-11L ,   // basis(7.5)
+ 2.308349801939754902e-13L ,   // basis(8)
+ 1.5619206968586226463e-16L ,   // basis(8.5)
+ 5.958254611429682336e-22L ,   // basis(9)
+ } ; 
+double Poles_18[] = {
+-0.77080505126463716437 ,
+-0.45132873338515144823 ,
+-0.25207457469899424707 ,
+-0.12841283679297030296 ,
+-0.055462967138511676257 ,
+-0.017662377684794876992 ,
+-0.0030119307290000858941 ,
+-0.00010633735588702059982 ,
+-2.5812403962584360567e-09 ,
+} ;
+long double K19[] = {
+ 0.3066931017379824246L ,   // basis(0)
+ 0.28515265744763108603L ,   // basis(0.5)
+ 0.22904564568118377632L ,   // basis(1)
+ 0.1586346253388907509L ,   // basis(1.5)
+ 0.094419295116760105743L ,   // basis(2)
+ 0.048060545425350700269L ,   // basis(2.5)
+ 0.020781149371245016366L ,   // basis(3)
+ 0.0075653834126722674764L ,   // basis(3.5)
+ 0.0022918668891541334257L ,   // basis(4)
+ 0.00056895229089948501399L ,   // basis(4.5)
+ 0.00011341320068077591568L ,   // basis(5)
+ 1.7663033358559161242e-05L ,   // basis(5.5)
+ 2.0693993456206894214e-06L ,   // basis(6)
+ 1.7275247843548983816e-07L ,   // basis(6.5)
+ 9.4683295350905535817e-09L ,   // basis(7)
+ 2.987004917810922453e-10L ,   // basis(7.5)
+ 4.3098159994772440922e-12L ,   // basis(8)
+ 1.8223814805986014024e-14L ,   // basis(8.5)
+ 8.2206352466243297175e-18L ,   // basis(9)
+ 1.5679617398499164042e-23L ,   // basis(9.5)
+ } ; 
+double Poles_19[] = {
+-0.78094644484628727987 ,
+-0.47037281947078746214 ,
+-0.27218037628176311449 ,
+-0.14585089375766777109 ,
+-0.068345906124943789361 ,
+-0.025265073344845085518 ,
+-0.0059366595910830613492 ,
+-0.00050841019468083302468 ,
+-1.9154786562122251559e-06 ,
+} ;
+long double K20[] = {
+ 0.29941029032001264032L ,   // basis(0)
+ 0.27932165599364228926L ,   // basis(0.5)
+ 0.22666242185748694763L ,   // basis(1)
+ 0.15972211762658876278L ,   // basis(1.5)
+ 0.0974575566598727568L ,   // basis(2)
+ 0.051275465138013302938L ,   // basis(2.5)
+ 0.023129338338060293147L ,   // basis(3)
+ 0.0088777091023436491261L ,   // basis(3.5)
+ 0.0028712400200206135652L ,   // basis(4)
+ 0.00077261996725682196449L ,   // basis(4.5)
+ 0.00017015073085024172881L ,   // basis(5)
+ 3.0008819646690530456e-05L ,   // basis(5.5)
+ 4.1167033003850903957e-06L ,   // basis(6)
+ 4.2192794922896485481e-07L ,   // basis(6.5)
+ 3.0493046656519177393e-08L ,   // basis(7)
+ 1.4241282646631125569e-09L ,   // basis(7.5)
+ 3.7354418501332067724e-11L ,   // basis(8)
+ 4.3098940955120870235e-13L ,   // basis(8.5)
+ 1.3667861257365780153e-15L ,   // basis(9)
+ 4.1103176233121648586e-19L ,   // basis(9.5)
+ 3.9199043496247910105e-25L ,   // basis(10)
+ } ; 
+double Poles_20[] = {
+-0.79023111767977516351 ,
+-0.48819126033675236398 ,
+-0.29142160165551617146 ,
+-0.16303353479638585388 ,
+-0.081648115630934034459 ,
+-0.033849479552361630419 ,
+-0.0099730290200507193399 ,
+-0.0014683217571042010263 ,
+-3.7746573197331790075e-05 ,
+-2.8679944881725126467e-10 ,
+} ;
+long double K21[] = {
+ 0.29262268723143477922L ,   // basis(0)
+ 0.2738298047486301248L ,   // basis(0.5)
+ 0.22428009387883276411L ,   // basis(1)
+ 0.16054821266164454585L ,   // basis(1.5)
+ 0.10019429073492722872L ,   // basis(2)
+ 0.054315966627272970968L ,   // basis(2.5)
+ 0.025451983263662738633L ,   // basis(3)
+ 0.010243000848845290252L ,   // basis(3.5)
+ 0.0035111077726313273026L ,   // basis(4)
+ 0.0010143045932529873796L ,   // basis(4.5)
+ 0.000243612424661332394L ,   // basis(5)
+ 4.7797839244413500002e-05L ,   // basis(5.5)
+ 7.4865177795402407054e-06L ,   // basis(6)
+ 9.0756157943914249454e-07L ,   // basis(6.5)
+ 8.1587909794275973583e-08L ,   // basis(7)
+ 5.1150819066710363876e-09L ,   // basis(7.5)
+ 2.0383683775099098269e-10L ,   // basis(8)
+ 4.4482237420372396468e-12L ,   // basis(8.5)
+ 4.104700189226971567e-14L ,   // basis(9)
+ 9.7627580792412901893e-17L ,   // basis(9.5)
+ 1.9572941063391261232e-20L ,   // basis(10)
+ 9.3331055943447405012e-27L ,   // basis(10.5)
+ } ; 
+double Poles_21[] = {
+-0.79876288565466957436 ,
+-0.50489153745536197171 ,
+-0.30982319641503575092 ,
+-0.17988466679726275443 ,
+-0.095200812461283090826 ,
+-0.043213918440668783183 ,
+-0.01504549998728420962 ,
+-0.0031720039638856827036 ,
+-0.00021990295763158517806 ,
+-4.7797646894259869337e-07 ,
+} ;
+long double K22[] = {
+ 0.28627661405538603955L ,   // basis(0)
+ 0.26864594027689889732L ,   // basis(0.5)
+ 0.22191207309687150606L ,   // basis(1)
+ 0.1611512144701082552L ,   // basis(1.5)
+ 0.10265788953426401334L ,   // basis(2)
+ 0.057185290104801063607L ,   // basis(2.5)
+ 0.027736783120003498267L ,   // basis(3)
+ 0.011649203759035082664L ,   // basis(3.5)
+ 0.0042065557982618627852L ,   // basis(4)
+ 0.0012943433274091186101L ,   // basis(4.5)
+ 0.00033552928198532912351L ,   // basis(5)
+ 7.2224788646371748003e-05L ,   // basis(5.5)
+ 1.2671383794748147439e-05L ,   // basis(6)
+ 1.7682350579090077749e-06L ,   // basis(6.5)
+ 1.8993891467043433632e-07L ,   // basis(7)
+ 1.5010206322471487409e-08L ,   // basis(7.5)
+ 8.1770577437810697864e-10L ,   // basis(8)
+ 2.7833247876855379198e-11L ,   // basis(8.5)
+ 5.0557094184087925374e-13L ,   // basis(9)
+ 3.7315643098318982977e-15L ,   // basis(9.5)
+ 6.6564259722400510509e-18L ,   // basis(10)
+ 8.8967913924505732872e-22L ,   // basis(10.5)
+ 2.1211603623510773867e-28L ,   // basis(11)
+ } ; 
+double Poles_22[] = {
+-0.80662949916286152963 ,
+-0.52057023687190062677 ,
+-0.3274164733138280603 ,
+-0.19635282650762261869 ,
+-0.10887245188483440916 ,
+-0.053181604599218119944 ,
+-0.021035660929842874001 ,
+-0.0057066136460001649564 ,
+-0.00072254796507928529137 ,
+-1.3458154983225084633e-05 ,
+-3.186643260432269507e-11 ,
+} ;
+long double K23[] = {
+ 0.28032619854980754502L ,   // basis(0)
+ 0.26374269458034057742L ,   // basis(0.5)
+ 0.21956831005031718209L ,   // basis(1)
+ 0.16156340331433543452L ,   // basis(1.5)
+ 0.1048741828768824975L ,   // basis(2)
+ 0.059888404600676471811L ,   // basis(2.5)
+ 0.029974159449075470104L ,   // basis(3)
+ 0.013085403104047330802L ,   // basis(3.5)
+ 0.0049523097091663721335L ,   // basis(4)
+ 0.0016124087669444304968L ,   // basis(4.5)
+ 0.00044731411734139782554L ,   // basis(5)
+ 0.0001044647630135970384L ,   // basis(5.5)
+ 2.0225085344373592521e-05L ,   // basis(6)
+ 3.1828904692399063536e-06L ,   // basis(6.5)
+ 3.9679866129008683199e-07L ,   // basis(7)
+ 3.7855233852927286935e-08L ,   // basis(7.5)
+ 2.6346734890183937921e-09L ,   // basis(8)
+ 1.2488410498396141086e-10L ,   // basis(8.5)
+ 3.6338307165683742375e-12L ,   // basis(9)
+ 5.4959585554808751978e-14L ,   // basis(9.5)
+ 3.2448470402629826029e-16L ,   // basis(10)
+ 4.3411473752750814749e-19L ,   // basis(10.5)
+ 3.868170170630684038e-23L ,   // basis(11)
+ 4.6112181790240812755e-30L ,   // basis(11.5)
+ } ; 
+double Poles_23[] = {
+-0.81390562354320794558 ,
+-0.53531408371104993726 ,
+-0.34423627688965990901 ,
+-0.21240466055269885404 ,
+-0.12256116098899572098 ,
+-0.063602480154273194346 ,
+-0.027811662038017159748 ,
+-0.0090795953352833073946 ,
+-0.0017112714467820973156 ,
+-9.5733943500721317005e-05 ,
+-1.1936918816067781773e-07 ,
+} ;
+long double K24[] = {
+ 0.27473197352118810147L ,   // basis(0)
+ 0.25909593388549224613L ,   // basis(0.5)
+ 0.21725612218406020861L ,   // basis(1)
+ 0.16181208211791016533L ,   // basis(1.5)
+ 0.10686656672959712099L ,   // basis(2)
+ 0.062431425854373209442L ,   // basis(2.5)
+ 0.032156816325798337903L ,   // basis(3)
+ 0.014541849599514216045L ,   // basis(3.5)
+ 0.0057429446266243922923L ,   // basis(4)
+ 0.0019676174028389475043L ,   // basis(4.5)
+ 0.00058004996270088237076L ,   // basis(5)
+ 0.00014563543156618789351L ,   // basis(5.5)
+ 3.0746018052888292381e-05L ,   // basis(6)
+ 5.3704036096147168721e-06L ,   // basis(6.5)
+ 7.6016977670631529332e-07L ,   // basis(7)
+ 8.4861949009616751487e-08L ,   // basis(7.5)
+ 7.2045281870976666726e-09L ,   // basis(8)
+ 4.4229185004672962615e-10L ,   // basis(8.5)
+ 1.8261499938887221924e-11L ,   // basis(9)
+ 4.5452628388307088642e-13L ,   // basis(9.5)
+ 5.7253638111923437033e-15L ,   // basis(10)
+ 2.70404290721556569e-17L ,   // basis(10.5)
+ 2.7132171099984410352e-20L ,   // basis(11)
+ 1.6117375710961183492e-24L ,   // basis(11.5)
+ 9.6067045396335026575e-32L ,   // basis(12)
+ } ; 
+double Poles_24[] = {
+-0.82065517417952760226 ,
+-0.54920097364808984075 ,
+-0.3603190653178175995 ,
+-0.22802014939914075353 ,
+-0.13618849963046011919 ,
+-0.074351497302516889043 ,
+-0.035244126673212937406 ,
+-0.013246375325256078484 ,
+-0.0032976826232791502103 ,
+-0.00035807154412069458092 ,
+-4.8126755630580574097e-06 ,
+-3.5407088073360672255e-12 ,
+} ;
+const double* precomputed_poles[] = {
+  0, 
+  0, 
+  Poles_2, 
+  Poles_3, 
+  Poles_4, 
+  Poles_5, 
+  Poles_6, 
+  Poles_7, 
+  Poles_8, 
+  Poles_9, 
+  Poles_10, 
+  Poles_11, 
+  Poles_12, 
+  Poles_13, 
+  Poles_14, 
+  Poles_15, 
+  Poles_16, 
+  Poles_17, 
+  Poles_18, 
+  Poles_19, 
+  Poles_20, 
+  Poles_21, 
+  Poles_22, 
+  Poles_23, 
+  Poles_24, 
+} ;
+const long double* precomputed_basis_function_values[] = {
+  K0, 
+  K1, 
+  K2, 
+  K3, 
+  K4, 
+  K5, 
+  K6, 
+  K7, 
+  K8, 
+  K9, 
+  K10, 
+  K11, 
+  K12, 
+  K13, 
+  K14, 
+  K15, 
+  K16, 
+  K17, 
+  K18, 
+  K19, 
+  K20, 
+  K21, 
+  K22, 
+  K23, 
+  K24, 
+} ;
diff --git a/prefilter.h b/prefilter.h
new file mode 100644
index 0000000..738fe71
--- /dev/null
+++ b/prefilter.h
@@ -0,0 +1,1915 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/*! \file prefilter.h
+
+    \brief Code to create the coefficient array for a b-spline.
+    
+    The coefficients can be generated in two ways (that I know of): the first
+    is by solving a set of equations which encode the constraints of the spline.
+    A good example of how this is done can be found in libeinspline. I term it
+    the 'linear algebra approach'. In this implementation, I have chosen what I
+    call the 'DSP approach'. In a nutshell, the DSP approach looks at the b-spline's
+    reconstruction by convolving the coefficients with a specific kernel. This
+    kernel acts as a low-pass filter. To counteract the effect of this filter and
+    obtain the input signal from the convolution of the coefficients, a high-pass
+    filter with the inverse transfer function to the low-pass is used. This high-pass
+    has infinite support, but can still be calculated precisely within the bounds of
+    the arithmetic precision the CPU offers, due to the properties it has.
+    
+    I recommend [CIT2000] for a formal explanation. At the core of my prefiltering
+    routines there is code from Philippe Thevenaz' accompanying code to this paper,
+    with slight modifications translating it to C++ and making it generic.
+    The greater part of this file deals with 'generifying' the process and to
+    employing multithreading and the CPU's vector units to gain speed.
+    
+    This code makes heavy use of vigra, which provides handling of multidimensional
+    arrays and efficient handling of aggreagte types - to only mention two of it's
+    many qualities. The vectorization is done with Vc, which allowed me to code
+    the horizontal vectorization I use in a generic fashion.
+    
+    For now, this file offers two implementations in one: the unvectorized version,
+    available as solve_vigra(), and the vectorized version, solve_vc(). Note that
+    the vectorized code is more constrained in what numeric types it can process
+    (namely, only float, double and their aggregates).
+    Unit testing code for these two versions is in prefilter_test...
+    
+    In another version of this code I used vigra's BSPlineBase class to obtain prefilter
+    poles. This required passing the spline degree/order as a template parameter. Doing it
+    like this allows to make the Poles static members of the solver, but at the cost of
+    type proliferation. Here I chose not to follow this path and pass the spline order as a
+    parameter to the spline's constructor, thus reducing the number of solver specializations
+    and allowing automated testing with loops over the degree. This variant is slightly slower.
+
+    In addition to the code following the 'implicit scheme' proposed by Thevenaz, I provide
+    code to use an 'explicit scheme' to obtain the b-spline coefficients. The implicit scheme
+    makes assumptions about the continuation of the signal outside of the window of data which
+    is acceessible: that the data continue mirrored, reflected, etc. - but it proceeds to
+    capture these assumptions in formulae deriving suitable initial causal/anticausal coefficients
+    from them. Usually this is done with a certain 'horizon' which takes into account the limited
+    arithmetic precision of the calculations and abbreviates the initial coefficient calculation
+    to a certain chosen degree of precision. The same effect can be achieved by simply embedding
+    the knot point data into a frame containing extrapolated knot point data. If the frame is
+    chosen so wide that margin effects don't 'disturb' the core data, we end up with an equally
+    (im)precise result with an explicit scheme. The width of the frame now takes the roll of the
+    horizon used in the implicit scheme and has the same effect. While the explicit scheme needs
+    more memory, it has several advantages:
+
+    - there is no need to code specific routines for initial coefficient generation
+    - nor any need to explicitly run this code
+    - the iteration over the input becomes more straightforward
+    - any extrapolation scheme can be used easily
+
+    A disadvantage, apart from the higher memory consumption, is that one cannot give a
+    'precise' solution, which the implicit scheme can do for the cases it can handle. But what
+    is 'precise'? Certainly there is no precision beyond the arithmetic precision offered by
+    the underlying system. So if the horizon is chosen wide enough, the resulting coefficients
+    become the same with all schemes. They are interchangeable.
+
+    In an image-processing context, the extra memory needed would typically be a small
+    single-digit percentage - not really a bother. In my trials, I found the runtime differences
+    between the two approaches negligible and the simplification of the code so attractive that
+    I was tempted to choose the explicit scheme over the implicit. Yet since the code for the
+    implicit scheme is there already and some of it is even used in the explicit scheme I keep
+    both methods in the code base for now.
+*/
+
+#ifndef VSPLINE_PREFILTER_H
+#define VSPLINE_PREFILTER_H
+
+#include <thread>
+#include <math.h>
+#include <complex>
+#include <cmath>
+#include <iostream>
+#include <array>
+#include <assert.h>
+
+#include <vigra/multi_array.hxx>
+#include <vigra/multi_iterator.hxx>
+#include <vigra/multi_math.hxx>
+#include <vigra/navigator.hxx>
+#include <vigra/bordertreatment.hxx>
+#include <vigra/multi_convolution.hxx>
+
+#include "common.h"
+#include "basis.h"
+
+namespace vspline {
+
+using namespace std ;
+using namespace vigra ;
+using namespace vigra::multi_math;
+
+/// class solver performs the conversion of a 1D string of data to spline coefficients with
+/// the 'DSP aproach', which performs coefficient generation by means of an IIR filter.
+/// The DSP approach is extremely versatile and the code is elegant, so I abandoned even my
+/// optimized cubic spline code, which used linear algebra methods, in it's favour.
+/// The 1D solving is, later on, used repeatedly along the axes of multidimensional
+/// data, but the formulation of the solving process is unaware of the dimensionality
+/// of the data, it follows the 1D iterators it receives, no matter what strides these
+/// iterators use to pick out data along an axis.
+///
+/// With large data sets, and with higher dimensionality, processing separately along each
+/// axis consumes a lot of memory bandwidth. There are ways out of this dilemma by interleaving
+/// the code. Disregarding the calculation of initial causal and anticausal coefficients, the code
+/// to do this would perform the forward filtering step for all axes at the same time and then, later,
+/// the backward filtering step for all axes at the same time. This is possible, since the order
+/// of the filter steps is irrelevant, and the traversal of the data can be arranged so that
+/// values needed for context of the filter are always present (the filters are recursive and only
+/// 'look' one way). I have investigated these variants, but especially the need to calculate
+/// initial causal/anticausal coefficients, and the additional complications arising from
+/// vectorization, have kept me from choosing this path for the current body of code. With the
+/// inclusion of the explicit scheme for prefiltering, dimension-interleaved prefiltering becomes
+/// more feasible, and I anticipate revisiting it.
+///
+/// Here I am using a scheme where I make access to 1D subsets of the data very efficient (if necessary
+/// by buffering lines/stripes of data) and rely on the fact that such simple, fast access plays
+/// well with the compiler's optimizer and pipelining in the CPU. From the trials on my own system
+/// I conclude that this approach does not perform significantly worse than interleaving schemes
+/// and is much easier to formulate and understand. And with fast access to 1D subsets, higher order
+/// splines become less of an issue; the extra arithemtic to prefilter for, say, quintic splines is
+/// done very quickly, since no additional memory access is needed beyond a buffer's worth of data
+/// already present in core memory.
+///
+/// class solver needs two template arguments, one for the type of iterator over the incoming
+/// data, and one for the type of iterator to the resultant coefficients. These will usually be
+/// the same, but formulating the code with two separate types makes it more versatile.
+
+template < typename in_iter ,   // iterator over the knot point values
+           typename out_iter >  // iterator over the coefficient array
+class solver
+{
+  // both iterators must define value_type and have the same value_type
+
+  typedef typename in_iter::value_type value_type ;
+  
+  static_assert ( std::is_same < typename out_iter::value_type , value_type > :: value ,
+                  "prefilter input and output iterator must have the same value_type" ) ;
+  
+  // while the iterators may refer to aggregates (pixels etc.), we also need access
+  // to the fundamental type of the aggregates. Further down, in the vectorized code,
+  // a specialization for Vc::Vector objects is given, so that the element type of
+  // Vc::Vectors can be inferred as well.
+
+  typedef typename ExpandElementResult < value_type > :: type mattype ;
+
+//   // both iterators should be random access iterators.
+//   // currently not enforced, too lazy to code the traits for vector stripe iterators...
+//   typedef typename std::iterator_traits < in_iter > :: iterator_category in_cat ;
+//   static_assert ( std::is_same < in_cat , std::random_access_iterator_tag > :: value ,
+//                   "prefilter input iterator must be random access iterator"  ) ;
+//                   
+//   typedef typename std::iterator_traits < out_iter > :: iterator_category out_cat ;
+//   static_assert ( std::is_same < out_cat , std::random_access_iterator_tag > :: value ,
+//                   "prefilter output iterator must be random access iterator" ) ;
+                  
+  
+  /// typedef the fully qualified type for brevity
+  typedef solver<in_iter,out_iter> solver_type ;
+
+public:
+  
+  ArrayVector<mattype> Pole ;        ///< Poles of the IIR filter
+  ArrayVector<int> Horizon ;         ///< as many 'Horizons' as Poles
+  mattype  Lambda ;                  ///< (potentiated) overall gain.  
+  const int NbPoles ;                ///< Number of filter poles
+  const int M ;                      ///< length of the data
+  const int SplineDegree ;           ///< degree of the spline
+  const int SplineOrder ;            ///< order of the spline (== degree + 1)
+
+  /// the solving routine and initial coefficient finding routines are called via method pointers.
+  /// these pointers are typedefed for better legibility:
+  
+  typedef int       ( solver_type::*p_solve )  ( in_iter  input , out_iter output ) ;
+  typedef value_type ( solver_type::*p_icc1 )  ( in_iter  input , int k ) ;
+  typedef value_type ( solver_type::*p_icc2 )  ( out_iter input , int k ) ;
+  typedef value_type ( solver_type::*p_iacc )  ( out_iter input , int k ) ;
+
+  
+  // these are the method pointers used:
+  
+  p_solve _p_solve ; ///< pointer to the solve method
+  p_icc1  _p_icc1 ;  ///< pointer to calculation of initial causal coefficient with different
+  p_icc2  _p_icc2 ;  ///< and equal data types of input and output
+  p_iacc  _p_iacc ;  ///< pointer to calculation of initial anticausal coefficient
+  
+ /// solve() takes two iterators, one to the input data and one to the output space.
+ /// The containers must have the same size. It's safe to use solve() in-place.
+
+ int solve ( in_iter  input , out_iter output )
+ {
+   (this->*_p_solve) ( input , output ) ;
+ }
+ 
+ /// for in-place operation we use the same solver routine.
+ /// I checked: a handcoded in-place routine using only a single
+ /// iterator is not noticeably faster than using one with two separate iterators.
+ 
+ int solve ( out_iter data )
+ {
+   (this->*_p_solve) ( data , data ) ;
+ }
+ 
+// I now use code for te 'DSP approach' to calculating the spline coefficients, or,
+// to use DSP terminology, to perform the 'prefiltering'. I use adapted versions of
+// Thevenaz' code to calculate the initial causal and anticausal coefficients. The
+// code is changed just a little to work with an iterator instead of a C vector.
+
+private:
+
+/// the next section holds routines to calculate the initial causal/anticausal coefficient
+/// for the solver routine. Since I now use the DSP approach to coefficient generation, this
+/// is where the boundary conditions manifest in code, while the solve routines are the same
+/// for all cases.
+///
+/// The code for mirrored BCs is adapted from P. Thevenaz' code, the other routines are my
+/// own doing, with aid from a digest of spline formulae I received from P. Thevenaz and which
+/// were helpful to verify the code against a trusted source. Anyway, it's all unit tested now
+/// and runs just fine.
+///
+/// note how, in the routines to find the initial causal coefficient, there are two different
+/// cases: first the 'accelerated loop', which is used when the theoretically infinite sum of
+/// terms has reached sufficient precision, and the 'full loop', which implements the mathematically
+/// precise representation of the limes of the infinite sum towards an infinite number of terms,
+/// which happens to be calculable due to the fact that the absolute value of all poles is < 1 and
+///
+///  lim     n                a
+///         sum a * q ^ k =  ---
+/// n->inf  k=0              1-q
+///
+///
+/// first are mirror BCs. This is mirroring 'on bounds',
+/// f(-x) == f(x) and f(n-1 +x) == f(n-1 + x)
+///
+/// note how mirror BCs are equivalent to requiring the first derivative to be zero in the
+/// linear algebra approach. Obviously with mirrored data this has to be the case; the location
+/// where mirroring occurs is always an extremum. So this case covers 'FLAT' BCs as well
+///
+/// the initial causal coefficient routines are templated by iterator type, because depending
+/// on the circumstances, they may be used either on the input or the output iterator.
+  
+template < class IT >
+value_type icc_mirror ( IT c , int k )
+{
+  mattype z = Pole[k] ;
+  mattype zn, z2n, iz;
+  value_type Sum ;
+  int  n ;
+
+  if (Horizon[k] < M) {
+    /* accelerated loop */
+    zn = z;
+    Sum = c[0];
+    for (n = 1; n < Horizon[k]; n++) {
+      Sum += zn * c[n];
+      zn *= z;
+    }
+  }
+  else {
+    /* full loop */
+    zn = z;
+    iz = 1.0 / z;
+    z2n = pow(z, (double)(M - 1));
+    Sum = c[0] + z2n * c[M - 1];
+    z2n *= z2n * iz;
+    for (n = 1; n <= M - 2; n++) {
+      Sum += (zn + z2n) * c[n];
+      zn *= z;
+      z2n *= iz;
+    }
+    Sum /= mattype(1.0 - zn * zn);
+  } 
+//  cout << "icc_mirror: " << Sum << endl ;
+ return(Sum);
+}
+
+/// the initial anticausal coefficient routines are always called with the output iterator,
+/// so they needn't be templated like the icc routines.
+///
+/// I still haven't understood the 'magic' which allows to calculate the initial anticausal
+/// coefficient from just two results of the causal filter, but I assume it's some exploitation
+/// of the symmetry of the data. This code is adapted from P. Thevenaz'.
+
+value_type iacc_mirror ( out_iter c , int k )
+{
+  mattype z = Pole[k] ;
+
+  return( mattype( z / ( z * z - 1.0 ) ) * ( c [ M - 1 ] + z * c [ M - 2 ] ) );
+}
+
+/// next are 'antimirrored' BCs. This is the same as 'natural' BCs: the signal is
+/// extrapolated via point mirroring at the ends, resulting in point-symmetry at the ends,
+/// which is equivalent to the second derivative being zero, the constraint used in
+/// the linear algebra approach to calculate 'natural' BCs:
+///
+/// f(x) - f(0) == f(0) - f(-x); f(x+n-1) - f(n-1) == f(n-1) - f (n-1-x)
+
+template < class IT >
+value_type icc_natural ( IT c , int k )
+{
+  mattype z = Pole[k] ;
+  mattype zn, z2n, iz;
+  value_type Sum , c02 ;
+  int  n ;
+
+  // f(x) - f(0) == f(0) - f(-x)
+  // f(-x) == 2 * f(0) - f(x)
+  
+  if (Horizon[k] < M) {
+    c02 = c[0] + c[0] ;
+    zn = z;
+    Sum = c[0];
+    for (n = 1; n < Horizon[k]; n++) {
+      Sum += zn * ( c02 - c[n] ) ;
+      zn *= z;
+    }
+    return(Sum);
+  }
+  else {
+    zn = z;
+    iz = 1.0 / z;
+    z2n = pow(z, (double)(M - 1));                                     // z2n == z^M-1
+    Sum = mattype( ( 1.0 + z ) / ( 1.0 - z ) ) * ( c[0] - z2n * c[M - 1] );
+    z2n *= z2n * iz;                                                   // z2n == z^2M-3
+    for (n = 1; n <= M - 2; n++) {
+      Sum -= (zn - z2n) * c[n];
+      zn *= z;
+      z2n *= iz;
+    }
+    return(Sum / mattype(1.0 - zn * zn));
+  } 
+}
+
+/// I still haven't understood the 'magic' which allows to calculate the initial anticausal
+/// coefficient from just two results of the causal filter, but I assume it's some exploitation
+/// of the symmetry of the data. This code is adapted from P. Thevenaz' formula.
+
+value_type iacc_natural ( out_iter c , int k )
+{
+  mattype z = Pole[k] ;
+
+  return - mattype( z / ( ( 1.0 - z ) * ( 1.0 - z ) ) ) * ( c [ M - 1 ] - z * c [ M - 2 ] ) ;
+}
+
+/// next are reflective BCs. This is mirroring 'between bounds':
+///
+/// f ( -1 - x ) == f ( x ) and f ( n + x ) == f ( n-1 - x )
+///
+/// I took Thevenaz' routine for mirrored data as a template and adapted it.
+/// 'reflective' BCs have some nice properties which make them more suited than mirror BCs in
+/// some situations:
+/// - the artificial discontinuity is 'pushed out' half a unit spacing
+/// - the extrapolated data are just as long as the source data
+/// - they play well with even splines
+
+template < class IT >
+value_type icc_reflect ( IT c , int k )
+{
+  mattype z = Pole[k] ;
+  mattype zn, z2n, iz;
+  value_type Sum ;
+  int  n ;
+
+  if (Horizon[k] < M) {
+    zn = z;
+    Sum = c[0];
+    for (n = 0; n < Horizon[k]; n++) {
+      Sum += zn * c[n];
+      zn *= z;
+    }
+    return(Sum);
+  }
+  else {
+    zn = z;
+    iz = 1.0 / z;
+    z2n = pow(z, (double)(2 * M));
+    Sum = 0 ;
+    for (n = 0; n < M - 1 ; n++) {
+      Sum += (zn + z2n) * c[n];
+      zn *= z;
+      z2n *= iz;
+    }
+    Sum += (zn + z2n) * c[n];
+    return c[0] + Sum / mattype(1.0 - zn * zn) ;
+  } 
+}
+
+/// I still haven't understood the 'magic' which allows to calculate the initial anticausal
+/// coefficient from just one result of the causal filter, but I assume it's some exploitation
+/// of the symmetry of the data. I have to thank P. Thevenaz for his formula which let me code:
+
+value_type iacc_reflect ( out_iter c , int k )
+{
+  mattype z = Pole[k] ;
+
+  return c[M - 1] / mattype( 1.0 - 1.0 / z ) ;
+}
+
+/// next is periodic BCs. so, f(x) = f(x+N)
+///
+/// Implementing this is more straightforward than implementing the various mirrored types.
+/// The mirrored types are, in fact, also periodic, but with a period twice as large, since they
+/// repeat only after the first reflection. So especially the code for the full loop is more complex
+/// for mirrored types. The down side here is the lack of symmetry to exploit, which made me code
+/// a loop for the initial anticausal coefficient as well.
+
+template < class IT >
+value_type icc_periodic ( IT c , int k )
+{
+  mattype z = Pole[k] ;
+  mattype zn ;
+  value_type Sum ;
+  int  n ;
+
+  if (Horizon[k] < M)
+  {
+    zn = z ;
+    Sum = c[0] ;
+    for ( n = M - 1 ; n > ( M - Horizon[k] ) ; n-- )
+    {
+      Sum += zn * c[n];
+      zn *= z;
+    }
+   }
+  else
+  {
+    zn = z;
+    Sum = c[0];
+    for ( n = M - 1 ; n > 0 ; n-- )
+    {
+      Sum += zn * c[n];
+      zn *= z;
+    }
+    Sum /= mattype( 1.0 - zn ) ;
+  }
+ return Sum ;
+}
+
+value_type iacc_periodic ( out_iter c , int k )
+{
+  mattype z = Pole[k] ;
+  mattype zn ;
+  value_type Sum ;
+
+  if (Horizon[k] < M)
+  {
+    zn = z ;
+    Sum = c[M-1] * z ;
+    for ( int n = 0 ; n < Horizon[k] ; n++ )
+    {
+      zn *= z;
+      Sum += zn * c[n];
+    }
+    Sum = -Sum ;
+  }
+  else
+  {
+    zn = z;
+    Sum = c[M-1];
+    for ( int n = 0 ; n < M - 1 ; n++ )
+    {
+      Sum += zn * c[n];
+      zn *= z;
+    }
+    Sum = z * Sum / mattype( zn - 1.0 );
+  }
+  return Sum ;
+}
+
+template < class IT >
+value_type icc_identity ( IT c , int k )
+{
+  return c[0] ;
+}
+
+value_type iacc_identity ( out_iter c , int k )
+{
+  return c[M-1] ;
+}
+
+/// now we come to the solving, or prefiltering code itself.
+/// there are some variants - a bit of code bloat due to the explicit handling of a few
+/// distinct cases; since this is core code I have opted to suffer some code duplication
+/// in exchange for maximum efficiency.
+/// The code itself is adapted from P. Thevenaz' code.
+///
+/// the first solve routine is to be used for the first dimension.
+/// here Lambda, the overall gain, is applied to the elements of the input as they
+/// are processed, saving the separate loop to preapply the gain. Subsequent poles
+/// and further dimensions then use the next routine. The gain which is applied here
+/// may be a power of the 'orthodox' gain, to avoid having to reapply the 'orthodox'
+/// Lambda with every dimension which is processed. See the constructor.
+
+int solve_gain_inlined ( in_iter c , out_iter x )
+{
+  assert ( M > 1 ) ;
+  
+  value_type X ;
+  
+  // process first pole, applying overall gain in the process
+  // of consuming the input. This gain may be a power of the 'orthodox'
+  // Lambda from Thevenaz' code. This is done when the input is multidimensional,
+  // in which case it's wasteful to apply Lambda in each dimension. In this situation
+  // it makes more sense to apply pow(Lambda,dimensions) when solving along the
+  // first axis and apply no gain when solving along the other axes.
+  // Also note that the application of the gain is performed during the processing
+  // of the first (maybe the only) pole of the filter, instead of running a separate
+  // loop over the input to apply it before processing starts.
+  
+  // note how the gain is applied to the initial causal coefficient. This is
+  // equivalent to first applying the gain to the input and then calculating
+  // the initial causal coefficient from the amplified input.
+  
+  // note the seemingly strange = X clause in the asignment. By performing this
+  // assignment, we buffer the result of the current filter step to be used in the
+  // next iteration instead of fetching it again from memory. In my trials, this
+  // performed better, especially on SIMD data.
+  
+  x[0] = X = Lambda * (this->*_p_icc1) (c, 0);
+
+  /* causal recursion */
+  // the gain is applied to each input value as it is consumed
+  
+  for (int n = 1; n < M; n++)
+  {
+    x[n] = X = Lambda * c[n] + Pole[0] * X ;
+  }
+  
+  // now the input is used up and won't be looked at any more; all subsequent
+  // processing operates on the output.
+  
+  /* anticausal initialization */
+  
+  x[M - 1] = X = (this->*_p_iacc)(x, 0);
+
+  /* anticausal recursion */
+  for (int n = M - 2; 0 <= n; n--)
+  {
+    x[n] = X = Pole[0] * ( X - x[n]);
+  }
+  
+  // for the remaining poles, if any, don't apply the gain
+  // and process the result from applying the first pole
+  
+  for (int k = 1; k < NbPoles; k++)
+  {
+    /* causal initialization */
+    x[0] = X = (this->*_p_icc2)(x, k);
+    
+    /* causal recursion */
+    for (int n = 1; n < M; n++)
+    {
+      x[n] = X = x[n] + Pole[k] * X ;
+    }
+    
+    /* anticausal initialization */
+    x[M - 1] = X = (this->*_p_iacc)(x, k);
+    
+    /* anticausal recursion */
+    for (int n = M - 2; 0 <= n; n--)
+    {
+      x[n] = X = Pole[k] * ( X - x[n] );
+    }
+  }
+}
+
+/// solve routine without application of any gain, it is assumed that this has been
+/// done already during an initial run with the routine above, or in some other way.
+
+int solve_no_gain ( in_iter c , out_iter x )
+{
+  assert ( M > 1 ) ;
+
+  value_type X ;
+  
+  // process first pole, consuming the input
+  
+  /* causal initialization */
+  x[0] = X = (this->*_p_icc1)(c, 0);
+  
+  /* causal recursion */
+  for ( int n = 1; n < M; n++)
+  {
+    x[n] = X = c[n] + Pole[0] * X ;
+  }
+  
+  /* anticausal initialization */
+  x[M - 1] = X = (this->*_p_iacc)(x, 0);
+  
+  /* anticausal recursion */
+  for ( int n = M - 2; 0 <= n; n--)
+  {
+    x[n] = X = Pole[0] * ( X - x[n]);
+  }
+  
+  // for the remaining poles, if any, work on the result
+  // of processing the first pole
+  
+  for ( int k = 1 ; k < NbPoles; k++)
+  {
+    /* causal initialization */
+    x[0] = X = (this->*_p_icc2)(x, k);
+    
+    /* causal recursion */
+    for (int n = 1; n < M; n++)
+    {
+      x[n] = X = x[n] + Pole[k] * X ;
+    }
+    
+    /* anticausal initialization */
+    x[M - 1] = X = (this->*_p_iacc)(x, k);
+    
+    /* anticausal recursion */
+    for (int n = M - 2; 0 <= n; n--)
+    {
+      x[n] = X = Pole[k] * ( X - x[n] );
+    }
+  }
+}
+
+/// shortcircuit routine, copies input to output
+///
+/// this routine can also be used for splines of degree 0 and 1, for simplicity's sake
+
+int solve_identity ( in_iter c , out_iter x )
+{
+  if ( x == c )
+    return 0 ;
+  for ( int n = 0 ; n < M ; n++ )
+    x[n] = c[n] ;
+}
+
+/// The last bit of work left in the solver is the constructor.
+/// The number of input/output values is passed into the constructur, limiting the
+/// solver to operate on data precisely of this length. apply_gain isn't immediately
+/// obvious: it's not a mere flag, but contains the exponent which should be applied
+/// to the gain. If, for example, a 2D spline is built, one might pass in 2 here for
+/// the first dimension, and 0 for the second. This way, one set of multiplications is
+/// saved, at the cost of slightly reduced accuracy for large spline degrees. For high
+/// spline degrees and higher dimensions, it's advisable to not use this mechanism and
+/// pass in apply_gain = 1 for all dimensions.
+///
+/// Next is the boundary condition to use for the current axis. This is one of
+/// MIRROR, REFLECT, NATURAL, and PERIODIC. Note that different axes can use
+/// different boundary conditions.
+///
+/// The last parameter determines the spline order, which is one larger than the
+/// spline degree TODO: change code to always use degree
+
+public:
+  
+solver ( int _M ,           ///< number of input/output elements (DataLength)
+         int apply_gain ,   ///< power of Lambda to apply while processing the first pole of the filter
+         bc_code bc ,       ///< boundary conditions for this solver
+         int spline_order ) ///< desired spline order (4 for cubic)
+: M ( _M ) ,
+  SplineOrder ( spline_order ) ,
+  SplineDegree ( spline_order - 1 ) ,
+  NbPoles ( ( spline_order - 1 ) / 2 )
+{
+  // TODO: make tolerance a parameter
+
+  double Tolerance = 0.0 ; // any type - no tolerance
+
+  if ( std::is_same < mattype , float > :: value == true )
+    Tolerance = .000000001 ;
+
+  else if ( std::is_same < mattype , double > :: value == true )
+    Tolerance = .000000000000001 ;
+
+  // fetch the precomputed filter poles:
+
+  assert ( SplineDegree >= 0 && SplineDegree < 25 ) ;
+  
+  if ( SplineDegree < 2 )
+  {
+    // this is the easy way to deal with low degree splines:
+    // copy the input to the output.
+    _p_solve = & solver_type::solve_identity ;
+    return ;
+  }
+  
+  for ( int i = 0 ; i < NbPoles ; i++ )
+  {
+    double pole = precomputed_poles [ SplineDegree ] [ i ] ;
+    Pole.push_back ( pole );
+    if ( Tolerance )
+      Horizon.push_back ( ceil ( log ( Tolerance ) / log ( fabs ( pole ) ) ) ) ;
+    else
+      Horizon.push_back ( M ) ;
+  }
+
+//   for ( int i = 0 ; i < NbPoles ; i++ )
+//     cout << "Pole " << i << ": " << Pole[i] << " Hor " << Horizon[i] << endl ;
+
+  /* compute the overall gain */
+
+  Lambda = 1.0 ; // if apply_gain is 0, Lambda won't be applied at all
+
+  if ( apply_gain ) // if apply_gain is set, it will be used as an *exponent* on Lambda
+                    // as to apply a power of Lambda when processing the first dimension
+  {
+    for (int k = 0; k < NbPoles; k++)
+      Lambda = Lambda * (1.0 - Pole[k]) * (1.0 - 1.0 / Pole[k]);
+    
+    Lambda = pow ( Lambda , apply_gain ) ;
+
+    _p_solve = & solver_type::solve_gain_inlined ; // multiply input with pow(Lambda,apply_gain)
+  }
+  else
+  {
+    _p_solve = & solver_type::solve_no_gain ;      // the gain has already been applied
+  }
+
+//   cout << "Lambda: " << Lambda << endl ;
+
+  // while the forward/backward IIR filter in the solve_... routines is the same for all
+  // boundary conditions, the calculation of the initial causal and anticausal coefficients
+  // depends on the boundary conditions and is handled by a call through a method pointer
+  // in the solve_... routines.
+  
+  if ( bc == MIRROR )
+  {     
+    _p_icc1 = & solver_type::icc_mirror<in_iter> ;
+    _p_icc2 = & solver_type::icc_mirror<out_iter> ;
+    _p_iacc = & solver_type::iacc_mirror ;
+  }
+  else if ( bc == NATURAL )
+  {     
+    _p_icc1 = & solver_type::icc_natural<in_iter> ;
+    _p_icc2 = & solver_type::icc_natural<out_iter> ;
+    _p_iacc = & solver_type::iacc_natural ;
+  }
+  else if ( bc == PERIODIC )
+  {
+    _p_icc1 = & solver_type::icc_periodic<in_iter> ;
+    _p_icc2 = & solver_type::icc_periodic<out_iter> ;
+    _p_iacc = & solver_type::iacc_periodic ;
+  }
+  else if ( bc == REFLECT )
+  {
+    _p_icc1 = & solver_type::icc_reflect<in_iter> ;
+    _p_icc2 = & solver_type::icc_reflect<out_iter> ;
+    _p_iacc = & solver_type::iacc_reflect ;
+  }
+  else if ( bc == ZEROPAD || bc == IGNORE )
+  {
+    _p_icc1 = & solver_type::icc_identity<in_iter> ;
+    _p_icc2 = & solver_type::icc_identity<out_iter> ;
+    _p_iacc = & solver_type::iacc_identity ;
+  }
+  else if ( bc == IDENTITY )
+  {
+    _p_solve = & solver_type::solve_identity ;
+  }
+  else
+    cerr << "bc code " << bc << " not supported" << endl ;
+}
+
+} ; // end of class solver
+
+// previous implementation, not using divide_and_conquer:
+/*
+/// helper routine to run the solving process with several threads
+
+template < typename in_nav_type , typename out_nav_type , typename solver_type >
+int process ( in_nav_type ni , out_nav_type no , solver_type * s )
+{
+  while ( ni.hasMore() )
+  {
+    s->solve ( ni.begin() , no.begin() ) ;
+    ++ni ;
+    ++no ;
+  }
+}
+
+/// The code for multithreaded operation processes only one axis per call. This way we can easily
+/// mix the initial, potentially more complex call where the knot point data are filtered for
+/// the first time, and the subsequent in-place operations over the coefficient array along the
+/// remaining axes.
+/// Here we formulate the version of the 1D solve routine which doesn't work in-place.
+/// There is a certain amount of code bloat because we allow for different
+/// input and output types and don't restrict the code to perform the calculation in-place, though
+/// this works just as well. The code bloat consists in a few more types to accomodate the potentially
+/// different input and output types. d, the axis to process, is the axis we use when we 'pull in'
+/// the input from the knot point data to the coefficient array. This 'pulling in' has to happen
+/// before the subsequent axes are processed in-place in the coefficient array.
+/// Initailly I coded an in-place version of this routine but threw it out, since the
+/// performance gain of the explicit in-place version is if at all negligible, and the extra
+/// template parameters aren't too much to handle.
+///
+/// solve_vigra is the newer version of this routine, it internally constructs the solver,
+/// which makes the call easier to comprehend.
+
+template < typename input_array_type ,      ///< type of array with knot point data
+           typename output_array_type >     ///< type of array for coefficients (may be the same)
+void solve_vigra ( input_array_type &input ,    ///< knot point data. the routine can also operate in-place
+                   output_array_type &output ,  ///< where input == output.
+                   bc_code bc ,                 ///< boundary treatment for this solver
+                   int degree ,                 ///< degree of the spline
+                   int d ,                      ///< axis to process
+                   int nslices = ncores )  ///< number of threads to use
+{
+  const int dim = input_array_type::actual_dimension ;
+  typedef typename input_array_type::difference_type diff_t ;
+  diff_t shape = input.shape() ;
+  
+  // we use vigra::MultiArrayNavigators which provide precisely the functionality we need,
+  // which is to provide iterators to all 1D subarrays along a given axis. Again we have to
+  // account for the possible difference between the incoming data and the output.
+  
+  typedef vigra::MultiArrayNavigator<typename input_array_type::traverser, dim> input_Navigator;
+  typedef vigra::MultiArrayNavigator<typename output_array_type::traverser, dim> output_Navigator;
+  typedef typename input_Navigator::iterator input_nav_iter ;
+  typedef typename output_Navigator::iterator output_nav_iter ;
+
+  int lambda_exponent = 1 ;
+
+// deactivating the code below may produce slightly more precise results
+
+  if ( pow ( degree , dim ) < 64 ) // heuristic. for high degrees, below optimization reduces precision too much
+  {
+    lambda_exponent = 0 ;
+    if ( d == 0 )
+      lambda_exponent = dim ;
+  }
+  
+  int count = input.shape ( d ) ;
+  
+  typedef solver < input_nav_iter , output_nav_iter > solver_type ;
+                          
+  solver_type s ( count , lambda_exponent , bc , degree + 1 ) ;
+  solver_type * sp = &s ;
+  
+  diff_t shp = shape ;       // start with 'complete' shape
+  shp[d] = 1 ;               // bind current dimension to 1
+  
+  // find the outermost dimension that can be split nslices ways, and it's extent
+  // This is the same process as in split_array_..., but since we work with shapes here
+  // to set up the range for the Navigators, we don't use these routines.
+
+  int maxd = -1 ;
+  int max_extent = -1 ;
+  for ( int md = dim - 1 ; md >= 0 ; md-- )
+  {
+    if ( shp[md] > max_extent && shp[md] >= nslices )
+    {
+      max_extent = shp[md] ;
+      maxd = md ;
+      break ;
+    }
+  }
+  
+  if ( max_extent == -1 )
+  {
+    // repeat process with relaxed conditions
+    for ( int md = dim - 1 ; md >= 0 ; md-- )
+    {
+      if ( shp[md] > max_extent )
+      {
+        max_extent = shp[md] ;
+        maxd = md ;
+        break ;
+      }
+    }
+  }
+  
+  nslices = min ( max_extent , nslices ) ;
+  if ( nslices <= 1 )
+  {
+    // process in this thread
+    input_Navigator nav_in(input.traverser_begin(), input.shape() , d);
+    output_Navigator nav_out(output.traverser_begin(), input.shape() , d);
+    process<input_Navigator,output_Navigator,solver_type > ( nav_in , nav_out , sp ) ;
+  }
+  else
+  {
+    thread * t[nslices] ;
+
+    for ( int s = 0 ; s < nslices ; s++ )
+    {
+      diff_t s0 ;                                    // origin of the view
+      s0[maxd] = ( s * max_extent ) / nslices ;      // set start position in largest dimension to cut position
+      diff_t s1 = shape ;                            // end of the view
+      s1[maxd] = ( (s+1) * max_extent ) / nslices ;  // end position is the next cut position (one beyond)
+      input_Navigator nav_in(input.traverser_begin(), s0 , s1 , d);
+      output_Navigator nav_out(output.traverser_begin(), s0 , s1 , d);
+      t[s] = new thread ( process<input_Navigator,output_Navigator,solver_type > ,
+                          nav_in , nav_out , sp ) ;
+    }
+    for ( int s = 0 ; s < nslices ; s++ )
+    {
+        t[s]->join() ;
+        delete t[s] ;
+    }
+  }
+}
+*/
+
+/// process() prefilters a chunk of data along one axis. The solver is repeatedly
+/// called for 1D subarrays collinear to the processing axis.
+
+template < typename input_array_type ,      ///< type of array with knot point data
+           typename output_array_type ,     ///< type of array for coefficients (may be the same)
+           typename solver_type >           ///< type of solver to use
+int process ( input_array_type &input ,     ///< knot point data. the routine can also operate in-place
+              output_array_type &output ,   ///< where input == output.)
+              solver_type &solver ,         ///< solver to use
+              int axis                      ///< axis to process
+            )
+{
+  const int dim = input_array_type::actual_dimension ;
+
+  typedef vigra::MultiArrayNavigator<typename input_array_type::traverser, dim> input_Navigator;
+  typedef vigra::MultiArrayNavigator<typename output_array_type::traverser, dim> output_Navigator;
+  
+  input_Navigator  nav_in  ( input.traverser_begin() ,  input.shape() ,  axis ) ;
+  output_Navigator nav_out ( output.traverser_begin() , output.shape() , axis ) ;
+  
+  while ( nav_in.hasMore() )
+  {
+    solver.solve ( nav_in.begin() , nav_out.begin() ) ;
+    ++nav_in ;
+    ++nav_out ;
+  }
+}
+
+/// solve_vigra() prefilters an array along a specific axis. This routine splits the
+/// input and output array into equal-sized chunks and processes each chunk in a separate
+/// thread. This is done by using 'divide_and_conquer'.
+
+template < typename input_array_type ,      ///< type of array with knot point data
+           typename output_array_type >     ///< type of array for coefficients (may be the same)
+void solve_vigra ( input_array_type &input ,    ///< knot point data. the routine can also operate in-place
+                   output_array_type &output ,  ///< where input == output.
+                   bc_code bc ,                 ///< boundary treatment for this solver
+                   int degree ,                 ///< degree of the spline
+                   int d ,                      ///< axis to process
+                   int nslices = ncores )  ///< number of threads to use
+{
+  const int dim = input_array_type::actual_dimension ;
+
+  typedef vigra::MultiArrayNavigator<typename input_array_type::traverser, dim> input_Navigator;
+  typedef vigra::MultiArrayNavigator<typename output_array_type::traverser, dim> output_Navigator;
+  typedef typename input_Navigator::iterator input_nav_iter ;
+  typedef typename output_Navigator::iterator output_nav_iter ;
+  typedef solver < input_nav_iter , output_nav_iter > solver_type ;
+
+  int lambda_exponent = 1 ;
+
+// deactivating the code below may produce slightly more precise results
+
+  if ( pow ( degree , dim ) < 64 ) // heuristic. for high degrees, below optimization reduces precision too much
+  {
+    lambda_exponent = 0 ;
+    if ( d == 0 )
+      lambda_exponent = dim ;
+  }
+
+  solver_type s ( input.shape(d) , lambda_exponent , bc , degree + 1 ) ;
+
+  using namespace std::placeholders ;
+
+  // we use bind to create a functor which we can pass to divide_and_conquer_2.
+  // divide_and_conquer_2 will split input and output into chunks and then apply
+  // the functor to pairs of chunks in separate threads.
+
+  auto chunk_func_2
+  = std::bind ( process < input_array_type , output_array_type , solver_type > ,
+                _1 ,          // placeholders to accept data chunks
+                _2 ,          // from divide_and_conquer
+                std::ref(s) , // solver to apply
+                d ) ;         // axis to process
+
+  // divide_and_conquer_2 performs the array splitting and multithreading
+
+  divide_and_conquer_2 < input_array_type , output_array_type >
+  :: run ( input ,         // knot point data
+           output ,        // space for coefficients
+           chunk_func_2 ,  // functor from above to apply the solver
+           d ,             // forbid splitting along axis d
+           nslices ) ;     // use nslices threads
+}
+
+/// This routine calls the 1D prefiltering routine for all axes in turn.
+
+template < typename input_array_type ,      // type of array with knot point data
+           typename output_array_type >     // type of array for coefficients (may be the same)
+void solve_vigra ( input_array_type & input ,
+                   output_array_type & output ,
+                   TinyVector<bc_code,input_array_type::actual_dimension> bc ,
+                   int degree ,
+                   int nslices = ncores )
+{
+  // check if operation is in-place. I assume that the test performed here
+  // is sufficient to determine if the operation is in-place.
+  
+  bool in_place = false ;
+  
+  if ( (void*)(input.data()) == (void*)(output.data()) )
+    in_place = true ;
+
+  // if input == output, with degree <= 1 we needn't do anything at all.
+  
+  if ( in_place && degree <= 1 )
+    return ;
+
+  // do a bit of compatibility checking
+  
+  const int dim = input_array_type::actual_dimension ;
+  
+  if ( output_array_type::actual_dimension != dim )
+  {
+    throw dimension_mismatch ( "input and output array must have the same dimension" ) ;
+  }
+  
+  typedef typename input_array_type::difference_type diff_t ;
+  diff_t shape = input.shape() ;
+  if ( output.shape() != shape )
+  {
+    throw shape_mismatch ( "input and output array must have the same shape" ) ;
+  }
+
+  typedef typename input_array_type::value_type compound_type ;
+  typedef typename ExpandElementResult<compound_type>::type mattype ;
+  
+  // even if degree <= 1, we'll only arrive here if input != output.
+  // So we still have to copy the input data to the output (solve_identity)
+  
+  solve_vigra<input_array_type,output_array_type>
+             ( input , output , bc[0] , degree , 0 , nslices ) ;
+
+  // but if degree <= 1 we're done already, since copying the data again
+  // in dimensions 1... is futile
+
+  if ( degree > 1 )
+  {
+    for ( int d = 1 ; d < dim ; d++ )
+      solve_vigra<output_array_type,output_array_type>
+                ( output , output , bc[d] , degree , d , nslices ) ;
+  }
+}
+
+/// An interlude: restoration of the original knot point data from the spline coefficients.
+/// This is easily done by a simple convolution with the values of the basis function
+/// taken at discrete points inside the defined range.
+/// The function can take arbitrary spline degrees as template argument.
+/// there are two functions to restore the original data from a spline: the first one takes a
+/// braced spline and convolves it with BORDER_TREATMENT_AVOID. This way the explicit
+/// border treatment manifest in the brace is used, and splines with arbitrary border conditions
+/// can be verified.
+/// TODO: bit rough and ready - restoration from braced should really only produce the inner
+/// part, not the area covered by the brace.
+
+template < class array >
+void restore_from_braced ( array &spline , array& target , int SplineDegree )
+{  
+  typedef typename array::value_type value_type ;
+  const int half_ext = SplineDegree / 2 ;
+  vigra::Kernel1D<value_type> spline_kernel ;
+  spline_kernel.initExplicitly(-half_ext, half_ext) ;
+  for ( int i = -half_ext ; i <= half_ext ; i++ )
+    spline_kernel[i] = bspline_basis<double> ( i , SplineDegree ) ;
+//   cout << "using kernel" ;
+//   for ( int i = -half_ext ; i <= half_ext ; i++ )
+//     cout << " " << spline_kernel[i] ;
+//   cout << endl ;
+  spline_kernel.setBorderTreatment ( BORDER_TREATMENT_AVOID ) ;
+  separableConvolveMultiArray(spline, target, spline_kernel);
+}
+
+/// the second function takes a border treatment mode from vigra's collection and works
+/// on an unbraced spline. Some border treatment modes implemented here aren't available
+/// from vigra or aren't applicable
+/// - vigra                     vspline
+/// - BORDER_TREATMENT_AVOID    used with braced splines
+/// - BORDER_TREATMENT_CLIP     not sure
+/// - BORDER_TREATMENT_REPEAT   nearest is flat or REFLECT, same effect in cubic splines
+/// - BORDER_TREATMENT_REFLECT  MIRROR
+/// - BORDER_TREATMENT_WRAP     PERIODIC
+/// - BORDER_TREATMENT_ZEROPAD  -
+
+template < class array >
+void restore_from_unbraced ( array &spline , array& target , BorderTreatmentMode btm , int SplineDegree )
+{ 
+  typedef typename array::value_type value_type ;
+  const int half_ext = SplineDegree / 2 ;
+  vigra::Kernel1D<value_type> spline_kernel ;
+  spline_kernel.initExplicitly(-half_ext, half_ext) ;
+  for ( int i = -half_ext ; i <= half_ext ; i++ )
+    spline_kernel[i] = bspline_basis<double> ( i , SplineDegree ) ;
+  spline_kernel.setBorderTreatment ( btm ) ;
+  separableConvolveMultiArray(spline, target, spline_kernel);
+}
+
+} ; // end of namespace vspline
+
+// the use of Vc has to be switched on with the flag USE_VC.
+// The ramainder of the code in this file provides vectorized prefiltering
+// using Vc.
+
+#ifdef USE_VC
+
+namespace vigra
+{
+  /// specializes ExpandElementResult for Vc::Vector so that
+  /// solver objects operating on Vc::Vectors can infer the type of the
+  /// elements in the Vector.
+
+template <class T>
+struct ExpandElementResult < Vc::Vector<T> >
+{
+    typedef T type;
+    enum { size = Vc::Vector<T>::Size };
+} ;
+
+} ;
+
+namespace vspline
+{
+/// Here's my shot at using SIMD code for prefiltering.
+/// The complicated bit is the aggregation, since we're not doing a point operation, but
+/// have to keep context. So the aggregation has to happen perpendicular to the processing
+/// axis in order to maintain the same context relationship as without aggregation.
+/// We need some acrobatics to interface the image data with the SIMD data types.
+/// The following class implements a proxy object, which can be made into a vector type
+/// (on read access) and will store a vector type into a vigra array (on write access).
+/// Since the iterators we pass into the solver make use of expressions which use
+/// these proxy objects, we also need to define some arithmetic operators to be able to
+/// use the same code that we use without vectorization.
+///
+/// I have coded for explicit aligned load/stores when accessing aligned memory, but on
+/// my system I haven't seen real improvements from this. Anyway, it won't do harm.
+/// On my system, I have measured significant speedups for float data, while prefiltering
+/// double data performed slightly worse than the unvectorized version. For spline degrees
+/// 0 and 1, where the code currently copies the input to the output, vectorized prefiltering
+/// is generally slower, most likely due to the buffering taking place. So if performance
+/// is a consideration, it's best to check execution times rather than assuming that vectorized
+/// perfiltering is always faster. There's roundtrip.cc in the examples to do just that.
+
+template < class single_type , bool do_gather , bool do_mask , bool aligned = false >
+class vector_proxy
+{
+  static_assert ( ! (  do_gather == false && do_mask == true ) ,
+                  "if vector_proxy uses masked operations, indexes must be provided" ) ;
+
+  typedef Vc::Vector<single_type> vector_type ;
+  typedef typename vector_type::IndexType index_type ;
+  typedef typename vector_type::Mask mask_type ;
+  
+  single_type* base ;
+  index_type indexes ;
+  mask_type mask ;
+
+public:
+  
+  /// a set of constructors, reflecting the three modes of data transfer
+  /// from memory to SIMD register:
+  /// - plain load/store
+  /// - gather/scatter
+  /// - masked gather/scatter
+  /// TODO: seems there is a masked load/store after all...
+  /// note that there is no masked load/store, even though one would assume so
+  /// for symmetry reasons. If it's masked, it has to be a gather/scatter operation.
+  /// I had initially coded for masked load/stores as well (by supplying a default mask)
+  /// but I took that code out.
+  
+  vector_proxy ( single_type* _base )
+  : base ( _base )
+    {} ;
+    
+  vector_proxy ( single_type* _base ,
+                 index_type _indexes )
+  : base ( _base ) ,
+    indexes ( _indexes )
+    {} ;
+    
+  vector_proxy ( single_type* _base ,
+                 index_type _indexes ,
+                 mask_type _mask )
+  : base ( _base ) ,
+    indexes ( _indexes ) ,
+    mask ( _mask )
+    {} ;
+
+  /// construction of an SIMD vector from a proxy object performs the appropriate
+  /// memory access. the logical operations are performed on template arguments, so
+  /// they aren't executed at run-time but at compile-time, with the optimizer removing
+  /// all but the specifically needed code.
+
+  operator vector_type() const
+  {
+    if ( do_gather && do_mask )
+      return vector_type ( base , indexes , mask ) ;
+    else if ( do_gather )
+      return vector_type ( base , indexes ) ;
+    else if ( aligned )
+      return vector_type ( base , Vc::Aligned ) ;
+    else
+      return vector_type ( base ) ;
+  }
+  
+  /// this operator= results in the data in rhs to be written to memory. this is where the proxy
+  /// character shows most clearly, since there is no vector 'stored' somewhere and the write
+  /// operation has to be mapped somehow to 'ordinary' memory. Since none of the members
+  /// are affected by the operation, it's a const function.
+  
+  const vector_type& operator= ( const vector_type& rhs ) const
+  {
+    if ( do_gather && do_mask )
+      rhs.scatter ( base , indexes , mask ) ;
+    else if ( do_gather )
+      rhs.scatter ( base , indexes ) ;
+    else if ( aligned )
+      rhs.store ( base , Vc::Aligned ) ;
+    else
+      rhs.store ( base ) ;
+    return rhs ;
+  }
+  
+  /// next we define those operators we need for the solver. the operators are
+  /// defined for combinations of vectors and proxy types, and there are operators
+  /// with scalars as well. Most of the operator functions are declared as friends
+  /// inside this class.
+  /// Since the code is the same for all operators, we use a macro:
+
+#define opfuncs(op)                                                                    \
+  vector_type operator op##= ( const vector_type& rhs ) const                          \
+  {                                                                                    \
+    return ( *this = vector_type ( *this ) op rhs ) ;                                  \
+  }                                                                                    \
+  friend vector_type operator op ( vector_type lhs, const vector_proxy& rhs)           \
+  {                                                                                    \
+    return ( lhs op vector_type(rhs) ) ;                                               \
+  }                                                                                    \
+  friend vector_type operator op ( const vector_proxy& lhs, vector_type rhs)           \
+  {                                                                                    \
+    return ( lhs op##= rhs ) ;                                                         \
+  }                                                                                    \
+  friend vector_type operator op ( const vector_proxy& lhs, const vector_proxy& rhs)   \
+  {                                                                                    \
+    return vector_type(lhs) op vector_type(rhs) ;                                      \
+  }                                                                                    \
+  friend vector_type operator op ( const single_type& lhs, const vector_proxy& rhs)    \
+  {                                                                                    \
+    return lhs op vector_type(rhs) ;                                                   \
+  }                                                                                    \
+  friend vector_type operator op ( const vector_proxy& lhs, const single_type& rhs)    \
+  {                                                                                    \
+    return vector_type(lhs) op rhs ;                                                   \
+  }
+  
+  // we only support the basic arithmetic operators, that's all we need in the solver:
+  
+  opfuncs(*)
+  opfuncs(/)
+  opfuncs(+)
+  opfuncs(-)
+} ;
+
+/// class vector_iterator defines an iterator over a stripe of memory, producing
+/// vector proxy objects on access. These act as interface to obtain Vc::Vectors
+/// from the memory or write Vc::Vectors to the memory. Using this iterator, we can
+/// implement vectorized operation of the solver.
+
+template < class single_type , bool do_gather , bool do_mask , bool aligned = false >
+struct vector_iterator: public Vc::VectorAlignedBase
+{
+  static_assert ( ! (  do_gather == false && do_mask == true ) ,
+                  "if vector_iterator uses masked operations, indexes must be provided" ) ;
+
+  typedef Vc::Vector<single_type> vector_type ;
+  typedef Vc::Memory<vector_type> buffer_type ;
+  typedef vector_type value_type ;
+  typedef typename vector_type::IndexType index_type ;
+  typedef typename vector_type::Mask mask_type ;
+  
+  single_type * base ;
+  ptrdiff_t stride ;
+  index_type indexes ;
+  mask_type mask ;
+
+  // explicit default constructor
+  
+  vector_iterator()
+    { } ;
+ 
+  // constructor initializing all members
+    
+  vector_iterator ( single_type * _base ,
+                    ptrdiff_t _stride ,
+                    index_type _indexes ,
+                    mask_type _mask )
+  : base ( _base ) ,
+    stride ( _stride ) ,
+    indexes ( _indexes ) ,
+    mask ( _mask )
+    { } ;
+ 
+  // watch out! there is no masked load/store, and internally, Vc uses the same data type
+  // for masks and indexes. A call to this constructor with a mask_type argument will compile,
+  // but it will interpret the mask as indexes and result in the data being gathered/scattered,
+  // which is a hard-to-track bug.
+  // P.S. there are masked load/stores, but not via the vector type's constructor.
+  // TODO: see if we can support these as well
+
+  vector_iterator ( single_type * _base ,
+                    ptrdiff_t _stride ,
+                    index_type _indexes )
+  : base ( _base ) ,
+    stride ( _stride ) ,
+    indexes ( _indexes )
+    { } ;
+  
+  vector_iterator ( single_type * _base ,
+                    ptrdiff_t _stride )
+  : base ( _base ) ,
+    stride ( _stride )
+    { } ;
+  
+  bool operator== ( const vector_iterator& other )
+  {
+    return    ( base == other.base )
+           && ( stride == other.stride )
+           && all_of ( indexes == other.indexes ) ;
+  }
+  
+   vector_proxy<single_type, do_gather, do_mask, aligned> operator[] ( const size_t& i )
+   {
+     if ( do_mask && do_gather )
+       return vector_proxy<single_type, do_gather, do_mask, aligned> ( base + i * stride , indexes , mask ) ;
+     else if ( do_gather )
+       return vector_proxy<single_type, do_gather, do_mask, aligned> ( base + i * stride , indexes ) ;
+     else
+       return vector_proxy<single_type, do_gather, do_mask, aligned> ( base + i * stride ) ;
+   }
+  
+   vector_type operator[] ( const size_t& i ) const
+   {
+     if ( do_mask && do_gather )
+       return vector_type ( base + i * stride , indexes , mask ) ;
+     else if ( do_gather )
+       return vector_type ( base + i * stride , indexes ) ;
+     else if ( aligned )
+       return vector_type ( base + i * stride , Vc::Aligned ) ;
+     else
+       return vector_type ( base + i * stride ) ;
+   }
+  
+   vector_proxy<single_type, do_gather, do_mask, aligned> operator*() const
+   {
+     if ( do_mask && do_gather )
+       return vector_proxy<single_type, do_gather, do_mask, aligned> ( base , indexes , mask ) ;
+     else if ( do_gather )
+       return vector_proxy<single_type, do_gather, do_mask, aligned> ( base , indexes ) ;
+     else
+       return vector_proxy<single_type, do_gather, do_mask, aligned> ( base ) ;
+   }
+   
+   /// only preincrement and predecrement are defined for now.
+   /// Note that this implementation changes the base pointer!
+   
+   vector_proxy<single_type, do_gather, do_mask, aligned> operator++() // preincrement
+   {
+     base += stride ;
+     return *(*(this)) ;
+   }
+   
+   vector_proxy<single_type, do_gather, do_mask, aligned> operator--() // predecrement
+   {
+     base -= stride ;
+     return *(*(this)) ;
+   }
+   
+   /// copy out count vectors to a Vc::Memory object
+   
+   void buffer ( buffer_type& target , int count )
+   {
+     single_type * save_base = base ;
+     
+     for ( int i = 0 ; i < count ; i++ )
+     {
+       target.vector(i) = *(*this) ;
+       base += stride ;
+     }
+     base = save_base ;
+   }
+   
+   /// store count vectors from a Vc::Memory object
+   
+   void unbuffer ( buffer_type& source , int count )
+   {
+     single_type * save_base = base ;
+     
+     for ( int i = 0 ; i < count ; i++ )
+     {
+       *(*this) = source.vector(i) ;
+       base += stride ;
+     }
+     base = save_base ;
+   }
+} ;
+
+/// class aggregator handles the reinterpretation of the incoming/outgoing data
+/// to SIMD-processable types. The data come in as a vigra MultiArrayView of either
+/// a fundamental type or a compound type, like RGB pixel or TinyVector. The array
+/// is expanded into a view to it's fundamental type and the data are grouped in
+/// SIMD vectors perpendicular to the processing axis.
+///
+/// class aggregator functions like an iterator over 'stripes' of data: stripe number v
+/// is accessed by get_stripe(), which returns an iterator over the SIMD vectors in
+/// that stripe. This iterator is passed to the solver. So it's quite similar to a Navigator.
+///
+/// Initially I coded for direct access to the array data, but then I switched to
+/// copying out the array data into a buffer and pass out an iterator to the buffer.
+/// So now, once processing is done, a call to flush() stores the data back to the
+/// array. For simple cases (degree 3) this is as fast, for higher degrees it is faster.
+/// And the code is more straightforward.
+///
+/// I had code running where I did not buffer if the data are contiguous, but
+/// something was wrong with it and it failed the unit tests. TODO investigate
+
+template < class base_view_type >
+class aggregator: public Vc::VectorAlignedBase
+{
+public:
+  
+enum { actual_dimension = base_view_type::actual_dimension ,
+       expanded_dimension = base_view_type::actual_dimension + 1 } ;
+        
+typedef typename base_view_type::value_type compound_type ;
+typedef typename base_view_type::difference_type shape_type ;
+typedef typename ExpandElementResult<compound_type>::type single_type ;
+typedef MultiArrayView < expanded_dimension , single_type > expanded_view_type ;
+typedef MultiArrayView < actual_dimension , single_type > slice_type ;
+
+typedef Vc::Vector<single_type> vector_type ;
+typedef typename vector_type::IndexType index_type ;
+typedef typename vector_type::Mask mask_type ;
+typedef Vc::Memory<index_type> gather_map_type ;
+typedef Vc::Memory<vector_type> buffer_type ;
+
+typedef vector_iterator < single_type , false , false > plain_vector_iterator ;
+
+// currently I always buffer to an aligned buffer and use aligned ops on it:
+
+typedef vector_iterator < single_type , false , false , true > aligned_plain_vector_iterator ;
+
+typedef vector_iterator < single_type , true , false > gathering_vector_iterator ;
+typedef vector_iterator < single_type , true , true > gathering_masked_vector_iterator ;
+
+bool consult_gather_map ;
+index_type default_indexes ;
+
+size_t count ;
+size_t full_vectors ;
+size_t total_vectors ;
+mask_type mask ;
+single_type * base ;
+ptrdiff_t stride ;
+size_t index_count ;
+size_t remainder ;
+
+gather_map_type gather_map ;
+buffer_type buffer ;
+
+aligned_plain_vector_iterator current_stripe_iterator ;
+
+// test an index_type for sequentiality 
+static bool sequential ( const index_type & indexes )
+{
+  return Vc::all_of ( indexes - indexes[0] == index_type::IndexesFromZero() ) ;
+}
+
+aggregator ( base_view_type base_view , int axis )
+: gather_map(1) ,
+  buffer(1) ,
+  default_indexes ( index_type::IndexesFromZero() )
+{
+  // we grab the length of the processing axis here
+  
+  count = base_view.shape ( axis ) ;
+  
+  // next we create a view to the slice perpendicular to the processing axis.
+  // this is inexpensive, since the view doesn't hold any data. Note that the
+  // slice is taken from an element-expanded array view, since we want to form
+  // SIMD vectors from the data, and there aren't any vectors of, say, pixels.
+  
+  expanded_view_type expanded_view = base_view.expandElements ( 0 ) ;
+  slice_type slice_view = expanded_view.bindAt ( axis + 1 , 0 ) ;
+  
+  if ( slice_view.isUnstrided() )
+    consult_gather_map = false ;  // we won't need a gather map in this case
+  else
+    consult_gather_map = true ;
+  
+  // we analyze the slice and extract a few metrics:
+  
+  assert ( slice_view.data() == &(slice_view[shape_type()]) ) ;
+  base = slice_view.data() ;
+  
+  stride = expanded_view.stride()[axis+1] ;
+  index_count = slice_view.size() ;
+  full_vectors = slice_view.size() / vector_type::Size ;
+  
+  if ( index_count == full_vectors * vector_type::Size )
+  {
+    total_vectors = full_vectors ;
+    remainder = 0 ;
+  }
+  else
+  {
+    // if the number of single_type data in the slice isn't a multiple of the
+    // SIMD vector size, we need masking for the last SIMD vector:
+    total_vectors = full_vectors + 1 ;
+    remainder = index_count - full_vectors * vector_type::Size ;
+    index_type help = index_type::IndexesFromZero() ;
+    mask = ( help < remainder ) ;
+  }
+
+  // looks a bit funny, but as we want the buffer to be a class member, we have
+  // to create it with (1) initially and then swap in the real buffer later.
+  buffer_type help_buffer ( count * vector_type::Size ) ;
+  buffer.swap ( help_buffer ) ;
+  current_stripe_iterator = aligned_plain_vector_iterator ( buffer , vector_type::Size ) ;
+
+  // if, above, we have determined that we should use and consult a gather map,
+  // we construct it now in the easiest way possible: instead of doing arithmetics
+  // with strides and shapes, we simply take the difference of the adress of each
+  // element in the slice from the adress of the first element:
+  if ( consult_gather_map )
+  {
+    // our gather map is a class member and has been initialized to some default
+    // value - now we fill it with real data.
+    gather_map_type gm ( index_count ) ;
+    gather_map.swap ( gm ) ;
+
+    auto sliter = slice_view.begin() ;
+    
+    for ( size_t i = 0 ; i < index_count ; i++ )
+    {
+      gather_map [ i ] = &(*sliter) - base ;
+      ++sliter ;
+    }
+  }
+}
+
+aligned_plain_vector_iterator& get_stripe ( int v )
+{
+  if ( v < 0 || v >= total_vectors )
+    throw std::out_of_range ( "cannot access a stripe with this index" ) ;
+
+  index_type indexes ;
+
+  if ( v < full_vectors )
+  {
+    if ( consult_gather_map )
+    {
+      // cout << "1" ;
+      indexes = gather_map.vector(v) ;
+      // if ( indexes[0] + index_type::Size - 1 == indexes[index_type::Size-1] )
+      if ( sequential ( indexes ) )
+      {
+        // cout << "a - plain load" << endl ;
+        // all indexes are in sequence, so we can do a plain load here as well.
+        // at first this case looks like the else case two down, but here we might
+        // have jumps in the gather map (from 3D upwards), so we have to base by
+        // reading out the gather map.
+        plain_vector_iterator vi ( base + indexes[0] , stride ) ;
+        vi.buffer ( buffer , count ) ; // currently we always buffer
+      }
+      else
+      {
+        // cout << "b - gather indexed with " << indexes << endl ;
+        // we really need to gather
+        gathering_vector_iterator vi ( base, stride, indexes ) ;
+        vi.buffer ( buffer , count ) ;
+      }
+    }
+    else
+    {
+      // cout << "2 - plain load" << endl ;
+      // definitely no need to gather
+      plain_vector_iterator vi ( base + v * vector_type::Size , stride ) ;
+      vi.buffer ( buffer , count ) ; // currently we always buffer
+    }
+  }
+  else if ( v < total_vectors ) // one last incomplete vector, needs masked operation
+  {
+    if ( consult_gather_map )
+    {
+      indexes = gather_map.vector(v) ;
+      // cout << "3 - masked gather with " << indexes << ", " << mask << endl ;
+      gathering_masked_vector_iterator vi ( base, stride, indexes , mask ) ;
+      vi.buffer ( buffer , count ) ;
+    }
+    else
+    {
+      indexes = default_indexes ;
+      // cout << "4 - masked gather with default " << indexes << ", " << mask << endl ;
+      gathering_masked_vector_iterator vi ( base + v * vector_type::Size , stride, indexes , mask ) ;
+      vi.buffer ( buffer , count ) ;
+    }
+  }
+  return current_stripe_iterator ;
+}
+
+/// bit verbose, copies most of above routine. TODO factor sth. out.
+/// flush writes the buffer to the array. per default this writes to the aggregator's
+/// own buffer, but this can be overridden by passing in a pointer to the target buffer
+
+void flush ( int v , buffer_type * bp = 0 )
+{ 
+  if ( v < 0 || v >= total_vectors )
+    throw std::out_of_range ( "cannot access a stripe with this index" ) ;
+
+  index_type indexes ;
+
+  if ( bp == 0 )
+    bp = &buffer ;
+  
+  if ( v < full_vectors )
+  {
+    if ( consult_gather_map )
+    {
+      // cout << "1" ;
+      indexes = gather_map.vector(v) ;
+      if ( indexes[0] + index_type::Size - 1 == indexes[index_type::Size-1] )
+      {
+        // cout << "a - plain store" << endl ;
+        // all indexes are in sequence, so we can do a plain load here as well.
+        // at first this case looks like the else case two down, but here we might
+        // have jumps in the gather map (from 3D upwards), so we have to base by
+        // reading out the gather map.
+        plain_vector_iterator vi ( base + indexes[0] , stride ) ;
+        vi.unbuffer ( *bp , count ) ;
+      }
+      else
+      {
+        // cout << "b - scatter indexed with " << indexes << endl ;
+        // we really need to gather
+        gathering_vector_iterator vi ( base, stride, indexes ) ;
+        vi.unbuffer ( *bp , count ) ;
+      }
+    }
+    else
+    {
+      // cout << "2 - plain store" << endl ;
+      // definitely no need to gather
+      plain_vector_iterator vi ( base + v * vector_type::Size , stride ) ;
+      vi.unbuffer ( *bp , count ) ;
+    }
+  }
+  else if ( v < total_vectors ) // one last incomplete vector, needs masked operation
+  {
+    if ( consult_gather_map )
+    {
+      indexes = gather_map.vector(v) ;
+      // cout << "3 - masked scatter with " << indexes << ", " << mask << endl ;
+      gathering_masked_vector_iterator vi ( base, stride, indexes , mask ) ;
+      vi.unbuffer ( *bp , count ) ;
+    }
+    else
+    {
+      indexes = default_indexes ;
+      // cout << "4 - masked scatter with default " << indexes << ", " << mask << endl ;
+      gathering_masked_vector_iterator vi ( base + v * vector_type::Size , stride, indexes , mask ) ;
+      vi.unbuffer ( *bp , count ) ;
+    }
+  }
+}
+
+} ;
+
+/// aggregating_filter handles the presentation of the data so that they can
+/// be processed vectorized by the solver.
+
+template < class base_view_type ,
+           class output_array_type >
+int aggregating_filter ( base_view_type &base_view ,
+                         output_array_type &target ,
+                         bc_code bc ,
+                         int degree ,
+                         int axis )
+{
+  typedef aggregator<base_view_type> aggregator_type ;
+  typedef aggregator<output_array_type> output_aggregator_type ;
+  typedef typename aggregator_type::aligned_plain_vector_iterator stripe_iterator ;
+  typedef typename output_aggregator_type::aligned_plain_vector_iterator output_stripe_iterator ;
+  typedef typename aggregator_type::single_type single_type ;
+  
+  int lambda_exponent = 1 ;
+  
+  // heuristic. for high degrees, below optimization would reduce precision too much
+  if ( pow ( degree , int(target.actual_dimension) ) < 64 ) //  was: if ( degree < 7 ) 
+  {
+    lambda_exponent = 0 ;
+    if ( axis == 0 )
+      lambda_exponent = base_view_type::actual_dimension ;
+  }
+  
+  aggregator_type a ( base_view , axis ) ;
+
+  if ( &base_view == &target )
+  {
+    solver < stripe_iterator , stripe_iterator > slv ( a.count , lambda_exponent , bc , degree + 1 ) ;
+
+    for ( int stripe = 0 ; stripe < a.total_vectors ; stripe++ )
+    {
+      stripe_iterator& s = a.get_stripe ( stripe ) ;
+      slv.solve ( s ) ;
+      a.flush ( stripe ) ;
+    }
+  }
+  else
+  {
+    output_aggregator_type target_aggregator ( target , axis ) ;
+    solver < stripe_iterator , output_stripe_iterator > slv ( a.count , lambda_exponent , bc , degree + 1 ) ;
+
+    for ( int stripe = 0 ; stripe < a.total_vectors ; stripe++ )
+    {
+      stripe_iterator& s = a.get_stripe ( stripe ) ;
+      slv.solve ( s ) ;
+//       assert ( target_aggregator.buffer.vectorsCount() == a.buffer.vectorsCount() ) ;
+//       assert ( target_aggregator.buffer.entriesCount() == a.buffer.entriesCount() ) ;
+      target_aggregator.flush ( stripe , &(a.buffer) ) ; // store to target instead of back to source
+    }
+  }
+  return 0 ;
+}
+
+// previous implementation, not using divide_and_conquer
+
+/*
+template < typename input_array_type , 
+           typename output_array_type >     // type of array for coefficients (may be the same)
+void solve_vc ( input_array_type &input ,
+                output_array_type &output ,
+                bc_code bc ,                 // boundary treatment for this solver
+                int degree ,
+                int d ,                      // axis to process
+                int nslices = ncores )  // number of threads to use
+{
+  vector < input_array_type > iv ;
+  vector < output_array_type > ov ;
+  
+  nslices = split_array_to_chunks<input_array_type> ( input , iv , nslices , d ) ;
+  
+  thread * t[nslices] ;
+
+  if ( &input == &output )
+  {
+    for ( int s = 0 ; s < nslices ; s++ )
+    {
+      t[s] = new thread ( aggregating_filter<input_array_type,input_array_type> ,
+                          std::ref(iv[s]) , std::ref(iv[s]) , bc , degree , d ) ;
+    }
+  }
+  else
+  {
+    split_array_to_chunks<output_array_type> ( output , ov , nslices , d ) ;
+    for ( int s = 0 ; s < nslices ; s++ )
+    {
+      t[s] = new thread ( aggregating_filter<input_array_type,output_array_type> ,
+                          std::ref(iv[s]) , std::ref(ov[s]) , bc , degree , d ) ;
+    }
+  }
+  
+  for ( int s = 0 ; s < nslices ; s++ )
+  {
+    t[s]->join() ;
+    delete t[s] ;
+  }
+}
+*/
+
+/// solve_vc is the routine to prefilter using vectorization with Vc.
+/// This is the single-axis version
+
+template < typename input_array_type , 
+           typename output_array_type >     // type of array for coefficients (may be the same)
+void solve_vc ( input_array_type &input ,
+                output_array_type &output ,
+                bc_code bc ,                 // boundary treatment for this solver
+                int degree ,
+                int d ,                      // axis to process
+                int nslices = ncores )  // number of threads to use
+{
+  using namespace std::placeholders ;
+
+  // use bind to create a functor which can be passed to divide_and_conquer_2
+
+  auto chunk_func_2
+  = std::bind ( aggregating_filter < input_array_type , output_array_type > ,
+                _1 ,
+                _2 ,
+                bc ,
+                degree ,
+                d ) ;
+
+  // use divide_and_conquer_2 to split input and output into chunks and apply
+  // the functor above to each corresponding pair of chunks in a separate thread
+
+  divide_and_conquer_2 < input_array_type , output_array_type >
+  :: run ( input ,
+           output ,
+           chunk_func_2 ,
+           d ,
+           nslices ) ;
+}
+
+/// multi-axis version of solve_vc. This is what I use now per default, it performs
+/// the prefiltering with multiple threads and vectorization, which is the fastest
+/// way I could come up with.
+
+template < typename input_array_type ,      ///< type of array with knot point data
+           typename output_array_type >     ///< type of array for coefficients (may be the same)
+void solve_vc ( input_array_type& input ,
+                output_array_type& output ,
+                TinyVector<bc_code,input_array_type::actual_dimension> bc ,
+                int degree ,
+                int nslices = ncores )
+{
+  // check if operation is in-place. I assume that the test performed here
+  // is sufficient to determine if the operation is in-place.
+  
+  bool in_place = false ;
+  
+  if ( (void*)(input.data()) == (void*)(output.data()) )
+    in_place = true ;
+
+  // if input == output, with degree <= 1 we needn't do anything at all.
+  
+  if ( in_place && degree <= 1 )
+    return ;
+
+  
+  // do a bit of compatibility checking
+ 
+  const int dim = input_array_type::actual_dimension ;
+  
+  if ( output_array_type::actual_dimension != dim )
+  {
+    cerr << "solve_vigra: dimensions don't match" << endl ;
+    throw dimension_mismatch ( "input and output array must have the same dimension" ) ;
+  }
+  
+  typedef typename input_array_type::difference_type diff_t ;
+  diff_t shape = input.shape() ;
+  if ( output.shape() != shape )
+  {
+    cerr << "solve_vigra: shapes don't match" << endl ;
+    throw dimension_mismatch ( "input and output array must have the same shape" ) ;
+  }
+
+  // even if degree <= 1, we'll only arrive here if input != output.
+  // So we still have to copy the input data to the output (solve_identity)
+  
+  solve_vc<input_array_type,output_array_type>
+             ( input , output , bc[0] , degree , 0 , nslices ) ;
+             
+  // but if degree <= 1 we're done already, since copying the data again
+  // in dimensions 1... is futile
+
+  if ( degree > 1 )
+  {
+    for ( int d = 1 ; d < dim ; d++ )
+      solve_vc<output_array_type,output_array_type>
+                ( output , output , bc[d] , degree , d , nslices ) ;
+  }
+}
+
+} ; // namespace vspline
+
+#endif // USE_VC
+
+#endif // VSPLINE_PREFILTER_H
diff --git a/prefilter_poles.cc b/prefilter_poles.cc
new file mode 100644
index 0000000..d30ec5b
--- /dev/null
+++ b/prefilter_poles.cc
@@ -0,0 +1,174 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform rational b-splines                           */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/*! \file prefilter_poles.cc
+
+    \brief calculates the poles of the b-spline prefilter using gsl and BLAS
+
+    this doesn't have to be done for installing vspline if poles.cc is
+    already present. Providing degrees up to 24 is just about what gsl
+    can handle, with such high degrees the evaluation becomes quite imprecise
+    as well, especialy for floats.
+
+    compile with:
+    g++ -std=c++11 prefilter_poles.cc -oprefilter_poles -lgsl -lblas
+    run
+    ./prefilter_poles > poles.cc
+
+    TODO: could do with some TLC...
+*/
+
+#include <iostream>
+#include <iomanip>
+
+#include <vigra/array_vector.hxx>
+#include <vigra/splines.hxx>
+#include <gsl/gsl_poly.h>
+#include <vspline/basis.h>
+
+using namespace std ;
+using namespace vigra ;
+
+// template < class real_type >
+// real_type bspline_basis ( real_type x , int degree , int derivative = 0 )
+// {
+//   if ( degree == 0 )
+//   {
+//     if ( derivative == 0 )
+//         return x < real_type(0.5) && real_type(-0.5) <= x ?
+//                   real_type(1.0)
+//                 : real_type(0.0);
+//     else
+//         return real_type(0.0);
+//   }
+//   if ( derivative == 0 )
+//   {
+//     real_type n12 = real_type((degree + 1.0) / 2.0);
+//     return (     ( n12 + x )
+//                 * bspline_basis ( x + real_type(0.5) , degree - 1 , 0 )
+//               +   ( n12 - x )
+//                 * bspline_basis ( x - real_type(0.5) , degree - 1 , 0 )
+//             )
+//             / degree;
+//   }
+//   else
+//   {
+//     --derivative;
+//     return   bspline_basis ( x + real_type(0.5) , degree - 1 , derivative )
+//            - bspline_basis ( x - real_type(0.5) , degree - 1 , derivative ) ;
+//   }
+// }
+
+template < class T >
+ArrayVector<double> 
+calculatePrefilterCoefficients(int DEGREE)
+{
+    ArrayVector<double> res;
+    const int r = DEGREE / 2;
+    double a[2*r+1] ;
+    double z[4*r+2] ;
+    cout << "long double K" << DEGREE << "[] = {" << endl ;
+    // we calculate the basis function values at 0.5 intervals
+    int imax = 2 * r ;
+    if ( DEGREE & 1 )
+      imax++ ;
+    for(int i = 0; i <= imax ; ++i)
+    {
+      long double half_i = i / (long double) 2.0 ;
+      long double v = vspline::gen_bspline_basis<long double> ( half_i , DEGREE , 0 ) ;
+      cout << " " << v << "L ,   // basis(" << half_i << ")" << endl ;
+      if ( ! ( i & 1 ) )
+      {
+        // for even i, we put the value in a[] as well - only even i
+        // correspond to the value of the basis function at integral values
+        // which we need for the poles
+        int ih = i / 2 ;
+        a [ r - ih ] = a [ r + ih ] = v ;
+      }
+    }
+    cout << " } ; " << endl ;
+        
+    if(DEGREE > 1)
+    {
+        ArrayVector<double> roots;
+	
+	// we set up the environment gsl needs to find the roots
+	gsl_poly_complex_workspace * w 
+          = gsl_poly_complex_workspace_alloc (2*r+1);
+	// now we call gsl's root finder
+        gsl_poly_complex_solve (a, 2*r+1, w, z);
+	// and release it's workspace
+        gsl_poly_complex_workspace_free (w);
+
+	// we only look at the real parts of the values, which are stored
+	// interleaved real/imag. And we take them back to front, even though
+	// it doesn't matter to Thevenaz' code which end we start with - but conventionally
+	// Pole[0] is the root with the largest absolute, so I stick with that.
+        for(int i = 2 * r - 2 ; i >= 0; i-=2)
+            if(VIGRA_CSTD::fabs(z[i]) < 1.0)
+                res.push_back(z[i]);
+    }
+    return res;
+}
+
+// TODO ugly mishmash of prints and calculations...
+
+void print_poles ( int degree )
+{
+  ArrayVector<double> res = calculatePrefilterCoefficients<double> ( degree ) ;
+  if ( degree > 1 )
+  {
+    cout << "double Poles_" << degree << "[] = {" << endl ;
+    for ( auto r : res )
+      cout << r << " ," << endl ;
+    cout << "} ;" << endl ;
+  }
+}
+
+int main ( int argc , char * argv[] )
+{
+  cout << setprecision(20) ;
+  
+  for ( int degree = 0 ; degree < 25 ; degree++ )
+    print_poles(degree) ;
+  
+  cout << noshowpos ;
+  cout << "const double* precomputed_poles[] = {" << endl ;
+  cout << "  0, " << endl ;
+  cout << "  0, " << endl ;
+  for ( int i = 2 ; i < 25 ; i++ )
+    cout << "  Poles_" << i << ", " << endl ;
+  cout << "} ;" << endl ;
+  cout << "const long double* precomputed_basis_function_values[] = {" << endl ;
+  for ( int i = 0 ; i < 25 ; i++ )
+    cout << "  K" << i << ", " << endl ;
+  cout << "} ;" << endl ;
+}
diff --git a/remap.h b/remap.h
new file mode 100644
index 0000000..61e0622
--- /dev/null
+++ b/remap.h
@@ -0,0 +1,883 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/// \file remap.h
+///
+/// \brief set of generic remap functions
+///
+/// My foremost reason to have efficient B-spline processing is the formulation of a generic
+/// remap function. This is a function which takes an array of real-valued coordinates and
+/// an interpolator over a source array. Now each of the real-valued coordinates is fed into
+/// the interpolator in turn, yielding a value, which is placed in the output array at the same
+/// place the coordinate occupies in the coordinate array. To put it concisely, if we have
+///
+/// - c, the coordinate array (or 'warp' array)
+/// - a, the source array
+/// - i, the interpolator over a
+/// - j, the coordinates in c
+/// - and t, the target
+///
+/// remap defines the operation
+///
+/// t[j] = i(a,c[j]) for all j
+///
+/// st_remap is the single_threaded implementation; remap itself partitions it's work
+/// and creates several threads, each running one instance of st_remap.
+///
+/// all remap routines take several template arguments:
+///
+/// - value_type:      like, float, double, complex<>, pixels, TinyVectors etc.
+/// - coordinate_type: TinyVector of float or double for coordinates, or split type
+/// - dim_in:          number of dimensions of input array (and of 'warp' coordinates)
+/// - dim_out:         number of dimensions of output array
+///
+/// You can see from the two dimension parameters that remaps to other-dimensional
+/// objects are supported. This makes it possible to, for example, remap from a
+/// volume to a 2D image, using a 2D warp array containing 3D coordinates.
+///
+/// In these routines, we can switch the use of vectorization on or off. When using
+/// vectorization, this is done in a straightforward fashion by aggregating the input.
+/// If the source/target array lend themselves to it, we can pass it's memory directly
+/// to the vectorized eval code. To keep the complexity down,  if the memory is not
+/// suited, I 'manually' aggregate to a small buffer and pass the buffer to and fro.
+///
+/// There is also a second set of remap functions in this file, which don't take a
+/// 'warp' array but a functor instead. This functor receives, for every location in the
+/// output, the corresponding (discrete) coordinates, and returns (real) coordinates pointing
+/// into the input array. This variant exists, because a common situation in image transformations
+/// is that the geometric transformation from one image to another is defined by precisely
+/// such a functor, and the creation of the warp array only makes sense if the transformation
+/// is performed several times. With 'cheap' transformations the difference isn't large,
+/// but if the transformation is very complex, reusing it can save a lot of processing.
+/// What I have in mind here is image transformations as used in panorama stitching,
+/// lens correction, etc.
+///
+/// In the 'example' folder, there is a program called pano_extract.cc, which demonstrates
+/// the use of a transformation-based remap.
+
+#ifndef VSPLINE_REMAP_H
+#define VSPLINE_REMAP_H
+
+#include "eval.h"
+
+namespace vspline {
+
+using namespace std ;
+using namespace vigra ;
+using namespace vigra::multi_math;
+
+template < class value_type ,      // like, float, double, complex<>, pixels, TinyVectors etc.
+           class coordinate_type , // usually TinyVector of real for coordinates
+           int dim_in ,            // number of dimensions of input array (and of 'warp' coordinates)
+           int dim_out >           // number of dimensions of output array
+int st_remap ( const bspline < value_type , dim_in > & bspl ,
+               MultiArrayView < dim_out , coordinate_type > warp ,
+               MultiArrayView < dim_out , value_type > output ,
+               bool use_vc = true
+           )
+{
+  typedef bspline < value_type , dim_in > spline_type ;
+  typedef typename coordinate_type::value_type rc_type ;
+  typedef evaluator < dim_in , value_type , rc_type , int > evaluator_type ;
+  typedef typename evaluator_type::ele_type ele_type ;
+
+  // do a bit of compatibility checking
+
+  if ( output.shape() != warp.shape() )
+  {
+    throw shape_mismatch ( "the shapes of the warp array and the output array do not match" ) ;
+  }
+
+  evaluator_type ev ( bspl ) ;
+  
+  ele_type * p_workspace = new ele_type [ ev.workspace_size() ] ;
+
+#ifdef USE_VC
+
+  typedef typename evaluator_type::ele_v ele_v ;
+  const int vsize = evaluator_type::vsize ;
+
+  ele_v * p_workspace_v = new ele_v [ ev.workspace_size() ] ;
+
+#endif
+  
+  // TODO: we blindly assume that we can coiterate in scan order here:
+  
+  auto source_it = warp.begin() ;
+  auto target_it = output.begin() ;
+  
+  // since coding for all variations of strided and unstrided memory in warp and output
+  // is a bit much, we only differentiate between the ideal case where all memory is unstrided
+  // and all other cases, where we use buffering of vsize elements. In both cases, we pass
+  // consecutive memory to the evaluator, which can deinterleave consistently.
+  // TODO: this pattern of coiteration/cotraversal with suitable iteration methods is
+  // reoccuring and might be worth factoring out, also because these traversals are
+  // often needed in collateral code, like in apply or transform functions.
+
+  int leftover = warp.elementCount() ;
+
+#ifdef USE_VC
+
+  if ( use_vc )
+  {
+    int aggregates = warp.elementCount() / vsize ;            // number of full vectors
+    leftover = warp.elementCount() - aggregates * vsize ;     // any leftover single values
+    coordinate_type * source = warp.data() ;                      // acces to memory
+    value_type * destination = output.data() ;
+
+    if ( warp.isUnstrided() )
+    {
+      // best case: warp array has consecutive memory
+      if ( output.isUnstrided() )
+      {
+        // best case: output array has consecutive memory, no need to buffer
+        for ( int a = 0 ; a < aggregates ; a++ , source += vsize , destination += vsize )
+        {
+          ev ( source , destination , p_workspace_v ) ;
+        }
+        target_it += aggregates * vsize ;
+      }
+      else
+      {
+        // we fall back to buffering and storing individual result values
+        // TODO while this is a straightforward implementation, it should be more efficient
+        // to (de)interleave here and call the fully vectorized evaluation code.
+        value_type target_buffer [ vsize ] ;
+        for ( int a = 0 ; a < aggregates ; a++ , source += vsize )
+        {
+          ev ( source , target_buffer , p_workspace_v ) ;
+          for ( int e = 0 ; e < vsize ; e++ )
+          {
+            *target_it = target_buffer[e] ;
+            ++target_it ;
+          }
+        }
+      }
+      source_it += aggregates * vsize ;
+    }
+    else
+    {
+      // we fall back to loading and buffering individual warp values
+      coordinate_type source_buffer [ vsize ] ;
+      if ( output.isUnstrided() )
+      {
+        // best case: output array has consecutive memory
+        for ( int a = 0 ; a < aggregates ; a++ , destination += vsize )
+        {
+          for ( int e = 0 ; e < vsize ; e++ )
+          {
+            source_buffer[e] = *source_it ;
+            ++source_it ;
+          }
+          ev ( source_buffer , destination , p_workspace_v ) ;
+        }
+        target_it += aggregates * vsize ;
+      }
+      else
+      {
+        // we also fall back to buffering and storing individual result values
+        value_type target_buffer [ vsize ] ;
+        for ( int a = 0 ; a < aggregates ; a++ )
+        {
+          for ( int e = 0 ; e < vsize ; e++ )
+          {
+            source_buffer[e] = *source_it ;
+            ++source_it ;
+          }
+          ev ( source_buffer , target_buffer , p_workspace_v ) ;
+          for ( int e = 0 ; e < vsize ; e++ )
+          {
+            *target_it = target_buffer[e] ;
+            ++target_it ;
+          }
+        }
+      }
+    }
+    delete[] p_workspace_v ;
+  }
+  
+#endif // USE_VC
+
+  // process leftovers, if any - if vc isn't used, this loop does all the processing
+  while ( leftover-- )
+  {
+    // process leftovers with single-value evaluation
+    *target_it = ev ( *source_it , p_workspace ) ;
+    ++source_it ;
+    ++target_it ;
+  }
+
+  delete[] p_workspace ;
+  return 0 ;
+}
+
+/// this remap variant performs the remap with several threads. The routine above is single-threaded,
+/// but all individual evaluations are independent of each other. So making it multi-threaded
+/// is trivial: just split the warp array and target array n-ways and call the single-threaded
+/// version on these subarrays. We have utility code to perform the splitting of the arrays
+/// and processing the chunks in separate threads, namely divide_and_conquer_2.
+
+template < class value_type ,      // like, float, double, complex<>, pixels, TinyVectors etc.
+           class coordinate_type , // usually float for coordinates
+           int dim_in ,            // number of dimensions of input array (and of 'warp' coordinates)
+           int dim_out >           // number of dimensions of output array
+int remap ( const bspline < value_type , dim_in > & bspl ,
+            MultiArrayView < dim_out , coordinate_type > warp ,
+            MultiArrayView < dim_out , value_type > output ,
+            bool use_vc = true ,
+            int nthreads = ncores
+           )
+{
+  using namespace std::placeholders ;
+
+  // we use bind to bind the first and last argument of the single-threaded
+  // remap function, leaving two free arguments which will accept the two arrays
+  // of data to coiterate over (being the chunks of warp and output which are created
+  // by divide_and_conquer_2)
+
+  auto chunk_func_2
+  = std::bind ( st_remap < value_type , coordinate_type , dim_in , dim_out > , // single-threaded remap
+                std::ref(bspl) ,     // bspline passed in above
+                _1 ,       // placeholders to accept data chunks
+                _2 ,       // from divide_and_conquer
+                use_vc ) ; // use_vc flag as passed in above
+
+  divide_and_conquer_2 < decltype ( warp ) ,    // run divide_and_conquer_2
+                         decltype ( output ) >
+  :: run ( warp ,                          // on the warp array
+           output ,                        // and the output array
+           chunk_func_2 ,                  // applying the functor we've made with bind
+           -1 ,                            // allowing array splitting along any axis
+           nthreads ) ;                    // using nthreads threads
+} ;
+
+/// This is a variant of remap, which directly takes an array of values and remaps it,
+/// internally creating a b-spline of given order just for the purpose. This is used for
+/// one-shot remaps where the spline isn't reused.
+
+template < int dimension >
+using bcv_type = TinyVector < bc_code , dimension > ;
+
+template < class value_type ,      // like, float, double, complex<>, pixels, TinyVectors etc.
+           class coordinate_type , // usually TinyVector of float for coordinates
+           int dim_in ,            // number of dimensions of input array (and of 'warp' coordinates)
+           int dim_out >           // number of dimensions of output array
+
+int remap ( MultiArrayView < dim_in , value_type > input ,
+            MultiArrayView < dim_out , coordinate_type > warp ,
+            MultiArrayView < dim_out , value_type > output ,
+            bcv_type < dim_in > bcv = bcv_type < dim_in > ( MIRROR ) ,
+            int degree = 3 ,
+            bool use_vc = true ,
+            int nthreads = ncores
+          )
+{
+  // check shape compatibility
+  
+  if ( output.shape() != warp.shape() )
+  {
+    throw shape_mismatch ( "the shapes of the warp array and the output array do not match" ) ;
+  }
+
+  // create the bspline object
+  bspline < value_type , dim_in > bsp ( input.shape() , degree , bcv ) ;
+  // copy in the data
+  bsp.core = input ;
+  // prefilter
+  bsp.prefilter ( use_vc , nthreads ) ;
+
+  // and call the remap variant taking a bspline object,
+  // passing in the spline, the coordinate array and the target array
+  
+  remap < value_type , coordinate_type , dim_in , dim_out > ( bsp , warp , output , use_vc , nthreads ) ;
+    
+  return 0 ;
+}
+
+// Next we have some collateral code to get ready for the transformation-based remap
+
+/// type alias for a coordinate transformation functor, to (hopefully) make the signature
+/// more legible. In words: 'transform' is a standard function transforming an n-dimensional
+/// incoming coordinate to an m-dimensional outgoing coordinate. This function takes both
+/// coordinates as references.
+
+template < class rc_type ,       // elementary type for coordinates, usually float
+           int dim_in ,          // number of dimensions of input array (and of 'warp' coordinates)
+           int dim_out >         // number of dimensions of output array
+using transform
+= std::function < void ( const TinyVector < rc_type , dim_out > & ,
+                               TinyVector < rc_type , dim_in > & ) > ;
+
+#ifdef USE_VC
+
+/// We use this type alias, since the type is rather unwieldy
+/// what we need to define is a SimdArray of rc_type, which has just as
+/// many elements as a Vc::Vector of value_type's elementary type:
+
+template < class rc_type , class value_type >
+using rc_v
+= Vc::SimdArray < rc_type ,
+                  Vc::Vector < typename ExpandElementResult < value_type >::type > ::Size > ;
+
+/// This type alias defines a vectorized coordinate transformation.
+/// it is the equivalent of the unvectorized type above, taking TinyVectors
+/// of the appropriate SimdArray objects instead of singular values.
+
+template < class rc_v ,            // SimdArray of vsize real coordinates
+           int dim_in ,            // number of dimensions of input array (and of 'warp' coordinates)
+           int dim_out >           // number of dimensions of output array
+using vtransform
+= std::function < void
+                   ( const TinyVector < rc_v , dim_out > & ,
+                     TinyVector < rc_v , dim_in > & ) > ;
+
+#endif
+
+/// class transformation is a (multi-) functor, handling coordinate transformations
+/// this class simplifies pulling in transformations by automatically providing a brodcasting
+/// method to apply a single-value transformation to every element of an incoming SimdArray,
+/// if there is nor vectorized code at hand. This is less efficient, but often just enough,
+/// especially if the transformation is simple.
+///
+/// class transformation is constructed either with a single coordinate transformation
+/// functor, or, if USE_VC is defined, optionally with a vectorized coordinate transformation
+/// functor as second constructor argument. The functors have to satisfy the two type aliases
+/// transform and vtransform (see above). For illustration, consider this:
+///
+/// template < class rc_type >
+/// void tf_identity ( const TinyVector < rc_type , 2 > & c_in ,
+///                          TinyVector < rc_type , 2 > & c_out )
+/// {
+///   c_out = c_in ;
+/// }
+/// 
+/// template < class rc_v >
+/// void vtf_identity ( const TinyVector < rc_v , 2 > & c_in ,
+///                           TinyVector < rc_v , 2 > & c_out )
+/// {
+///   c_out = c_in ;
+/// }
+/// 
+///   vspline::transformation < pixel_type , rc_type , 2 , 2 >
+///      tf ( tf_identity<rc_type> , vtf_identity<rc_v>  ) ;
+
+// TODO: it's not really necessary to pass in value_type as a template arg, it's only
+// used to determine the correct vsize.
+// TODO: did I get muddled with dim_in and dim_out here? doublecheck!
+// TODO: naming with 'in' and 'out'creates confusion. use other naming scheme
+
+template < class value_type , /// coefficient/result type
+           class rc_type ,    /// elementary coordinate type (float, double)
+           int dim_in ,       /// dimension of incoming coordinates (== dim. of target array)
+           int dim_out >      /// dimension of outgoing coordinates (== dim. of coefficient array)
+class transformation
+{
+  typedef transform < rc_type , dim_in , dim_out > transform_type ;
+
+  transform_type tf ;   /// functor for single-value coordinate transformation
+
+  typedef TinyVector < rc_type , dim_in > rc_in_type ;
+  typedef TinyVector < rc_type , dim_out > rc_out_type ;
+  
+#ifdef USE_VC
+
+  typedef rc_v < rc_type , value_type > rc_v_type ;
+  const int vsize = rc_v_type::Size ;
+  
+  typedef TinyVector < rc_v_type , dim_in > rc_v_in_type ;
+  typedef TinyVector < rc_v_type , dim_out > rc_v_out_type ;
+
+  typedef vtransform < rc_v_type , dim_in , dim_out > vtransform_type ;
+  
+  vtransform_type vtf ; /// functor for vectorized operation
+  
+  /// broadcast calls the single-element transform repeatedly to emulate vectorized operation
+
+  void broadcast ( const rc_v_in_type & c_in , rc_v_out_type & c_out )
+  {
+    rc_in_type cs_in ;
+    rc_out_type cs_out ;
+ 
+    for ( int e = 0 ; e < vsize ; e++ )
+    {
+      for ( int d = 0 ; d < dim_out ; d++ )
+        cs_in[d] = c_in[d][e] ;
+      tf ( cs_in , cs_out ) ;
+      for ( int d = 0 ; d < dim_in ; d++ )
+        c_out[d][e] = cs_out[d] ;
+    }
+  }
+
+public:
+
+  /// this constructor takes tranformation functors for both single-value and
+  /// vectorized coordinate transforms
+  
+  transformation ( transform_type _tf , vtransform_type _vtf )
+  : tf ( _tf ) ,
+    vtf ( _vtf )
+  { } ;
+
+  /// this constructor takes only the single-value transformation functor.
+  /// broadcast (see above) is used to emulate vectorized transformation
+  /// If the performance penalty for not using a vectorized transformation is irrelevant,
+  /// or to try out a transformation, this mode of operation is just as good.
+
+  transformation ( transform_type _tf )
+  : tf ( _tf )
+  {
+  } ;
+
+#else
+
+public:
+
+  /// if USE_VC is not defined, we only have this single constructor:
+
+  transformation ( transform_type _tf )
+  : tf ( _tf )
+  { } ;
+
+#endif
+
+public :
+
+  /// class transformation's operator() delegates to the functor passed in at construction
+
+  void operator() ( const rc_in_type& c_in , rc_out_type& c_out )
+  {
+    tf ( c_in , c_out ) ;
+  }
+  
+#ifdef USE_VC
+
+  /// if USE_VC is defined, we overload operator() for vecorized arguments
+  
+  void operator() ( const rc_v_in_type& c_in , rc_v_out_type& c_out )
+  {
+    if ( vtf )
+      vtf ( c_in , c_out ) ;
+    else
+      broadcast ( c_in , c_out ) ;
+  }
+
+#endif
+} ;
+
+/// struct coordinate_iterator_v provides the vectorized equivalent of vigra's
+/// MultiCoordinateIterator. The iterator itself is not very elaborate, it only
+/// supports dereferencing and preincrement, but for the intended purpose that's
+/// enough. The iterator is cyclical, so the number of times it's used has to be
+/// controlled by the calling code.
+/// dereferencing this object will yield a vectorized coordinate which is guaranteed
+/// to deinterleave to vsize valid ordinary coordinates, but it is only guaranteed that
+/// these ordinary coordinates are unique if several preconditions are met:
+/// - the shape (end - start) must have more than vsize entries
+/// - the iteration doesn't proceed beyond the number of full vectors
+/// The vectorized coordinate delivered at the 'wrap-around-point', so one iteration
+/// after the last full set has ben taken, will be partly wrapped around to the start
+/// of the iteration. This makes sure the contained ordinary coordinates are valid.
+/// but accessing data through it will again touch elements which have already been
+/// seen. As long as the indices aren't used for an in-place operation, recalculating
+/// a few values does no harm.
+// TODO this is utility code and might be placed into another header, probably next to
+// the routine producing successive gather indices for vectorized array traversal
+
+template < class _rc_v , int _dimension >
+struct coordinate_iterator_v
+{
+  enum { dimension = _dimension } ;
+  typedef vigra::TinyVector < int , dimension > shape_type ;
+
+  typedef _rc_v rc_v ;
+  enum { vsize = rc_v::Size } ;
+  typedef vigra::TinyVector < rc_v , dimension > nd_rc_v ; // vectorized n-dimensional coordinate
+
+  shape_type start ;
+  shape_type end ;
+  shape_type shape ;
+  nd_rc_v c ;
+
+public:
+
+  coordinate_iterator_v ( shape_type _start , shape_type _end )
+  : start ( _start ) ,
+    end ( _end ) ,
+    shape ( _end - _start )
+  {
+    vigra::MultiCoordinateIterator<dimension> mci_start ( shape ) ;
+    auto mci_end = mci_start.getEndIterator() ;
+    auto mci = mci_start ;
+
+    for ( int i = 0 ; i < vsize ; i++ )
+    {
+      shape_type index = *mci + start ;
+      ++mci ;
+      if ( mci >= mci_end )
+        mci = mci_start ;
+      for ( int d = 0 ; d < dimension ; d++ )
+        c[d][i] = index[d] ;
+    }
+  } ;
+
+  const nd_rc_v& operator*()
+  {
+    return c ;
+  }
+
+  coordinate_iterator_v operator++()
+  {
+    c[0] += vsize ;        // increase level 0 values
+    auto mask = ( c[0] >= end[0] ) ; // mask overflow
+    while ( any_of ( mask ) )
+    {
+      // we need to do more only if c[0] overflows
+      c[0] ( mask ) -= shape[0] ; // fold back to range
+      for ( int d = 1 ; d < dimension ; d++ )
+      {
+        c[d] ( mask ) ++ ; // increase next-higher dimension's index
+        mask = ( c[d] >= end[d] ) ; // check for range overflow
+        // resultant mask is either empty or the same as before
+        if ( none_of ( mask ) )         // if no overflow, we're done
+          break ;
+        c[d] ( mask ) = start[d] ; // set back to lower bound
+        // with the next loop iteration, the next dimension's index is increased
+      }
+      // having increased c[0] by vsize can require several iterations
+      // if shape[0] < vsize, so we need to mask, test, etc. again until
+      // all of c[0] are back in range 
+      mask = ( c[0] >= end[0] ) ; // mask overflow
+    }
+    // otherwise, increasing input[0] landed inside the range,
+    return *this ;
+  } ;
+} ;
+
+/// this function provides a generalized geometric transformation
+/// of a source image
+///
+/// In image transformations, often the source location at which interpolation
+/// of the source image is required is defined by a transformation of the location
+/// of the target coordinate. There are (at least) two approaches to handle this.
+/// The first is what is implemented in the previous implementation of remap: we
+/// calculate a 'warp' array full of transformed coordinates and process it en bloc.
+/// Alternatively, with the coordinate transformation at hand, we can write a remap
+/// variant which performs the coordinate transformation to each target coordinate
+/// as it goes along, saving the construction of the warp array.
+///
+/// The transformation is passed in by using a 'transformation' object, which is a
+/// wrapper around the actual transformation routine(s).
+
+template < class value_type ,      // like, float, double, complex<>, pixels, TinyVectors etc.
+           class rc_type ,         // float or double, for coordinates
+           int dim_in ,            // number of dimensions of input array (and of 'warp' coordinates)
+           int dim_out >           // number of dimensions of output array
+int st_tf_remap ( const bspline < value_type , dim_in > & bspl ,
+                  transformation < value_type , rc_type , dim_in , dim_out > tf ,
+                  MultiArrayView < dim_out , value_type > output ,
+                  typename MultiArrayView < dim_out , value_type > :: difference_type offset ,
+                  bool use_vc = true
+               )
+{
+  typedef bspline < value_type , dim_in > spline_type ;
+  typedef evaluator < dim_in , value_type , rc_type , int > evaluator_type ;
+  typedef typename evaluator_type::ele_type ele_type ;
+  evaluator_type ev ( bspl ) ;
+  ele_type * p_workspace = new ele_type [ ev.workspace_size() ] ;
+
+#ifdef USE_VC
+
+  typedef typename evaluator_type::ele_v ele_v ;
+  typedef typename evaluator_type::rc_v rc_v ;
+  typedef typename evaluator_type::mc_ele_v mc_ele_v ;
+  const int vsize = evaluator_type::vsize ;
+
+  ele_v * p_workspace_v = new ele_v [ ev.workspace_size() ] ;
+
+  TinyVector < rc_v , dim_out > c_in ; // incoming coordinates (into output, which has dim_out dims)
+  TinyVector < rc_v , dim_in > c_out ; // transformed coordinates (into input, which has dim_in dims)
+  mc_ele_v result ;                    // result, as struct of vectors
+  
+#endif
+  
+  typedef typename CoupledIteratorType < dim_out , value_type > :: type Iterator ;
+  Iterator target_it = createCoupledIterator ( output ) ;
+  int leftover = output.elementCount() ;
+  
+#ifdef USE_VC
+
+  if ( use_vc )
+  {
+    int aggregates = output.elementCount() / vsize ;            // number of full vectors
+    leftover = output.elementCount() - aggregates * vsize ;     // any leftover single values
+    value_type * destination = output.data() ;
+    value_type target_buffer [ vsize ] ;
+    // use utility class coordinate_iterator_v to produce vectorized coordinates:
+    coordinate_iterator_v < rc_v , dim_out > civ ( offset , offset + output.shape() ) ;
+
+    for ( int a = 0 ; a < aggregates ; a++ )
+    {
+      tf ( *civ , c_out ) ;       // perform the coordinate transformation
+      ++civ ;
+      if ( output.isUnstrided() )
+      {
+        // finally, here we evaluate the spline
+        ev ( c_out , destination , p_workspace_v ) ;
+        destination += vsize ;
+      }
+      else
+      {
+        // alternative evaluation if we can't write straight to destination 
+        ev ( c_out , target_buffer , p_workspace_v ) ;
+        for ( int e = 0 ; e < vsize ; e++ )
+        {
+          target_it.get<1>() = target_buffer[e] ;
+          ++target_it ;
+        }
+      }
+    }
+    delete[] p_workspace_v ;
+    if ( output.isUnstrided() )
+      target_it += aggregates * vsize ;
+  }
+  
+#endif // USE_VC
+
+  TinyVector < rc_type , dim_out > cs_in ;
+  TinyVector < rc_type , dim_in > cs_out ;
+  
+  // process leftovers, if any - if vc isn't used, this loop does all the processing
+  while ( leftover-- )
+  {
+    // process leftovers with single-value evaluation of transformed coordinate
+    cs_in = target_it.get<0>() + offset ;
+    tf ( cs_in , cs_out ) ;
+    target_it.get<1>() = ev ( cs_out , p_workspace ) ;
+    ++target_it ;
+  }
+
+  delete[] p_workspace ;
+  return 0 ;
+}
+
+/// multithreaded tf_remap routine, splitting the target array to chunks.
+/// This is just as for the warp-array-based remap. The target array is split into
+/// chunks, which are in turn processed in a thread each by the single-threaded routine.
+/// Note the parameter 'offset'. Since we operate on the coordinates of the target array,
+/// we have to keep track of the coordinate the origin of each chunk had in the unsplit
+/// target. This is what 'offest' does, it's effectively added to the coordinates in the
+/// chunks when they are transformed.
+
+template < class value_type ,      // like, float, double, complex<>, pixels, TinyVectors etc.
+           class rc_type ,         // float or double for coordinates
+           int dim_in ,            // number of dimensions of input array (and of 'warp' coordinates)
+           int dim_out >           // number of dimensions of output array
+int tf_remap ( const bspline < value_type , dim_in > & bspl ,
+               transformation < value_type , rc_type , dim_in , dim_out > tf ,
+               MultiArrayView < dim_out , value_type > output ,
+               bool use_vc = true ,
+               int nthreads = ncores            
+             )
+{
+  using namespace std::placeholders ;
+
+  // we use bind to bind the first two and last argument of the single-threaded
+  // tf_remap function, leaving one free argument which will accept the array
+  // of data to iterate over (being the a chunk of 'output' which is created
+  // by divide_and_conquer)
+
+  auto chunk_func
+  = std::bind ( st_tf_remap < value_type , rc_type , dim_in , dim_out > , // single-threaded tf_remap
+                std::ref(bspl) ,     // bspline passed in above
+                tf ,       // transformation passed in above
+                _1 ,       // placeholders to accept data chunk
+                _2 ,       // and offset from divide_and_conquer
+                use_vc ) ; // use_vc flag as passed in above
+
+  divide_and_conquer < decltype ( output ) > // run divide_and_conquer
+  :: run ( output ,                          // on the output array
+           chunk_func ,                      // applying the functor we've made with bind
+           -1 ,                              // allowing array splitting along any axis
+           nthreads ) ;                      // using nthreads threads
+}
+
+/// this highest-level transform-based remap takes an input array and creates
+/// a b-spline over it internally. Then it calles the previous routine, which
+/// takes a bspline as it's first parameter. Like with warp-array-based remap,
+/// this is for on-the-fly remapping where the b-spline won't be reused.
+
+template < class value_type ,      // like, float, double, complex<>, pixels, TinyVectors etc.
+           class rc_type , // usually float for coordinates
+           int dim_in ,            // number of dimensions of input array (and of 'warp' coordinates)
+           int dim_out >           // number of dimensions of output array
+int tf_remap ( MultiArrayView < dim_in , value_type > input ,
+               transformation < value_type , rc_type , dim_in , dim_out > tf ,
+               MultiArrayView < dim_out , value_type > output ,
+               bcv_type<dim_in> bcv = bcv_type<dim_in> ( MIRROR ) ,
+               int degree = 3 ,
+               bool use_vc = true ,
+               int nthreads = ncores            
+             )
+{
+  // create the bspline object
+  bspline < value_type , dim_in > bsp ( input.shape() , degree , bcv ) ;
+  // copy in the data
+  bsp.core = input ;
+  // prefilter
+  bsp.prefilter ( use_vc , nthreads ) ;
+
+  tf_remap<value_type,rc_type,dim_in,dim_out>
+           ( bsp , tf , output , use_vc , nthreads ) ;
+}
+
+/// this function creates a warp array instead of evaluating a spline. The warp array can
+/// then be used as input to a warp-array-based remap. This way, if the transformation
+/// is computationally expensive and it's resulting transformed coordinates can be reused
+/// several times, the coordinate transformation can be separated from the remap procedure.
+/// This is the single-threaded routine; further below we will use divide_and_conquer
+/// to apply this routine to several chunks of data in parallel.
+
+template < class value_type ,      // like, float, double, complex<>, pixels, TinyVectors etc.
+           class rc_type ,         // float or double, for coordinates
+           int dim_in ,            // number of dimensions of input array (and of 'warp' coordinates)
+           int dim_out >           // number of dimensions of output array
+int st_make_warp_array ( transformation < value_type , rc_type , dim_in , dim_out > tf ,
+                         MultiArrayView < dim_out ,
+                                          TinyVector < rc_type , dim_in > > & warp_array ,
+                         typename MultiArrayView < dim_out , value_type > :: difference_type offset ,
+                         bool use_vc = true
+                       )
+{
+  /// incoming coordinates (into output, which has dim_out dims)
+  /// generated by vigra's CoupledIterator or, in the vectorized code,
+  /// by coordinate_iterator_v
+
+  typedef TinyVector < rc_type , dim_out > nd_rc_in ;
+
+  /// transformed coordinates (into input, which has dim_in dims)
+  /// populating the resulting warp array
+
+  typedef TinyVector < rc_type , dim_in > nd_rc_out ;
+
+  nd_rc_in cs_in ;
+  nd_rc_out cs_out ;
+
+  auto target_it = createCoupledIterator ( warp_array ) ;
+  int leftover = warp_array.elementCount() ;
+  
+#ifdef USE_VC
+
+  typedef typename ExpandElementResult<value_type>::type ele_type ;
+  typedef Vc::Vector < ele_type > ele_v ;
+  const int vsize = ele_v::Size ;
+  typedef Vc::SimdArray < rc_type , vsize > rc_v ;
+
+  /// vecorized incoming coordinates (into output, which has dim_out dims)
+  typedef TinyVector < rc_v , dim_out > nd_rc_in_v ;
+
+  /// vectorized transformed coordinates (into input, which has dim_in dims)
+  typedef TinyVector < rc_v , dim_in > nd_rc_out_v ;
+
+  if ( use_vc )
+  {
+    int aggregates = warp_array.elementCount() / vsize ;        // number of full vectors
+    leftover = warp_array.elementCount() - aggregates * vsize ; // any leftover single values
+    nd_rc_out_v target_buffer ;                                 // vectorized warp coordinates
+
+    // use utility class coordinate_iterator_v to produce vectorized target coordinates:
+    coordinate_iterator_v < rc_v , dim_out > civ ( offset , offset + warp_array.shape() ) ;
+
+    for ( int a = 0 ; a < aggregates ; a++ )
+    {
+      tf ( *civ , target_buffer ) ;       // perform the coordinate transformation
+      ++civ ;
+      // interleave target_buffer into warp array (TODO: maybe use scatter?)
+      for ( int e = 0 ; e < vsize ; e++ )
+      {
+        for ( int d = 0 ; d < dim_in ; d++ )
+          target_it.get<1>()[d] = target_buffer[d][e] ;
+        ++target_it ;
+      }
+    }
+  }
+  
+#endif // USE_VC
+
+  // process leftovers, if any - if vc isn't used, this loop does all the processing
+  while ( leftover-- )
+  {
+    // process leftovers with single-value evaluation of transformed coordinate
+    cs_in = target_it.get<0>() + offset ;
+    tf ( cs_in , cs_out ) ;
+    target_it.get<1>() = cs_out ;
+    ++target_it ;
+  }
+
+  return 0 ;
+}
+
+/// multithreaded code to make a warp array using a coordinate transformation.
+
+template < class value_type ,      // like, float, double, complex<>, pixels, TinyVectors etc.
+           class rc_type ,         // float or double for coordinates
+           int dim_in ,            // number of dimensions of input array (and of 'warp' coordinates)
+           int dim_out >           // number of dimensions of output array
+int make_warp_array ( transformation < value_type , rc_type , dim_in , dim_out > tf ,
+                      MultiArrayView < dim_out ,
+                                       TinyVector < rc_type , dim_in > > warp_array ,
+                      bool use_vc = true ,
+                      int nthreads = ncores
+                    )
+{
+  using namespace std::placeholders ;
+
+  // we use bind to bind the first two and last arguments of the single-threaded
+  // function, leaving one free argument which will accept the array
+  // of data to iterate over (being the a chunk of 'output' which is created
+  // by divide_and_conquer)
+
+  auto chunk_func
+  = std::bind ( st_make_warp_array < value_type , rc_type , dim_in , dim_out > , // single-threaded make_warp_array
+                tf ,       // transformation passed in above
+                _1 ,       // placeholders to accept data chunk
+                _2 ,       // and offset from divide_and_conquer
+                use_vc ) ; // use_vc flag as passed in above
+
+  divide_and_conquer < decltype ( warp_array ) > // run divide_and_conquer
+  :: run ( warp_array  ,                        // on the output array
+           chunk_func ,                    // applying the functor we've made with bind
+           -1 ,                            // allowing array splitting along any axis
+           nthreads ) ;                    // using nthreads threads
+}
+
+} ; // end of namespace vspline
+
+#endif // VSPLINE_REMAP_H
diff --git a/vspline.doxy b/vspline.doxy
new file mode 100644
index 0000000..5f4edf5
--- /dev/null
+++ b/vspline.doxy
@@ -0,0 +1,2303 @@
+# Doxyfile 1.8.6
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a double hash (##) is considered a comment and is placed in
+# front of the TAG it is preceding.
+#
+# All text after a single hash (#) is considered a comment and will be ignored.
+# The format is:
+# TAG = value [value, ...]
+# For lists, items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (\" \").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all text
+# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
+# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv
+# for the list of possible encodings.
+# The default value is: UTF-8.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
+# double-quotes, unless you are using Doxywizard) that should identify the
+# project for which the documentation is generated. This name is used in the
+# title of most generated pages and in a few other places.
+# The default value is: My Project.
+
+PROJECT_NAME           = "vspline"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
+# could be handy for archiving the generated documentation or if some version
+# control system is used.
+
+PROJECT_NUMBER         = 16
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer a
+# quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF          = "Generic C++ Code for Uniform B-Splines"
+
+# With the PROJECT_LOGO tag one can specify an logo or icon that is included in
+# the documentation. The maximum height of the logo should not exceed 55 pixels
+# and the maximum width should not exceed 200 pixels. Doxygen will copy the logo
+# to the output directory.
+
+PROJECT_LOGO           =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
+# into which the generated documentation will be written. If a relative path is
+# entered, it will be relative to the location where doxygen was started. If
+# left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = doc
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub-
+# directories (in 2 levels) under the output directory of each output format and
+# will distribute the generated files over these directories. Enabling this
+# option can be useful when feeding doxygen a huge amount of source files, where
+# putting all generated files in the same directory would otherwise causes
+# performance problems for the file system.
+# The default value is: NO.
+
+CREATE_SUBDIRS         = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
+# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
+# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
+# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
+# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
+# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
+# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
+# Ukrainian and Vietnamese.
+# The default value is: English.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member
+# descriptions after the members that are listed in the file and class
+# documentation (similar to Javadoc). Set to NO to disable this.
+# The default value is: YES.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief
+# description of a member or function before the detailed description
+#
+# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+# The default value is: YES.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator that is
+# used to form the text in various listings. Each string in this list, if found
+# as the leading text of the brief description, will be stripped from the text
+# and the result, after processing the whole list, is used as the annotated
+# text. Otherwise, the brief description is used as-is. If left blank, the
+# following values are used ($name is automatically replaced with the name of
+# the entity):The $name class, The $name widget, The $name file, is, provides,
+# specifies, contains, represents, a, an and the.
+
+ABBREVIATE_BRIEF       =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# doxygen will generate a detailed section even if there is only a brief
+# description.
+# The default value is: NO.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+# The default value is: NO.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path
+# before files name in the file list and in the header files. If set to NO the
+# shortest path that makes the file name unique will be used
+# The default value is: YES.
+
+FULL_PATH_NAMES        = YES
+
+# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
+# Stripping is only done if one of the specified strings matches the left-hand
+# part of the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the path to
+# strip.
+#
+# Note that you can specify absolute paths here, but also relative paths, which
+# will be relative from the directory where doxygen is started.
+# This tag requires that the tag FULL_PATH_NAMES is set to YES.
+
+STRIP_FROM_PATH        =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
+# path mentioned in the documentation of a class, which tells the reader which
+# header file to include in order to use a class. If left blank only the name of
+# the header file containing the class definition is used. Otherwise one should
+# specify the list of include paths that are normally passed to the compiler
+# using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
+# less readable) file names. This can be useful is your file systems doesn't
+# support long names like on DOS, Mac, or CD-ROM.
+# The default value is: NO.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
+# first line (until the first dot) of a Javadoc-style comment as the brief
+# description. If set to NO, the Javadoc-style will behave just like regular Qt-
+# style comments (thus requiring an explicit @brief command for a brief
+# description.)
+# The default value is: NO.
+
+JAVADOC_AUTOBRIEF      = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
+# line (until the first dot) of a Qt-style comment as the brief description. If
+# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
+# requiring an explicit \brief command for a brief description.)
+# The default value is: NO.
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
+# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
+# a brief description. This used to be the default behavior. The new default is
+# to treat a multi-line C++ comment block as a detailed description. Set this
+# tag to YES if you prefer the old behavior instead.
+#
+# Note that setting this tag to YES also means that rational rose comments are
+# not recognized any more.
+# The default value is: NO.
+
+MULTILINE_CPP_IS_BRIEF = YES
+
+# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
+# documentation from any documented member that it re-implements.
+# The default value is: YES.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a
+# new page for each member. If set to NO, the documentation of a member will be
+# part of the file/class/namespace that contains it.
+# The default value is: NO.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
+# uses this value to replace tabs by spaces in code fragments.
+# Minimum value: 1, maximum value: 16, default value: 4.
+
+TAB_SIZE               = 4
+
+# This tag can be used to specify a number of aliases that act as commands in
+# the documentation. An alias has the form:
+# name=value
+# For example adding
+# "sideeffect=@par Side Effects:\n"
+# will allow you to put the command \sideeffect (or @sideeffect) in the
+# documentation, which will result in a user-defined paragraph with heading
+# "Side Effects:". You can put \n's in the value part of an alias to insert
+# newlines.
+
+ALIASES                =
+
+# This tag can be used to specify a number of word-keyword mappings (TCL only).
+# A mapping has the form "name=value". For example adding "class=itcl::class"
+# will allow you to use the command class in the itcl::class meaning.
+
+TCL_SUBST              =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C. For
+# instance, some of the names that are used will be different. The list of all
+# members will be omitted, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
+# Python sources only. Doxygen will then generate output that is more tailored
+# for that language. For instance, namespaces will be presented as packages,
+# qualified scopes will look different, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources. Doxygen will then generate output that is tailored for Fortran.
+# The default value is: NO.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for VHDL.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given
+# extension. Doxygen has a built-in mapping, but you can override or extend it
+# using this tag. The format is ext=language, where ext is a file extension, and
+# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
+# C#, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL. For instance to make
+# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C
+# (default is Fortran), use: inc=Fortran f=C.
+#
+# Note For files without extension you can use no_extension as a placeholder.
+#
+# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
+# the files are not read by doxygen.
+
+EXTENSION_MAPPING      =
+
+# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
+# according to the Markdown format, which allows for more readable
+# documentation. See http://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you can
+# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
+# case of backward compatibilities issues.
+# The default value is: YES.
+
+MARKDOWN_SUPPORT       = YES
+
+# When enabled doxygen tries to link words that correspond to documented
+# classes, or namespaces to their corresponding documentation. Such a link can
+# be prevented in individual cases by by putting a % sign in front of the word
+# or globally by setting AUTOLINK_SUPPORT to NO.
+# The default value is: YES.
+
+AUTOLINK_SUPPORT       = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should set this
+# tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string);
+# versus func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+# The default value is: NO.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+# The default value is: NO.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
+# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen
+# will parse them like normal C++ but will assume all classes use public instead
+# of private inheritance when no explicit protection keyword is present.
+# The default value is: NO.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate
+# getter and setter methods for a property. Setting this option to YES will make
+# doxygen to replace the get and set methods by a property in the documentation.
+# This will only work if the methods are indeed getting or setting a simple
+# type. If this is not the case, or you want to show the methods anyway, you
+# should set this option to NO.
+# The default value is: YES.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+# The default value is: NO.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# Set the SUBGROUPING tag to YES to allow class member groups of the same type
+# (for instance a group of public functions) to be put as a subgroup of that
+# type (e.g. under the Public Functions section). Set it to NO to prevent
+# subgrouping. Alternatively, this can be done per class using the
+# \nosubgrouping command.
+# The default value is: YES.
+
+SUBGROUPING            = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
+# are shown inside the group in which they are included (e.g. using \ingroup)
+# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
+# and RTF).
+#
+# Note that this feature does not work in combination with
+# SEPARATE_MEMBER_PAGES.
+# The default value is: NO.
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
+# with only public data fields or simple typedef fields will be shown inline in
+# the documentation of the scope in which they are defined (i.e. file,
+# namespace, or group documentation), provided this scope is documented. If set
+# to NO, structs, classes, and unions are shown on a separate page (for HTML and
+# Man pages) or section (for LaTeX and RTF).
+# The default value is: NO.
+
+INLINE_SIMPLE_STRUCTS  = NO
+
+# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
+# enum is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically be
+# useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+# The default value is: NO.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
+# cache is used to resolve symbols given their name and scope. Since this can be
+# an expensive process and often the same symbol appears multiple times in the
+# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
+# doxygen will become slower. If the cache is too large, memory is wasted. The
+# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
+# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
+# symbols. At the end of a run doxygen will report the cache usage and suggest
+# the optimal cache size from a speed point of view.
+# Minimum value: 0, maximum value: 9, default value: 0.
+
+LOOKUP_CACHE_SIZE      = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available. Private
+# class members and static file members will be hidden unless the
+# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
+# Note: This will also disable the warnings about undocumented members that are
+# normally produced when WARNINGS is set to YES.
+# The default value is: NO.
+
+EXTRACT_ALL            = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class will
+# be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIVATE        = NO
+
+# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal
+# scope will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PACKAGE        = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file will be
+# included in the documentation.
+# The default value is: NO.
+
+EXTRACT_STATIC         = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined
+# locally in source files will be included in the documentation. If set to NO
+# only classes defined in header files are included. Does not have any effect
+# for Java sources.
+# The default value is: YES.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. When set to YES local methods,
+# which are defined in the implementation section but not in the interface are
+# included in the documentation. If set to NO only methods in the interface are
+# included.
+# The default value is: NO.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base name of
+# the file that contains the anonymous namespace. By default anonymous namespace
+# are hidden.
+# The default value is: NO.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
+# undocumented members inside documented classes or files. If set to NO these
+# members will be included in the various overviews, but no documentation
+# section is generated. This option has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy. If set
+# to NO these classes will be included in the various overviews. This option has
+# no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
+# (class|struct|union) declarations. If set to NO these declarations will be
+# included in the documentation.
+# The default value is: NO.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
+# documentation blocks found inside the body of a function. If set to NO these
+# blocks will be appended to the function's detailed documentation block.
+# The default value is: NO.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation that is typed after a
+# \internal command is included. If the tag is set to NO then the documentation
+# will be excluded. Set it to YES to include the internal documentation.
+# The default value is: NO.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
+# names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+# The default value is: system dependent.
+
+CASE_SENSE_NAMES       = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
+# their full class and namespace scopes in the documentation. If set to YES the
+# scope will be hidden.
+# The default value is: NO.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
+# the files that are included by a file in the documentation of that file.
+# The default value is: YES.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
+# grouped member an include statement to the documentation, telling the reader
+# which file to include in order to use the member.
+# The default value is: NO.
+
+SHOW_GROUPED_MEMB_INC  = NO
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
+# files with double quotes in the documentation rather than with sharp brackets.
+# The default value is: NO.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
+# documentation for inline members.
+# The default value is: YES.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
+# (detailed) documentation of file and class members alphabetically by member
+# name. If set to NO the members will appear in declaration order.
+# The default value is: YES.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
+# descriptions of file, namespace and class members alphabetically by member
+# name. If set to NO the members will appear in declaration order. Note that
+# this will also influence the order of the classes in the class list.
+# The default value is: NO.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
+# (brief and detailed) documentation of class members so that constructors and
+# destructors are listed first. If set to NO the constructors will appear in the
+# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
+# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
+# member documentation.
+# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
+# detailed member documentation.
+# The default value is: NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
+# of group names into alphabetical order. If set to NO the group names will
+# appear in their defined order.
+# The default value is: NO.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
+# fully-qualified names, including namespaces. If set to NO, the class list will
+# be sorted only by class name, not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the alphabetical
+# list.
+# The default value is: NO.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
+# type resolution of all parameters of a function it will reject a match between
+# the prototype and the implementation of a member function even if there is
+# only one candidate or it is obvious which candidate to choose by doing a
+# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
+# accept a match between prototype and implementation in such cases.
+# The default value is: NO.
+
+STRICT_PROTO_MATCHING  = NO
+
+# The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the
+# todo list. This list is created by putting \todo commands in the
+# documentation.
+# The default value is: YES.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the
+# test list. This list is created by putting \test commands in the
+# documentation.
+# The default value is: YES.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug
+# list. This list is created by putting \bug commands in the documentation.
+# The default value is: YES.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO)
+# the deprecated list. This list is created by putting \deprecated commands in
+# the documentation.
+# The default value is: YES.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional documentation
+# sections, marked by \if <section_label> ... \endif and \cond <section_label>
+# ... \endcond blocks.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
+# initial value of a variable or macro / define can have for it to appear in the
+# documentation. If the initializer consists of more lines than specified here
+# it will be hidden. Use a value of 0 to hide initializers completely. The
+# appearance of the value of individual variables and macros / defines can be
+# controlled using \showinitializer or \hideinitializer command in the
+# documentation regardless of this setting.
+# Minimum value: 0, maximum value: 10000, default value: 30.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
+# the bottom of the documentation of classes and structs. If set to YES the list
+# will mention the files that were used to generate the documentation.
+# The default value is: YES.
+
+SHOW_USED_FILES        = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
+# will remove the Files entry from the Quick Index and from the Folder Tree View
+# (if specified).
+# The default value is: YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
+# page. This will remove the Namespaces entry from the Quick Index and from the
+# Folder Tree View (if specified).
+# The default value is: YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command command input-file, where command is the value of the
+# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
+# by doxygen. Whatever the program writes to standard output is used as the file
+# version. For an example see the documentation.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option. You can
+# optionally specify a file name after the option, if omitted DoxygenLayout.xml
+# will be used as the name of the layout file.
+#
+# Note that if you run doxygen from a directory containing a file called
+# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
+# tag is left empty.
+
+LAYOUT_FILE            =
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
+# the reference definitions. This must be a list of .bib files. The .bib
+# extension is automatically appended if omitted. This requires the bibtex tool
+# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info.
+# For LaTeX the style of the bibliography can be controlled using
+# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
+# search path. Do not use file names with spaces, bibtex cannot handle them. See
+# also \cite for info how to create references.
+
+CITE_BIB_FILES         =
+
+#---------------------------------------------------------------------------
+# Configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated to
+# standard output by doxygen. If QUIET is set to YES this implies that the
+# messages are off.
+# The default value is: NO.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES
+# this implies that the warnings are on.
+#
+# Tip: Turn warnings on while writing the documentation.
+# The default value is: YES.
+
+WARNINGS               = YES
+
+# If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate
+# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: YES.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some parameters
+# in a documented function, or documenting parameters that don't exist or using
+# markup commands wrongly.
+# The default value is: YES.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
+# are documented, but have no documentation for their parameters or return
+# value. If set to NO doxygen will only warn about wrong or incomplete parameter
+# documentation, but not about the absence of documentation.
+# The default value is: NO.
+
+WARN_NO_PARAMDOC       = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that doxygen
+# can produce. The string should contain the $file, $line, and $text tags, which
+# will be replaced by the file and line number from which the warning originated
+# and the warning text. Optionally the format may contain $version, which will
+# be replaced by the version of the file (if it could be obtained via
+# FILE_VERSION_FILTER)
+# The default value is: $file:$line: $text.
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning and error
+# messages should be written. If left blank the output is written to standard
+# error (stderr).
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag is used to specify the files and/or directories that contain
+# documented source files. You may enter file names like myfile.cpp or
+# directories like /usr/src/myproject. Separate the files or directories with
+# spaces.
+# Note: If this tag is empty the current directory is searched.
+
+INPUT                  = . example
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
+# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
+# documentation (see: http://www.gnu.org/software/libiconv) for the list of
+# possible encodings.
+# The default value is: UTF-8.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank the
+# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii,
+# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp,
+# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown,
+# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf,
+# *.qsf, *.as and *.js.
+
+FILE_PATTERNS          =
+
+# The RECURSIVE tag can be used to specify whether or not subdirectories should
+# be searched for input files as well.
+# The default value is: NO.
+
+RECURSIVE              = NO
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+#
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE                =
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+# The default value is: NO.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories use the pattern */test/*
+
+EXCLUDE_SYMBOLS        =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or directories
+# that contain example code fragments that are included (see the \include
+# command).
+
+EXAMPLE_PATH           =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank all
+# files are included.
+
+EXAMPLE_PATTERNS       =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude commands
+# irrespective of the value of the RECURSIVE tag.
+# The default value is: NO.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or directories
+# that contain images that are to be included in the documentation (see the
+# \image command).
+
+IMAGE_PATH             =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command:
+#
+# <filter> <input-file>
+#
+# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the
+# name of an input file. Doxygen will then use the output that the filter
+# program writes to standard output. If FILTER_PATTERNS is specified, this tag
+# will be ignored.
+#
+# Note that the filter must not add or remove lines; it is applied before the
+# code is scanned, but not when the output code is generated. If lines are added
+# or removed, the anchors will not be placed correctly.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form: pattern=filter
+# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
+# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
+# patterns match the file name, INPUT_FILTER is applied.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER ) will also be used to filter the input files that are used for
+# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
+# The default value is: NO.
+
+FILTER_SOURCE_FILES    = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
+# it is also possible to disable source filtering for a specific pattern using
+# *.ext= (so without naming a filter).
+# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
+
+FILTER_SOURCE_PATTERNS =
+
+# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
+# is part of the input, its contents will be placed on the main page
+# (index.html). This can be useful if you have a project on for instance GitHub
+# and want to reuse the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE =
+
+#---------------------------------------------------------------------------
+# Configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
+# generated. Documented entities will be cross-referenced with these sources.
+#
+# Note: To get rid of all source code in the generated output, make sure that
+# also VERBATIM_HEADERS is set to NO.
+# The default value is: NO.
+
+SOURCE_BROWSER         = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body of functions,
+# classes and enums directly into the documentation.
+# The default value is: NO.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
+# special comment blocks from generated source code fragments. Normal C, C++ and
+# Fortran comments will always remain visible.
+# The default value is: YES.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
+# function all documented functions referencing it will be listed.
+# The default value is: NO.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES then for each documented function
+# all documented entities called/used by that function will be listed.
+# The default value is: NO.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
+# to YES, then the hyperlinks from functions in REFERENCES_RELATION and
+# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
+# link to the documentation.
+# The default value is: YES.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
+# source code will show a tooltip with additional information such as prototype,
+# brief description and links to the definition and documentation. Since this
+# will make the HTML file larger and loading of large files a bit slower, you
+# can opt to disable this feature.
+# The default value is: YES.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+SOURCE_TOOLTIPS        = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code will
+# point to the HTML generated by the htags(1) tool instead of doxygen built-in
+# source browser. The htags tool is part of GNU's global source tagging system
+# (see http://www.gnu.org/software/global/global.html). You will need version
+# 4.8.6 or higher.
+#
+# To use it do the following:
+# - Install the latest version of global
+# - Enable SOURCE_BROWSER and USE_HTAGS in the config file
+# - Make sure the INPUT points to the root of the source tree
+# - Run doxygen as normal
+#
+# Doxygen will invoke htags (and that will in turn invoke gtags), so these
+# tools must be available from the command line (i.e. in the search path).
+#
+# The result: instead of the source browser generated by doxygen, the links to
+# source code will now point to the output of htags.
+# The default value is: NO.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
+# verbatim copy of the header file for each class for which an include is
+# specified. Set to NO to disable this.
+# See also: Section \class.
+# The default value is: YES.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
+# compounds will be generated. Enable this if the project contains a lot of
+# classes, structs, unions or interfaces.
+# The default value is: YES.
+
+ALPHABETICAL_INDEX     = YES
+
+# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
+# which the alphabetical index list will be split.
+# Minimum value: 1, maximum value: 20, default value: 5.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all classes will
+# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
+# can be used to specify a prefix (or a list of prefixes) that should be ignored
+# while generating the index headers.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES doxygen will generate HTML output
+# The default value is: YES.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
+# generated HTML page (for example: .htm, .php, .asp).
+# The default value is: .html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
+# each generated HTML page. If the tag is left blank doxygen will generate a
+# standard header.
+#
+# To get valid HTML the header file that includes any scripts and style sheets
+# that doxygen needs, which is dependent on the configuration options used (e.g.
+# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
+# default header using
+# doxygen -w html new_header.html new_footer.html new_stylesheet.css
+# YourConfigFile
+# and then modify the file new_header.html. See also section "Doxygen usage"
+# for information on how to generate the default header that doxygen normally
+# uses.
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. For a description
+# of the possible markers and block names see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_HEADER            =
+
+# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
+# generated HTML page. If the tag is left blank doxygen will generate a standard
+# footer. See HTML_HEADER for more information on how to generate a default
+# footer and what special commands can be used inside the footer. See also
+# section "Doxygen usage" for information on how to generate the default footer
+# that doxygen normally uses.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FOOTER            =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
+# sheet that is used by each HTML page. It can be used to fine-tune the look of
+# the HTML output. If left blank doxygen will generate a default style sheet.
+# See also section "Doxygen usage" for information on how to generate the style
+# sheet that doxygen normally uses.
+# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
+# it is more robust and this tag (HTML_STYLESHEET) will in the future become
+# obsolete.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_STYLESHEET        =
+
+# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional user-
+# defined cascading style sheet that is included after the standard style sheets
+# created by doxygen. Using this option one can overrule certain style aspects.
+# This is preferred over using HTML_STYLESHEET since it does not replace the
+# standard style sheet and is therefor more robust against future updates.
+# Doxygen will copy the style sheet file to the output directory. For an example
+# see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_STYLESHEET  =
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
+# files will be copied as-is; there are no commands or markers available.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_FILES       =
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
+# will adjust the colors in the stylesheet and background images according to
+# this color. Hue is specified as an angle on a colorwheel, see
+# http://en.wikipedia.org/wiki/Hue for more information. For instance the value
+# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
+# purple, and 360 is red again.
+# Minimum value: 0, maximum value: 359, default value: 220.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_HUE    = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
+# in the HTML output. For a value of 0 the output will use grayscales only. A
+# value of 255 will produce the most vivid colors.
+# Minimum value: 0, maximum value: 255, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_SAT    = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
+# luminance component of the colors in the HTML output. Values below 100
+# gradually make the output lighter, whereas values above 100 make the output
+# darker. The value divided by 100 is the actual gamma applied, so 80 represents
+# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
+# change the gamma.
+# Minimum value: 40, maximum value: 240, default value: 80.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_GAMMA  = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting this
+# to NO can help when comparing the output of multiple runs.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_TIMESTAMP         = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
+# shown in the various tree structured indices initially; the user can expand
+# and collapse entries dynamically later on. Doxygen will expand the tree to
+# such a level that at most the specified number of entries are visible (unless
+# a fully collapsed tree already exceeds this amount). So setting the number of
+# entries 1 will produce a full collapsed tree by default. 0 is a special value
+# representing an infinite number of entries and will result in a full expanded
+# tree by default.
+# Minimum value: 0, maximum value: 9999, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_INDEX_NUM_ENTRIES = 100
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files will be
+# generated that can be used as input for Apple's Xcode 3 integrated development
+# environment (see: http://developer.apple.com/tools/xcode/), introduced with
+# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a
+# Makefile in the HTML output directory. Running make will produce the docset in
+# that directory and running make install will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
+# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
+# for more information.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_DOCSET        = NO
+
+# This tag determines the name of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# The default value is: Doxygen generated docs.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# This tag specifies a string that should uniquely identify the documentation
+# set bundle. This should be a reverse domain-name style string, e.g.
+# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+# The default value is: org.doxygen.Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+
+# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
+# The default value is: Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_NAME  = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
+# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
+# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
+# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on
+# Windows.
+#
+# The HTML Help Workshop contains a compiler that can convert all HTML output
+# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
+# files are now used as the Windows 98 help format, and will replace the old
+# Windows help format (.hlp) on all Windows platforms in the future. Compressed
+# HTML files also contain an index, a table of contents, and you can search for
+# words in the documentation. The HTML workshop also contains a viewer for
+# compressed HTML files.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_HTMLHELP      = NO
+
+# The CHM_FILE tag can be used to specify the file name of the resulting .chm
+# file. You can add a path in front of the file if the result should not be
+# written to the html output directory.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_FILE               =
+
+# The HHC_LOCATION tag can be used to specify the location (absolute path
+# including file name) of the HTML help compiler ( hhc.exe). If non-empty
+# doxygen will try to run the HTML help compiler on the generated index.hhp.
+# The file has to be specified with full path.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+HHC_LOCATION           =
+
+# The GENERATE_CHI flag controls if a separate .chi index file is generated (
+# YES) or that it should be included in the master .chm file ( NO).
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+GENERATE_CHI           = NO
+
+# The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc)
+# and project file content.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_INDEX_ENCODING     =
+
+# The BINARY_TOC flag controls whether a binary table of contents is generated (
+# YES) or a normal table of contents ( NO) in the .chm file.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members to
+# the table of contents of the HTML help documentation and to the tree view.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
+# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
+# (.qch) of the generated HTML documentation.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
+# the file name of the resulting .qch file. The path specified is relative to
+# the HTML output folder.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
+# Project output. For more information please see Qt Help Project / Namespace
+# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace).
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
+# Help Project output. For more information please see Qt Help Project / Virtual
+# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual-
+# folders).
+# The default value is: doc.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
+# filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's filter section matches. Qt Help Project / Filter Attributes (see:
+# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# The QHG_LOCATION tag can be used to specify the location of Qt's
+# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
+# generated .qhp file.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHG_LOCATION           =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
+# generated, together with the HTML files, they form an Eclipse help plugin. To
+# install this plugin and make it available under the help contents menu in
+# Eclipse, the contents of the directory containing the HTML and XML files needs
+# to be copied into the plugins directory of eclipse. The name of the directory
+# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
+# After copying Eclipse needs to be restarted before the help appears.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the Eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have this
+# name. Each documentation set should have its own identifier.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
+# If you want full control over the layout of the generated HTML pages it might
+# be necessary to disable the index and replace it with your own. The
+# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
+# of each HTML page. A value of NO enables the index and the value YES disables
+# it. Since the tabs in the index contain the same information as the navigation
+# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+DISABLE_INDEX          = NO
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information. If the tag
+# value is set to YES, a side panel will be generated containing a tree-like
+# index structure (just like the one that is generated for HTML Help). For this
+# to work a browser that supports JavaScript, DHTML, CSS and frames is required
+# (i.e. any modern browser). Windows users are probably better off using the
+# HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can
+# further fine-tune the look of the index. As an example, the default style
+# sheet generated by doxygen has an example that shows how to put an image at
+# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
+# the same information as the tab index, you could consider setting
+# DISABLE_INDEX to YES when enabling this option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_TREEVIEW      = NO
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
+# doxygen will group on one line in the generated HTML documentation.
+#
+# Note that a value of 0 will completely suppress the enum values from appearing
+# in the overview section.
+# Minimum value: 0, maximum value: 20, default value: 4.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
+# to set the initial width (in pixels) of the frame in which the tree is shown.
+# Minimum value: 0, maximum value: 1500, default value: 250.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+TREEVIEW_WIDTH         = 250
+
+# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to
+# external symbols imported via tag files in a separate window.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+EXT_LINKS_IN_WINDOW    = NO
+
+# Use this tag to change the font size of LaTeX formulas included as images in
+# the HTML documentation. When you change the font size after a successful
+# doxygen run you need to manually remove any form_*.png images from the HTML
+# output directory to force them to be regenerated.
+# Minimum value: 8, maximum value: 50, default value: 10.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_FONTSIZE       = 10
+
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are not
+# supported properly for IE 6.0, but are supported on all modern browsers.
+#
+# Note that when changing this option you need to delete any form_*.png files in
+# the HTML output directory before the changes have effect.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_TRANSPARENT    = YES
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
+# http://www.mathjax.org) which uses client side Javascript for the rendering
+# instead of using prerendered bitmaps. Use this if you do not have LaTeX
+# installed or if you want to formulas look prettier in the HTML output. When
+# enabled you may also need to install MathJax separately and configure the path
+# to it using the MATHJAX_RELPATH option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+USE_MATHJAX            = NO
+
+# When MathJax is enabled you can set the default output format to be used for
+# the MathJax output. See the MathJax site (see:
+# http://docs.mathjax.org/en/latest/output.html) for more details.
+# Possible values are: HTML-CSS (which is slower, but has the best
+# compatibility), NativeMML (i.e. MathML) and SVG.
+# The default value is: HTML-CSS.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_FORMAT         = HTML-CSS
+
+# When MathJax is enabled you need to specify the location relative to the HTML
+# output directory using the MATHJAX_RELPATH option. The destination directory
+# should contain the MathJax.js script. For instance, if the mathjax directory
+# is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
+# Content Delivery Network so you can quickly see the result without installing
+# MathJax. However, it is strongly recommended to install a local copy of
+# MathJax from http://www.mathjax.org before deployment.
+# The default value is: http://cdn.mathjax.org/mathjax/latest.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
+# extension names that should be enabled during MathJax rendering. For example
+# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_EXTENSIONS     =
+
+# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
+# of code that will be used on startup of the MathJax code. See the MathJax site
+# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# example see the documentation.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_CODEFILE       =
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
+# the HTML output. The underlying search engine uses javascript and DHTML and
+# should work on any modern browser. Note that when using HTML help
+# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
+# there is already a search function so this one should typically be disabled.
+# For large projects the javascript based search engine can be slow, then
+# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
+# search using the keyboard; to jump to the search box use <access key> + S
+# (what the <access key> is depends on the OS and browser, but it is typically
+# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down
+# key> to jump into the search results window, the results can be navigated
+# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel
+# the search. The filter options can be selected when the cursor is inside the
+# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>
+# to select a filter and <Enter> or <escape> to activate or cancel the filter
+# option.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+SEARCHENGINE           = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a web server instead of a web client using Javascript. There
+# are two flavours of web server based searching depending on the
+# EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for
+# searching and an index file used by the script. When EXTERNAL_SEARCH is
+# enabled the indexing and searching needs to be provided by external tools. See
+# the section "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SERVER_BASED_SEARCH    = NO
+
+# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
+# script for searching. Instead the search results are written to an XML file
+# which needs to be processed by an external indexer. Doxygen will invoke an
+# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
+# search results.
+#
+# Doxygen ships with an example indexer ( doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: http://xapian.org/).
+#
+# See the section "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH        = NO
+
+# The SEARCHENGINE_URL should point to a search engine hosted by a web server
+# which will return the search results when EXTERNAL_SEARCH is enabled.
+#
+# Doxygen ships with an example indexer ( doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: http://xapian.org/). See the section "External Indexing and
+# Searching" for details.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHENGINE_URL       =
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
+# search data is written to a file for indexing by an external tool. With the
+# SEARCHDATA_FILE tag the name of this file can be specified.
+# The default file is: searchdata.xml.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHDATA_FILE        = searchdata.xml
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
+# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
+# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
+# projects and redirect the results back to the right project.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH_ID     =
+
+# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
+# projects other than the one defined by this configuration file, but that are
+# all added to the same external search index. Each project needs to have a
+# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
+# to a relative location where the documentation can be found. The format is:
+# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTRA_SEARCH_MAPPINGS  =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES doxygen will generate LaTeX output.
+# The default value is: YES.
+
+GENERATE_LATEX         = YES
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked.
+#
+# Note that when enabling USE_PDFLATEX this option is only used for generating
+# bitmaps for formulas in the HTML output, but not in the Makefile that is
+# written to the output directory.
+# The default file is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
+# index for LaTeX.
+# The default file is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES doxygen generates more compact LaTeX
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used by the
+# printer.
+# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
+# 14 inches) and executive (7.25 x 10.5 inches).
+# The default value is: a4.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PAPER_TYPE             = a4
+
+# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
+# that should be included in the LaTeX output. To get the times font for
+# instance you can specify
+# EXTRA_PACKAGES=times
+# If left blank no extra packages will be included.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+EXTRA_PACKAGES         =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
+# generated LaTeX document. The header should contain everything until the first
+# chapter. If it is left blank doxygen will generate a standard header. See
+# section "Doxygen usage" for information on how to let doxygen write the
+# default header to a separate file.
+#
+# Note: Only use a user-defined header if you know what you are doing! The
+# following commands have a special meaning inside the header: $title,
+# $datetime, $date, $doxygenversion, $projectname, $projectnumber. Doxygen will
+# replace them by respectively the title of the page, the current date and time,
+# only the current date, the version number of doxygen, the project name (see
+# PROJECT_NAME), or the project number (see PROJECT_NUMBER).
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HEADER           =
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
+# generated LaTeX document. The footer should contain everything after the last
+# chapter. If it is left blank doxygen will generate a standard footer.
+#
+# Note: Only use a user-defined footer if you know what you are doing!
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_FOOTER           =
+
+# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the LATEX_OUTPUT output
+# directory. Note that the files will be copied as-is; there are no commands or
+# markers available.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_FILES      =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
+# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
+# contain links (just like the HTML output) instead of page references. This
+# makes the output suitable for online browsing using a PDF viewer.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PDF_HYPERLINKS         = YES
+
+# If the LATEX_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
+# the PDF file directly from the LaTeX files. Set this option to YES to get a
+# higher quality PDF documentation.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
+# command to the generated LaTeX files. This will instruct LaTeX to keep running
+# if errors occur, instead of asking the user for help. This option is also used
+# when generating formulas in HTML.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BATCHMODE        = NO
+
+# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
+# index chapters (such as File Index, Compound Index, etc.) in the output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HIDE_INDICES     = NO
+
+# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
+# code with syntax highlighting in the LaTeX output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_SOURCE_CODE      = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. See
+# http://en.wikipedia.org/wiki/BibTeX and \cite for more info.
+# The default value is: plain.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BIB_STYLE        = plain
+
+#---------------------------------------------------------------------------
+# Configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES doxygen will generate RTF output. The
+# RTF output is optimized for Word 97 and may not look too pretty with other RTF
+# readers/editors.
+# The default value is: NO.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: rtf.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES doxygen generates more compact RTF
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
+# contain hyperlink fields. The RTF file will contain links (just like the HTML
+# output) instead of page references. This makes the output suitable for online
+# browsing using Word or some other Word compatible readers that support those
+# fields.
+#
+# Note: WordPad (write) and others do not support links.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's config
+# file, i.e. a series of assignments. You only have to provide replacements,
+# missing definitions are set to their default value.
+#
+# See also section "Doxygen usage" for information on how to generate the
+# default style sheet that doxygen normally uses.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an RTF document. Syntax is
+# similar to doxygen's config file. A template extensions file can be generated
+# using doxygen -e rtf extensionFile.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_EXTENSIONS_FILE    =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES doxygen will generate man pages for
+# classes and files.
+# The default value is: NO.
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it. A directory man3 will be created inside the directory specified by
+# MAN_OUTPUT.
+# The default directory is: man.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to the generated
+# man pages. In case the manual section does not start with a number, the number
+# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
+# optional.
+# The default value is: .3.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_EXTENSION          = .3
+
+# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
+# will generate one additional man file for each entity documented in the real
+# man page(s). These additional files only source the real man page, but without
+# them the man command would be unable to find the correct page.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES doxygen will generate an XML file that
+# captures the structure of the code including all documentation.
+# The default value is: NO.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: xml.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_OUTPUT             = xml
+
+# The XML_SCHEMA tag can be used to specify a XML schema, which can be used by a
+# validating XML parser to check the syntax of the XML files.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_SCHEMA             =
+
+# The XML_DTD tag can be used to specify a XML DTD, which can be used by a
+# validating XML parser to check the syntax of the XML files.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_DTD                =
+
+# If the XML_PROGRAMLISTING tag is set to YES doxygen will dump the program
+# listings (including syntax highlighting and cross-referencing information) to
+# the XML output. Note that enabling this will significantly increase the size
+# of the XML output.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_DOCBOOK tag is set to YES doxygen will generate Docbook files
+# that can be used to generate PDF.
+# The default value is: NO.
+
+GENERATE_DOCBOOK       = NO
+
+# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
+# front of it.
+# The default directory is: docbook.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_OUTPUT         = docbook
+
+#---------------------------------------------------------------------------
+# Configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES doxygen will generate an AutoGen
+# Definitions (see http://autogen.sf.net) file that captures the structure of
+# the code including all documentation. Note that this feature is still
+# experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES doxygen will generate a Perl module
+# file that captures the structure of the code including all documentation.
+#
+# Note that this feature is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES doxygen will generate the necessary
+# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
+# output from the Perl module output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be nicely
+# formatted so it can be parsed by a human reader. This is useful if you want to
+# understand what is going on. On the other hand, if this tag is set to NO the
+# size of the Perl module output will be much smaller and Perl will parse it
+# just the same.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file are
+# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
+# so different doxyrules.make files included by the same Makefile don't
+# overwrite each other's variables.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES doxygen will evaluate all
+# C-preprocessor directives found in the sources and include files.
+# The default value is: YES.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES doxygen will expand all macro names
+# in the source code. If set to NO only conditional compilation will be
+# performed. Macro expansion can be done in a controlled way by setting
+# EXPAND_ONLY_PREDEF to YES.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
+# the macro expansion is limited to the macros specified with the PREDEFINED and
+# EXPAND_AS_DEFINED tags.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES the includes files in the
+# INCLUDE_PATH will be searched if a #include is found.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by the
+# preprocessor.
+# This tag requires that the tag SEARCH_INCLUDES is set to YES.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will be
+# used.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that are
+# defined before the preprocessor is started (similar to the -D option of e.g.
+# gcc). The argument of the tag is a list of macros of the form: name or
+# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
+# is assumed. To prevent a macro definition from being undefined via #undef or
+# recursively expanded use the := operator instead of the = operator.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+PREDEFINED             = USE_VC
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
+# tag can be used to specify a list of macro names that should be expanded. The
+# macro definition that is found in the sources will be used. Use the PREDEFINED
+# tag if you want to use a different macro definition that overrules the
+# definition found in the source code.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_AS_DEFINED      =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
+# remove all refrences to function-like macros that are alone on a line, have an
+# all uppercase name, and do not end with a semicolon. Such function macros are
+# typically used for boiler-plate code, and will confuse the parser if not
+# removed.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES tag can be used to specify one or more tag files. For each tag
+# file the location of the external documentation should be added. The format of
+# a tag file without this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where loc1 and loc2 can be relative or absolute paths or URLs. See the
+# section "Linking to external documentation" for more information about the use
+# of tag files.
+# Note: Each tag file must have an unique name (where the name does NOT include
+# the path). If a tag file is not located in the directory in which doxygen is
+# run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
+# tag file that is based on the input files it reads. See section "Linking to
+# external documentation" for more information about the usage of tag files.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES all external class will be listed in the
+# class index. If set to NO only the inherited external classes will be listed.
+# The default value is: NO.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed in
+# the modules index. If set to NO, only the current project's groups will be
+# listed.
+# The default value is: YES.
+
+EXTERNAL_GROUPS        = YES
+
+# If the EXTERNAL_PAGES tag is set to YES all external pages will be listed in
+# the related pages index. If set to NO, only the current project's pages will
+# be listed.
+# The default value is: YES.
+
+EXTERNAL_PAGES         = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of 'which perl').
+# The default file (with absolute path) is: /usr/bin/perl.
+
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES doxygen will generate a class diagram
+# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
+# NO turns the diagrams off. Note that this option also works with HAVE_DOT
+# disabled, but it is recommended to install and use dot, since it yields more
+# powerful graphs.
+# The default value is: YES.
+
+CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see:
+# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH            =
+
+# You can include diagrams made with dia in doxygen documentation. Doxygen will
+# then run dia to produce the diagram and insert it in the documentation. The
+# DIA_PATH tag allows you to specify the directory where the dia binary resides.
+# If left empty dia is assumed to be found in the default search path.
+
+DIA_PATH               =
+
+# If set to YES, the inheritance and collaboration graphs will hide inheritance
+# and usage relations if the target is undocumented or is not a class.
+# The default value is: YES.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz (see:
+# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
+# Bell Labs. The other options in this section have no effect if this option is
+# set to NO
+# The default value is: NO.
+
+HAVE_DOT               = NO
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
+# to run in parallel. When set to 0 doxygen will base this on the number of
+# processors available in the system. You can set it explicitly to a value
+# larger than 0 to get control over the balance between CPU load and processing
+# speed.
+# Minimum value: 0, maximum value: 32, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_NUM_THREADS        = 0
+
+# When you want a differently looking font n the dot files that doxygen
+# generates you can specify the font name using DOT_FONTNAME. You need to make
+# sure dot is able to find the font, which can be done by putting it in a
+# standard location or by setting the DOTFONTPATH environment variable or by
+# setting DOT_FONTPATH to the directory containing the font.
+# The default value is: Helvetica.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTNAME           = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
+# dot graphs.
+# Minimum value: 4, maximum value: 24, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the default font as specified with
+# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
+# the path where dot can find it using this tag.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
+# each documented class showing the direct and indirect inheritance relations.
+# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
+# graph for each documented class showing the direct and indirect implementation
+# dependencies (inheritance, containment, and class references variables) of the
+# class with other documented classes.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
+# groups, showing the direct groups dependencies.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LOOK               = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
+# class node. If there are many fields or methods and many nodes the graph may
+# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
+# number of items for each type to make the size more manageable. Set this to 0
+# for no limit. Note that the threshold may be exceeded by 50% before the limit
+# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
+# but if the number exceeds 15, the total amount of fields shown is limited to
+# 10.
+# Minimum value: 0, maximum value: 100, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LIMIT_NUM_FIELDS   = 10
+
+# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
+# collaboration graphs will show the relations between templates and their
+# instances.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
+# YES then doxygen will generate a graph for each documented file showing the
+# direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDE_GRAPH          = YES
+
+# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
+# set to YES then doxygen will generate a graph for each documented file showing
+# the direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable caller graphs for selected
+# functions only using the \callergraph command.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
+# hierarchy of all classes instead of a textual one.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
+# dependencies a directory has on other directories in a graphical way. The
+# dependency relations are determined by the #include relations between the
+# files in the directories.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot.
+# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
+# to make the SVG files visible in IE 9+ (other browsers do not have this
+# requirement).
+# Possible values are: png, jpg, gif and svg.
+# The default value is: png.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_IMAGE_FORMAT       = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+#
+# Note that this requires a modern browser other than Internet Explorer. Tested
+# and working are Firefox, Chrome, Safari, and Opera.
+# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
+# the SVG files visible. Older versions of IE do not have SVG support.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INTERACTIVE_SVG        = NO
+
+# The DOT_PATH tag can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the \dotfile
+# command).
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOTFILE_DIRS           =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the \mscfile
+# command).
+
+MSCFILE_DIRS           =
+
+# The DIAFILE_DIRS tag can be used to specify one or more directories that
+# contain dia files that are included in the documentation (see the \diafile
+# command).
+
+DIAFILE_DIRS           =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
+# that will be shown in the graph. If the number of nodes in a graph becomes
+# larger than this value, doxygen will truncate the graph, which is visualized
+# by representing a node as a red box. Note that doxygen if the number of direct
+# children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
+# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+# Minimum value: 0, maximum value: 10000, default value: 50.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
+# generated by dot. A depth value of 3 means that only nodes reachable from the
+# root by following a path via at most 3 edges will be shown. Nodes that lay
+# further from the root node will be omitted. Note that setting this option to 1
+# or 2 may greatly reduce the computation time needed for large code bases. Also
+# note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+# Minimum value: 0, maximum value: 1000, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not seem
+# to support this out of the box.
+#
+# Warning: Depending on the platform used, enabling this option may lead to
+# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
+# read).
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10) support
+# this, this feature is disabled by default.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_MULTI_TARGETS      = YES
+
+# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
+# explaining the meaning of the various boxes and arrows in the dot generated
+# graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES doxygen will remove the intermediate dot
+# files that are used to generate the various graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_CLEANUP            = YES
diff --git a/vspline.h b/vspline.h
new file mode 100644
index 0000000..60c3541
--- /dev/null
+++ b/vspline.h
@@ -0,0 +1,38 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/*! \file vspline.h
+
+    \brief includes all headers from vspline (most of them indirectly)
+
+*/
+
+#include "remap.h"

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/vspline.git