[vspline] 39/72: small changes due to problems showing up with example code these are mainly type changes in filter.h and eval.h. also added a few more examples with simple tasks (slice.cc, complex.cc)

Kay F. Jahnke kfj-guest at moszumanska.debian.org
Sun Jul 2 09:02:41 UTC 2017


This is an automated email from the git hooks/post-receive script.

kfj-guest pushed a commit to branch master
in repository vspline.

commit 9262c143b16fb5639602de0fa73c25a32e6f412a
Author: Kay F. Jahnke <kfjahnke at gmail.com>
Date:   Thu Mar 23 12:13:30 2017 +0100

    small changes due to problems showing up with example code
    these are mainly type changes in filter.h and eval.h.
    also added a few more examples with simple tasks (slice.cc, complex.cc)
---
 brace.h                     |   8 +-
 eval.h                      | 268 +++++++++++++++++++++++++++++++++++---------
 example/complex.cc          |  81 +++++++++++++
 example/impulse_response.cc |  20 +---
 example/roundtrip.cc        |   8 +-
 example/slice.cc            | 127 +++++++++++++++++++++
 example/slice2.cc           | 245 ++++++++++++++++++++++++++++++++++++++++
 example/slice3.cc           | 147 ++++++++++++++++++++++++
 example/splinus.cc          |  96 ++++++++++++++++
 filter.h                    |  85 +++++++-------
 remap.h                     |  91 +--------------
 unary_functor.h             |  49 +++++++-
 12 files changed, 1013 insertions(+), 212 deletions(-)

diff --git a/brace.h b/brace.h
index 683903e..27d5d18 100644
--- a/brace.h
+++ b/brace.h
@@ -423,7 +423,7 @@ struct bracer
             // but this fails in 1D TODO: why?
             auto target = a.bindAt ( axis , lt ) ; // get a view to left target slice
             target = a.bindAt ( axis , lp ) ;      // assign value of left pivot slice
-            target *= ele_type(2) ;                // double that
+            target *= value_type(2) ;                // double that
             target -= a.bindAt ( axis , ls ) ;     // subtract left source slice
             break ;
           }
@@ -436,7 +436,7 @@ struct bracer
           case ZEROPAD :
           {
             // fill with 0
-            a.bindAt ( axis , lt ) = 0 ;
+            a.bindAt ( axis , lt ) = value_type() ;
             break ;
           }
           case SPHERICAL : // needs special treatment
@@ -475,7 +475,7 @@ struct bracer
             // but this fails in 1D TODO: why?
             auto target = a.bindAt ( axis , rt ) ; // get a view to right targte slice
             target = a.bindAt ( axis , rp ) ;      // assign value of pivot slice
-            target *= ele_type(2) ;                // double that
+            target *= value_type(2) ;                // double that
             target -= a.bindAt ( axis , rs ) ;     // subtract source slice
             break ;
           }
@@ -488,7 +488,7 @@ struct bracer
           case ZEROPAD :
           {
             // fill with 0
-            a.bindAt ( axis , rt ) = 0 ;
+            a.bindAt ( axis , rt ) = value_type() ;
             break ;
           }
           case SPHERICAL : // needs special treatment
diff --git a/eval.h b/eval.h
index fe44ce9..54d4e60 100644
--- a/eval.h
+++ b/eval.h
@@ -176,7 +176,8 @@ MultiArray < 2 , target_type > calculate_weight_matrix ( int degree , int deriva
 /// We access the weight functors via a pointer to this base class in the code below.
 
 template < typename ele_type , // elementary type of value_type
-           typename rc_type >  // type of real-valued coordinate
+           typename rc_type ,  // type of real-valued coordinate
+           int vsize = 1 >
 struct weight_functor_base
 {
   // we define two pure virtual overloads for operator(), one for unvectorized
@@ -187,8 +188,8 @@ struct weight_functor_base
   
 #ifdef USE_VC
 
-  typedef typename vector_traits < ele_type > :: ele_v ele_v ;
-  typedef typename vector_traits < rc_type , ele_v::Size > :: ele_v rc_v ;
+  typedef typename vector_traits < ele_type , vsize > :: ele_v ele_v ;
+  typedef typename vector_traits < rc_type , vsize > :: ele_v rc_v ;
 
   virtual void operator() ( ele_v* result , const rc_v& delta ) = 0 ;
 
@@ -265,11 +266,11 @@ struct bspline_derivative_weights
 /// we derive from the weight functor base class to obtain a (multi-) functor
 /// specifically for (derivatives of) a b-spline :
 
-template < typename ele_type , typename rc_type >
+template < typename ele_type , typename rc_type , int vsize = 1 >
 struct bspline_derivative_weight_functor
-: public weight_functor_base < ele_type , rc_type >
+: public weight_functor_base < ele_type , rc_type , vsize >
 {
-  typedef weight_functor_base < ele_type , rc_type > base_class ;
+  typedef weight_functor_base < ele_type , rc_type , vsize > base_class ;
 
   // set up the fully specialized functors to which operator() delegates:
 
@@ -392,7 +393,8 @@ struct average_weight_functor
 /// neighbour interpolation, which has the same effect as simply running with degree 0 but avoids
 /// code which isn't needed for nearest neighbour interpolation (like the application of weights,
 /// which is futile under the circumstances, the weight always being 1.0).
-/// specialize can also be set to 1 for explicit n-linear interpolation. Any value but 0 or 1 will
+/// specialize can also be set to 1 for explicit n-linear interpolation, and (new) to 2 to use
+/// 'softened linear' interpolation (see _eval_soft for an explanation). Any other value will
 /// result in the general-purpose code being used.
 ///
 /// - template argument 'evenness' can be passed in as 1 to enforce raw-mode coordinate splitting
@@ -403,8 +405,10 @@ struct average_weight_functor
 /// with the routines defined in mapping.h.
 ///
 /// class evaluator inherits from class unary_functor, which implements the broader concept.
-/// class evaluator has more methods to help with special cases, but apart from that it is
-/// a standard vspline::unary_functor.
+/// class evaluator has some methods to help with special cases, but apart from that it is
+/// a standard vspline::unary_functor. This inheritance gives us the full set of class
+/// unary_functor's methods, among them the convenient overloads of operator() which
+/// allow us to invoke class evaluator's evaluation with function call syntax.
 ///
 /// Note how the number of vector elements is fixed here by picking the number of ele_type
 /// which vspline::vector_traits considers appropriate. There should rarely be a need to
@@ -412,12 +416,10 @@ struct average_weight_functor
 /// computationally intensive part of a processing chain, and therefore this choice is
 /// sensible. But it's not mandatory.
 
-template < int _dimension ,         // dimension of the spline
-           class _value_type ,      // type of spline coefficients/result (pixels etc.)
-           class _rc_type ,         // singular real coordinate, float or double
-           class _ic_type ,         // singular integral coordinate, currently only int
-           int specialize = -1 ,    // specialize for degree 0 or 1
-           int evenness = -1 ,      // specialize for raw mapping, even or odd spline
+template < typename _coordinate_type , // nD real coordinate
+           typename _value_type ,      // type of coefficient/result
+           int specialize = -1 ,       // specialize for degree 0 or 1
+           int evenness = -1 ,         // specialize for raw mapping, even or odd spline
 #ifdef USE_VC
            int _vsize = simdized_ele_type < _value_type > :: Size
 #else
@@ -425,39 +427,41 @@ template < int _dimension ,         // dimension of the spline
 #endif
          > // nr. of vector elements
 class evaluator
-: public unary_functor
-         < vigra::TinyVector < _rc_type , _dimension > ,
-           _value_type ,
-           _vsize
-         >
+: public unary_functor < _coordinate_type , _value_type , _vsize >
 {
 public:
 
- // we want to access facilites of the base class (vspline::unary_functor<...>)
- // so we use a typedef for the base class.
+  typedef _value_type value_type ; // == base_type::out_type
 
- typedef unary_functor
-         < vigra::TinyVector < _rc_type , _dimension > ,
-           _value_type ,
-           _vsize
-         >
-         base_type ;
+  // we want to access facilites of the base class (vspline::unary_functor<...>)
+  // so we use a typedef for the base class.
+
+  typedef unary_functor < _coordinate_type , _value_type , _vsize > base_type ;
 
   // pull in standard vspline::unary_functor type system with this macro:
 
-  using_unary_functor_types ( base_type )
+  using_unary_functor_types ( base_type ) ;
   
-  typedef _ic_type ic_type ;
-  typedef _rc_type rc_type ;
-  typedef _value_type value_type ;
+  // relying on base type to provide these:
   
-  typedef typename vigra::ExpandElementResult < value_type > :: type ele_type ;
-  enum { dimension = _dimension }  ;
-  enum { level = _dimension - 1 }  ;
-  enum { channels = vigra::ExpandElementResult < value_type > :: size } ;
+  typedef in_ele_type rc_type ;    // elementary type of a coordinate
+  typedef out_ele_type ele_type ;  // elementary type of value_type
+  enum { dimension = dim_in }  ;
+  enum { level = dim_in - 1 }  ;
+  enum { channels = dim_out } ;
 
-  typedef vigra::TinyVector < rc_type , dimension > nd_rc_type ;
+  // types for nD integral indices, these are not in base_type
+  
+  typedef int ic_type ;            // we're only using int for indices
+  
   typedef vigra::TinyVector < ic_type , dimension > nd_ic_type ;
+  
+  // we don't use:  typedef in_type nd_rc_type ;
+  // because if the spline is 1D, in_type comes out as, say, a plain float,
+  // while the code in class evaluator expects TinyVector<float,1>. This produces
+  // no inconvenience, since there are specializations for 1D splines.
+
+  typedef vigra::TinyVector < rc_type , dimension > nd_rc_type ;
 
   /// view_type is used for a view to the coefficient array
 
@@ -485,12 +489,13 @@ public:
 
   typedef nd_mapping < ic_type , rc_type , dimension , vsize > nd_mapping_type ;
 
-  typedef weight_functor_base < ele_type , rc_type > weight_functor_base_type ;
+  typedef weight_functor_base < ele_type , rc_type , vsize > weight_functor_base_type ;
 
   /// in the context of b-spline calculation, this is the weight generating
   /// functor which will be used:
 
-  typedef bspline_derivative_weight_functor < ele_type , rc_type > bspline_weight_functor_type ;
+  typedef bspline_derivative_weight_functor < ele_type , rc_type , vsize >
+    bspline_weight_functor_type ;
   
   // to try out gaussian weights, one might instead use
   // typedef gaussian_weight_functor < ele_type > bspline_weight_functor_type ;
@@ -758,6 +763,10 @@ public:
     }
   } ;
 
+  /// _eval_linear implements the specialization for n-linear (degree 1) b-splines.
+  /// here, there is no gain to be had from working with precomputed per-axis weights,
+  /// the weight generation is trivial. So the specialization given here is faster:
+  
   template < int level , class dtype >
   struct _eval_linear
   {
@@ -766,7 +775,7 @@ public:
                        const nd_rc_type& tune
                      ) const
     {
-      dtype result = ( 1.0 - tune[level] )
+      dtype result = dtype ( 1.0 - tune[level] )
                      * _eval_linear < level - 1 , dtype >() ( pdata , ofs , tune ) ;
       result += tune[level]
                 * _eval_linear < level - 1 , dtype >() ( pdata , ofs , tune ) ;
@@ -774,6 +783,8 @@ public:
     }
   } ;
 
+  /// again, level 0 terminates the recursion
+  
   template < class dtype >
   struct _eval_linear < 0 , dtype >
   {
@@ -782,7 +793,7 @@ public:
                        const nd_rc_type& tune
                      ) const
     {
-      dtype result = ( 1.0 - tune[0] ) * pdata [ *ofs ] ;
+      dtype result = dtype ( 1.0 - tune[0] ) * pdata [ *ofs ] ;
       ++ofs ;
       result += tune[0] * pdata [ *ofs ] ;
       ++ofs ;
@@ -790,6 +801,56 @@ public:
     }
   } ;
 
+  /// while plain linear interpolation is fast, the discontinuity in the first
+  /// derivative of the reconstruction filter (it's A-shaped) produces visible
+  /// artifacts ('star-shaped artifacts' for bilinear interpolation). To lessen
+  /// this effect and yet keep the support as small as the linear interpolation's,
+  /// I introduce 'softened linear' interpolation, which preprocesses the weights
+  /// of the linear interpolation with a simple 3d degree polynome, which is
+  /// symmetric around .5 and has extrema at 0 and 1. This method can be activated
+  /// by passing 'specialize' as 2.
+  
+  template < typename T >
+  static T wfunc ( T x )
+  {
+    x -= 0.5f ;
+    return 0.5f + ( 1.5f * x - 2 * x * x * x ) ;
+  }
+  
+  template < int level , class dtype >
+  struct _eval_soft
+  {
+    dtype operator() ( const dtype* & pdata ,
+                       offset_iterator & ofs ,
+                       const nd_rc_type& tune
+                     ) const
+    {
+      dtype result = dtype ( 1.0 - wfunc ( tune[level] ) )
+                     * _eval_soft < level - 1 , dtype >() ( pdata , ofs , tune ) ;
+      result += wfunc ( tune[level] )
+                * _eval_soft < level - 1 , dtype >() ( pdata , ofs , tune ) ;
+      return result ;
+    }
+  } ;
+
+  /// again, level 0 terminates the recursion
+  
+  template < class dtype >
+  struct _eval_soft < 0 , dtype >
+  {
+    dtype operator() ( const dtype* & pdata ,
+                       offset_iterator & ofs ,
+                       const nd_rc_type& tune
+                     ) const
+    {
+      dtype result = dtype ( 1.0 - wfunc ( tune[0] ) ) * pdata [ *ofs ] ;
+      ++ofs ;
+      result += wfunc ( tune[0] ) * pdata [ *ofs ] ;
+      ++ofs ;
+      return result ;
+    }
+  } ;
+
   // next are evaluation routines. there are quite a few, since I've coded for operation
   // from different starting points and both for vectorized and nonvectorized operation.
   
@@ -826,9 +887,12 @@ public:
   /// representing the origin of the coefficient window to process, second the fractional parts
   /// of the coordinate which are needed to calculate the weights to apply to the coefficient
   /// window. Note that the weights aren't as many values as the window has coefficients, but
-  /// only ORDER weights per dimension; the 'final' weights would be the tensor product of the
+  /// only ORDER weights per dimension; the 'final' weights would be the outer product of the
   /// sets of weights for each dimension, which we don't explicitly use here. Also note that
   /// we limit the code here to have the same number of weights for each axis.
+  /// Here we have the code for the specializations affected by the template argument
+  /// 'specialize' (TODO silly name) which provide specialized code for degree 0 and 1
+  /// b-splines (aka nearest-neighbour and n-linear interpolation)
   
   void eval ( const nd_ic_type& select ,
               const nd_rc_type& tune ,
@@ -847,6 +911,14 @@ public:
       offset_iterator ofs = offsets.begin() ;  // offsets reflects the positions inside the subarray
       result = _eval_linear<level,value_type>() ( base , ofs , tune ) ;
     }
+    else if ( specialize == 2 )
+    {
+      // linear interpolation. use specialized _eval_linear object
+      const value_type * base =   coefficients.data()
+                                + sum ( select * coefficients.stride() ) ;
+      offset_iterator ofs = offsets.begin() ;  // offsets reflects the positions inside the subarray
+      result = _eval_soft<level,value_type>() ( base , ofs , tune ) ;
+    }
     else
     {
       // general case. this works for degree 0 and 1 as well, but is less efficient
@@ -869,9 +941,15 @@ public:
     }
   }
 
-  /// this variant take a coordinate which hasn't been mapped yet, so it uses
+  /// this eval variant take a coordinate which hasn't been mapped yet, so it uses
   /// the nd_mapping, mmap, and applies it. It's the most convenient form but
   /// also the slowest, due to the mapping, which is quite expensive computationally.
+  /// Here we have the code for the specializations affected by template argument
+  /// 'evenness' (TODO silly name), which provides 'shortcut' code splitting the
+  /// incoming coordinate with a 'raw' mapping: no questions asked, no 'folding-in'
+  /// applied, may crash with out-of-bounds coordinates, but the fastest method
+  /// possible. With 'evenness' neither 0 or 1, mapping is done via the mmap
+  /// object, which will usually be something else apart from raw mapping.
   
   void eval ( const nd_rc_type& c , // unsplit coordinate
               value_type & result ) const
@@ -902,6 +980,9 @@ public:
     eval ( select , tune , result ) ;
   }
   
+  /// throughout I use a calling scheme which passes argument and result per
+  /// reference. But here is the 'normal' function call signature:
+
   value_type eval ( const nd_rc_type& c ) const // unsplit coordinate
   {
     value_type result ;
@@ -914,7 +995,8 @@ public:
   /// This is to avoid hard-to-track errors which might ensue from allowing
   /// broadcasting of single rc_types to nd_rc_types for D>1. We convert the
   /// single coordinate (of rc_type) to an aggregate of 1 to fit the signature
-  /// of the nD code above
+  /// of the nD code above, which will then be optimized away again by the
+  /// compiler.
   
   void eval ( const rc_type& c , // single 1D coordinate
               value_type & result ) const
@@ -939,10 +1021,10 @@ public:
   }
 
   /// alternative implementation of the last part of evaluation. Here, we calculate the
-  /// tensor product of the component vectors in 'weight' and use flat_eval to obtain the
+  /// outer product of the component vectors in 'weight' and use flat_eval to obtain the
   /// weighted sum over the coefficient window.
 
-  /// evaluation using the tensor product of the weight component vectors. This is simple,
+  /// evaluation using the outer product of the weight component vectors. This is simple,
   /// no need for recursion here, it's simply the sum of all values in the window, weighted
   /// with their corresponding weights. note that there are no checks; if there aren't enough
   /// weights the code may crash. Note that this code is formulated as a template and can be
@@ -958,10 +1040,10 @@ public:
       result += pweight[i] * pdata[offsets[i]] ;
   }
 
-  /// calculation of the tensor product of the component vectors of the weights:
+  /// calculation of the outer product of the component vectors of the weights:
 
   template < int _level , class dtype >
-  struct tensor_product
+  struct outer_product
   {
     void operator() ( dtype * & target ,
                       const MultiArrayView < 2 , dtype > & weight ,
@@ -974,7 +1056,7 @@ public:
         // 1.0. hence we can omit it:
         for ( int i = 0 ; i < weight.shape(0) ; i++ )
         {
-          tensor_product < _level - 1 , dtype >() ( target ,
+          outer_product < _level - 1 , dtype >() ( target ,
                                                     weight ,
                                                     weight [ Shape2 ( i , level ) ] ) ;
         }
@@ -984,7 +1066,7 @@ public:
         // otherwise we need it.
         for ( int i = 0 ; i < weight.shape(0) ; i++ )
         {
-          tensor_product < _level - 1 , dtype >() ( target ,
+          outer_product < _level - 1 , dtype >() ( target ,
                                                     weight ,
                                                     weight [ Shape2 ( i , level ) ] * factor ) ;
         }
@@ -993,7 +1075,7 @@ public:
   } ;
   
   template < class dtype >
-  struct tensor_product < 0 , dtype >
+  struct outer_product < 0 , dtype >
   {
     void operator() ( dtype * & target ,
                       const MultiArrayView < 2 , dtype > & weight ,
@@ -1037,8 +1119,8 @@ public:
     // we need an instance of a pointer to these weights here, passing it in
     // per reference to be manipulated by the code it's passed to
     ele_type * iter = flat_weight ;
-    // now we multiply out the weights using tensor_product()
-    tensor_product < level , ele_type >() ( iter , weight , 1.0 ) ;
+    // now we multiply out the weights using outer_product()
+    outer_product < level , ele_type >() ( iter , weight , 1.0 ) ;
     // finally we delegate to flat_eval above
     flat_eval ( base , flat_weight , result ) ;
   }
@@ -1112,7 +1194,7 @@ public:
   } ;
 
   // for linear (degree=1) interpolation we use a specialized routine
-  // which is slightly faster, bcause it directly uses the 'tune' values
+  // which is slightly faster, because it directly uses the 'tune' values
   // instead of building an array of weights first and passing that in.
 
   template < class dtype , int level >
@@ -1172,6 +1254,66 @@ public:
     }  
   } ;
 
+  // like linear interpolation, but using an additional function on the weight
+  // to avoid star-shaped artifacts
+
+  template < class dtype , int level >
+  struct _v_eval_soft
+  {
+    dtype operator() ( const component_base_type& base , // base adresses of components
+                       const ic_v& origin ,        // offsets to evaluation window origins
+                       offset_iterator & ofs ,     // offsets to coefficients inside this window
+                       const in_v& tune ) const       // weights to apply
+    {
+      dtype sum ;    ///< to accumulate the result
+      dtype subsum ;
+
+      sum = _v_eval_soft < dtype , level - 1 >() ( base , origin , ofs , tune ) ;
+      for ( int ch = 0 ; ch < channels ; ch++ )
+      {
+        sum[ch] *= ( rc_type ( 1.0 ) - wfunc ( tune [ level ] ) ) ;
+      }
+      
+      subsum = _v_eval_soft < dtype , level - 1 >() ( base , origin , ofs , tune );
+      for ( int ch = 0 ; ch < channels ; ch++ )
+      {
+        sum[ch] += ( wfunc ( tune [ level ] ) ) * subsum[ch] ;
+      }
+      
+      return sum ;
+    }  
+  } ;
+
+  /// the level 0 routine terminates the recursion
+  
+  template < class dtype >
+  struct _v_eval_soft < dtype , 0 >
+  {
+    dtype operator() ( const component_base_type& base , // base adresses of components
+                       const ic_v& origin ,              // offsets to evaluation window origins
+                       offset_iterator & ofs ,           // offsets to coefficients in this window
+                       const in_v& tune ) const       // weights to apply
+    {
+      dtype sum ;
+      auto o1 = *ofs ;
+      ++ofs ;
+      auto o2 = *ofs ;
+      ++ofs ;
+      
+      for ( int ch = 0 ; ch < channels ; ch++ )
+      {
+        sum[ch] = ( rc_type ( 1.0 ) - wfunc ( tune [ 0 ] ) )
+                  * ele_v ( base[ch] , origin + o1 ) ;
+                  
+        sum[ch] += wfunc ( tune [ 0 ] )
+                   * ele_v ( base[ch] , origin + o2 ) ; 
+      }
+      
+      
+      return sum ;
+    }  
+  } ;
+
   // vectorized variants of the evaluation routines:
   
   /// this is the vectorized version of the final delegate, calling _v_eval. The first
@@ -1229,10 +1371,24 @@ public:
       result = _v_eval_linear < out_v , level >()
                ( component_base , select , ofs , tune ) ;
     }
+    else if ( specialize == 2 )
+    {
+      // linear interpolation. this uses a specialized _v_eval object which
+      // directly processes 'tune' instead of creating an array of weights first.
+
+      offset_iterator ofs = component_offsets.begin() ;
+    
+      result = _v_eval_soft < out_v , level >()
+               ( component_base , select , ofs , tune ) ;
+    }
     else
     {
       // like in the unvectorized code, the 2D MultiArrayView to the weights is created
-      // as a view to data on the stack (weight_data) which is lightweight and fast
+      // as a view to data on the stack (weight_data) which is lightweight and fast.
+      // It is of course tempting to have this array handy as a member of class evaluator,
+      // but this would produce mutable state and make class evaluator thread-unsafe
+      // and no longer a 'pure' functor in a functional programming sense, so we use
+      // this bit of artistry here.
       
       // ... but clang++ does not accept this (variable sized array of non-POD type)
       // When the second variant is used, the code runs here compiled with clang++,
@@ -1313,7 +1469,7 @@ public:
   /// specialization can be used to avoid the (slightly less efficient) use of the mmap object.
   
   void eval ( const in_v & input ,    // number of dimensions * coordinate vectors
-                out_v & result )  const // number of channels * value vectors
+              out_v & result )  const // number of channels * value vectors
   {
     nd_ic_v select ;
     in_v tune ;
diff --git a/example/complex.cc b/example/complex.cc
new file mode 100644
index 0000000..cc0270b
--- /dev/null
+++ b/example/complex.cc
@@ -0,0 +1,81 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/// \file complex.cc
+///
+/// \brief demonstrate use of b-spline over std::complex data
+///
+/// vspline handles std::complex data like pairs of the complex
+/// type's value_type, and uses a vigra::TinyVector of two
+/// simdized value_types as the vectorized type.
+
+#include <iomanip>
+#include <assert.h>
+#include <complex>
+#include <vspline/multithread.h>
+#include <vspline/vspline.h>
+
+int main ( int argc , char * argv[] )
+{
+// using the highest-level access to prefiltering, we code:
+
+  vspline::bspline < std::complex < float > , 1 > bsp ( 100000 , 3 , vspline::MIRROR ) ;
+  auto v1 = bsp.core ;
+  v1 [ 50000 ] = std::complex<float> ( 1.0 + 1i ) ;
+  bsp.prefilter() ;
+
+//   for ( int k = 4990 ; k < 5010 ; k++ )
+//   {
+//     std::cout << v1[k] << std::endl ;
+//   }
+
+  typedef vspline::evaluator < float ,
+                               std::complex<float> ,
+                               -1 , -1 , 2 > ev_type ;
+  
+  ev_type ev ( bsp ) ;
+  for ( float k = 49999.0 ; k < 50001.0 ; k += .1 )
+  {
+    std::cout << "ev(" << k << ") = " << ev(k) << std::endl ;
+  }
+  
+#ifdef USE_VC
+
+  for ( float k = 49999.0 ; k < 50001.0 ; k += .1 )
+  {
+    // feed the evaluator with vectors, just to show off. Note how the
+    // result appears as a vigra::TinyVector of two ev_type::out_v.
+    typename ev_type::in_v vk ( k ) ;
+    std::cout << "ev(" << vk << ") = " << ev(vk) << std::endl ;
+  }
+  
+#endif
+}
diff --git a/example/impulse_response.cc b/example/impulse_response.cc
index 8174dfa..12d5e5a 100644
--- a/example/impulse_response.cc
+++ b/example/impulse_response.cc
@@ -69,27 +69,10 @@
 /// note how three different ways of getting the result are given, the variants
 /// using lower-level access to the filter are commented out.
 ///
-/// The array sized used here may seem overly large, but this program also serves as
+/// The array size used here may seem overly large, but this program also serves as
 /// a test for prefiltering 1D arrays with 'fake 2D processing' which only occurs
 /// with large 1D arrays, see filter.h for more on the topic.
 
-// when playing with the filter a bit, I noticed that feeding the filter with the
-// absolute values of the poles produces an impulse response which looks remotely like
-// a gaussian and also represents a partition of unity (like the b-spline reconstruction
-// filter). Using this filter instead of the prefilter produces a blurred image, as one
-// might expect from a filter with an impulse response like this - the precise maths
-// aren't clear to me yet. 
-// the impulse resonse produced by passing in the absolute values of the poles is the
-// absolute of the 'normal' impulse response, scaled by a factor.
-// to keep the filter stable, the poles must be less than 1.0. positive poles approaching
-// 1 produce more and more blurring when coming closer to 1. Since the impulse response
-// with larger poles becomes very broad, smoothing is pronounced then. Within the context of
-// my spline prefiltering code, such far-reaching effects aren't dealt with properly, since
-// the calculation of the 'horizon' looks at the absolute value of the pole, so when smoothing
-// the signal instead of sharpening it, if the absolute value of the pole is large, the margins
-// won't be right because the default horizon is too small. If the horizon is widened, eventually
-// it'll be good enough to accomodate even strong smoothing.
-
 #include <iomanip>
 #include <assert.h>
 #include <vspline/multithread.h>
@@ -147,4 +130,5 @@ int main ( int argc , char * argv[] )
       std::cout << v1[k] << " ," << std::endl ;
     }
   }
+  std::cout << "} ;" << std::endl ;
 }
diff --git a/example/roundtrip.cc b/example/roundtrip.cc
index a7ca2a7..dbf27e4 100644
--- a/example/roundtrip.cc
+++ b/example/roundtrip.cc
@@ -144,15 +144,15 @@ void run_test ( view_type & data ,
   // get a view to the core coefficients (those which aren't part of the brace)
   view_type cfview = bsp.core ;
 
+  // get the coordinate type from the evaluator
+  typedef vigra::TinyVector < rc_type , 2 > coordinate_type ;
+  
   // get evaluator type
-  typedef vspline::evaluator<2,pixel_type,rc_type,int_type> eval_type ;
+  typedef vspline::evaluator<coordinate_type,pixel_type> eval_type ;
 
   // create the evaluator
   eval_type ev ( bsp ) ;
 
-  // get the coordinate type from the evaluator
-  typedef typename eval_type::nd_rc_type coordinate_type ;
-  
   // get type for coordinate array
   typedef vigra::MultiArray<2, coordinate_type> coordinate_array ;
   
diff --git a/example/slice.cc b/example/slice.cc
new file mode 100644
index 0000000..6047db0
--- /dev/null
+++ b/example/slice.cc
@@ -0,0 +1,127 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/// slice.cc
+///
+/// build a 3D volume from samples of the RGB colour space
+/// build a spline over it and extract a 2D slice
+///
+/// compile with:
+/// clang++ -std=c++11 -march=native -o slice -O3 -pthread -DUSE_VC=1 slice.cc -lvigraimpex -lVc
+/// g++ also works.
+
+#include <vspline/vspline.h>
+
+#include <vigra/stdimage.hxx>
+#include <vigra/imageinfo.hxx>
+#include <vigra/impex.hxx>
+
+int main ( int argc , char * argv[] )
+{
+  // pixel_type is the result type, an RGB float pixel
+  typedef vigra::TinyVector < float , 3 > pixel_type ;
+  
+  // voxel_type is the source data type
+  typedef vigra::TinyVector < float , 3 > voxel_type ;
+  
+  // coordinate_type has a 3D coordinate
+  typedef vigra::TinyVector < float , 3 > coordinate_type ;
+  
+  // warp_type is a 2D array of coordinates
+  typedef vigra::MultiArray < 2 , coordinate_type > warp_type ;
+  
+  // target_type is a 2D array of pixels  
+  typedef vigra::MultiArray < 2 , pixel_type > target_type ;
+  
+  // we want a b-spline with natural boundary conditions
+  vigra::TinyVector < vspline::bc_code , 3 > bcv ( vspline::NATURAL ) ;
+  
+  // create quintic 3D b-spline object containing voxels
+  vspline::bspline < voxel_type , 3 >
+    space ( vigra::Shape3 ( 10 , 10 , 10 ) , 5 , bcv ) ;
+  
+  // fill the b-spline's core with a three-way gradient
+  for ( int z = 0 ; z < 10 ; z++ )
+  {
+    for ( int y = 0 ; y < 10 ; y++ )
+    {
+      for ( int x = 0 ; x < 10 ; x++ )
+      {
+        voxel_type & c ( space.core [ vigra::Shape3 ( x , y , z ) ] ) ;
+        c[0] = 25.5 * x ;
+        c[1] = 25.5 * y ;
+        c[2] = 25.5 * z ;
+      }
+    }
+  }
+  
+  // prefilter the b-spline
+  space.prefilter ( true ) ;
+  
+  // get an evaluator for the b-spline
+  typedef vspline::evaluator < coordinate_type , voxel_type > ev_type ;
+  ev_type ev ( space ) ;
+  
+  // now make a warp array with 1920X1080 3D coordinates
+  warp_type warp ( vigra::Shape2 ( 1920 , 1080 ) ) ;
+  
+  // we want the coordinates to follow this scheme:
+  // warp(x,y) = (x,1-x,y)
+  // scaled appropriately
+  
+  for ( int y = 0 ; y < 1080 ; y++ )
+  {
+    for ( int x = 0 ; x < 1920 ; x++ )
+    {
+      coordinate_type & c ( warp [ vigra::Shape2 ( x , y ) ] ) ;
+      c[0] = float ( x ) / 192.0 ;
+      c[1] = 10.0 - c[0] ;
+      c[2] = float ( y ) / 108.0 ;
+    }
+  }
+  
+  // this is where the result should go:
+  target_type target ( vigra::Shape2 ( 1920 , 1080 ) ) ;
+
+  // now we perform the remap, yielding the result
+  vspline::remap < ev_type , 2 > ( ev , warp , target ) ;
+
+  // store the result with vigra impex
+  vigra::ImageExportInfo imageInfo ( "slice.tif" );
+  
+  vigra::exportImage ( target ,
+                      imageInfo
+                      .setPixelType("UINT8")
+                      .setCompression("100")
+                      .setForcedRangeMapping ( 0 , 255 , 0 , 255 ) ) ;
+  
+  exit ( 0 ) ;
+}
diff --git a/example/slice2.cc b/example/slice2.cc
new file mode 100644
index 0000000..a089881
--- /dev/null
+++ b/example/slice2.cc
@@ -0,0 +1,245 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/// slice.cc
+///
+/// build a 3D volume from samples of the RGB colour space
+/// build a spline over it and extract a 2D slice
+///
+/// while the result is just about the same as the one we get from slice.cc,
+/// here our volume contains double-precision voxels. Since the evaluator over
+/// the b-spline representing the volume of voxels produces double precision
+/// voxels as output (same as the source data type), but our target takes
+/// float pixels, we have to wrap the evaluator in an outer class which handles
+/// the conversion from doule voxels to float pixels. This is done with a
+/// class derived from vspline::unary_functor. This is quite a mouthful for
+/// a simple double-to-float conversion, but it shows the wrapping mechanism
+/// clearly. Using wrapping like this is verbose, but produces a resulting
+/// functor which is maximally efficient.
+///
+/// compile with:
+/// clang++ -std=c++11 -march=native -o slice2 -O3 -pthread -DUSE_VC=1 slice2.cc -lvigraimpex -lVc
+/// g++ also works.
+
+#include <vspline/vspline.h>
+
+#include <vigra/stdimage.hxx>
+#include <vigra/imageinfo.hxx>
+#include <vigra/impex.hxx>
+
+// pixel_type is the result type, an RGB float pixel
+typedef vigra::TinyVector < float , 3 > pixel_type ;
+
+// voxel_type is the source data type
+typedef vigra::TinyVector < double , 3 > voxel_type ;
+
+// coordinate_type has a 3D coordinate
+typedef vigra::TinyVector < float , 3 > coordinate_type ;
+
+// warp_type is a 2D array of coordinates
+typedef vigra::MultiArray < 2 , coordinate_type > warp_type ;
+
+// target_type is a 2D array of pixels  
+typedef vigra::MultiArray < 2 , pixel_type > target_type ;
+
+// b-spline evaluator producing double precision voxels
+typedef vspline::evaluator < coordinate_type , voxel_type > ev_type ;
+
+// to move from the b-splines result type (double precision voxel) to the
+// target type (float pixel) we wrap the b-spline evaluator with a class
+// derived from vspline::unary_functor:
+
+struct downcast_type
+: public vspline::unary_functor < coordinate_type , // incoming coordinate
+                                  pixel_type ,      // desired result type
+                                  ev_type::vsize >  // use same vector size as ev_type
+{
+  // pull in vspline::unary_functor's type system
+  typedef vspline::unary_functor < coordinate_type ,
+                                   pixel_type ,
+                                   ev_type::vsize > base_type ;
+                                   
+  using_unary_functor_types(base_type) ;
+  
+  // we keep a const reference to the wrappee
+  const ev_type & inner ;
+  
+  // which is initialized in the constructor
+  downcast_type ( const ev_type & _inner )
+  : inner ( _inner )
+  { } ;
+  
+  // single-value evaluation:
+  void eval ( const in_type & c ,
+                    out_type & result ) const
+  {
+    typename ev_type::out_type intermediate ; // space for a double precision voxel
+    inner.eval ( c , intermediate ) ;         // produce the voxel by a call to 'inner'
+    result = out_type ( intermediate ) ;      // assign to result, casting down in the process
+  } ;
+
+#ifdef USE_VC
+  
+  // vectorized evaluation
+  void eval ( const in_v & c ,
+                    out_v & result ) const
+  {
+    typename ev_type::out_v intermediate ; // space for a vectorized voxel
+    inner.eval ( c , intermediate ) ;      // produce a vectorized voxel
+    result = out_v ( intermediate ) ;      // downcast and assign
+  } ;
+
+#endif
+} ;
+
+// slightly different implementation of the wrapper functor, using a templated
+// method for the evaluation:
+
+struct downcast_type_2
+: public vspline::unary_functor < coordinate_type , // incoming coordinate
+                                  pixel_type ,      // desired result type
+                                  ev_type::vsize >  // use same vector size as ev_type
+{
+  // pull in vspline::unary_functor's type system
+  typedef vspline::unary_functor < coordinate_type ,
+                                   pixel_type ,
+                                   ev_type::vsize > base_type ;
+                                   
+  using_unary_functor_types(base_type) ;
+  
+  // we keep a const reference to the wrappee
+  const ev_type & inner ;
+  
+  // which is initialized in the constructor
+  downcast_type_2 ( const ev_type & _inner )
+  : inner ( _inner )
+  { } ;
+  
+  // since the code is the same for vectorized and unvectorized
+  // operation, we can write a template:
+  
+  template < class IN , class OUT = base_type::out_type_of<IN> >
+  void _eval ( const IN & c ,
+                     OUT & result ) const
+  {
+    auto intermediate = inner ( c ) ;
+    result = OUT ( intermediate ) ;
+  } ;
+  
+  // and implement the pure virtual method(s) like this:
+  void eval ( const in_type & c ,
+                    out_type & result ) const
+  {
+    _eval ( c , result ) ;
+  } ;
+
+#ifdef USE_VC
+  
+  void eval ( const in_v & c ,
+                    out_v & result ) const
+  {
+    _eval ( c , result ) ;
+  } ;
+
+#endif
+} ;
+
+int main ( int argc , char * argv[] )
+{
+  // we want a b-spline with natural boundary conditions
+  vigra::TinyVector < vspline::bc_code , 3 > bcv ( vspline::NATURAL ) ;
+  
+  // create quintic 3D b-spline object containing voxels
+  vspline::bspline < voxel_type , 3 >
+    space ( vigra::Shape3 ( 10 , 10 , 10 ) , 5 , bcv ) ;
+  
+  // fill the b-spline's core with a three-way gradient
+  for ( int z = 0 ; z < 10 ; z++ )
+  {
+    for ( int y = 0 ; y < 10 ; y++ )
+    {
+      for ( int x = 0 ; x < 10 ; x++ )
+      {
+        voxel_type & c ( space.core [ vigra::Shape3 ( x , y , z ) ] ) ;
+        c[0] = 25.5 * x ;
+        c[1] = 25.5 * y ;
+        c[2] = 25.5 * z ;
+      }
+    }
+  }
+  
+  // prefilter the b-spline (using Vc)
+  space.prefilter ( true ) ;
+  
+  // get an evaluator for the b-spline
+  ev_type ev ( space ) ;
+  
+  // wrap it in a downcast_type
+  downcast_type dc ( ev ) ;
+  
+  // same effect:
+  // downcast_type_2 dc ( ev ) ;
+  
+  // now make a warp array with 1920X1080 3D coordinates
+  warp_type warp ( vigra::Shape2 ( 1920 , 1080 ) ) ;
+  
+  // we want the coordinates to follow this scheme:
+  // warp(x,y) = (x,1-x,y)
+  // scaled appropriately
+  
+  for ( int y = 0 ; y < 1080 ; y++ )
+  {
+    for ( int x = 0 ; x < 1920 ; x++ )
+    {
+      coordinate_type & c ( warp [ vigra::Shape2 ( x , y ) ] ) ;
+      c[0] = float ( x ) / 192.0 ;
+      c[1] = 10.0 - c[0] ;
+      c[2] = float ( y ) / 108.0 ;
+    }
+  }
+  
+  // this is where the result should go:
+  target_type target ( vigra::Shape2 ( 1920 , 1080 ) ) ;
+
+  // now we perform the remap, yielding the result
+  vspline::remap < downcast_type , 2 > ( dc , warp , target ) ;
+
+  // store the result with vigra impex
+  vigra::ImageExportInfo imageInfo ( "slice.tif" );
+  
+  vigra::exportImage ( target ,
+                      imageInfo
+                      .setPixelType("UINT8")
+                      .setCompression("100")
+                      .setForcedRangeMapping ( 0 , 255 , 0 , 255 ) ) ;
+  
+  exit ( 0 ) ;
+}
diff --git a/example/slice3.cc b/example/slice3.cc
new file mode 100644
index 0000000..3c2528e
--- /dev/null
+++ b/example/slice3.cc
@@ -0,0 +1,147 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/// slice.cc
+///
+/// build a 3D volume from samples of the RGB colour space
+/// build a spline over it and extract a 2D slice
+///
+/// Here we use a quick shot without elaborate, possibly vectorized, wrapper
+/// classes. Oftentimes all it takes is a single run of an interpolation with
+/// as little programming effort as possible, never mind the performance.
+/// Again we use a b-spline with double-precision voxels as value_type,
+/// but instead of using vspline::remap, which requires a suitable functor
+/// yielding pixel_type, we simply use the evaluator's operator() and
+/// implicitly cast the result to pixel_type.
+///
+/// compile with:
+/// clang++ -std=c++11 -o slice3 -O3 -pthread slice3.cc -lvigraimpex
+/// g++ also works.
+
+#include <vspline/vspline.h>
+
+#include <vigra/stdimage.hxx>
+#include <vigra/imageinfo.hxx>
+#include <vigra/impex.hxx>
+
+// pixel_type is the result type, here we use a vigra::RGBValue for a change
+typedef vigra::RGBValue < unsigned char , 0 , 1 , 2 > pixel_type ;
+
+// voxel_type is the source data type
+typedef vigra::TinyVector < double , 3 > voxel_type ;
+
+// coordinate_type has a 3D coordinate
+typedef vigra::TinyVector < float , 3 > coordinate_type ;
+
+// warp_type is a 2D array of coordinates
+typedef vigra::MultiArray < 2 , coordinate_type > warp_type ;
+
+// target_type is a 2D array of pixels  
+typedef vigra::MultiArray < 2 , pixel_type > target_type ;
+
+// b-spline evaluator producing double precision voxels
+typedef vspline::evaluator < coordinate_type , voxel_type > ev_type ;
+
+int main ( int argc , char * argv[] )
+{
+  // we want a b-spline with natural boundary conditions
+  vigra::TinyVector < vspline::bc_code , 3 > bcv ( vspline::NATURAL ) ;
+  
+  // create quintic 3D b-spline object containing voxels
+  vspline::bspline < voxel_type , 3 >
+    space ( vigra::Shape3 ( 10 , 10 , 10 ) , 5 , bcv ) ;
+  
+  // fill the b-spline's core with a three-way gradient
+  for ( int z = 0 ; z < 10 ; z++ )
+  {
+    for ( int y = 0 ; y < 10 ; y++ )
+    {
+      for ( int x = 0 ; x < 10 ; x++ )
+      {
+        voxel_type & c ( space.core [ vigra::Shape3 ( x , y , z ) ] ) ;
+        c[0] = 25.5 * x ;
+        c[1] = 25.5 * y ;
+        c[2] = 25.5 * z ;
+      }
+    }
+  }
+  
+  // prefilter the b-spline (using Vc)
+  space.prefilter ( true ) ;
+  
+  // get an evaluator for the b-spline
+  ev_type ev ( space ) ;
+  
+  // now make a warp array with 1920X1080 3D coordinates
+  warp_type warp ( vigra::Shape2 ( 1920 , 1080 ) ) ;
+  
+  // we want the coordinates to follow this scheme:
+  // warp(x,y) = (x,1-x,y)
+  // scaled appropriately
+  
+  for ( int y = 0 ; y < 1080 ; y++ )
+  {
+    for ( int x = 0 ; x < 1920 ; x++ )
+    {
+      coordinate_type & c ( warp [ vigra::Shape2 ( x , y ) ] ) ;
+      c[0] = float ( x ) / 192.0 ;
+      c[1] = 10.0 - c[0] ;
+      c[2] = float ( y ) / 108.0 ;
+    }
+  }
+  
+  // this is where the result should go:
+  target_type target ( vigra::Shape2 ( 1920 , 1080 ) ) ;
+
+  // we are sure warp and target have the same shape. We use iterators
+  // over warp and target and perform the remap 'manually' by iterating over
+  // warp and target synchronously:
+
+  auto coordinate_iter = warp.begin() ;
+  
+  for ( auto & trg : target )
+  {
+    // here we implicitly cast the result of the evaluator down to pixel_type:
+    trg = ev ( *coordinate_iter ) ;
+    ++coordinate_iter ;
+  }
+
+  // store the result with vigra impex
+  vigra::ImageExportInfo imageInfo ( "slice.tif" );
+  
+  vigra::exportImage ( target ,
+                      imageInfo
+                      .setPixelType("UINT8")
+                      .setCompression("100")
+                      .setForcedRangeMapping ( 0 , 255 , 0 , 255 ) ) ;
+  
+  exit ( 0 ) ;
+}
diff --git a/example/splinus.cc b/example/splinus.cc
new file mode 100644
index 0000000..360dd6e
--- /dev/null
+++ b/example/splinus.cc
@@ -0,0 +1,96 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/// \file splinus.cc
+///
+/// \brief compare a periodic b-spline with a sine
+/// 
+/// This is a deliberately trivial example using a periodic b-spline
+/// over just two values: 1 and -1. This spline is used to approximate
+/// a sine function. You pass the spline's desired degree on the command
+/// line. Next you enter a number (interpreted as degrees) and the program
+/// will output the sine and the 'splinus' of the given angle.
+/// As you can see when playing with higher degrees, the higher the spline's
+/// degree, the closer the match with the sine. So apart from serving as a
+/// very simple demonstration of using a 1D periodic b-spline, it teaches us
+/// that a periodic b-spline can approximate a sine.
+///
+/// compile with: clang++ -pthread -O3 -std=c++11 splinus.cc -o splinus
+
+#include <assert.h>
+#include <vspline/vspline.h>
+
+int main ( int argc , char * argv[] )
+{
+  int degree = std::atoi ( argv[1] ) ;
+  
+  assert ( degree >= 0 && degree < 25 ) ;
+  
+  // create the bspline object
+  
+  vspline::bspline < double ,   // spline's data type
+                     1 >        // one dimension
+    bsp ( 2 ,                   // two values
+          degree ,              // degree as per command line
+          vspline::PERIODIC ) ; // periodic boundary conditions
+          
+  // the bspline object's 'core' is a MultiArrayView to the knot point
+  // data, which we set one by one for this simple example:
+  
+  bsp.core[0] = 1.0 ;
+  bsp.core[1] = -1.0 ;
+  
+  // now we prefilter the data
+  
+  bsp.prefilter() ;
+  
+  // the spline is now ready for use, we create an evaluator
+  // to obtain interpolated values
+  
+  vspline::evaluator < double ,       // evaluator taking double coordinates
+                       double         // and producing double results
+                       > ev ( bsp ) ; // from the bspline object we just made
+
+  while ( true )
+  {
+    double x ;
+    std::cin >> x ;                // get an angle
+    double xs = x * M_PI / 180.0 ; // sin() uses radians 
+    double xx = x / 180.0 - .5 ;   // 'splinus' has period 1 and is shifted .5
+    
+    // finally we can produce both results. Note how we can use ev, the evaluator,
+    // like an ordinary function.
+
+    std::cout << "sin(" << x << ") = " << sin(xs)
+              << " splin(" << x << ") = " << ev(xx)
+              << " difference: " << sin(xs) - ev(xx) << std::endl ;
+  }
+}
diff --git a/filter.h b/filter.h
index 51a8375..2737370 100644
--- a/filter.h
+++ b/filter.h
@@ -217,8 +217,8 @@ private:
 template < class IT >
 value_type icc_mirror ( IT c , int k )
 {
-  real_type z = pole[k] ;
-  real_type zn, z2n, iz;
+  value_type z = value_type ( pole[k] ) ;
+  value_type zn, z2n, iz;
   value_type Sum ;
   int  n ;
 
@@ -235,8 +235,8 @@ value_type icc_mirror ( IT c , int k )
   else {
     /* full loop */
     zn = z;
-    iz = 1.0 / z;
-    z2n = pow(z, (double)(M - 1));
+    iz = value_type(1.0) / z;
+    z2n = value_type ( pow(double(pole[k]), double(M - 1)) );
     Sum = c[0] + z2n * c[M - 1];
     z2n *= z2n * iz;
     for (n = 1; n <= M - 2; n++)
@@ -245,7 +245,7 @@ value_type icc_mirror ( IT c , int k )
       zn *= z;
       z2n *= iz;
     }
-    Sum /= real_type(1.0 - zn * zn);
+    Sum /= (value_type(1.0) - zn * zn);
   } 
 //  cout << "icc_mirror: " << Sum << endl ;
  return(Sum);
@@ -260,9 +260,9 @@ value_type icc_mirror ( IT c , int k )
 
 value_type iacc_mirror ( out_iter c , int k )
 {
-  real_type z = pole[k] ;
+  value_type z = value_type ( pole[k] ) ;
 
-  return( real_type( z / ( z * z - 1.0 ) ) * ( c [ M - 1 ] + z * c [ M - 2 ] ) );
+  return( value_type( z / ( z * z - value_type(1.0) ) ) * ( c [ M - 1 ] + z * c [ M - 2 ] ) );
 }
 
 /// next are 'antimirrored' BCs. This is the same as 'natural' BCs: the signal is
@@ -275,8 +275,8 @@ value_type iacc_mirror ( out_iter c , int k )
 template < class IT >
 value_type icc_natural ( IT c , int k )
 {
-  real_type z = pole[k] ;
-  real_type zn, z2n, iz;
+  value_type z = value_type ( pole[k] ) ;
+  value_type zn, z2n, iz;
   value_type Sum , c02 ;
   int  n ;
 
@@ -297,9 +297,10 @@ value_type icc_natural ( IT c , int k )
   }
   else {
     zn = z;
-    iz = 1.0 / z;
-    z2n = pow(z, (double)(M - 1));                                     // z2n == z^M-1
-    Sum = real_type( ( 1.0 + z ) / ( 1.0 - z ) ) * ( c[0] - z2n * c[M - 1] );
+    iz = value_type(1.0) / z;
+    z2n = value_type ( pow(double(pole[k]), double(M - 1)) );
+    Sum = value_type( ( value_type(1.0) + z ) / ( value_type(1.0) - z ) )
+          * ( c[0] - z2n * c[M - 1] );
     z2n *= z2n * iz;                                                   // z2n == z^2M-3
     for (n = 1; n <= M - 2; n++)
     {
@@ -307,7 +308,7 @@ value_type icc_natural ( IT c , int k )
       zn *= z;
       z2n *= iz;
     }
-    return(Sum / real_type(1.0 - zn * zn));
+    return(Sum / (value_type(1.0) - zn * zn));
   } 
 }
 
@@ -317,9 +318,9 @@ value_type icc_natural ( IT c , int k )
 
 value_type iacc_natural ( out_iter c , int k )
 {
-  real_type z = pole[k] ;
+  value_type z = real_type ( pole[k] ) ;
 
-  return - real_type( z / ( ( 1.0 - z ) * ( 1.0 - z ) ) ) * ( c [ M - 1 ] - z * c [ M - 2 ] ) ;
+  return - value_type( z / ( ( value_type(1.0) - z ) * ( value_type(1.0) - z ) ) ) * ( c [ M - 1 ] - z * c [ M - 2 ] ) ;
 }
 
 /// next are reflective BCs. This is mirroring 'between bounds':
@@ -336,8 +337,8 @@ value_type iacc_natural ( out_iter c , int k )
 template < class IT >
 value_type icc_reflect ( IT c , int k )
 {
-  real_type z = pole[k] ;
-  real_type zn, z2n, iz;
+  value_type z = value_type ( pole[k] ) ;
+  value_type zn, z2n, iz;
   value_type Sum ;
   int  n ;
 
@@ -354,8 +355,8 @@ value_type icc_reflect ( IT c , int k )
   }
   else {
     zn = z;
-    iz = 1.0 / z;
-    z2n = pow(z, (double)(2 * M));
+    iz = value_type(1.0) / z;
+    z2n = value_type ( pow(double(pole[k]), double(M - 1)) );
     Sum = 0 ;
     for (n = 0; n < M - 1 ; n++)
     {
@@ -364,7 +365,7 @@ value_type icc_reflect ( IT c , int k )
       z2n *= iz;
     }
     Sum += (zn + z2n) * c[n];
-    return c[0] + Sum / real_type(1.0 - zn * zn) ;
+    return c[0] + Sum / (value_type(1.0) - zn * zn) ;
   } 
 }
 
@@ -374,9 +375,9 @@ value_type icc_reflect ( IT c , int k )
 
 value_type iacc_reflect ( out_iter c , int k )
 {
-  real_type z = pole[k] ;
+  value_type z = value_type ( pole[k] ) ;
 
-  return c[M - 1] / real_type( 1.0 - 1.0 / z ) ;
+  return c[M - 1] / ( value_type(1.0) - value_type(1.0) / z ) ;
 }
 
 /// next is periodic BCs. so, f(x) = f(x+N)
@@ -390,8 +391,8 @@ value_type iacc_reflect ( out_iter c , int k )
 template < class IT >
 value_type icc_periodic ( IT c , int k )
 {
-  real_type z = pole[k] ;
-  real_type zn ;
+  value_type z = value_type ( pole[k] ) ;
+  value_type zn ;
   value_type Sum ;
   int  n ;
 
@@ -414,7 +415,7 @@ value_type icc_periodic ( IT c , int k )
       Sum += zn * c[n];
       zn *= z;
     }
-    Sum /= real_type( 1.0 - zn ) ;
+    Sum /= ( value_type(1.0) - zn ) ;
   }
  return Sum ;
 }
@@ -422,8 +423,8 @@ value_type icc_periodic ( IT c , int k )
 /// TODO doublecheck this routine!
 value_type iacc_periodic ( out_iter c , int k )
 {
-  real_type z = pole[k] ;
-  real_type zn ;
+  value_type z = value_type ( pole[k] ) ;
+  value_type zn ;
   value_type Sum ;
 
   if (horizon[k] < M)
@@ -446,7 +447,7 @@ value_type iacc_periodic ( out_iter c , int k )
       Sum += zn * c[n];
       zn *= z;
     }
-    Sum = z * Sum / real_type( zn - 1.0 );
+    Sum = z * Sum / ( zn - value_type(1.0) );
   }
   return Sum ;
 }
@@ -459,7 +460,7 @@ value_type iacc_periodic ( out_iter c , int k )
 template < class IT >
 value_type icc_safe_beyond ( IT c , int k )
 {
-  real_type p = pole[k] ;
+  value_type p = value_type ( pole[k] ) ;
   int z = horizon[k] ;
   value_type icc = c [ - z ] ;
   
@@ -473,7 +474,7 @@ value_type icc_safe_beyond ( IT c , int k )
 
 value_type iacc_safe_beyond ( out_iter c , int k )
 {
-  real_type p = pole[k] ;
+  value_type p = value_type ( pole[k] ) ;
   int z = horizon[k] ;
   value_type iacc = c [ M - 1 + z ] ;
 
@@ -492,7 +493,7 @@ value_type iacc_safe_beyond ( out_iter c , int k )
 template < class IT >
 value_type icc_guess ( IT c , int k )
 {
-  return c[0] * real_type ( 1.0 / ( 1.0 - pole[k] ) ) ;
+  return c[0] * value_type ( 1.0 / ( 1.0 - pole[k] ) ) ;
 }
 
 // for the backward filter, we assume mirror BC, which is also cheap to compute:
@@ -530,7 +531,7 @@ void solve_gain_inlined ( in_iter c , out_iter x )
   // use a buffer of one value_type for the recursion (see below)
 
   value_type X ;
-  real_type p = pole[0] ;
+  real_type p = real_type ( pole[0] ) ;
   
   // process first pole, applying overall gain in the process
   // of consuming the input. This gain may be a power of the 'orthodox'
@@ -551,14 +552,14 @@ void solve_gain_inlined ( in_iter c , out_iter x )
   // next iteration instead of fetching it again from memory. In my trials, this
   // performed better, especially on SIMD data.
   
-  x[0] = X = lambda * (this->*_p_icc1) (c, 0);
+  x[0] = X = value_type ( lambda ) * (this->*_p_icc1) (c, 0);
 
   /* causal recursion */
   // the gain is applied to each input value as it is consumed
   
   for (int n = 1; n < M; n++)
   {
-    x[n] = X = lambda * c[n] + p * X ;
+    x[n] = X = value_type ( lambda ) * c[n] + value_type ( p ) * X ;
   }
   
   // now the input is used up and won't be looked at any more; all subsequent
@@ -571,7 +572,7 @@ void solve_gain_inlined ( in_iter c , out_iter x )
   /* anticausal recursion */
   for (int n = M - 2; 0 <= n; n--)
   {
-    x[n] = X = real_type ( p ) * ( X - x[n]);
+    x[n] = X = value_type ( p ) * ( X - x[n]);
   }
   
   // for the remaining poles, if any, don't apply the gain
@@ -586,7 +587,7 @@ void solve_gain_inlined ( in_iter c , out_iter x )
     /* causal recursion */
     for (int n = 1; n < M; n++)
     {
-      x[n] = X = x[n] + p * X ;
+      x[n] = X = x[n] + value_type ( p ) * X ;
     }
     
     /* anticausal initialization */
@@ -595,7 +596,7 @@ void solve_gain_inlined ( in_iter c , out_iter x )
     /* anticausal recursion */
     for (int n = M - 2; 0 <= n; n--)
     {
-      x[n] = X = p * ( X - x[n] );
+      x[n] = X = value_type ( p ) * ( X - x[n] );
     }
   }
 }
@@ -608,7 +609,7 @@ void solve_no_gain ( in_iter c , out_iter x )
   assert ( M > 1 ) ;
 
   value_type X ;
-  real_type p = pole[0] ;
+  real_type p = real_type ( pole[0] ) ;
   
   // process first pole, consuming the input
   
@@ -618,7 +619,7 @@ void solve_no_gain ( in_iter c , out_iter x )
   /* causal recursion */
   for ( int n = 1; n < M; n++)
   {
-    x[n] = X = c[n] + p * X ;
+    x[n] = X = c[n] + value_type ( p ) * X ;
   }
   
   /* anticausal initialization */
@@ -627,7 +628,7 @@ void solve_no_gain ( in_iter c , out_iter x )
   /* anticausal recursion */
   for ( int n = M - 2; 0 <= n; n--)
   {
-    x[n] = X = p * ( X - x[n]);
+    x[n] = X = value_type ( p ) * ( X - x[n]);
   }
   
   // for the remaining poles, if any, work on the result
@@ -642,7 +643,7 @@ void solve_no_gain ( in_iter c , out_iter x )
     /* causal recursion */
     for (int n = 1; n < M; n++)
     {
-      x[n] = X = x[n] + p * X ;
+      x[n] = X = x[n] + value_type ( p ) * X ;
     }
     
     /* anticausal initialization */
@@ -651,7 +652,7 @@ void solve_no_gain ( in_iter c , out_iter x )
     /* anticausal recursion */
     for (int n = M - 2; 0 <= n; n--)
     {
-      x[n] = X = p * ( X - x[n] );
+      x[n] = X = value_type ( p ) * ( X - x[n] );
     }
   }
 }
diff --git a/remap.h b/remap.h
index d31e1b6..3045eb6 100644
--- a/remap.h
+++ b/remap.h
@@ -543,9 +543,10 @@ void fill ( generator_type & gen ,
   shape_range_type < dim_target > range ( shape_type < dim_target > () ,
                                           output.shape() ) ;
 
-  // heuristic. desired number of partitions. When working on images,
-  // we try setting up the jobs to do 4 lines each: 
+  // heuristic. desired number of partitions.
+
   int njobs = 64 ; // = output.shape ( dim_target - 1 ) / 4 ;
+
 //   if ( njobs < 16 ) // small thing, try at least 16
 //     njobs = 16 ;
 
@@ -751,7 +752,8 @@ int remap ( const MultiArrayView < coordinate_traits < coordinate_type > :: dime
 
   // create an evaluator over the bspline
 
-  typedef evaluator < dim_in , value_type , rc_type , ic_type > evaluator_type ;
+//   typedef evaluator < dim_in , value_type , rc_type , ic_type > evaluator_type ;
+  typedef evaluator < coordinate_type , value_type > evaluator_type ;
   evaluator_type ev ( bsp ) ;
   
   // and call the other remap variant,
@@ -846,89 +848,6 @@ struct index_generator
   }  
 } ;
 
-// template < int dim_target , typename unary_functor_type >
-// struct index_generator
-// {
-//   typedef typename unary_functor_type::value_type value_type ;
-// 
-//   enum { dim_source = unary_functor_type::dimension } ;
-//   
-// #ifdef USE_VC
-// 
-//   enum { vsize = unary_functor_type :: vsize } ;
-// 
-// #else
-//   
-//   enum { vsize = 1 } ;
-// 
-// #endif
-//   
-//   typedef typename unary_functor_type::ic_type ic_type ;
-//   typedef typename unary_functor_type::rc_type rc_type ;
-//   typedef typename unary_functor_type::nd_ic_type nd_ic_type ;
-//   typedef typename unary_functor_type::nd_rc_type nd_rc_type ;
-// 
-//   const unary_functor_type & itp ;
-//   const shape_range_type < dim_target > range ;
-//   
-//   vigra::MultiCoordinateIterator < dim_source > mci ;
-//   
-// #ifdef USE_VC
-//   
-//   coordinate_iterator_v < rc_type , ic_type , dim_source , vsize > civ ;
-// 
-// #endif
-//   
-//   index_generator
-//     ( const unary_functor_type & _itp ,
-//       const shape_range_type < dim_target > _range
-//     )
-//   : itp ( _itp ) ,
-//     range ( _range ) ,
-//     mci ( _range[1] - _range[0] )
-// #ifdef USE_VC
-//     , civ ( _range[0] , _range[1] ) 
-// #endif
-//   {
-//   } ;
-// 
-//   void operator() ( value_type & target )
-//   {
-//     itp.eval ( * ( mci + range[0] ) , target ) ;
-//     ++mci ;
-//   }
-// 
-// #ifdef USE_VC
-//   
-//   void operator() ( value_type * target )
-//   {
-//     itp.eval ( *civ , target ) ;
-//     ++civ ;
-//     mci += vsize ; // this sucks...
-//   }
-// 
-// #endif
-// 
-//   index_generator < dim_target , unary_functor_type >
-//     subrange ( const shape_range_type < dim_target > range ) const
-//   {
-//     return index_generator < dim_target , unary_functor_type >
-//              ( itp , range ) ;
-//   }
-//   
-//   index_generator < dim_target , unary_functor_type >
-//     bindOuter ( const int & c ) const
-//   {
-//     nd_ic_type slice_start = range[0] , slice_end = range[1] ;
-// 
-//     slice_start [ dim_target - 1 ] += c ;
-//     slice_end [ dim_target - 1 ] = slice_start [ dim_target - 1 ] + 1 ;
-//     
-//     return index_generator < dim_target , unary_functor_type >
-//              ( itp , shape_range_type < dim_target > ( slice_start , slice_end ) ) ;
-//   }  
-// } ;
-
 /// index_remap() is very similar to remap(), but while remap() picks coordinates from
 /// an array (the 'warp' array), index_remap feeds the discrete coordinates to the
 /// successive places data should be rendered to to the unary_functor_type object.
diff --git a/unary_functor.h b/unary_functor.h
index f858567..5ec29ef 100644
--- a/unary_functor.h
+++ b/unary_functor.h
@@ -120,6 +120,11 @@ struct unary_functor
   
   typedef typename vector_traits < int , vsize > :: ele_v ic_v ;
   
+  template < class in_type >
+  using out_type_of = typename std::conditional < std::is_same < in_type , in_v > :: value ,
+                                                  out_v ,
+                                                  out_type > :: type ;
+
   /// simdized evaluation routine. This routine takes the simdized coordinate
   /// per const reference and deposits the result via a reference. This
   /// method is pure virtual and must be implemented by derived classes.
@@ -177,7 +182,7 @@ struct unary_functor
   /// method to load/gather interleaved memory, T being IN or OUT
   
   template < typename T >
-  void load ( const T * const pc ,
+  void load ( const T * const & pc ,
               typename vector_traits < T , vsize > :: type & cv ) const
   {
     typedef typename vigra::ExpandElementResult < T > :: type ele_type ;
@@ -197,11 +202,28 @@ struct unary_functor
     }
   }
   
+//   /// variant of load() taking a stride, currently unused
+//   
+//   template < typename T >
+//   void load ( const T * const & pc ,
+//               const int & stride ,
+//               typename vector_traits < T , vsize > :: type & cv ) const
+//   {
+//     typedef typename vigra::ExpandElementResult < T > :: type ele_type ;
+//     enum { dimension = vigra::ExpandElementResult < T > :: size } ;
+// 
+//     for ( int d = 0 ; d < dimension ; d++ )
+//     {
+//       cv[d].gather ( (ele_type*) pc ,
+//                       ic_v::IndexesFromZero() * dimension * stride + d ) ;
+//     }
+//   }
+  
   /// method to store/scatter a simdized value to interleaved memory
 
   template < typename T >
   void store ( const typename vector_traits < T , vsize > :: type & ev ,
-               T * result ) const
+               T * const & result ) const
   {
     typedef typename vigra::ExpandElementResult < T > :: type ele_type ;
     enum { dimension = vigra::ExpandElementResult < T > :: size } ;
@@ -220,6 +242,23 @@ struct unary_functor
     }
   } ;
 
+//   /// variant of store taking a stride, currently unused
+//   
+//   template < typename T >
+//   void store ( const typename vector_traits < T , vsize > :: type & ev ,
+//                T * const & result ,
+//                const int & stride ) const
+//   {
+//     typedef typename vigra::ExpandElementResult < T > :: type ele_type ;
+//     enum { dimension = vigra::ExpandElementResult < T > :: size } ;
+// 
+//     for ( int d = 0 ; d < dimension ; d++ )
+//     {
+//       ev[d].scatter ( (ele_type*) result ,
+//                       ic_v::IndexesFromZero() * dimension * stride + d ) ;
+//     }
+//   } ;
+
   void eval ( const in_type * const pmc ,
               out_type * result ) const
   {
@@ -256,6 +295,12 @@ struct unary_functor
     eval ( cv , result ) ;
   } ;
 
+#else
+
+  template < class in_type >
+  using out_type_of = out_type ;
+
+
 #endif
 
 } ;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/vspline.git



More information about the debian-science-commits mailing list