Sun Jul 2 09:02:38 UTC 2017

This is an automated email from the git hooks/post-receive script.

kfj-guest pushed a commit to branch master
in repository vspline.

commit 90457edbc855e048f8aa070e7192ec1b76ae529d
Author: Kay F. Jahnke <kfjahnke at gmail.com>
Date:   Thu Dec 8 11:29:24 2016 +0100

    added interpolator.h
    interpolator.h has an interface definition for a class 'interpolator'
    with those methods defined pure virtual which are essential for remapping
    from unsplit coordinates. class evaluator fulfills this interface, but
    since the interface only requires a few methods to be present, it's quite
    easy to write alternative interpolators, as 'nearest_neighbour' in the
    same file, which provides a possibly minimally faster implementation
    of nearest-neighbour interpolation than the one resulting from evaluating
    a b-spline with degree 0. The idea is to open the remapping code, where it
    isn't specific to b-splines (like, when using split coordinates), to use
    with different interpolators, since it isn't specific to b-splines. This
    more general remapping code (taking an interpolator as a template argument)
    performs just as well.
    I also did a bit of work on pv to work better with partial sphericals.
---
 eval.h                  | 291 +++++++++++++++-------------
 example/pano_extract.cc |  10 +-
 example/roundtrip.cc    |   8 +-
 interpolator.h          | 499 ++++++++++++++++++++++++++++++++++++++++++++++++
 mapping.h               |  20 +-
 remap.h                 | 194 +++++++++----------
 6 files changed, 763 insertions(+), 259 deletions(-)

diff --git a/eval.h b/eval.h
index 6295651..f97cb8c 100644
--- a/eval.h
+++ b/eval.h
@@ -460,53 +460,49 @@ struct alt_bspline_derivative_weight_functor
 /// steps can be bound into a single functor, and the calling code can be reduced to polling
 /// this functor until it has obtained the desired number of output values.
 
-template < int dimension ,         ///< dimension of the spline
-           class value_type ,      ///< type of spline coefficients/result (pixels etc.)
-           class rc_type = float , ///< singular real coordinate, float or double
-           class ic_type = int >   ///< singular integral coordinate, currently only int
-
+template < int _dimension ,         ///< dimension of the spline
+           class _value_type ,      ///< type of spline coefficients/result (pixels etc.)
+           class _rc_type ,         ///< singular real coordinate, float or double
+           class _ic_type ,         ///< singular integral coordinate, currently only int
+           bool use_tag = false >   ///< flag switching tag checking on/off
 class evaluator
-: public interpolator < dimension , value_type , rc_type , ic_type >
+: public interpolator < _dimension , _value_type , _rc_type , _ic_type >
 {
 public:
   
-  enum { level = dimension - 1 } ;
+  // fix the types we're dealing with
+  typedef _ic_type ic_type ;
+  typedef _rc_type rc_type ;
 
-  /// value_type is the data type of the coefficients the evaluator processes,
-  /// and also the data type for the results it produces. These values may be
-  /// aggregates like pixels or TinyVectors. For certain operations these values
-  /// are taken apart into their components, using vigra's ExpandElement mechanism.
-  /// This mechanism can be used to introduce aggregate types that aren't already
-  /// known to vigra. ele_type is the type of an individual element of such an
-  /// aggregate; if value_type isn't an aggregate, it's the same as value_type.
-  /// If a value_type is used which isn't known to vigra's ExpandElement
-  /// mechanism already, the expansion has to be defined in order to make this
-  /// value_type work with this code.
+  enum { dimension = _dimension } ;
+  enum { level = dimension - 1 } ;
 
-  enum { channels = ExpandElementResult < value_type > :: size } ;
+  typedef vigra::TinyVector < rc_type , dimension > nd_rc_type ;
+  typedef vigra::TinyVector < ic_type , dimension > nd_ic_type ;
+  
+  typedef _value_type value_type ;  
+  typedef typename vigra::ExpandElementResult < value_type > :: type ele_type ;
 
-  typedef typename ExpandElementResult < value_type > :: type      ele_type ;
+  enum { channels = vigra::ExpandElementResult < value_type > :: size } ;
 
   /// array_type is used for the coefficient array TODO change to 'view_type'
 
   typedef MultiArrayView < dimension , value_type >                array_type ;
 
-  /// type used for nD integral coordinates, array shapes
+  /// type used for nD array coordinates, array shapes
 
   typedef typename array_type::difference_type                     shape_type ;
 
-  /// types for a multidimensional real/integral coordinate
-  // TODO this is duplicating the typedef in mapping.h
-
-  typedef vigra::TinyVector < rc_type , dimension >   nd_rc_type ;
-  typedef vigra::TinyVector < ic_type , dimension >   nd_ic_type ;
-
-  typedef vigra::TinyVector < int , dimension >                       derivative_spec_type ;
+  typedef vigra::TinyVector < int , dimension >                    derivative_spec_type ;
 
   /// type for a 'split' coordinate, mainly used internally in the evaluator, but
   /// also in 'warp arrays' used for remapping
 
-  typedef split_type < dimension , ic_type , rc_type >             split_coordinate_type ;
+  typedef split_type < dimension , ic_type , rc_type > 
+    split_coordinate_type ;
+
+  typedef compact_split_type < dimension , ic_type , rc_type >
+    compact_split_coordinate_type ;
 
   /// the evaluator can handle raw coordinates, but it needs a mapping to do so.
  
@@ -606,11 +602,6 @@ private:
 
 public:
 
-#ifdef USE_VC
-  nd_ic_v nd_interleave ;            ///< gather/scatter indexes for interleaving nD
-  mc_ic_v mc_interleave ;            ///< and mc vectors
-#endif
-
   /// this constructor is the most flexible variant and will ultimately be called by all other
   /// constructor overloads. class evaluator is coded to be (thread)safe as a functor: all state
   /// (in terms of member data) is fixed at construction time and remains constant afterwards.
@@ -630,7 +621,7 @@ public:
     mmap ( _mmap ) ,             // I enforce the passing in of a mapping, even though it
                                  // may not be used at all. TODO: can I do better?
     workspace_size ( ( _spline_degree + 1 ) * dimension ) ,
-    window_size ( pow ( ORDER , dimension ) )
+    window_size ( std::pow ( ORDER , int(dimension) ) )
   {
     // initalize the weight functors. In this constructor, we use only bspline weight
     // functors, even though the evaluator can operate with all weight functors
@@ -660,10 +651,6 @@ public:
     // window by means of stride/shape arithmetic. Coding the window in this fashion
     // also makes it easy to vectorize the code.
     
-//     int noffsets = ORDER ;
-//     for ( int exp = 1 ; exp < dimension ; exp++ )
-//       noffsets *= ORDER ;
-    
     offsets = MultiArray < 1 , ptrdiff_t > ( window_size ) ;
     component_offsets = MultiArray < 1 , ptrdiff_t > ( window_size ) ;
     
@@ -694,32 +681,6 @@ public:
       eshp[0] = i ;
       component_base[i] = &(component_view[eshp]) ;
     }
-    
-#ifdef USE_VC
-
-    // fill the gather/scatter information for vectorized operation
-    // we only need to do this once on evaluator creation. For now this
-    // code is limited to use ints to scatter/gather. This limits the
-    // array size which can be processed, but in real applications this
-    // should rarely become a problem. Still a thing to keep in mind.
-    // TODO this should be factored out and be static
-
-    for ( int d = 0 ; d < dimension ; d++ )
-    {
-      nd_interleave[d] = ic_v::IndexesFromZero() ;
-      nd_interleave[d] *= ic_type ( dimension ) ;
-      nd_interleave[d] += ic_type ( d ) ;
-    }
-    
-    for ( int ch = 0 ; ch < channels ; ch++ )
-    {
-      mc_interleave[ch] = ic_v::IndexesFromZero() ;
-      mc_interleave[ch] *= ic_type ( channels ) ;
-      mc_interleave[ch] += ic_type ( ch ) ;
-    }
-    
-#endif
-
   } ;
 
   /// simplified constructor from a bspline object
@@ -743,6 +704,22 @@ public:
     return mmap ;
   }
   
+#ifdef USE_VC
+
+  // provide interleave indices for nD and multichannel types
+
+  static ic_v nd_interleave ( int d )
+  {
+    return ic_v::IndexesFromZero() * ic_type ( dimension ) + ic_type ( d ) ; 
+  }
+
+  static ic_v mc_interleave ( int ch )
+  {
+    return ic_v::IndexesFromZero() * ic_type ( channels ) + ic_type ( ch ) ; 
+  }
+
+#endif
+
   /// obtain_weights calculates the weights to be applied to a section of the coefficients from
   /// the fractional parts of the split coordinates. What is calculated here is the evaluation
   /// of the spline's basis function at dx, dx+1 , dx+2..., but doing it naively is computationally
@@ -900,8 +877,13 @@ public:
     eval ( select , weight , result ) ;   // delegate
   }
 
-  /// this variant of eval() takes a split coordinate:
-  
+  /// this variant of eval() takes a split coordinate. This method isn't used
+  /// by other methods in class evaluator but serves as an alternative entry
+  /// point into the evaluation sequence if the calling code already has split
+  /// coordinates. It takes split_coordinate_type as a template argument, taking
+  /// split_type and compact_split_type.
+
+  template < class split_coordinate_type >
   void eval ( const split_coordinate_type & sc , // presplit coordinate
               value_type & result ) const
   {
@@ -915,9 +897,10 @@ public:
   void eval ( const nd_rc_type& c , // unsplit coordinate
               value_type & result ) const
   {
-    split_coordinate_type sc ;
-    mmap ( c , sc ) ;  /// apply the mappings
-    eval ( sc.select , sc.tune , result ) ;
+    nd_ic_type select ;
+    nd_rc_type tune ;
+    mmap ( c , select , tune ) ;  /// apply the mappings
+    eval ( select , tune , result ) ;
   }
   
   /// variant taking a plain rc_type for a coordinate. only for 1D splines!
@@ -1129,31 +1112,13 @@ public:
     result = _v_eval < mc_ele_v , level >() ( component_base , select , ofs , weight ) ;
   }
 
-  /// in the penultimate delegate, we move from nD coordinates to offsets
-
-  void v_eval ( const nd_ic_v& select ,  // lower corners of the subarrays
-                const MultiArrayView < 2 , ele_v > & weight , // vectorized weights
-                mc_ele_v & result ) const
-  {
-    // here we transform the incoming nD coordinates into offsets into the coefficient
-    // array's memory.
-    // note that we use both the strides and offsets appropriate for an expanded array,
-    // and component_base has pointers to the element type.
-    
-    ic_v origin = select[0] * ic_type ( expanded_stride [ 0 ] ) ;
-    for ( int d = 1 ; d < dimension ; d++ )
-      origin += select[d] * ic_type ( expanded_stride [ d ] ) ;
-    
-    // now we delegate to the final delegate
-    
-    v_eval ( origin , weight , result ) ;
-  }
-
-  // again, pretty much like the unvectorized version, now using simdized data:
+  /// in the penultimate delegate, we calculate the weights. We also use this
+  /// method 'further down' when we operate on 'compact split coordinates',
+  /// which contain offsets and fractional parts.
 
-  void v_eval ( const nd_ic_v& select ,  // lower corners of the subarrays
-                const nd_rc_v& tune ,    // fractional parts of the incoming coordinates
-                mc_ele_v & result ) const
+  void v_eval ( const ic_v& select ,       // offsets to coefficient windows
+                const nd_rc_v& tune ,      // fractional parts of the coordinates
+                mc_ele_v & result ) const  // target
   {
     // like in the unvectorized code, the 2D MultiArrayView to the weights is created
     // as a view to data on the stack (weight_data) which is lightweight and fast
@@ -1172,37 +1137,48 @@ public:
     MultiArrayView < 2 , ele_v > weight ( Shape2 ( ORDER , dimension ) , (ele_v*)weight_data ) ;
 #endif
 
-    // obtain_weights is the same code for vectorized operation, the arguments
-    // suffice to pick the right template argiments
+    // obtain_weights is the same code as for unvectorized operation, the arguments
+    // suffice to pick the right template arguments
   
     obtain_weights ( weight , tune ) ;
     
-    // delegate
-
-    v_eval ( select , weight , result ) ;
-
-#ifdef TEST_FOR_TAGGED
+    // having obtained the weights, we delegate to the final delegate.
     
-    // this bit of code is optional, it's only needed if mapping mode TAG is used.
-    // If TEST_FOR_TAGGED is not defined and mapping mode TAG is used, out-of-range
-    // coordinates produce undefined results but the program will not crash.
-    // TODO: this isn't good style, there should be a way of parametrizing this
-    // behaviour. This would best be done by introducing a 'strategy' template parameter
-    // to class evaluator and changing the remap code to accept an evaluator instead of
-    // a spline, which is desirable anyway.
+    v_eval ( select , weight , result ) ;
 
-    for ( int d = 0 ; d < dimension ; d++ )
+    if ( use_tag )
     {
-      auto mask = ( tune[d] == rc_type ( rc_out_of_bounds ) ) ; // test for oob value
-      if ( any_of ( mask ) )
+      // this bit of code is optional, it's only needed if mapping mode TAG is used.
+      // If use_tag is false and mapping mode TAG is used, out-of-range
+      // coordinates produce undefined results but the program will not crash.
+
+      for ( int d = 0 ; d < dimension ; d++ )
       {
-        for ( int ch = 0 ; ch < channels ; ch++ ) // coordinate is oob, set result to 0
-          result[ch] ( mask ) = ele_type ( rv_out_of_bounds ) ;
+        auto mask = ( tune[d] == rc_type ( rc_out_of_bounds ) ) ; // test for oob value
+        if ( any_of ( mask ) )
+        {
+          for ( int ch = 0 ; ch < channels ; ch++ ) // coordinate is oob, set to 0
+            result[ch] ( mask ) = ele_type ( rv_out_of_bounds ) ;
+        }
       }
     }
 
-#endif
+  }
+
+  /// here we transform incoming nD coordinates into offsets into the coefficient
+  /// array's memory.
+  /// note that we use both the strides and offsets appropriate for an expanded array,
+  /// and component_base has pointers to the element type.
 
+  void v_eval ( const nd_ic_v& select ,  // nD coordinates to coefficient windows
+                const nd_rc_v& tune ,    // fractional parts of the coordinates
+                mc_ele_v & result ) const
+  {
+    ic_v origin = select[0] * ic_type ( expanded_stride [ 0 ] ) ;
+    for ( int d = 1 ; d < dimension ; d++ )
+      origin += select[d] * ic_type ( expanded_stride [ d ] ) ;
+    
+    v_eval ( origin , tune , result ) ;
   }
 
   /// here we take the approach to require the calling function to present pointers to
@@ -1212,8 +1188,8 @@ public:
   /// in contiguous memory. But this is not always the case, for example when the
   /// data are strided. Anyway, it doesn't hurt to have the option.
   
-  void v_eval ( const split_coordinate_type* const psc , // pointer to vsize presplit coordinates
-                value_type * result )  const                   // pointer to vsize result values
+  void v_eval ( const split_coordinate_type * const psc , // pointer to vsize split coordinates
+                value_type * result )  const             // pointer to vsize result values
   {
     nd_ic_v select ;
     nd_rc_v tune ;
@@ -1237,7 +1213,37 @@ public:
 
     // and deposit it in the memory the caller provides
     for ( int ch = 0 ; ch < channels ; ch++ )
-      v_result[ch].scatter ( (ele_type*)result , mc_interleave[ch] ) ;
+      v_result[ch].scatter ( (ele_type*)result , mc_interleave(ch) ) ;
+  }
+
+  /// same for 'compact' split coordinates.
+
+  void v_eval ( const compact_split_coordinate_type* const psc ,
+                value_type * result )  const
+  {
+    ic_v select ;
+    nd_rc_v tune ;
+
+    // this is awkward, but if we get split coordinates interleaved like this,
+    // we have to manually deinterleave them unless we make potentially unsafe
+    // assumptions about the size of the components (if they are the same and packed,
+    // we might gather instead...)
+    
+    for ( int vi = 0 ; vi < vsize ; vi++ )
+    {
+      select[vi] = int ( psc[vi].select ) ;
+      for ( int d = 0 ; d < dimension ; d++ )
+      {
+        tune[d][vi] = rc_type ( psc[vi].tune[d] ) ;
+      }
+    }
+
+    mc_ele_v v_result ;
+    v_eval ( select , tune , v_result ) ;
+
+    // and deposit it in the memory the caller provides
+    for ( int ch = 0 ; ch < channels ; ch++ )
+      v_result[ch].scatter ( (ele_type*)result , mc_interleave(ch) ) ;
   }
 
   /// this variant is roughly the same as the one above, but it receives separate
@@ -1245,9 +1251,9 @@ public:
   /// case where these aspects are stored in different containers. This type of access
   /// is more efficient, since we can use gather operations
   
-  void v_eval ( const ic_type * const pi ,  // pointer to integral parts of coordinates
-                const rc_type * const pr ,  // pointer to real parts fo coordinates
-                value_type * result ) const  // pointer to vsize result values
+  void v_eval ( const nd_ic_type * const pi ,  // pointer to integral parts of coordinates
+                const nd_rc_type * const pr ,  // pointer to real parts fo coordinates
+                value_type * result ) const    // pointer to vsize result values
   {
     nd_ic_v select ;
     nd_rc_v tune ;
@@ -1256,8 +1262,35 @@ public:
     
     for ( int d = 0 ; d < dimension ; d++ )
     {
-      select[d].gather ( pi , nd_interleave[d] )  ;
-      tune[d].gather ( pr , nd_interleave[d] )  ;
+      select[d].gather ( (ic_type*)pi , nd_interleave(d) )  ;
+      tune[d].gather ( (rc_type*)pr , nd_interleave(d) )  ;
+    }
+
+    mc_ele_v v_result ;
+    v_eval ( select , tune , v_result ) ;
+
+    // and deposit it in the memory the caller provides
+    
+    for ( int ch = 0 ; ch < channels ; ch++ )
+      v_result[ch].scatter ( (ele_type*)result , mc_interleave(ch) ) ;
+  }
+
+  /// same for 'compact' split coordinates
+  
+  void v_eval_c ( const ic_type * const pi ,    // pointer to offsets
+                  const nd_rc_type * const pr , // pointer to real parts fo coordinates
+                  value_type * result ) const   // pointer to vsize result values
+  {
+    ic_v select ;
+    nd_rc_v tune ;
+
+    select.load ( pi )  ;
+      
+    // gather the real coordinate parts from the pointer passed in
+    
+    for ( int d = 0 ; d < dimension ; d++ )
+    {
+      tune[d].gather ( pr , nd_interleave(d) )  ;
     }
 
     mc_ele_v v_result ;
@@ -1266,7 +1299,7 @@ public:
     // and deposit it in the memory the caller provides
     
     for ( int ch = 0 ; ch < channels ; ch++ )
-      v_result[ch].scatter ( (ele_type*)result , mc_interleave[ch] ) ;
+      v_result[ch].scatter ( (ele_type*)result , mc_interleave(ch) ) ;
   }
 
   /// This variant of v_eval() works directly on vector data (of unsplit coordinates)
@@ -1275,8 +1308,8 @@ public:
   /// to perform the (de)interleaving e.g. by a gather/scatter operation, or already receives
   /// the data in simdized form.
   
-  void v_eval ( const nd_rc_v & input ,  // number of dimensions * coordinate vectors
-                mc_ele_v & result )  const     // number of channels * value vectors
+  void v_eval ( const nd_rc_v & input ,    // number of dimensions * coordinate vectors
+                mc_ele_v & result )  const // number of channels * value vectors
   {
     nd_ic_v select ;
     nd_rc_v tune ;
@@ -1299,7 +1332,7 @@ public:
   // relying on the optimizer to shed the unnecessary TinyVector<T,1> container
   
   void v_eval ( const nd_rc_type * const pmc ,  // pointer to vsize muli_coordinates
-                value_type * result ) const           // pointer to vsize result values
+                value_type * result ) const     // pointer to vsize result values
   {
     nd_rc_v input ;
     mc_ele_v v_result ;
@@ -1313,7 +1346,7 @@ public:
     else
     {
       for ( int d = 0 ; d < dimension ; d++ )
-        input[d].gather ( (const rc_type* const)pmc , nd_interleave[d] ) ;
+        input[d].gather ( (const rc_type* const)pmc , nd_interleave(d) ) ;
     }
 
     // call v_eval() for vectorized data
@@ -1329,7 +1362,7 @@ public:
     else
     {
       for ( int ch = 0 ; ch < channels ; ch++ )
-        v_result[ch].scatter ( (ele_type*)result , mc_interleave[ch] ) ;
+        v_result[ch].scatter ( (ele_type*)result , mc_interleave(ch) ) ;
     }
   }
 
@@ -1359,7 +1392,7 @@ public:
     else
     {
       for ( int ch = 0 ; ch < channels ; ch++ )
-        v_result[ch].scatter ( (ele_type*)result , mc_interleave[ch] ) ;
+        v_result[ch].scatter ( (ele_type*)result , mc_interleave(ch) ) ;
     }
   }
 
@@ -1384,7 +1417,7 @@ public:
     else
     {
       for ( int ch = 0 ; ch < channels ; ch++ )
-        v_result[ch].scatter ( (ele_type*)result , mc_interleave[ch] ) ;
+        v_result[ch].scatter ( (ele_type*)result , mc_interleave(ch) ) ;
     }
   }
 
diff --git a/example/pano_extract.cc b/example/pano_extract.cc
index 56e0881..ec02cb1 100644
--- a/example/pano_extract.cc
+++ b/example/pano_extract.cc
@@ -694,11 +694,12 @@ void process_image ( char * name ,
   start = std::chrono::system_clock::now();
 #endif
   
-  evaluator < 2 , pixel_type , rc_type , int > ev ( bspl ) ;
+  typedef evaluator < 2 , pixel_type , rc_type , int > eval_type ;
+  eval_type ev ( bspl ) ;
   
   for ( int times = 0 ; times < 1 ; times++ )
   {
-    vspline::tf_remap < pixel_type , rc_type , 2 , 2 >
+    vspline::tf_remap < pixel_type , rc_type , eval_type , 2 , 2 >
       ( ev , tf , result ) ;
   }
 
@@ -729,10 +730,11 @@ void process_image ( char * name ,
 #endif
 
     // create an evaluator for bspl_alpha
-    evaluator < 2 , float , rc_type , int > ev_alpha ( bspl_alpha ) ;
+    typedef evaluator < 2 , float , rc_type , int > alpha_ev_type ;
+    alpha_ev_type ev_alpha ( bspl_alpha ) ;
 
     // now we do a transformation-based remap of the alpha channel
-    vspline::tf_remap < float , rc_type , 2 , 2 >
+    vspline::tf_remap < float , rc_type , alpha_ev_type , 2 , 2 >
       ( ev_alpha , tf_alpha , alpha_result ) ;
   }
 
diff --git a/example/roundtrip.cc b/example/roundtrip.cc
index 445548a..5c88ce2 100644
--- a/example/roundtrip.cc
+++ b/example/roundtrip.cc
@@ -227,7 +227,7 @@ void roundtrip ( view_type & data ,
 #endif
   
   for ( int times = 0 ; times < TIMES ; times++ )
-    vspline::remap<pixel_type,split_type,2,2>
+    vspline::remap<split_type,pixel_type,eval_type,2>
       ( ev , warp , target , use_vc ) ;
 
   
@@ -263,7 +263,7 @@ void roundtrip ( view_type & data ,
 #endif
   
   for ( int times = 0 ; times < TIMES ; times++ )
-    vspline::remap<pixel_type,coordinate_type,2,2>
+    vspline::remap<coordinate_type,pixel_type,eval_type,2>
       ( ev , fwarp , target , use_vc ) ;
 
   
@@ -281,7 +281,7 @@ void roundtrip ( view_type & data ,
 #endif
   
   for ( int times = 0 ; times < TIMES ; times++ )
-    vspline::remap<pixel_type,coordinate_type,2,2>
+    vspline::remap<coordinate_type,pixel_type,2>
       ( data , fwarp , target , bcv , DEGREE , use_vc ) ;
 
  
@@ -333,7 +333,7 @@ void roundtrip ( view_type & data ,
 #endif
   
   for ( int times = 0 ; times < TIMES ; times++ )
-    vspline::tf_remap < pixel_type , rc_type , 2 , 2 >
+    vspline::tf_remap < pixel_type , rc_type , eval_type , 2 , 2 >
       ( ev , tf , target , use_vc ) ;
 
   // note:: with REFLECT this doesn't come out right, because of the .5 difference!
diff --git a/interpolator.h b/interpolator.h
new file mode 100644
index 0000000..caefe41
--- /dev/null
+++ b/interpolator.h
@@ -0,0 +1,499 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/*! \file interpolator.h
+
+    \brief interface definition for interpolators
+
+    The code in this file formalizes the services of an 'interpolator'.
+    In the context of vspline, this is used to present b-spline evaluators
+    to the remap routines, but the interface can accomodate other
+    interpolators as well.
+    
+    THIS Is A CONSTRUCTION SITE
+*/
+
+#ifndef VSPLINE_INTERPOLATOR_H
+#define VSPLINE_INTERPOLATOR_H
+
+#include <vspline/common.h>
+#include <vspline/coordinate.h>
+
+namespace vspline {
+
+template < int _dimension ,    ///< dimension of the spline
+           class _value_type , ///< type of spline coefficients/result (pixels etc.)
+           class _rc_type ,    ///< singular real coordinate, like float or double
+           class _ic_type >    ///< singular integral coordinate, like int
+struct interpolator
+{
+  // fix the types we're dealing with
+  typedef _ic_type ic_type ;
+  typedef _rc_type rc_type ;
+
+  enum { dimension = _dimension } ;
+  typedef vigra::TinyVector < rc_type , dimension > nd_rc_type ;
+  typedef vigra::TinyVector < ic_type , dimension > nd_ic_type ;
+  
+  typedef _value_type value_type ;  
+  typedef typename vigra::ExpandElementResult < value_type > :: type ele_type ;
+
+  enum { channels = vigra::ExpandElementResult < value_type > :: size } ;
+
+  // for single coordinates we require an operator() taking a const& to a
+  // single coordinate and a reference to the place where the result should
+  // be deposited.
+
+  virtual void eval ( const nd_rc_type & c ,
+                      value_type & result ) const = 0 ;
+
+#ifdef USE_VC
+  
+  // for vectorized operation, we need a few extra typedefs
+  // I use a _v suffix to indicate vectorized types and the prefixes
+  // mc_ for multichannel and nd_ for multidimensional
+
+  /// a simdized type of the elementary type of value_type,
+  /// which is used for coefficients and results. this is fixed via
+  /// the traits class vector_traits (in common.h)
+  
+  typedef typename vector_traits < ele_type > :: type ele_v ;
+  
+  /// element count of Simd data types.
+  
+  enum { vsize = ele_v::Size } ;
+
+  /// compatible-sized simdized type of vsize ic_type
+
+  typedef typename vector_traits < ic_type , vsize > :: type ic_v ;
+
+  /// compatible-sized simdized type of vsize rc_type
+
+  typedef typename vector_traits < rc_type , vsize > :: type rc_v ;
+
+  /// multichannel value as SoA, for pixels etc.
+
+  typedef vigra::TinyVector < ele_v , channels > mc_ele_v ;
+
+  /// SoA for nD coordinates/components
+
+  typedef vigra::TinyVector < rc_v , dimension > nd_rc_v ;
+
+  /// SoA for nD shapes (or multidimensional indices)
+
+  typedef vigra::TinyVector < ic_v , dimension > nd_ic_v ;
+
+  /// require overload of operator() for simdized coordinates and values
+  /// this operator() is used in cases where the calling code already has the
+  /// simdized types handy:
+
+  virtual void v_eval ( const nd_rc_v & input ,         // simdized nD coordinate
+                        mc_ele_v & result ) const = 0 ; // simdized value_type
+
+  /// require overload of v_eval taking pointers to vsize coordinates and
+  /// vsize values, expecting these data to be contiguous in memory. With this
+  /// variant, the implementation of this v_eval takes responsibility for
+  /// (de)interleaving the data.
+
+  virtual void v_eval ( const nd_rc_type * const pmc ,    // -> vsize coordinates
+                        value_type * result ) const = 0 ; // -> vsize result values
+  
+#endif
+} ;
+
+/// probably the simplest interpolation technique is 'nearest neighbour'
+/// interpolation. This serves as a model implementation of a class satisfying
+/// the interpolator interface:
+// TODO we're missing the treatment of values for out-of-range coordinates
+
+template < int _dimension ,
+           class _value_type ,
+           typename _rc_type = float ,
+           typename _ic_type = int >
+struct nearest_neighbour
+: public interpolator < _dimension , _value_type , _rc_type , _ic_type >
+{
+  typedef interpolator < _dimension , _value_type , _rc_type , _ic_type > base_type ;
+
+  // import unvectorized types
+  // TODO this sucks. isn't there a way to just take the lot in?
+  
+  typedef typename base_type::value_type value_type ;
+  typedef typename base_type::ele_type ele_type ;
+  typedef typename base_type::ic_type ic_type ;
+  typedef typename base_type::rc_type rc_type ;
+  typedef typename base_type::nd_ic_type nd_ic_type ;
+  typedef typename base_type::nd_rc_type nd_rc_type ;
+
+  // number of channels and dimensions
+
+  const int channels = base_type::channels ;
+  const int dimension = base_type::dimension ;
+
+  typedef vigra::MultiArrayView < _dimension , value_type > substrate_type ;
+  
+  // data to interpolate over
+
+  const substrate_type & substrate ;
+  
+#ifdef USE_VC
+
+  // import vectorized types
+
+  typedef typename base_type::ic_v ic_v ;
+  typedef typename base_type::rc_v rc_v ;
+  typedef typename base_type::ele_v ele_v ;
+  typedef typename base_type::nd_ic_v nd_ic_v ;
+  typedef typename base_type::nd_rc_v nd_rc_v ;
+  typedef typename base_type::mc_ele_v mc_ele_v ;
+  
+  enum { vsize = base_type::vsize } ;
+  
+  const ele_type * const p_data ;
+
+#endif
+  
+  nearest_neighbour ( const substrate_type & _substrate )
+  : substrate ( _substrate )
+#ifdef USE_VC
+  , p_data ( (ele_type*) ( substrate.data() ) )
+#endif
+  {
+  } ;
+
+  // round the real-valued coordinate components to the nearest integral
+  // value and pick the value from the substrate at position 'c'. then
+  // deposit this value in 'result'
+
+  void eval ( const nd_rc_type& c ,
+              value_type & result ) const
+  {
+    nd_ic_type where ;
+    for ( int d = 0 ; d < dimension ; d++ )
+    {
+      where[d] = ic_type ( c[d] + rc_type ( 0.5 ) ) ;
+      if ( where[d] < 0 || where[d] >= substrate.stride(d) )
+      {
+        result = 0 ;
+        return ;
+      }
+    }
+    result = substrate [ where ] ;
+  } ;
+  
+#ifdef USE_VC
+
+  /// overload of v_eval for simdized coordinates and values
+
+  void v_eval ( const nd_rc_v & c ,       // simdized nD coordinate
+                mc_ele_v & result ) const // simdized value_type
+  {
+    ic_v offset ( 0 ) ;
+    typename ic_v::mask_type mask ( false ) ;
+
+    for ( int d = 0 ; d < dimension ; d++ )
+    {
+      ic_v select ( c[d] + rc_type ( 0.5 ) ) ;
+      mask |= ( select < 0 ) ;
+      mask |= ( select >= substrate.shape(d) ) ;
+      select ( mask ) = 0 ;
+      offset += select * ic_type ( substrate.stride(d) ) ;
+    }
+    for ( int ch = 0 ; ch < channels ; ch++ )
+    {
+      result[ch].gather ( p_data , offset * channels + ch ) ;
+      result[ch] ( mask ) = 0 ;
+    }
+  } ;
+
+  /// overload of v_eval taking pointers to vsize coordinates and
+  /// vsize values, expecting these data to be contiguous in memory
+
+  void v_eval ( const nd_rc_type * const pmc ,  // pointer to vsize muli_coordinates
+                value_type * result ) const     // pointer to vsize result values
+  {
+    nd_rc_v cv ;
+    mc_ele_v ev ;
+
+    for ( int d = 0 ; d < dimension ; d++ )
+    {
+      cv[d].gather ( (rc_type*) pmc ,
+                     ic_v::IndexesFromZero() * dimension + d ) ;
+    }
+    v_eval ( cv , ev ) ;
+    for ( int ch = 0 ; ch < channels ; ch++ )
+    {
+      ev[ch].scatter ( (ele_type*) result , ic_v::IndexesFromZero() * channels + ch ) ;
+    }
+  } ;
+  
+#endif
+} ;
+
+// /// struct diversify takes a few simple functors as template arguments
+// /// and provides a wider range of functors with different, but compatible
+// /// signatures (like, pointers instead of references)
+// 
+// template < int _dimension ,    ///< dimension of coordinates
+//            class _value_type , ///< type of value which the functor produces
+//            class _rc_type ,    ///< singular real coordinate, like float or double
+//            class _ic_type ,    ///< singular integral coordinate, like int
+//            class multi_functor_type >
+// struct diversify
+// {  
+//   // fix the types we're dealing with
+//   typedef _ic_type ic_type ;
+//   typedef _rc_type rc_type ;
+// 
+//   enum { dimension = _dimension } ;
+//   typedef vigra::TinyVector < rc_type , dimension > nd_rc_type ;
+//   typedef vigra::TinyVector < ic_type , dimension > nd_ic_type ;
+//   
+//   typedef _value_type value_type ;  
+//   typedef typename vigra::ExpandElementResult < value_type > :: type ele_type ;
+// 
+//   enum { channels = vigra::ExpandElementResult < value_type > :: size } ;
+// 
+//   // constructor taking a suitable object which can produce the desired behaviour
+// 
+//   multi_functor_type multi_functor ;
+//   
+//   diversify ( multi_functor_type _mf )
+//   : multi_functor ( _mf )
+//   { } ;
+//   
+//   // from the multi_functor passed to  the constructor, we can derive a host of
+//   // different functors which all ultimately do the same thing, but access the
+//   // arguments in different ways. We start out with signatures for eval which
+//   // precisely match the std::functions passed in:
+//   
+//   void eval ( const nd_rc_type & c , value_type & result ) const
+//   {
+//     multi_functor.eval ( c , result ) ;
+//   }
+//   
+//   void eval ( const nd_ic_type & ic , const nd_rc_type & rc , value_type & result ) const
+//   {
+//     multi_functor.eval ( ic , rc , result ) ;
+//   }
+//   
+//   // operation on split coordinates (nD int coordinate+fractional part)
+//   // we can define this using scref2rref:
+//   
+//   typedef split_type < dimension , ic_type , rc_type > split_t ;
+// 
+//   void eval ( const split_t & c , value_type & result ) const
+//   {
+//     multi_functor.eval ( c.select , c.tune , result ) ;
+//   }
+//   
+//   // operation on compact split coordinates (offset+fractional part)
+//   // to define this we need cscref2rref.
+//   
+//   typedef compact_split_type < dimension , ic_type , rc_type > csplit_t ;
+// 
+//   void eval ( const csplit_t & c , value_type & result ) const
+//   {
+//     multi_functor.eval ( c.select , c.tune , result ) ;
+//   }
+//   
+//   // in diversify we refrain from any notion of progression from unsplit
+//   // to split coordinates (via a mapping, as in mapping.h) and further
+//   // to the more compact offset/fractionals representation (which requires
+//   // the strides of the coefficient array, a metric outside the scope
+//   // of this object) - the sole purpose of this class is providing
+//   // additional signature types.
+//   
+//   // now we define the operation with other parameter signatures by
+//   // resorting to the stored functions we have. First using pointers:
+// 
+//   void eval ( const nd_rc_type * const p_c , value_type * p_result ) const
+//   {
+//     eval ( *p_c , *p_result ) ;
+//   }
+//   
+//   void eval ( const nd_ic_type * const p_ic ,
+//               const nd_rc_type * const p_rc ,
+//               value_type * p_result ) const
+//   {
+//     eval ( *p_ic , *p_rc , *p_result ) ;
+//   }
+//   
+//   void eval ( const split_t * const p_sc ,
+//               value_type * p_result ) const
+//   {
+//     eval ( p_sc->select , p_sc->tune , *p_result ) ;
+//   }
+//   
+//   void eval ( const csplit_t * const p_sc ,
+//               value_type * p_result ) const
+//   {
+//     eval ( p_sc->select , p_sc->tune , *p_result ) ;
+//   }
+//   
+//   // now variants taking only the coordinate and returning value_type&&
+//   
+//   value_type && eval ( const nd_rc_type & c ) const
+//   {
+//     value_type v ;
+//     eval ( c , v ) ;
+//     return v ;
+//   }
+//   
+//   value_type && eval ( const nd_ic_type & ic ,
+//                        const nd_rc_type & rc ) const
+//   {
+//     value_type v ;
+//     eval ( ic , rc , v ) ;
+//     return v ;
+//   }
+//   
+//   value_type && eval ( const split_t & sc ,
+//                        const nd_rc_type & rc ) const
+//   {
+//     value_type v ;
+//     eval ( sc.select , sc.tune , v ) ;
+//     return v ;
+//   }
+//   
+//   value_type && eval ( const csplit_t & sc ,
+//                        const nd_rc_type & rc ) const
+//   {
+//     value_type v ;
+//     eval ( sc.select , sc.tune , v ) ;
+//     return v ;
+//   }
+//   
+//   // just being playful :)
+// 
+//   void eval ( const nd_rc_type * const p_c ,
+//               int i_c ,
+//               value_type * p_result ,
+//               int i_result
+//             ) const
+//   {
+//     eval ( p_c[i_c] , p_result[i_result] ) ;
+//   }
+//   
+// #ifdef USE_VC
+// 
+//   // for vectorized operation, we need a few extra typedefs
+//   // I use a _v suffix to indicate vectorized types and the prefixes
+//   // mc_ for multichannel and nd_ for multidimensional
+// 
+//   /// a simdized type of the elementary type of value_type,
+//   /// which is used for coefficients and results. this is fixed via
+//   /// the traits class vector_traits (in common.h)
+//   
+//   typedef typename vector_traits < ele_type > :: type ele_v ;
+//   
+//   /// element count of Simd data types.
+//   
+//   enum { vsize = ele_v::Size } ;
+// 
+//   /// compatible-sized simdized type of vsize ic_type
+// 
+//   typedef typename vector_traits < ic_type , vsize > :: type ic_v ;
+// 
+//   /// compatible-sized simdized type of vsize rc_type
+// 
+//   typedef typename vector_traits < rc_type , vsize > :: type rc_v ;
+// 
+//   /// multichannel value as SoA, for pixels etc.
+// 
+//   typedef vigra::TinyVector < ele_v , channels > mc_ele_v ;
+// 
+//   /// SoA for nD coordinates/components
+// 
+//   typedef vigra::TinyVector < rc_v , dimension > nd_rc_v ;
+// 
+//   /// SoA for nD shapes (or multidimensional indices)
+// 
+//   typedef vigra::TinyVector < ic_v , dimension > nd_ic_v ;
+// 
+//   /// SoA for multichannel indices (used for gather/scatter operations)
+// 
+//   typedef vigra::TinyVector < ic_v , channels >  mc_ic_v ;
+// 
+//   // we start out with vectorized routines directly matching v_eval
+//   // methods of the multi_functor, first the variant taking references:
+// 
+//   void v_eval ( const nd_rc_v & c ,       // simdized nD coordinate
+//                 mc_ele_v & result ) const // simdized value_type
+//   {
+//     multi_functor.v_eval ( c , result ) ;
+//   } ;
+// 
+//   /// overload of v_eval taking pointers to vsize coordinates and
+//   /// vsize values, expecting these data to be contiguous in memory
+// 
+//   void v_eval ( const nd_rc_type * const pmc ,  // pointer to vsize muli_coordinates
+//                 value_type * p_result ) const     // pointer to vsize result values
+//   {
+//     multi_functor.v_eval ( pmc , p_result ) ;
+//   } ;
+// 
+//   // TODO: now here we can go on defining signatures:
+//   
+// #endif
+// } ;
+
+// // now we can define nearest_neighbour as a class derived from diversify,
+// // which gives a host of different eval() and v_eval() methods which are
+// // all implemented in terms of a few basic eval() and v_eval() methods
+// // provided by the 'core' type.
+// // The idea is to do the same with the b-spline evaluator, concentrating in
+// // class evaluator on the 'core' methods and leaving the diversification to
+// // the diversify inheritance.
+// 
+// template < int _dimension ,
+//            class _value_type ,
+//            typename _rc_type = float ,
+//            typename _ic_type = int >
+// struct nearest_neighbour_d
+// : public diversify < _dimension , _value_type , _rc_type , _ic_type ,
+//                      nearest_neighbour < _dimension , _value_type , _rc_type , _ic_type >
+//                    >
+// {
+//   typedef nearest_neighbour < _dimension , _value_type , _rc_type , _ic_type > core_t ;
+//   typedef diversify < _dimension , _value_type , _rc_type , _ic_type , core_t > div_t ;
+//   typedef vigra::MultiArrayView < _dimension , _value_type > substrate_type ;
+// 
+//   nearest_neighbour_d ( const substrate_type & substrate )
+//   : div_t ( core_t ( substrate ) )
+//   { } ;
+//   
+// } ;
+
+} ; // end of namespace vspline
+
+#endif // VSPLINE_INTERPOLATOR_H
+
diff --git a/mapping.h b/mapping.h
index 439f9e4..f5c931a 100644
--- a/mapping.h
+++ b/mapping.h
@@ -110,31 +110,13 @@
 #include <vigra/multi_iterator.hxx>
 
 #include "common.h"
+#include "coordinate.h"
 
 namespace vspline {
 
 using namespace std ;
 using namespace vigra ;
 
-/// class split_type contains n-dimensional 'split coordinates', consisting of the
-/// integral and fracional part of the original real coordinates, separated so that
-/// they can be processed by the evaluation routine.
-
-template < int N ,
-           typename IT ,
-           typename FT >
-struct split_type
-{
-  typedef IT ic_type ;
-  typedef FT rc_type ;
-  typedef FT value_type ;
-  enum { dimension = N } ;
-  typedef vigra::TinyVector < IT , N > select_type ;
-  select_type select ; ///< named select because it selects the range of coefficients
-  typedef vigra::TinyVector < FT , N > tune_type ;
-  tune_type tune ; ///< named tune because it is the base for the weights 
-} ;
-
 #ifdef USE_VC
 
 /// since Vc doesn't offer a vectorized modf function, we have to code it. We make the effort not
diff --git a/remap.h b/remap.h
index 87fad06..d0ec6b8 100644
--- a/remap.h
+++ b/remap.h
@@ -105,34 +105,6 @@ using namespace vigra ;
 /// per default, whatever the coordinate_type may be, take it to be the rc_type as well.
 /// This works for simple 1D coordinates like float or double.
 
-template < class coordinate_type >
-struct coordinate_traits
-{
-  typedef coordinate_type rc_type ;
-  typedef int ic_type ;
-} ;
-
-/// here we have a partial specialization of class coordinate_traits for coordinates
-/// coming as vigra TinyVectors, which is used throughout for nD coordinates. Here we
-/// pick the TinyVector's value_type as our rc_type
-
-template < typename T , int N >
-struct coordinate_traits < vigra::TinyVector < T , N > >
-{
-  typedef typename vigra::TinyVector < T , N >::value_type rc_type ;
-  typedef int ic_type ;
-} ;
-
-/// since remap can also operate with pre-split coordinates, we need another partial
-/// specialization of coordinate_traits for split_type, which also defines a value_type
-
-template < int N , typename IT , typename FT >
-struct coordinate_traits < split_type < N , IT , FT > >
-{
-  typedef typename split_type < N , IT , FT >::value_type rc_type ;
-  typedef int ic_type ;
-} ;
-
 /// declaring an appropriate evaluator is quite a mouthful, so we use
 /// this 'using' expresssion to mak it more handy:
 
@@ -144,40 +116,42 @@ using evaluator_type
               typename coordinate_traits < coordinate_type > :: ic_type > ;
 
 /// st_remap is the single-thread routine to perform a remap with a given warp array
-/// _warp (containing coordinates) into a target array _output, which takes the interpolated
-/// values at the locations the coordinates indicate, using a bspline as interpolator.
+/// at p_warp (containing coordinates) into a target array _output, which takes the
+/// interpolated values at the locations the coordinates indicate.
 /// While calling this function directly is entirely possible, code wanting to perform
 /// a remap would rather call the next function which partitions the task at hand and
 /// uses a separate thread for each partial job.
 /// The first parameter, range, specifies a subarray of _warp and _output which this
 /// routine is meant to process. this may be the full range, but normally it will cover
 /// only equally sized parts of these arrays, where the precise extent is determined
-/// by multithred() or it's caller in the next routine.
+/// by multithred() or it's caller in the next routine, see there.
 /// Note how I pass in several pointers. Normally I'd pass such data by reference,
 /// but I can't do this here (or haven't found a way yet) because of my parameter-handling
 /// in multithread() which I couldn't get to work with references. The effect is the same.
-// TODO instead of passing in a b-spline, one might pass in an evaluator. If this is
-// coded so that the evaluator can be any object capable of the methods called for class
-// evaluator in this routine, the remap becomes more general, since it can use any old
-// 'evaluator' with this interface. For the unvectorized code, supplying this interface
-// is trivial, there is only one method used. For the vectorized version, only one more is
-// needed (taking pointers to vsize values/coordinates).
-
-template < class value_type ,      // like, float, double, complex<>, pixels, TinyVectors etc.
-           class coordinate_type , // usually TinyVector of real for coordinates
-           int dim_in ,            // number of dimensions of input array (and of 'warp' coordinates)
-           int dim_out >           // number of dimensions of output array
+/// Note how this routine takes 'interpolator_type' as a template argument. Since the code
+/// in this routine is not specific to b-splines, it accepts not only b-spline evaluators
+/// but any object satisfying the interface defined in class interpolator. This opens the
+/// remapping code for other interpolation methods as well.
+/// The interface defined by class interpolator is minimal and does not contain pure
+/// virtual methods for many of the methods defined in class evaluator, but then many
+/// of class evaluator's methods only are meaningful for b-spline evaluation, like
+/// treatment of 'split coordinates'. The remap code in this method and it's companion
+/// below only use methods from the interpolator interface.
+              
+template < typename coordinate_type , // type of coordinates in the warp array
+           typename value_type ,      // type of values to produce
+           typename interpolator_type  , // functor object yielding values for coordinates
+           int dim_out >              // number of dimensions of output array
 void st_remap ( shape_range_type < dim_out >                 range ,
-                const evaluator_type < dim_in , value_type , coordinate_type > * const p_ev ,
-                MultiArrayView < dim_out , coordinate_type > *p_warp ,
+                const interpolator_type * const               p_ev ,
+                const MultiArrayView < dim_out , coordinate_type > * const p_warp ,
                 MultiArrayView < dim_out , value_type >      *p_output ,
                 bool use_vc = true
               )
 {
-  typedef bspline < value_type , dim_in > spline_type ;
+  enum { dim_in = coordinate_traits < coordinate_type > :: dimension } ;
   typedef typename coordinate_traits < coordinate_type > :: rc_type rc_type ;
-  typedef evaluator < dim_in , value_type , rc_type , int > evaluator_type ;
-  typedef typename evaluator_type::ele_type ele_type ;
+  typedef typename interpolator_type::ele_type ele_type ;
   
   // pick out the subarrays specified by 'range'
   // this idiom reoccurs throughout, since the multithreading code I use does not
@@ -190,14 +164,10 @@ void st_remap ( shape_range_type < dim_out >                 range ,
   auto warp = p_warp->subarray ( range[0] , range[1] ) ;
   auto output = p_output->subarray ( range[0] , range[1] ) ;
   
-//   // create an evaluator from the b-spline
-// 
-//   evaluator_type ev ( *p_bspl ) ;
-  
 #ifdef USE_VC
 
-  typedef typename evaluator_type::ele_v ele_v ;
-  const int vsize = evaluator_type::vsize ;
+  typedef typename interpolator_type::ele_v ele_v ;
+  const int vsize = interpolator_type::vsize ;
 
 #endif
   
@@ -312,19 +282,21 @@ void st_remap ( shape_range_type < dim_out >                 range ,
 /// directly, but later I switched to multithreading code which doesn't actually touch
 /// the containers, but instead only passes partitioning information to the individual
 /// threads, leaving it to them to split the containers accordingly, which results in
-/// simple and transparent code with only few limitations.
-
-template < class value_type ,      // like, float, double, complex<>, pixels, TinyVectors etc.
-           class coordinate_type , // usually float for coordinates
-           int dim_in ,            // dimensions of input array and of 'warp' coordinates
-           int dim_out >           // dimensions of output array
-void remap ( const evaluator_type < dim_in , value_type , coordinate_type > & ev ,
-             MultiArrayView < dim_out , coordinate_type > & warp ,
+/// simple and transparent code with only few limitations, see multithread.h
+
+template < typename coordinate_type , // type of coordinates in the warp array
+           typename value_type ,      // type of values to produce
+           typename interpolator_type  , // functor object yielding values for coordinates
+           int dim_out >     // number of dimensions of output array
+void remap ( const interpolator_type & ev ,
+             const MultiArrayView < dim_out , coordinate_type > & warp ,
              MultiArrayView < dim_out , value_type > & output ,
              bool use_vc = true ,
              int nthreads = ncores
            )
 {
+  enum { dim_in = coordinate_traits < coordinate_type > :: dimension } ;
+
   // check shape compatibility
   
   if ( output.shape() != warp.shape() )
@@ -343,7 +315,10 @@ void remap ( const evaluator_type < dim_in , value_type , coordinate_type > & ev
   // pretty much the set of parameters we've received here, with the subtle difference
   // that we can't pass anything on by reference and use pointers instead.
 
-  multithread ( & st_remap < value_type , coordinate_type , dim_in , dim_out > ,
+  multithread ( & st_remap < coordinate_type ,
+                             value_type ,
+                             interpolator_type ,
+                             dim_out > ,
                  nthreads , range , &ev , &warp , &output , use_vc ) ;
 } ;
 
@@ -354,20 +329,25 @@ void remap ( const evaluator_type < dim_in , value_type , coordinate_type > & ev
 template < int dimension >
 using bcv_type = vigra::TinyVector < bc_code , dimension > ;
 
-template < class value_type ,      // like, float, double, complex<>, pixels, TinyVectors etc.
-           class coordinate_type , // usually TinyVector of float for coordinates
-           int dim_in ,            // number of dimensions of input array (and of 'warp' coordinates)
-           int dim_out >           // number of dimensions of output array
-
-int remap ( MultiArrayView < dim_in , value_type > input ,
-            MultiArrayView < dim_out , coordinate_type > warp ,
+template < typename coordinate_type , // type of coordinates in the warp array
+           typename value_type ,      // type of values to produce
+           int dim_out >              // number of dimensions of output array
+int remap ( const MultiArrayView < coordinate_traits < coordinate_type > :: dimension ,
+                             value_type > input ,
+            const MultiArrayView < dim_out , coordinate_type > warp ,
             MultiArrayView < dim_out , value_type > output ,
-            bcv_type < dim_in > bcv = bcv_type < dim_in > ( MIRROR ) ,
+            bcv_type < coordinate_traits < coordinate_type > :: dimension > bcv
+              = bcv_type < coordinate_traits < coordinate_type > :: dimension >
+                ( MIRROR ) ,
             int degree = 3 ,
             bool use_vc = true ,
             int nthreads = ncores
           )
 {
+  const int dim_in = coordinate_traits < coordinate_type > :: dimension ;
+  typedef typename coordinate_traits < coordinate_type > :: rc_type rc_type ;
+  typedef typename coordinate_traits < coordinate_type > :: ic_type ic_type ;
+
   // check shape compatibility
   
   if ( output.shape() != warp.shape() )
@@ -386,12 +366,13 @@ int remap ( MultiArrayView < dim_in , value_type > input ,
 
   // create an evaluator over the bspline
 
-  evaluator_type < dim_in , value_type , coordinate_type > ev ( bsp ) ;
+  typedef evaluator < dim_in , value_type , rc_type , ic_type > evaluator_type ;
+  evaluator_type ev ( bsp ) ;
   
   // and call the remap variant taking a bspline object,
   // passing in the evaluator, the coordinate array and the target array
   
-  remap < value_type , coordinate_type , dim_in , dim_out >
+  remap < coordinate_type , value_type , evaluator_type , dim_out >
     ( ev , warp , output , use_vc , nthreads ) ;
     
   return 0 ;
@@ -405,7 +386,10 @@ int remap ( MultiArrayView < dim_in , value_type > input ,
 // TODO: runs, but needs more testing
 // TODO: we want an equivalent processing offset/tune pairs which would be even more compact
 
-template < class ic_type , class rc_type , int dimension , int coordinate_dimension >
+template < class ic_type ,
+           class rc_type ,
+           int dimension ,
+           int coordinate_dimension >
 struct warper
 {
   typedef vigra::TinyVector < ic_type , coordinate_dimension > nd_ic_type ;
@@ -443,9 +427,10 @@ struct warper
   } ;
 } ;  
 
-/// single-threaded remap taking separate arrays 'tune' and 'select', which are chunks of
-/// the arrays in a warper object. This routine is called by the one below it via
-/// 'multithread'
+/// single-threaded remap taking separate arrays 'tune' and 'select', which may be chunks
+/// of the arrays in a warper object. This routine is called by the one below it via
+/// 'multithread'. Since operation on split coordinates is specific to b-splines, we
+/// limit this code to use class evaluator.
 
 template < class value_type ,      // like, float, double, complex<>, pixels, TinyVectors etc.
            class ic_type , 
@@ -454,13 +439,15 @@ template < class value_type ,      // like, float, double, complex<>, pixels, Ti
            int dim_out >           // number of dimensions of output array
 void st_wremap ( shape_range_type < dim_out > range ,
                  const evaluator < dim_in , value_type , rc_type , ic_type > * const p_ev ,
-                 MultiArrayView < dim_out , vigra::TinyVector < ic_type , dim_in > > _select ,
-                 MultiArrayView < dim_out , vigra::TinyVector < rc_type , dim_in > > _tune ,
-                 MultiArrayView < dim_out , value_type > _output ,
+                 const MultiArrayView < dim_out ,
+                                  vigra::TinyVector < ic_type , dim_in > > _select ,
+                 const MultiArrayView < dim_out ,
+                                  vigra::TinyVector < rc_type , dim_in > > _tune ,
+                 MultiArrayView < dim_out ,
+                                  value_type > _output ,
                  bool use_vc = true
                )
 {
-  typedef bspline < value_type , dim_in > spline_type ;
   typedef vigra::TinyVector < rc_type , dim_in > nd_rc_type ;
   typedef vigra::TinyVector < ic_type , dim_in > nd_ic_type ;
   typedef evaluator < dim_in , value_type , rc_type , ic_type > evaluator_type ;
@@ -503,7 +490,7 @@ void st_wremap ( shape_range_type < dim_out > range ,
            a < aggregates ;
            a++ , pselect += vsize , ptune += vsize , destination += vsize )
       {
-        p_ev->v_eval ( (ic_type*) pselect , (rc_type*) ptune , destination ) ;
+        p_ev->v_eval ( pselect , ptune , destination ) ;
       }
       target_it += aggregates * vsize ;
     }
@@ -515,7 +502,7 @@ void st_wremap ( shape_range_type < dim_out > range ,
       value_type target_buffer [ vsize ] ;
       for ( int a = 0 ; a < aggregates ; a++ , pselect += vsize , ptune += vsize )
       {
-        p_ev->v_eval ( (ic_type*) pselect , (rc_type*) ptune , target_buffer ) ;
+        p_ev->v_eval ( pselect , ptune , target_buffer ) ;
         for ( int e = 0 ; e < vsize ; e++ )
         {
           *target_it = target_buffer[e] ;
@@ -548,7 +535,7 @@ template < class value_type ,      // like, float, double, complex<>, pixels, Ti
            int dim_in ,            // number of dimensions of input array (and of 'warp' coordinates)
            int dim_out >           // number of dimensions of output array
 void remap ( const evaluator < dim_in , value_type , rc_type , ic_type > & ev ,
-             warper < ic_type , rc_type , dim_out , dim_in > & warp ,
+             const warper < ic_type , rc_type , dim_out , dim_in > & warp ,
              MultiArrayView < dim_out , value_type > output ,
              bool use_vc = true ,
              int nthreads = ncores
@@ -834,8 +821,6 @@ public:
 } ;
 
 /// this function provides a generalized geometric transformation
-/// of a source image
-///
 /// In image transformations, often the source location at which interpolation
 /// of the source image is required is defined by a transformation of the location
 /// of the target coordinate. There are (at least) two approaches to handle this.
@@ -846,32 +831,33 @@ public:
 /// as it goes along, saving the construction of the warp array.
 ///
 /// The transformation is passed in by using a 'transformation' object, which is a
-/// wrapper around the actual transformation routine(s).
-
-template < class value_type ,      // like, float, double, complex<>, pixels, TinyVectors etc.
-           class rc_type ,         // float or double, for coordinates
-           int dim_in ,            // number of dimensions of input array (and of 'warp' coordinates)
-           int dim_out >           // number of dimensions of output array
+/// wrapper around the actual transformation routine(s). Note that this routine is
+/// also suitable for any type of interpolator satisfying the interface defined in
+/// class interpolator in interpolator.h.
+
+template < class value_type ,        // like, float, double, complex<>, pixels, TinyVectors etc.
+           class rc_type ,           // float or double, for coordinates
+           class interpolator_type , // type satisfying the interface in class interpolator
+           int dim_in ,              // number of dimensions of input array (and of 'warp' coordinates)
+           int dim_out >             // number of dimensions of output array
 void st_tf_remap ( shape_range_type < dim_out > range ,
-                   const evaluator < dim_in , value_type , rc_type , int > * p_ev ,
+                   const interpolator_type * const p_ev ,
                    transformation < value_type , rc_type , dim_in , dim_out > tf ,
                    MultiArrayView < dim_out , value_type > _output ,
                    bool use_vc = true
                  )
 {
-  typedef bspline < value_type , dim_in > spline_type ;
-  typedef evaluator < dim_in , value_type , rc_type , int > evaluator_type ;
-  typedef typename evaluator_type::ele_type ele_type ;
+  typedef typename interpolator_type::ele_type ele_type ;
   
   auto output = _output.subarray ( range[0] , range[1] ) ;
   auto offset = range[0] ;
 
 #ifdef USE_VC
 
-  typedef typename evaluator_type::ele_v ele_v ;
-  typedef typename evaluator_type::rc_v rc_v ;
-  typedef typename evaluator_type::mc_ele_v mc_ele_v ;
-  const int vsize = evaluator_type::vsize ;
+  typedef typename interpolator_type::ele_v ele_v ;
+  typedef typename interpolator_type::rc_v rc_v ;
+  typedef typename interpolator_type::mc_ele_v mc_ele_v ;
+  const int vsize = interpolator_type::vsize ;
 
    // incoming coordinates (into output, which has dim_out dims)
   vigra::TinyVector < rc_v , dim_out > c_in ;
@@ -944,9 +930,10 @@ void st_tf_remap ( shape_range_type < dim_out > range ,
 
 template < class value_type ,      // like, float, double, complex<>, pixels, TinyVectors etc.
            class rc_type ,         // float or double for coordinates
+           class interpolator_type ,
            int dim_in ,            // number of dimensions of input array (and of 'warp' coordinates)
            int dim_out >           // number of dimensions of output array
-void tf_remap ( const evaluator < dim_in , value_type , rc_type , int > & ev ,
+void tf_remap ( const interpolator_type & ev ,
                 transformation < value_type , rc_type , dim_in , dim_out > tf ,
                 MultiArrayView < dim_out , value_type > output ,
                 bool use_vc = true ,
@@ -955,7 +942,7 @@ void tf_remap ( const evaluator < dim_in , value_type , rc_type , int > & ev ,
 {
   shape_range_type < dim_out > range ( shape_type < dim_out > () , output.shape() ) ;
   
-  multithread ( st_tf_remap < value_type , rc_type , dim_in , dim_out > ,
+  multithread ( st_tf_remap < value_type , rc_type , interpolator_type , dim_in , dim_out > ,
                 nthreads , range , &ev , tf , output , use_vc ) ;
 }
 
@@ -982,8 +969,9 @@ void tf_remap ( MultiArrayView < dim_in , value_type > input ,
   // copy in the data
   bsp.prefilter ( use_vc , nthreads , input ) ;
   // create an evaluator for this spline
-  evaluator < dim_in , value_type , rc_type , int > ev ( bsp ) ;
-  tf_remap<value_type,rc_type,dim_in,dim_out>
+  typedef evaluator < dim_in , value_type , rc_type , int > evaluator_type ;
+  evaluator_type ev ( bsp ) ;
+  tf_remap<value_type,rc_type,evaluator_type,dim_in,dim_out>
            ( ev , tf , output , use_vc , nthreads ) ;
 }
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/vspline.git