[vspline] 27/72: nice working state of pv

Sun Jul 2 09:02:39 UTC 2017

This is an automated email from the git hooks/post-receive script.

kfj-guest pushed a commit to branch master
in repository vspline.

commit fd9ed9e8bb2edfb155772d0d52cc72720ba3d8fd
Author: Kay F. Jahnke <kfjahnke at gmail.com>
Date:   Tue Jan 17 09:47:28 2017 +0100

    nice working state of pv
---
 basis.h              |   2 +-
 bspline.h            |   2 +-
 eval.h               |  26 ++++
 example/gradient.cc  |  86 +++++++++++
 example/roundtrip.cc |  80 ++++------
 interpolator.h       |   4 +
 mapping.h            |   6 +-
 poles.cc             | 104 +++++++------
 prefilter.h          |   4 +-
 prefilter_poles.cc   |   4 +-
 remap.h              | 427 ++++++++++++++-------------------------------------
 11 files changed, 322 insertions(+), 423 deletions(-)

diff --git a/basis.h b/basis.h
index d517d9f..27c337b 100644
--- a/basis.h
+++ b/basis.h
@@ -178,7 +178,7 @@ real_type bspline_basis_2 ( int x2 , int degree , int derivative )
     if ( abs ( x2 ) > degree )
       return real_type ( 0 ) ;
     // for derivative 0 we have precomputed values:
-    const long double * pk = precomputed_basis_function_values [ degree ] ;
+    const long double * pk = vspline_constants::precomputed_basis_function_values [ degree ] ;
     return pk [ abs ( x2 ) ] ;
   }
   else
diff --git a/bspline.h b/bspline.h
index bf6d574..02baed9 100644
--- a/bspline.h
+++ b/bspline.h
@@ -354,7 +354,7 @@ public:
 
     real_type max_pole = .00000000000000000001 ;
     if ( spline_degree > 1 )
-      max_pole = fabs ( precomputed_poles [ spline_degree ] [ 0 ] ) ;
+      max_pole = fabs ( vspline_constants::precomputed_poles [ spline_degree ] [ 0 ] ) ;
     if ( smoothing > max_pole )
       max_pole = smoothing ;
 
diff --git a/eval.h b/eval.h
index 5d5b044..4886e8f 100644
--- a/eval.h
+++ b/eval.h
@@ -609,6 +609,11 @@ public:
     return ORDER ;
   }
 
+  const int & get_degree() const
+  {
+    return spline_degree ;
+  }
+
   const shape_type & get_stride() const
   {
     return coefficients.stride() ;
@@ -986,6 +991,17 @@ public:
     eval ( select , tune , result ) ;
   }
   
+  value_type eval ( const nd_rc_type& c ) const // unsplit coordinate
+  {
+    nd_ic_type select ;
+    nd_rc_type tune ;
+    value_type result ;
+    
+    mmap ( c , select , tune ) ;  /// apply the mappings
+    eval ( select , tune , result ) ;
+    return result ;
+  }
+  
   /// variant taking a plain rc_type for a coordinate. only for 1D splines!
   /// This is to avoid hard-to-track errors which might ensue from allowing
   /// broadcasting of single rc_types to nd_rc_types for D>1. We convert the
@@ -1001,6 +1017,16 @@ public:
     eval ( cc , result ) ;
   }
 
+  value_type eval ( const rc_type& c ) const
+  {
+    static_assert ( dimension == 1 ,
+                    "evaluation at a single real coordinate is only allowed for 1D splines" ) ;
+    nd_rc_type cc ( c ) ;
+    value_type result ;
+    eval ( cc , result ) ;
+    return result ;
+  }
+
   /// alternative implementation of the last part of evaluation. Here, we calculate the
   /// tensor product of the component vectors in 'weight' and use flat_eval to obtain the
   /// weighted sum over the coefficient window.
diff --git a/example/gradient.cc b/example/gradient.cc
new file mode 100644
index 0000000..bce6301
--- /dev/null
+++ b/example/gradient.cc
@@ -0,0 +1,86 @@
+/************************************************************************/
+/*                                                                      */
+/*    vspline - a set of generic tools for creation and evaluation      */
+/*              of uniform b-splines                                    */
+/*                                                                      */
+/*            Copyright 2015, 2016 by Kay F. Jahnke                     */
+/*                                                                      */
+/*    Permission is hereby granted, free of charge, to any person       */
+/*    obtaining a copy of this software and associated documentation    */
+/*    files (the "Software"), to deal in the Software without           */
+/*    restriction, including without limitation the rights to use,      */
+/*    copy, modify, merge, publish, distribute, sublicense, and/or      */
+/*    sell copies of the Software, and to permit persons to whom the    */
+/*    Software is furnished to do so, subject to the following          */
+/*    conditions:                                                       */
+/*                                                                      */
+/*    The above copyright notice and this permission notice shall be    */
+/*    included in all copies or substantial portions of the             */
+/*    Software.                                                         */
+/*                                                                      */
+/*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
+/*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
+/*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
+/*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
+/*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
+/*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
+/*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
+/*    OTHER DEALINGS IN THE SOFTWARE.                                   */
+/*                                                                      */
+/************************************************************************/
+
+/// gradient.cc
+///
+/// If we create a b-spline over an array containing, at each grid point,
+/// the sum of the grid point's coordinates, each 1D row, column, etc will
+/// hold a linear gradient with first derivative == 1. If we use NATURAL
+/// BCs, evaluating the spline with real coordinates anywhere inside the
+/// defined range should produce precisely the sum of the coordinates.
+/// This is a good test for both the precision of the evaluation and it's
+/// correct functioning, particularly with higher-D arrays. 
+
+#include <vspline/vspline.h>
+#include <random>
+
+using namespace std ;
+
+main ( int argc , char * argv[] )
+{
+  typedef vspline::bspline < double , 3 > spline_type ;
+  typedef typename spline_type::shape_type shape_type ;
+  typedef typename spline_type::view_type view_type ;
+  typedef typename spline_type::bcv_type bcv_type ;
+  
+  shape_type core_shape = { 10 , 4 , 13 } ;
+  spline_type bspl ( core_shape , 3 , bcv_type ( vspline::NATURAL ) ) ; // , vspline::EXPLICIT ) ;
+  view_type core = bspl.core ;
+  
+  for ( int d = 0 ; d < bspl.dimension ; d++ )
+  {
+    for ( int c = 0 ; c < core_shape[d] ; c++ )
+      core.bindAt ( d , c ) += c ;
+  }
+  
+  bspl.prefilter() ;
+
+  typedef vspline::evaluator < bspl.dimension , double , double , int > evaluator_type ;
+  typedef typename evaluator_type::nd_rc_type coordinate_type ;
+  
+  evaluator_type ev ( bspl ) ;
+//   double * ws = new double [ ev.workspace_size() ] ;
+  
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  // std::mt19937 gen(12345);   // fix starting value for reproducibility
+
+  coordinate_type c ;
+  
+  for ( int times = 0 ; times < 1 ; times++ )
+  {
+    for ( int d = 0 ; d < bspl.dimension ; d++ )
+      c[d] = ( core_shape[d] - 1 ) * std::generate_canonical<double, 20>(gen) ;
+    double delta = ev.eval ( c ) - sum ( c ) ;
+    if ( delta > 2.0e-14 )
+      cout << c << " -> delta = " << delta << endl ;
+  }
+}
diff --git a/example/roundtrip.cc b/example/roundtrip.cc
index c15b5c8..b533207 100644
--- a/example/roundtrip.cc
+++ b/example/roundtrip.cc
@@ -65,23 +65,23 @@ using namespace vigra ;
 /// translated straight to outgoing coordinates. If we use such a transformation
 /// we'd expect to recover the input.
 
-template < class rc_type > // float or double for coordinates
-void tf_identity ( const TinyVector < rc_type , 2 > & c_in ,
-                         TinyVector < rc_type , 2 > & c_out )
+template < class coordinate_type > // float or double for coordinates
+void tf_identity ( const TinyVector < coordinate_type , 2 > & c_in ,
+                         TinyVector < coordinate_type , 2 > & c_out )
 {
   c_out = c_in ;
 }
 
-#ifdef USE_VC
-
-template < class rc_v >
-void vtf_identity ( const TinyVector < rc_v , 2 > & c_in ,
-                          TinyVector < rc_v , 2 > & c_out )
-{
-  c_out = c_in ;
-}
-
-#endif
+// #ifdef USE_VC
+// 
+// template < class rc_v >
+// void vtf_identity ( const TinyVector < rc_v , 2 > & c_in ,
+//                           TinyVector < rc_v , 2 > & c_out )
+// {
+//   c_out = c_in ;
+// }
+// 
+// #endif
 
 template < class view_type >
 void check_diff ( const view_type& a , const view_type& b )
@@ -114,9 +114,17 @@ void roundtrip ( view_type & data ,
 
 #ifdef USE_VC
   
-//   const int vsize = Vc::Vector < real_type > :: Size ;
+  // we use simdized types with as many elements as vector_traits
+  // considers appropriate for a given real_type, which is the elementary
+  // type of the (pixel) data we process:
+  
   const int vsize = vspline::vector_traits < real_type > :: vsize ;
   
+  // for vectorized coordinates, we use simdized coordinates with as many
+  // elements as the simdized values hold:
+
+  typedef typename vspline::vector_traits < rc_type , vsize > :: type rc_v ;
+  
 #else
   
   const int vsize = 1 ;
@@ -164,17 +172,17 @@ void roundtrip ( view_type & data ,
   typedef typename eval_type::nd_rc_type coordinate_type ;
   typedef typename eval_type::split_coordinate_type split_type ;
   
-  typedef vspline::nd_mapping < int , rc_type , 2 , vsize > mmap ;
+//   typedef vspline::nd_mapping < int , rc_type , 2 , vsize > mmap ;
   
   // obtain the mapping from the evaluator:
-  const mmap map = ev.get_mapping() ;
+  const auto map = ev.get_mapping() ;
 
   typedef vigra::MultiArray<2, coordinate_type> coordinate_array ;
   
   typedef vigra::MultiArray<2, split_type> warp_array ;
   typedef vigra::MultiArrayView<2, split_type> warp_view ;
 
-  typedef vspline::warper < int , rc_type , 2 , 2 > warper_type ;
+//   typedef vspline::warper < int , rc_type , 2 , 2 > warper_type ;
 
   int Tx = Nx ;
   int Ty = Ny ;
@@ -185,15 +193,10 @@ void roundtrip ( view_type & data ,
   // to contain the result.
 
   coordinate_array fwarp ( Shape ( Tx , Ty ) ) ;
-  warper_type wwarp ( Shape ( Tx , Ty ) ) ;
   warp_array _warp ( Shape(Tx+5,Ty+4) ) ;
   warp_view warp = _warp.subarray ( Shape(2,1) , Shape(-3,-3) ) ;
   array_type target ( Shape(Tx,Ty) ) ;
   rc_type dfx = 0.0 , dfy = 0.0 ;
-//   if ( bcv[0] == vspline::REFLECT )
-//     dfx = .5 ;
-//   if ( bcv[1] == vspline::REFLECT )
-//     dfy = .5 ;
   
   for ( int times = 0 ; times < 1 ; times++ )
   {
@@ -208,20 +211,10 @@ void roundtrip ( view_type & data ,
         // and apply the mapping to (fx, fy), storing the result to warp[x,y]
         map ( fx , warp [ Shape ( x , y ) ] , 0 ) ;
         map ( fy , warp [ Shape ( x , y ) ] , 1 ) ;
-
-        map ( fx , wwarp.select [ Shape ( x , y ) ] [ 0 ] , wwarp.tune [ Shape ( x , y ) ] [ 0 ] , 0 ) ;
-        map ( fy , wwarp.select [ Shape ( x , y ) ] [ 1 ] , wwarp.tune [ Shape ( x , y ) ] [ 1 ] , 1 ) ;
       }
     }
   }
   
-//   coordinate_type c = { 1.0 , 1.0 } ;
-//   pixel_type px = ev ( c ) ;
-//   cout << c << " -> " << px << endl ;
-//   coordinate_type c1 = { -1.0 , -1.0 } ;
-//   pixel_type px1 = ev ( c1 ) ;
-//   cout << c1 << " -> " << px1 << endl ;
-
 #ifdef PRINT_ELAPSED
   start = std::chrono::system_clock::now();
 #endif
@@ -245,24 +238,6 @@ void roundtrip ( view_type & data ,
 #endif
   
   for ( int times = 0 ; times < TIMES ; times++ )
-    vspline::remap<pixel_type,int,rc_type,2,2>
-      ( ev , wwarp , target , use_vc ) ;
-
-  
-#ifdef PRINT_ELAPSED
-  end = std::chrono::system_clock::now();
-  cout << "avg " << TIMES << " x remap1 using warper object:        "
-       << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() / float(TIMES)
-       << " ms" << endl ;
-#endif
-       
-  check_diff<view_type> ( target , data ) ;
-  
-#ifdef PRINT_ELAPSED
-  start = std::chrono::system_clock::now();
-#endif
-  
-  for ( int times = 0 ; times < TIMES ; times++ )
     vspline::remap<coordinate_type,pixel_type,eval_type,2>
       ( ev , fwarp , target , use_vc ) ;
 
@@ -297,11 +272,11 @@ void roundtrip ( view_type & data ,
 #ifdef USE_VC
 
   vspline::transformation < rc_type , 2 , 2 , vsize >
-    tf ( tf_identity<rc_type> ) ; // , vtf_identity<rc_v>  ) ;
+    tf ( tf_identity<rc_type> , tf_identity<rc_v> ) ;
 
 #else
  
-  // using this call when USE_VC is defined will result in broadcasting
+  // using this call when USE_VC is defined would result in broadcasting
   // of the single-element coordinate transform. The effect is the same,
   // but the code is potentially slower.
 
@@ -317,7 +292,6 @@ void roundtrip ( view_type & data ,
   for ( int times = 0 ; times < TIMES ; times++ )
     vspline::tf_remap < pixel_type , pixel_type , rc_type , 2 , 2 >
       ( data , tf , target , bcv , DEGREE , use_vc ) ;
-
  
   // note:: with REFLECT this doesn't come out right, because of the .5 difference!
       
diff --git a/interpolator.h b/interpolator.h
index d24ef59..e7f120c 100644
--- a/interpolator.h
+++ b/interpolator.h
@@ -158,6 +158,10 @@ struct interpolator
       }
     }
   } ;
+
+#else
+
+  enum { vsize = 1 } ;
   
 #endif
 } ;
diff --git a/mapping.h b/mapping.h
index dd2d70a..d07e3be 100644
--- a/mapping.h
+++ b/mapping.h
@@ -461,8 +461,8 @@ public:
 
 /// with mapping mode TAG, out-of-bounds incoming coordinate values don't result in
 /// an exception, but offending values are 'tagged' by the remainder part being set to
-/// rc_out_of_bounds, which is a value which can't be valid. The calling code is responsible for
-/// taking appropriate action.
+/// rc_out_of_bounds, which is a value which can't be valid. The calling code is
+/// responsible for taking appropriate action.
 /// If the calling code fails to treat the 'impossible' value appropriately, this
 /// will result in a wrong result but not in an access to an invalid memory location,
 /// which might be the result of passing some impossible value for the int part.
@@ -1112,6 +1112,7 @@ map_func pick_mapping ( bc_code bc ,        ///< mapping mode
       }
       default:
       {
+        std::cerr << "unsupported BC code " << bc << std::endl ;
         throw not_supported ( "mapping for this BC code is not supported" ) ;
         break ;
       }
@@ -1185,6 +1186,7 @@ map_func pick_mapping ( bc_code bc ,        ///< mapping mode
       }
       default:
       {
+        std::cerr << "unsupported BC code " << bc << std::endl ;
         throw not_supported ( "mapping for this BC code is not supported" ) ;
         break ;
       }
diff --git a/poles.cc b/poles.cc
index 4079876..99878bf 100644
--- a/poles.cc
+++ b/poles.cc
@@ -41,42 +41,44 @@
 
 #ifndef VSPLINE_POLES_CC
 
-long double K0[] = {
+namespace vspline_constants {
+
+const long double K0[] = {
  1L ,   // basis(0)
  } ; 
-long double K1[] = {
+const long double K1[] = {
  1L ,   // basis(0)
  0.5L ,   // basis(0.5)
  } ; 
-long double K2[] = {
+const long double K2[] = {
  0.75L ,   // basis(0)
  0.5L ,   // basis(0.5)
  0.125L ,   // basis(1)
  } ; 
-double Poles_2[] = {
+const double Poles_2[] = {
 -0.17157287525381015314 ,
 } ;
-long double K3[] = {
+const long double K3[] = {
  0.66666666666666666668L ,   // basis(0)
  0.47916666666666666666L ,   // basis(0.5)
  0.16666666666666666667L ,   // basis(1)
  0.020833333333333333334L ,   // basis(1.5)
  } ; 
-double Poles_3[] = {
+const double Poles_3[] = {
 -0.26794919243112280682 ,
 } ;
-long double K4[] = {
+const long double K4[] = {
  0.59895833333333333332L ,   // basis(0)
  0.45833333333333333334L ,   // basis(0.5)
  0.19791666666666666667L ,   // basis(1)
  0.041666666666666666668L ,   // basis(1.5)
  0.0026041666666666666667L ,   // basis(2)
  } ; 
-double Poles_4[] = {
+const double Poles_4[] = {
 -0.36134122590021944266 ,
 -0.013725429297339164503 ,
 } ;
-long double K5[] = {
+const long double K5[] = {
  0.55000000000000000001L ,   // basis(0)
  0.4380208333333333333L ,   // basis(0.5)
  0.21666666666666666667L ,   // basis(1)
@@ -84,11 +86,11 @@ long double K5[] = {
  0.0083333333333333333337L ,   // basis(2)
  0.00026041666666666666668L ,   // basis(2.5)
  } ; 
-double Poles_5[] = {
+const double Poles_5[] = {
 -0.43057534709997430378 ,
 -0.043096288203264443428 ,
 } ;
-long double K6[] = {
+const long double K6[] = {
  0.51102430555555555553L ,   // basis(0)
  0.41944444444444444449L ,   // basis(0.5)
  0.22879774305555555554L ,   // basis(1)
@@ -97,12 +99,12 @@ long double K6[] = {
  0.001388888888888888889L ,   // basis(2.5)
  2.170138888888888889e-05L ,   // basis(3)
  } ; 
-double Poles_6[] = {
+const double Poles_6[] = {
 -0.48829458930304570075 ,
 -0.081679271076237264237 ,
 -0.0014141518083258114435 ,
 } ;
-long double K7[] = {
+const long double K7[] = {
  0.47936507936507936512L ,   // basis(0)
  0.4025964161706349206L ,   // basis(0.5)
  0.23630952380952380952L ,   // basis(1)
@@ -112,12 +114,12 @@ long double K7[] = {
  0.00019841269841269841271L ,   // basis(3)
  1.5500992063492063493e-06L ,   // basis(3.5)
  } ; 
-double Poles_7[] = {
+const double Poles_7[] = {
 -0.5352804307964414976 ,
 -0.12255461519232610512 ,
 -0.0091486948096082820747 ,
 } ;
-long double K8[] = {
+const long double K8[] = {
  0.45292096819196428568L ,   // basis(0)
  0.38737599206349206352L ,   // basis(0.5)
  0.24077768477182539682L ,   // basis(1)
@@ -128,13 +130,13 @@ long double K8[] = {
  2.4801587301587301589e-05L ,   // basis(3.5)
  9.6881200396825396832e-08L ,   // basis(4)
  } ; 
-double Poles_8[] = {
+const double Poles_8[] = {
 -0.57468690924881216109 ,
 -0.16303526929727354955 ,
 -0.023632294694844447475 ,
 -0.00015382131064169135559 ,
 } ;
-long double K9[] = {
+const long double K9[] = {
  0.43041776895943562614L ,   // basis(0)
  0.3736024025676532187L ,   // basis(0.5)
  0.24314925044091710759L ,   // basis(1)
@@ -146,13 +148,13 @@ long double K9[] = {
  2.7557319223985890654e-06L ,   // basis(4)
  5.3822889109347442683e-09L ,   // basis(4.5)
  } ; 
-double Poles_9[] = {
+const double Poles_9[] = {
 -0.60799738916862233751 ,
 -0.20175052019315406482 ,
 -0.04322260854048156492 ,
 -0.0021213069031808251541 ,
 } ;
-long double K10[] = {
+const long double K10[] = {
  0.41096264282441854056L ,   // basis(0)
  0.36109843474426807762L ,   // basis(0.5)
  0.24406615618885719797L ,   // basis(1)
@@ -165,14 +167,14 @@ long double K10[] = {
  2.7557319223985890654e-07L ,   // basis(4.5)
  2.6911444554673721342e-10L ,   // basis(5)
  } ; 
-double Poles_10[] = {
+const double Poles_10[] = {
 -0.63655066396958059904 ,
 -0.23818279837754796624 ,
 -0.065727033228304657109 ,
 -0.0075281946755491966489 ,
 -1.6982762823274620556e-05 ,
 } ;
-long double K11[] = {
+const long double K11[] = {
  0.39392556517556517558L ,   // basis(0)
  0.34970223188744306906L ,   // basis(0.5)
  0.24396028739778739779L ,   // basis(1)
@@ -186,14 +188,14 @@ long double K11[] = {
  2.5052108385441718776e-08L ,   // basis(5)
  1.2232474797578964246e-11L ,   // basis(5.5)
  } ; 
-double Poles_11[] = {
+const double Poles_11[] = {
 -0.66126606890063921451 ,
 -0.27218034929481393913 ,
 -0.089759599793708844118 ,
 -0.016669627366234951449 ,
 -0.00051055753444649576434 ,
 } ;
-long double K12[] = {
+const long double K12[] = {
  0.37884408454472999147L ,   // basis(0)
  0.33927295023649190319L ,   // basis(0.5)
  0.24313091801014469471L ,   // basis(1)
@@ -208,7 +210,7 @@ long double K12[] = {
  2.0876756987868098981e-09L ,   // basis(5.5)
  5.0968644989912351028e-13L ,   // basis(6)
  } ; 
-double Poles_12[] = {
+const double Poles_12[] = {
 -0.68286488419809487915 ,
 -0.30378079328817336746 ,
 -0.11435052002714780894 ,
@@ -216,7 +218,7 @@ double Poles_12[] = {
 -0.0025161662172618224839 ,
 -1.883305645063344802e-06 ,
 } ;
-long double K13[] = {
+const long double K13[] = {
  0.36537086948545281884L ,   // basis(0)
  0.32968987958591001189L ,   // basis(0.5)
  0.24178841798633465302L ,   // basis(1)
@@ -232,7 +234,7 @@ long double K13[] = {
  1.6059043836821614601e-10L ,   // basis(6)
  1.960332499612013501e-14L ,   // basis(6.5)
  } ; 
-double Poles_13[] = {
+const double Poles_13[] = {
 -0.701894251817016257 ,
 -0.33310723293052579841 ,
 -0.13890111319434489401 ,
@@ -240,7 +242,7 @@ double Poles_13[] = {
 -0.0067380314152448743045 ,
 -0.00012510011321441739246 ,
 } ;
-long double K14[] = {
+const long double K14[] = {
  0.35323915669918929845L ,   // basis(0)
  0.32085024502063192543L ,   // basis(0.5)
  0.24008299041558734203L ,   // basis(1)
@@ -257,7 +259,7 @@ long double K14[] = {
  1.1470745597729724715e-11L ,   // basis(6.5)
  7.0011874986143339324e-16L ,   // basis(7)
  } ; 
-double Poles_14[] = {
+const double Poles_14[] = {
 -0.71878378723766189751 ,
 -0.36031907191881451524 ,
 -0.16303351479903732679 ,
@@ -266,7 +268,7 @@ double Poles_14[] = {
 -0.00086402404095337124838 ,
 -2.0913096775274000322e-07 ,
 } ;
-long double K15[] = {
+const long double K15[] = {
  0.34224026135534072046L ,   // basis(0)
  0.31266660625176080971L ,   // basis(0.5)
  0.23812319491070731152L ,   // basis(1)
@@ -284,7 +286,7 @@ long double K15[] = {
  7.6471637318198164765e-13L ,   // basis(7)
  2.3337291662047779775e-17L ,   // basis(7.5)
  } ; 
-double Poles_15[] = {
+const double Poles_15[] = {
 -0.73387257168597164192 ,
 -0.3855857342780184549 ,
 -0.18652010845105168602 ,
@@ -293,7 +295,7 @@ double Poles_15[] = {
 -0.0028011514820764091618 ,
 -3.0935680451474410063e-05 ,
 } ;
-long double K16[] = {
+const long double K16[] = {
  0.33220826914249586032L ,   // basis(0)
  0.30506442781494322298L ,   // basis(0.5)
  0.23598831687663609049L ,   // basis(1)
@@ -312,7 +314,7 @@ long double K16[] = {
  4.7794773323873852978e-14L ,   // basis(7.5)
  7.2929036443899311795e-19L ,   // basis(8)
  } ; 
-double Poles_16[] = {
+const double Poles_16[] = {
 -0.74743238775188380885 ,
 -0.40907360475830745195 ,
 -0.2092287193405746315 ,
@@ -322,7 +324,7 @@ double Poles_16[] = {
 -0.00030156536330664312833 ,
 -2.3232486364235544612e-08 ,
 } ;
-long double K17[] = {
+const long double K17[] = {
  0.32300939415699870668L ,   // basis(0)
  0.29797995870819162778L ,   // basis(0.5)
  0.23373674923065111L ,   // basis(1)
@@ -342,7 +344,7 @@ long double K17[] = {
  2.8114572543455207634e-15L ,   // basis(8)
  2.144971660114685641e-20L ,   // basis(8.5)
  } ; 
-double Poles_17[] = {
+const double Poles_17[] = {
 -0.75968322407197097501 ,
 -0.43093965318021570932 ,
 -0.23108984359938430919 ,
@@ -352,7 +354,7 @@ double Poles_17[] = {
 -0.0011859331251521279364 ,
 -7.6875625812547303262e-06 ,
 } ;
-long double K18[] = {
+const long double K18[] = {
  0.31453440085864671822L ,   // basis(0)
  0.29135844665108330336L ,   // basis(0.5)
  0.2314117793664616011L ,   // basis(1)
@@ -373,7 +375,7 @@ long double K18[] = {
  1.5619206968586226463e-16L ,   // basis(8.5)
  5.958254611429682336e-22L ,   // basis(9)
  } ; 
-double Poles_18[] = {
+const double Poles_18[] = {
 -0.77080505126463716437 ,
 -0.45132873338515144823 ,
 -0.25207457469899424707 ,
@@ -384,7 +386,7 @@ double Poles_18[] = {
 -0.00010633735588702059982 ,
 -2.5812403962584360567e-09 ,
 } ;
-long double K19[] = {
+const long double K19[] = {
  0.3066931017379824246L ,   // basis(0)
  0.28515265744763108603L ,   // basis(0.5)
  0.22904564568118377632L ,   // basis(1)
@@ -406,7 +408,7 @@ long double K19[] = {
  8.2206352466243297175e-18L ,   // basis(9)
  1.5679617398499164042e-23L ,   // basis(9.5)
  } ; 
-double Poles_19[] = {
+const double Poles_19[] = {
 -0.78094644484628727987 ,
 -0.47037281947078746214 ,
 -0.27218037628176311449 ,
@@ -417,7 +419,7 @@ double Poles_19[] = {
 -0.00050841019468083302468 ,
 -1.9154786562122251559e-06 ,
 } ;
-long double K20[] = {
+const long double K20[] = {
  0.29941029032001264032L ,   // basis(0)
  0.27932165599364228926L ,   // basis(0.5)
  0.22666242185748694763L ,   // basis(1)
@@ -440,7 +442,7 @@ long double K20[] = {
  4.1103176233121648586e-19L ,   // basis(9.5)
  3.9199043496247910105e-25L ,   // basis(10)
  } ; 
-double Poles_20[] = {
+const double Poles_20[] = {
 -0.79023111767977516351 ,
 -0.48819126033675236398 ,
 -0.29142160165551617146 ,
@@ -452,7 +454,7 @@ double Poles_20[] = {
 -3.7746573197331790075e-05 ,
 -2.8679944881725126467e-10 ,
 } ;
-long double K21[] = {
+const long double K21[] = {
  0.29262268723143477922L ,   // basis(0)
  0.2738298047486301248L ,   // basis(0.5)
  0.22428009387883276411L ,   // basis(1)
@@ -476,7 +478,7 @@ long double K21[] = {
  1.9572941063391261232e-20L ,   // basis(10)
  9.3331055943447405012e-27L ,   // basis(10.5)
  } ; 
-double Poles_21[] = {
+const double Poles_21[] = {
 -0.79876288565466957436 ,
 -0.50489153745536197171 ,
 -0.30982319641503575092 ,
@@ -488,7 +490,7 @@ double Poles_21[] = {
 -0.00021990295763158517806 ,
 -4.7797646894259869337e-07 ,
 } ;
-long double K22[] = {
+const long double K22[] = {
  0.28627661405538603955L ,   // basis(0)
  0.26864594027689889732L ,   // basis(0.5)
  0.22191207309687150606L ,   // basis(1)
@@ -513,7 +515,7 @@ long double K22[] = {
  8.8967913924505732872e-22L ,   // basis(10.5)
  2.1211603623510773867e-28L ,   // basis(11)
  } ; 
-double Poles_22[] = {
+const double Poles_22[] = {
 -0.80662949916286152963 ,
 -0.52057023687190062677 ,
 -0.3274164733138280603 ,
@@ -526,7 +528,7 @@ double Poles_22[] = {
 -1.3458154983225084633e-05 ,
 -3.186643260432269507e-11 ,
 } ;
-long double K23[] = {
+const long double K23[] = {
  0.28032619854980754502L ,   // basis(0)
  0.26374269458034057742L ,   // basis(0.5)
  0.21956831005031718209L ,   // basis(1)
@@ -552,7 +554,7 @@ long double K23[] = {
  3.868170170630684038e-23L ,   // basis(11)
  4.6112181790240812755e-30L ,   // basis(11.5)
  } ; 
-double Poles_23[] = {
+const double Poles_23[] = {
 -0.81390562354320794558 ,
 -0.53531408371104993726 ,
 -0.34423627688965990901 ,
@@ -565,7 +567,7 @@ double Poles_23[] = {
 -9.5733943500721317005e-05 ,
 -1.1936918816067781773e-07 ,
 } ;
-long double K24[] = {
+const long double K24[] = {
  0.27473197352118810147L ,   // basis(0)
  0.25909593388549224613L ,   // basis(0.5)
  0.21725612218406020861L ,   // basis(1)
@@ -592,7 +594,7 @@ long double K24[] = {
  1.6117375710961183492e-24L ,   // basis(11.5)
  9.6067045396335026575e-32L ,   // basis(12)
  } ; 
-double Poles_24[] = {
+const double Poles_24[] = {
 -0.82065517417952760226 ,
 -0.54920097364808984075 ,
 -0.3603190653178175995 ,
@@ -606,6 +608,7 @@ double Poles_24[] = {
 -4.8126755630580574097e-06 ,
 -3.5407088073360672255e-12 ,
 } ;
+
 const double* precomputed_poles[] = {
   0, 
   0, 
@@ -633,6 +636,7 @@ const double* precomputed_poles[] = {
   Poles_23, 
   Poles_24, 
 } ;
+
 const long double* precomputed_basis_function_values[] = {
   K0, 
   K1, 
@@ -661,5 +665,7 @@ const long double* precomputed_basis_function_values[] = {
   K24, 
 } ;
 
+} ; // end of namespace vspline_constants
+
 #define VSPLINE_POLES_CC
-#endif
\ No newline at end of file
+#endif
diff --git a/prefilter.h b/prefilter.h
index 3331333..b92d653 100644
--- a/prefilter.h
+++ b/prefilter.h
@@ -187,7 +187,7 @@ void solve ( input_array_type & input ,
     double pole [ npoles ] ;
     pole[0] = smoothing ;
     for ( int i = 1 ; i < npoles ; i++ )
-      pole[i] = precomputed_poles [ degree ] [ i - 1 ] ;
+      pole[i] = vspline_constants::precomputed_poles [ degree ] [ i - 1 ] ;
     filter_nd < input_array_type , output_array_type , math_type >
               ( input ,
                 output ,
@@ -204,7 +204,7 @@ void solve ( input_array_type & input ,
                 output ,
                 bcv ,
                 degree / 2 ,
-                precomputed_poles [ degree ] ,
+                vspline_constants::precomputed_poles [ degree ] ,
                 tolerance ,
                 use_vc ,
                 nslices ) ;
diff --git a/prefilter_poles.cc b/prefilter_poles.cc
index 5165f43..5f2d11a 100644
--- a/prefilter_poles.cc
+++ b/prefilter_poles.cc
@@ -97,7 +97,7 @@ calculatePrefilterCoefficients(int DEGREE)
     const int r = DEGREE / 2;
     double a[2*r+1] ;
     double z[4*r+2] ;
-    cout << "long double K" << DEGREE << "[] = {" << endl ;
+    cout << "const long double K" << DEGREE << "[] = {" << endl ;
     // we calculate the basis function values at 0.5 intervals
     int imax = 2 * r ;
     if ( DEGREE & 1 )
@@ -148,7 +148,7 @@ void print_poles ( int degree )
   ArrayVector<double> res = calculatePrefilterCoefficients<double> ( degree ) ;
   if ( degree > 1 )
   {
-    cout << "double Poles_" << degree << "[] = {" << endl ;
+    cout << "const double Poles_" << degree << "[] = {" << endl ;
     for ( auto r : res )
       cout << r << " ," << endl ;
     cout << "} ;" << endl ;
diff --git a/remap.h b/remap.h
index 2cf7565..ef8bc72 100644
--- a/remap.h
+++ b/remap.h
@@ -34,7 +34,7 @@
 /// \brief set of generic remap functions
 ///
 /// My foremost reason to have efficient B-spline processing is the formulation of a generic
-/// remap function. This is a function which takes an array of real-valued coordinates and
+/// remap function. This is a function which takes an array of real-valued nD coordinates and
 /// an interpolator over a source array. Now each of the real-valued coordinates is fed into
 /// the interpolator in turn, yielding a value, which is placed in the output array at the same
 /// place the coordinate occupies in the coordinate array. To put it concisely, if we have
@@ -54,14 +54,14 @@
 ///
 /// all remap routines take several template arguments:
 ///
-/// - value_type:      like, float, double, complex<>, pixels, TinyVectors etc.
-/// - coordinate_type: TinyVector of float or double for coordinates, or split type
-/// - dim_in:          number of dimensions of input array (and of 'warp' coordinates)
-/// - dim_out:         number of dimensions of output array
+/// - coordinate_type:   TinyVector of float or double for coordinates, or split type
+/// - value_type:        like, float, double, complex<>, pixels, TinyVectors etc.
+/// - interpolator_type: functor object yielding values for coordinates
+/// - dim_out:           number of dimensions of output array
 ///
-/// You can see from the two dimension parameters that remaps to other-dimensional
-/// objects are supported. This makes it possible to, for example, remap from a
-/// volume to a 2D image, using a 2D warp array containing 3D coordinates.
+/// remaps to other-dimensional objects are supported. This makes it possible to,
+/// for example, remap from a volume to a 2D image, using a 2D warp array containing
+/// 3D coordinates.
 ///
 /// In these routines, we can switch the use of vectorization on or off. When using
 /// vectorization, this is done in a straightforward fashion by aggregating the input.
@@ -88,6 +88,9 @@
 /// In the 'example' folder, there is a program called pano_extract.cc, which demonstrates
 /// the use of a transformation-based remap. There's also pv.cc which uses remap extensively,
 /// and, being a panorama viewer, gives instant visual feedback showing the results as images.
+///
+/// This file also has code to evaluate a b-spline at positions in a mesh grid, which can be used
+/// for scaling, and for separable geometric transformations.
 
 #ifndef VSPLINE_REMAP_H
 #define VSPLINE_REMAP_H
@@ -116,15 +119,18 @@ using evaluator_type
               typename coordinate_traits < coordinate_type > :: ic_type > ;
 
 /// st_remap is the single-thread routine to perform a remap with a given warp array
-/// at p_warp (containing coordinates) into a target array _output, which takes the
+/// at p_warp (containing coordinates) into a target array at p_output, which takes the
 /// interpolated values at the locations the coordinates indicate.
+///              
 /// While calling this function directly is entirely possible, code wanting to perform
 /// a remap would rather call the next function which partitions the task at hand and
-/// uses a separate thread for each partial job.
-/// The first parameter, range, specifies a subarray of _warp and _output which this
+/// uses a thread pool to process the partial jobs.
+///              
+/// The first parameter, range, specifies a subarray of warp and output which this
 /// routine is meant to process. this may be the full range, but normally it will cover
 /// only equally sized parts of these arrays, where the precise extent is determined
-/// by multithred() or it's caller in the next routine, see there.
+/// by multithread() or it's caller in the next routine, see there.
+///              
 /// Note how I pass in several pointers. Normally I'd pass such data by reference,
 /// but I can't do this here (or haven't found a way yet) because of my parameter-handling
 /// in multithread() which I couldn't get to work with references. The effect is the same.
@@ -132,20 +138,21 @@ using evaluator_type
 /// in this routine is not specific to b-splines, it accepts not only b-spline evaluators
 /// but any object satisfying the interface defined in class interpolator. This opens the
 /// remapping code for other interpolation methods as well.
+///              
 /// The interface defined by class interpolator is minimal and does not contain pure
 /// virtual methods for many of the methods defined in class evaluator, but then many
 /// of class evaluator's methods only are meaningful for b-spline evaluation, like
 /// treatment of 'split coordinates'. The remap code in this method and it's companion
 /// below only use methods from the interpolator interface.
               
-template < typename coordinate_type , // type of coordinates in the warp array
-           typename value_type ,      // type of values to produce
+template < typename coordinate_type ,    // type of coordinates in the warp array
+           typename value_type ,         // type of values to produce
            typename interpolator_type  , // functor object yielding values for coordinates
-           int dim_out >              // number of dimensions of output array
-void st_remap ( shape_range_type < dim_out >                 range ,
+           int dim_out >                 // number of dimensions of output array
+void st_remap ( shape_range_type < dim_out >                  range ,
                 const interpolator_type * const               p_ev ,
                 const MultiArrayView < dim_out , coordinate_type > * const p_warp ,
-                MultiArrayView < dim_out , value_type >      *p_output ,
+                MultiArrayView < dim_out , value_type > *     p_output ,
                 bool use_vc = true
               )
 {
@@ -163,14 +170,7 @@ void st_remap ( shape_range_type < dim_out >                 range ,
 
   auto warp = p_warp->subarray ( range[0] , range[1] ) ;
   auto output = p_output->subarray ( range[0] , range[1] ) ;
-  
-#ifdef USE_VC
-
-  typedef typename interpolator_type::ele_v ele_v ;
-  const int vsize = interpolator_type::vsize ;
 
-#endif
-  
   auto source_it = warp.begin() ;
   auto target_it = output.begin() ;
   
@@ -186,6 +186,9 @@ void st_remap ( shape_range_type < dim_out >                 range ,
 
 #ifdef USE_VC
 
+  typedef typename interpolator_type::ele_v ele_v ;
+  const int vsize = interpolator_type::vsize ;
+
   if ( use_vc )
   {
     int aggregates = warp.elementCount() / vsize ;            // number of full vectors
@@ -284,10 +287,10 @@ void st_remap ( shape_range_type < dim_out >                 range ,
 /// threads, leaving it to them to split the containers accordingly, which results in
 /// simple and transparent code with only few limitations, see multithread.h
 
-template < typename coordinate_type , // type of coordinates in the warp array
-           typename value_type ,      // type of values to produce
+template < typename coordinate_type ,    // type of coordinates in the warp array
+           typename value_type ,         // type of values to produce
            typename interpolator_type  , // functor object yielding values for coordinates
-           int dim_out >     // number of dimensions of output array
+           int dim_out >                 // number of dimensions of output array
 void remap ( const interpolator_type & ev ,
              const MultiArrayView < dim_out , coordinate_type > & warp ,
              MultiArrayView < dim_out , value_type > & output ,
@@ -319,16 +322,21 @@ void remap ( const interpolator_type & ev ,
                              value_type ,
                              interpolator_type ,
                              dim_out > ,
-                 nthreads * 8 , range , &ev , &warp , &output , use_vc ) ;
+                nthreads * 8 , // heuristic. desired number of partitions
+                range ,        // 'full' range which is to be partitioned
+                &ev ,          // interpolator_type object
+                &warp ,        // warp array
+                &output ,      // target array
+                use_vc ) ;     // flag to switch use of Vc on/off
 } ;
 
+template < int dimension >
+using bcv_type = vigra::TinyVector < bc_code , dimension > ;
+
 /// This is a variant of remap, which directly takes an array of values and remaps it,
 /// internally creating a b-spline of given order just for the purpose. This is used for
 /// one-shot remaps where the spline isn't reused.
 
-template < int dimension >
-using bcv_type = vigra::TinyVector < bc_code , dimension > ;
-
 template < typename coordinate_type , // type of coordinates in the warp array
            typename value_type ,      // type of values to produce
            int dim_out >              // number of dimensions of output array
@@ -369,7 +377,7 @@ int remap ( const MultiArrayView < coordinate_traits < coordinate_type > :: dime
   typedef evaluator < dim_in , value_type , rc_type , ic_type > evaluator_type ;
   evaluator_type ev ( bsp ) ;
   
-  // and call the remap variant taking a bspline object,
+  // and call the other remap variant,
   // passing in the evaluator, the coordinate array and the target array
   
   remap < coordinate_type , value_type , evaluator_type , dim_out >
@@ -378,222 +386,8 @@ int remap ( const MultiArrayView < coordinate_traits < coordinate_type > :: dime
   return 0 ;
 }
 
-/// definition of an object containing separate arrays for the components of split coordinates.
-/// This way the data can be accessed by gather/scatter operations, which should be more efficient
-/// than having these data in interleaved format, where, due to their mixed nature, we have to
-/// 'manually' de/interleave them. In my tests, this improved performance in unvectorized code,
-/// while for vectorized code it made no noticeable difference.
-// TODO: runs, but needs more testing
-// TODO: we want an equivalent processing offset/tune pairs which would be even more compact
-
-template < class ic_type ,
-           class rc_type ,
-           int dimension ,
-           int coordinate_dimension >
-struct warper
-{
-  typedef vigra::TinyVector < ic_type , coordinate_dimension > nd_ic_type ;
-  typedef vigra::TinyVector < rc_type , coordinate_dimension > nd_rc_type ;
-
-  vigra::MultiArray < dimension , nd_ic_type > _select ;
-  vigra::MultiArray < dimension , nd_rc_type > _tune ;
-  
-  vigra::MultiArrayView < dimension , nd_ic_type > select ;
-  vigra::MultiArrayView < dimension , nd_rc_type > tune ;
-
-  typedef typename vigra::MultiArrayShape < dimension > :: type shape_type ;
-  
-  shape_type shape ;
-
-  /// constructor taking a shape argument. This creates the containers for integral and
-  /// remainder parts of the coordinates internally.
-
-  warper ( const shape_type & _shape )
-  : shape ( _shape ) ,
-    _select ( _shape ) ,
-    _tune ( _shape )
-  {
-    select = _select ;
-    tune = _tune ;
-  } ;
-  
-  /// constructor taking MultiArrayViews to externally managed containers
-
-  warper ( vigra::MultiArrayView < dimension , nd_ic_type > & _select ,
-           vigra::MultiArrayView < dimension , nd_rc_type > & _tune )
-  {
-    select ( _select ) ;
-    tune ( _tune ) ;
-  } ;
-} ;  
-
-/// single-threaded remap taking separate arrays 'tune' and 'select', which may be chunks
-/// of the arrays in a warper object. This routine is called by the one below it via
-/// 'multithread'. Since operation on split coordinates is specific to b-splines, we
-/// limit this code to use class evaluator.
-
-template < class value_type ,      // like, float, double, complex<>, pixels, TinyVectors etc.
-           class ic_type , 
-           class rc_type ,
-           int dim_in ,            // number of dimensions of input array (and of 'warp' coordinates)
-           int dim_out >           // number of dimensions of output array
-void st_wremap ( shape_range_type < dim_out > range ,
-                 const evaluator < dim_in , value_type , rc_type , ic_type > * const p_ev ,
-                 const MultiArrayView < dim_out ,
-                                  vigra::TinyVector < ic_type , dim_in > > _select ,
-                 const MultiArrayView < dim_out ,
-                                  vigra::TinyVector < rc_type , dim_in > > _tune ,
-                 MultiArrayView < dim_out ,
-                                  value_type > _output ,
-                 bool use_vc = true
-               )
-{
-  typedef vigra::TinyVector < rc_type , dim_in > nd_rc_type ;
-  typedef vigra::TinyVector < ic_type , dim_in > nd_ic_type ;
-  typedef evaluator < dim_in , value_type , rc_type , ic_type > evaluator_type ;
-  typedef typename evaluator_type::ele_type ele_type ;
-
-  auto select = _select.subarray ( range[0] , range[1] ) ;
-  auto tune = _tune.subarray ( range[0] , range[1] ) ;
-  auto output = _output.subarray ( range[0] , range[1] ) ;
-  
-#ifdef USE_VC
-
-  typedef typename evaluator_type::ele_v ele_v ;
-  const int vsize = evaluator_type::vsize ;
-
-#endif
-  
-  auto select_it = select.begin() ;
-  auto tune_it = tune.begin() ;
-  auto target_it = output.begin() ;
-  
-  int leftover = select.elementCount() ;
-
-#ifdef USE_VC
-
-  if ( use_vc )
-  {
-    int aggregates = select.elementCount() / vsize ;            // number of full vectors
-    leftover = select.elementCount() - aggregates * vsize ;     // any leftover single values
-    nd_ic_type * pselect = select.data() ;                      // acces to memory
-    nd_rc_type * ptune = tune.data() ;                          // acces to memory
-    value_type * destination = output.data() ;
-
-    // for now we require that both select and tune are unstrided!
-    assert ( select.isUnstrided() && tune.isUnstrided() ) ;
-    
-    if ( output.isUnstrided() )
-    {
-      // best case: output array has consecutive memory, no need to buffer
-      for ( int a = 0 ;
-           a < aggregates ;
-           a++ , pselect += vsize , ptune += vsize , destination += vsize )
-      {
-        p_ev->v_eval ( pselect , ptune , destination ) ;
-      }
-      target_it += aggregates * vsize ;
-    }
-    else
-    {
-      // we fall back to buffering and storing individual result values
-      // TODO while this is a straightforward implementation, it should be more efficient
-      // to (de)interleave here and call the fully vectorized evaluation code.
-      value_type target_buffer [ vsize ] ;
-      for ( int a = 0 ; a < aggregates ; a++ , pselect += vsize , ptune += vsize )
-      {
-        p_ev->v_eval ( pselect , ptune , target_buffer ) ;
-        for ( int e = 0 ; e < vsize ; e++ )
-        {
-          *target_it = target_buffer[e] ;
-          ++target_it ;
-        }
-      }
-    }
-    select_it += aggregates * vsize ;
-    tune_it += aggregates * vsize ;
-  }
-  
-#endif // USE_VC
-
-  // process leftovers, if any - if vc isn't used, this loop does all the processing
-  while ( leftover-- )
-  {
-    // process leftovers with single-value evaluation
-    p_ev->eval ( *select_it , *tune_it , *target_it ) ;
-    ++select_it ;
-    ++tune_it ;
-    ++target_it ;
-  }
-}
-
-/// overload of remap() using presplit coordinates packaged in a warper object
-
-template < class value_type ,      // like, float, double, complex<>, pixels, TinyVectors etc.
-           class ic_type ,         // integral coordinate part
-           class rc_type ,         // real coordinate part
-           int dim_in ,            // number of dimensions of input array (and of 'warp' coordinates)
-           int dim_out >           // number of dimensions of output array
-void remap ( const evaluator < dim_in , value_type , rc_type , ic_type > & ev ,
-             const warper < ic_type , rc_type , dim_out , dim_in > & warp ,
-             MultiArrayView < dim_out , value_type > output ,
-             bool use_vc = true ,
-             int nthreads = ncores
-           )
-{
-  typedef warper < ic_type , rc_type , dim_out , dim_in > warper_type ;
-  typedef typename warper_type::nd_ic_type nd_ic_type ;
-  typedef typename warper_type::nd_rc_type nd_rc_type ;
-
-  if ( output.shape() != warp.shape )
-  {
-    throw shape_mismatch ( "remap: the shapes of the warper and the output array do not match" ) ;
-  }
-  shape_range_type < dim_out > range ( shape_type < dim_out > () , output.shape() ) ;
-  
-  multithread ( & st_wremap < value_type , ic_type , rc_type , dim_in , dim_out > ,
-                 nthreads * 8 , range , &ev , warp.select , warp.tune , output , use_vc ) ;
-} ;
-
 // Next we have some collateral code to get ready for the transformation-based remap
 
-// /// type alias for a coordinate transformation functor, to (hopefully) make the signature
-// /// more legible. In words: 'transform' is a standard function transforming an n-dimensional
-// /// incoming coordinate to an m-dimensional outgoing coordinate. This function takes both
-// /// coordinates as references.
-// 
-// template < class rc_type ,       // elementary type for coordinates, usually float
-//            int dim_in ,          // number of dimensions of input array (and of 'warp' coordinates)
-//            int dim_out >         // number of dimensions of output array
-// using transform
-// = std::function < void ( const vigra::TinyVector < rc_type , dim_out > & ,
-//                                vigra::TinyVector < rc_type , dim_in > & ) > ;
-// 
-// #ifdef USE_VC
-// 
-// /// We use this type alias, since the type is rather unwieldy
-// /// what we need to define is a SimdArray of rc_type, which has just as
-// /// many elements as a simdized_type of value_type's elementary type:
-// 
-// template < class rc_type , class value_type >
-// using rc_v
-// = typename
-//    vector_traits < rc_type ,
-//                    vector_traits < typename ExpandElementResult < value_type > :: type > :: vsize
-//                  > :: type ;
-//                  
-// /// This type alias defines a vectorized coordinate transformation.
-// /// it is the equivalent of the unvectorized type above, taking TinyVectors
-// /// of the appropriate SimdArray objects instead of singular values.
-// 
-// template < class rc_v ,            // simdized type of vsize real coordinates
-//            int dim_in ,            // number of dimensions of input array (and of 'warp' coordinates)
-//            int dim_out >           // number of dimensions of output array
-// using vtransform
-// = std::function < void
-//                    ( const vigra::TinyVector < rc_v , dim_out > & ,
-//                            vigra::TinyVector < rc_v , dim_in > & ) > ;
-
 /// declaration of a generalized coordinate transformation. In vspline, we're using
 /// a 'reverse transformation': For every coordinate ct into the target array (the result
 /// of the 'remap' procedure, the final picture) we calculate a corresponding coordinate cs
@@ -609,7 +403,7 @@ void remap ( const evaluator < dim_in , value_type , rc_type , ic_type > & ev ,
 ///
 /// std::is_same < decltype ( cs[0] ) , decltype ( ct[0] ) > :: value == true
 
-template < class component_type ,  // 1D coordinate type, single value or Simdarray
+template < class component_type ,  // 1D coordinate type, single value or SimdArray
            int dim_target ,        // number of dimensions of target array, incoming coordinate
            int dim_source >        // number of dimensions of coefficient array, resultant coordinate
 using transform
@@ -619,7 +413,7 @@ using transform
 /// class transformation is a (multi-) functor, handling coordinate transformations
 /// this class simplifies pulling in transformations by automatically providing a brodcasting
 /// method to apply a single-value transformation to every element of an incoming SimdArray,
-/// if there is nor vectorized code at hand. This is less efficient, but often just enough,
+/// if there is no vectorized code at hand. This is less efficient, but often just enough,
 /// especially if the transformation is simple.
 ///
 /// class transformation is constructed either with a single coordinate transformation
@@ -627,27 +421,31 @@ using transform
 /// functor as second constructor argument. The functors have to satisfy the two type aliases
 /// transform and vtransform (see above). For illustration, consider this:
 ///
-/// template < class rc_type >
-/// void tf_identity ( const TinyVector < rc_type , 2 > & c_in ,
-///                          TinyVector < rc_type , 2 > & c_out )
-/// {
-///   c_out = c_in ;
-/// }
-/// 
-/// template < class rc_v >
-/// void vtf_identity ( const TinyVector < rc_v , 2 > & c_in ,
-///                           TinyVector < rc_v , 2 > & c_out )
+/// tf_identity defines a transformation function template leaving the coordinate unchanged
+///
+/// template < class coordinate_type >
+/// void tf_identity ( const TinyVector < coordinate_type , 2 > & c_in ,
+///                          TinyVector < coordinate_type , 2 > & c_out )
 /// {
 ///   c_out = c_in ;
 /// }
-/// 
-///   vspline::transformation < float , 2 , 2 >
-///      tf ( tf_identity<float> , vtf_identity<float_v>  ) ;
+///
+/// now we can create a vspline::transformation object, passing in two specializations
+/// of the transformation function, one for unvectorized and one for vectorized coordinates
+///
+/// vspline::transformation < float , 2 , 2 >
+///   tf ( tf_identity<float> , vtf_identity<Vc::float_v>  ) ;
+///
+/// we could also using the single-argument constructor to the same effect, but not using
+/// vectorized transformations:
+///                              
+/// vspline::transformation < float , 2 , 2 >
+///   tf ( tf_identity<float> ) ;
 
 template < class rc_type ,   /// elementary coordinate type
            int dim_target ,  /// dimension of incoming coordinates (== dim. of target array)
            int dim_source ,  /// dimension of outgoing coordinates (== dim. of coefficient array)
-           int vsize = 1     /// number of elements in Simdized coordinate
+           int vsize = 1     /// number of elements in simdized coordinate
          >
 class transformation
 {
@@ -676,41 +474,43 @@ class transformation
   /// of the transformation has been written and is passed into class transformation's constructor,
   /// bradcasting is no longer used.
 
-  void broadcast ( const target_nd_rc_v & c_target_v , source_nd_rc_v & c_source_v )
+  struct broadcast
   {
-    target_nd_rc_type c_target ;
-    source_nd_rc_type c_source ;
- 
-    for ( int e = 0 ; e < vsize ; e++ )
+    transform_type tf ;   /// functor for single-value coordinate transformation
+
+    broadcast ( transform_type _tf )
+    : tf ( _tf )
+    { } ;
+    
+    void operator() ( const target_nd_rc_v & c_target_v ,
+                      source_nd_rc_v & c_source_v )
     {
-      for ( int d = 0 ; d < dim_source ; d++ )
-        c_target[d] = c_target_v[d][e] ;
-      tf ( c_target , c_source ) ;
-      for ( int d = 0 ; d < dim_target ; d++ )
-        c_source_v[d][e] = c_source[d] ;
+      target_nd_rc_type c_target ;
+      source_nd_rc_type c_source ;
+  
+      for ( int e = 0 ; e < vsize ; e++ )
+      {
+        for ( int d = 0 ; d < dim_source ; d++ )
+          c_target[d] = c_target_v[d][e] ;
+        tf ( c_target , c_source ) ;
+        for ( int d = 0 ; d < dim_target ; d++ )
+          c_source_v[d][e] = c_source[d] ;
+      }
     }
-  }
+  } ;
 
 public:
 
-  /// the two-argument constructor takes tranformation functors for both unvectorized
-  /// and vectorized coordinate transformations:
+  /// the constructor takes tranformation functors for both unvectorized
+  /// and vectorized coordinate transformations. If only the unvectorized
+  /// transform is passed, it is automatically broadcasted.
   
-  transformation ( transform_type _tf , vtransform_type _vtf )
+  transformation ( transform_type _tf ,
+                   vtransform_type _vtf = 0 )
   : tf ( _tf ) ,
-    vtf ( _vtf )
+    vtf ( _vtf ? _vtf : broadcast ( _tf ) )
   { } ;
 
-  /// this constructor takes only the single-value transformation functor.
-  /// broadcast (see above) is used to emulate vectorized transformation
-  /// If the performance penalty for not using a vectorized transformation is irrelevant,
-  /// or to try out a transformation, this mode of operation is just as good.
-
-  transformation ( transform_type _tf )
-  : tf ( _tf )
-  {
-  } ;
-
 #else
 
 public:
@@ -726,7 +526,7 @@ public:
 public :
 
   /// class transformation's operator() delegates to the functor passed in at construction.
-  /// it transforms atarget coordinate, c_target, into a source coordinate c_source
+  /// it transforms a target coordinate, c_target, into a source coordinate c_source
 
   void operator() ( const target_nd_rc_type& c_target , source_nd_rc_type& c_source )
   {
@@ -739,10 +539,7 @@ public :
   
   void operator() ( const target_nd_rc_v& c_target , source_nd_rc_v& c_source )
   {
-    if ( vtf )
-      vtf ( c_target , c_source ) ;
-    else
-      broadcast ( c_target , c_source ) ;
+    vtf ( c_target , c_source ) ;
   }
 
 #endif
@@ -759,7 +556,7 @@ public :
 /// - the shape (end - start) must have more than vsize entries
 /// - the iteration doesn't proceed beyond the number of full vectors
 /// The vectorized coordinate delivered at the 'wrap-around-point', so one iteration
-/// after the last full set has ben taken, will be partly wrapped around to the start
+/// after the last full set has been taken, will be partly wrapped around to the start
 /// of the iteration. This makes sure the contained ordinary coordinates are valid.
 /// but accessing data through it will again touch elements which have already been
 /// seen. As long as the indices aren't used for an in-place operation, recalculating
@@ -842,7 +639,7 @@ public:
   } ;
 } ;
 
-// currently unused variant. seems to perform the same.
+// currently unused variant, using vigra's MultiCoordinateIterator. seems to perform the same.
 
 // template < class _rc_v , int _dimension >
 // struct coordinate_iterator_v
@@ -924,9 +721,9 @@ public:
 /// also suitable for any type of interpolator satisfying the interface defined in
 /// class interpolator in interpolator.h.
 
-template < class interpolator_type , ///< type satisfying the interface in class interpolator
-           int dim_target ,          ///< dimension of incoming coordinates (== dim. of target array)
-           int dim_source >          ///< dimension of outgoing coordinates (== dim. of coefficient array)
+template < class interpolator_type , // type satisfying the interface in class interpolator
+           int dim_target ,          // dimension of incoming coordinates (== dim. of target array)
+           int dim_source >          // dimension of outgoing coordinates (== dim. of coefficient array)
 void st_tf_remap ( shape_range_type < dim_target > range ,
                    const interpolator_type * const p_interpolator ,
                    transformation < typename interpolator_type::rc_type ,
@@ -1033,9 +830,9 @@ void st_tf_remap ( shape_range_type < dim_target > range ,
 /// This is just as for the warp-array-based remap. The target array is split into
 /// chunks, which are in turn processed in a thread each by the single-threaded routine.
 
-template < class interpolator_type , ///< type satisfying the interface in class interpolator
-           int dim_target ,          ///< dimension of incoming coordinates (== dim. of target array)
-           int dim_source >          ///< dimension of outgoing coordinates (== dim. of coefficient array)
+template < class interpolator_type , // type satisfying the interface in class interpolator
+           int dim_target ,          // dimension of incoming coordinates (== dim. of target array)
+           int dim_source >          // dimension of outgoing coordinates (== dim. of coefficient array)
 void tf_remap ( const interpolator_type & ev ,
                 transformation < typename interpolator_type::rc_type ,
                                  dim_target ,
@@ -1055,14 +852,14 @@ void tf_remap ( const interpolator_type & ev ,
 
 /// this highest-level transform-based remap takes an input array and creates
 /// a b-spline over it internally. Then it calles the previous routine, which
-/// takes a bspline as it's first parameter. Like with warp-array-based remap,
+/// takes an interpolator as it's first parameter. Like with warp-array-based remap,
 /// this is for on-the-fly remapping where the b-spline won't be reused.
 
-template < typename input_value_type ,  ///< type of values in input
-           typename output_value_type , /// type of values in output
-           typename rc_type ,           /// elementary type of coordinates
-           int dim_target ,             ///< dimension of incoming coordinates (== dim. of target array)
-           int dim_source >             ///< dimension of outgoing coordinates (== dim. of coefficient array)
+template < typename input_value_type ,  // type of values in input
+           typename output_value_type , // type of values in output
+           typename rc_type ,           // elementary type of coordinates
+           int dim_target ,             // dimension of incoming coordinates (== dim. of target array)
+           int dim_source >             // dimension of outgoing coordinates (== dim. of coefficient array)
 void tf_remap ( MultiArrayView < dim_source ,
                                  input_value_type > input ,
                 transformation < rc_type ,
@@ -1202,6 +999,8 @@ void make_warp_array ( transformation < rc_type ,
   
 }
 
+namespace detail // workhorse code for grid_eval
+{
 // in grid_weight, for every dimension we have a set of ORDER weights
 // for every position in this dimension. in grid_ofs, we have the
 // partial offset for this dimension for every position. these partial
@@ -1337,7 +1136,9 @@ struct _grid_eval < interpolator_type ,
     }
   }
 } ;
-  
+
+} ; // end of namespace detail
+
 // Here is the single-threaded code for the grid_eval function.
 // The first argument is a shape range, defining the subsets of data
 // to process in a single thread. the remainder are forwards of the
@@ -1405,10 +1206,10 @@ void st_grid_eval ( shape_range_type < target_type::actual_dimension > range ,
   MultiArray < 2 , weight_type > weight ( vigra::Shape2 ( ORDER , dim_target ) ) ;
   
   // now call the recursive workhorse routine
-  _grid_eval < interpolator_type ,
-               target_type ,
-               weight_type ,
-               dim_target - 1 >()
+  detail::_grid_eval < interpolator_type ,
+                       target_type ,
+                       weight_type ,
+                       dim_target - 1 >()
    ( 0 ,
      weight ,
      grid_weight ,
@@ -1429,7 +1230,7 @@ void st_grid_eval ( shape_range_type < target_type::actual_dimension > range ,
 // this is the multithreaded version of grid_eval, which sets up the
 // full range over 'result' and calls 'multithread' to do the rest
 
-/// grid_eval evaluates a b-spline object (TODO: general interpolator obj)
+/// grid_eval evaluates a b-spline object (TODO: general interpolator obj?)
 /// at points whose coordinates are distributed in a grid, so that for
 /// every axis there is a set of as many coordinates as this axis is long,
 /// which will be used in the grid as the coordinate for this axis at the
@@ -1472,7 +1273,7 @@ void grid_eval ( rc_type ** const grid_coordinate ,
   const int dim_target = target_type::actual_dimension ;
   shape_range_type < dim_target > range ( shape_type < dim_target > () , result.shape() ) ;
   multithread ( st_grid_eval < interpolator_type , target_type , weight_type , rc_type > ,
-                ncores , range , grid_coordinate , &itp , &result ) ;
+                ncores * 8 , range , grid_coordinate , &itp , &result ) ;
 }
 
 } ; // end of namespace vspline

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/vspline.git