[vspline] 46/72: removed the use_vc flag in all it's guises. Now use of Vc is either switched on (by defining USE_VC) or off (default). compiles with USE_VC will alwways (try to) run the vector code. Switching off the use of the vector code in a comile with USE_VC defined is no longer an option.

Sun Jul 2 09:02:41 UTC 2017

This is an automated email from the git hooks/post-receive script.

kfj-guest pushed a commit to branch master
in repository vspline.

commit 4075e7b837abdccfa6ca2491f3854a8208de76e0
Author: Kay F. Jahnke <kfjahnke at gmail.com>
Date:   Thu Apr 20 09:52:32 2017 +0200

    removed the use_vc flag in all it's guises.
    Now use of Vc is either switched on (by defining USE_VC) or off (default).
    compiles with USE_VC will alwways (try to) run the vector code.
    Switching off the use of the vector code in a comile with USE_VC defined
    is no longer an option.
---
 bspline.h            |   7 +-
 doxy.h               |   4 +-
 example/roundtrip.cc |  19 +++--
 filter.h             |  19 ++---
 prefilter.h          |   3 -
 remap.h              | 200 ++++++++++++++++++++-------------------------------
 6 files changed, 92 insertions(+), 160 deletions(-)

diff --git a/bspline.h b/bspline.h
index 9e3f8ba..ac18706 100644
--- a/bspline.h
+++ b/bspline.h
@@ -490,8 +490,7 @@ public:
   /// after prefilter() terminates, so it's safe to pass in some MultiArrayView
   /// which is destroyed after the call to prefilter().
 
-  void prefilter ( bool use_vc = true ,           ///< use Vc by default
-                   int njobs = default_njobs ,    ///< intended number of jobs to use
+  void prefilter ( int njobs = default_njobs ,    ///< intended number of jobs to use
                    view_type data = view_type() ) ///< view to knot point data to use instead of 'core'
   {
     // if the user should have modified 'bcv' since the bspline object's creation,
@@ -550,7 +549,6 @@ public:
                 spline_degree ,
                 tolerance ,
                 smoothing ,
-                use_vc ,
                 njobs
               ) ;
         break ;
@@ -564,7 +562,6 @@ public:
                 spline_degree ,
                 tolerance ,
                 smoothing ,
-                use_vc ,
                 njobs
               ) ;
         // using the more general code here now, since the frame may be larger
@@ -594,7 +591,6 @@ public:
                 spline_degree ,
                 tolerance ,
                 smoothing ,
-                use_vc ,
                 njobs
               ) ;
         break ;
@@ -612,7 +608,6 @@ public:
                 spline_degree ,
                 tolerance ,
                 smoothing ,
-                use_vc ,
                 njobs
               ) ;
         break ;
diff --git a/doxy.h b/doxy.h
index 5ac8291..62badcd 100644
--- a/doxy.h
+++ b/doxy.h
@@ -53,7 +53,7 @@
  
  - <a href="https://compeng.uni-frankfurt.de/index.php?id=vc">Vc</a>, for the use of the CPU's vector units
  
- I find VIGRA indispensible, omitting it from vspline is not really an option. But it is possible not to use Vc: either it's use can be disabled at compile time (see 'Compilation' below), or the higher-level routines can be called with the flag use_vc set to false, which will prevent vector code from being used even if it has been compiled in. This is for situations where the binary can't be modified but the vectorized code doesn't work on the target system.
+ I find VIGRA indispensible, omitting it from vspline is not really an option. Use of Vc is optional, though, and has to be activated by defining 'USE_VC'. This should be done by passing -DUSE_VC to the compiler; defining USE_VC only for parts of a project may or may not work.
  
  I have made an attempt to generalize the code so that it can handle
 
@@ -116,8 +116,6 @@ On the evaluation side I provide
  
  All access to Vc in the code is inside #ifdef USE_VC .... #endif statements, so not defining USE_VC will effectively prevent it's use.
  
- For simplicity's sake, even if the code isn't compiled to use Vc, the higher level code may still accept the common use_vc flag in the call signatures, but it's value wont have an effect. When the code is compiled to use Vc, the unvectorized code is still built and available by calling the relevant routines with use_vc set to false. The documentation is built to contain text for vectorized operation, if this is unwanted, change the doxy file.
- 
  \section license_sec License
 
  vspline is free software, licensed under this license:
diff --git a/example/roundtrip.cc b/example/roundtrip.cc
index 066b864..5440e95 100644
--- a/example/roundtrip.cc
+++ b/example/roundtrip.cc
@@ -92,7 +92,6 @@ template < class view_type , typename real_type , typename rc_type >
 void run_test ( view_type & data ,
                 vspline::bc_code bc ,
                 int DEGREE ,
-                bool use_vc ,
                 int TIMES = 32 )
 {
   typedef typename view_type::value_type pixel_type ;
@@ -137,7 +136,7 @@ void run_test ( view_type & data ,
 #endif
   
   for ( int times = 0 ; times < TIMES ; times++ )
-    bsp.prefilter ( use_vc ) ;
+    bsp.prefilter() ;
   
 #ifdef PRINT_ELAPSED
   end = std::chrono::system_clock::now();
@@ -148,7 +147,7 @@ void run_test ( view_type & data ,
   
   // do it again, data above are useless after 10 times filtering
   bsp.core = data ;
-  bsp.prefilter ( use_vc ) ;
+  bsp.prefilter() ;
 
   // get a view to the core coefficients (those which aren't part of the brace)
   view_type cfview = bsp.core ;
@@ -201,7 +200,7 @@ void run_test ( view_type & data ,
   
   for ( int times = 0 ; times < TIMES ; times++ )
     vspline::remap < eval_type , 2 >
-      ( ev , fwarp , target , use_vc ) ;
+      ( ev , fwarp , target ) ;
 
   
 #ifdef PRINT_ELAPSED
@@ -222,7 +221,7 @@ void run_test ( view_type & data ,
   
   for ( int times = 0 ; times < TIMES ; times++ )
     vspline::remap < coordinate_type , pixel_type , 2 >
-      ( data , fwarp , target , bcv , DEGREE , use_vc ) ;
+      ( data , fwarp , target , bcv , DEGREE ) ;
 
  
 #ifdef PRINT_ELAPSED
@@ -245,7 +244,7 @@ void run_test ( view_type & data ,
   
   for ( int times = 0 ; times < TIMES ; times++ )
     vspline::index_remap < eval_type >
-      ( ev , target , use_vc ) ;
+      ( ev , target ) ;
 
 #ifdef PRINT_ELAPSED
   end = std::chrono::system_clock::now();
@@ -313,14 +312,14 @@ void process_image ( char * name )
     vspline::bc_code bc = bcs[b] ;
     for ( int spline_degree = 0 ; spline_degree < 8 ; spline_degree++ )
     {
-      cout << "testing bc code " << vspline::bc_name[bc]
-           << " spline degree " << spline_degree << endl ;
-      run_test < view_type , real_type , rc_type > ( imageArray , bc , spline_degree , false ) ;
 #ifdef USE_VC
       cout << "testing bc code " << vspline::bc_name[bc]
            << " spline degree " << spline_degree << " using Vc" << endl ;
-      run_test < view_type , real_type , rc_type > ( imageArray , bc , spline_degree , true ) ;
+#else
+      cout << "testing bc code " << vspline::bc_name[bc]
+           << " spline degree " << spline_degree << endl ;
 #endif
+      run_test < view_type , real_type , rc_type > ( imageArray , bc , spline_degree ) ;
     }
   }
 }
diff --git a/filter.h b/filter.h
index e80ce21..96a9cea 100644
--- a/filter.h
+++ b/filter.h
@@ -1435,7 +1435,6 @@ public:
                     int nbpoles ,
                     const double * pole ,
                     double tolerance ,
-                    bool use_vc = true ,
                     int njobs = default_njobs )  ///< number of jobs to use when multithreading
 {
   typedef typename input_array_type::value_type value_type ;
@@ -1469,10 +1468,9 @@ public:
 
   typedef typename vigra::ExpandElementResult < value_type > :: type ele_type ;
 
-  if ( use_vc )
-    pf = & aggregating_filter < input_array_type ,
-                                output_array_type ,
-                                ele_type > ;
+  pf = & aggregating_filter < input_array_type ,
+                              output_array_type ,
+                              ele_type > ;
 
 #endif
   
@@ -1523,7 +1521,6 @@ public:
                     int nbpoles ,
                     const double * pole ,
                     double tolerance ,
-                    bool use_vc = true ,
                     int njobs = default_njobs )  ///< number of jobs to use
 {
   typedef typename input_array_type::value_type value_type ;
@@ -1547,10 +1544,7 @@ public:
   // if we can use vector code, the number of lanes is multiplied by the
   // number of elements a simdized type inside the vector code can handle
 
-  if ( use_vc )
-  {
-    lanes *= vsize ;
-  }
+  lanes *= vsize ;
 
 #endif
 
@@ -1748,7 +1742,6 @@ public:
       nbpoles ,
       pole ,
       tolerance ,
-      use_vc ,
       njobs ) ;
 
   // we now have filtered data in target, but the stripes along the magin
@@ -1785,7 +1778,6 @@ public:
 /// - nbpoles: number of filter poles
 /// - pole: pointer to nbpoles doubles containing the filter poles
 /// - tolerance: acceptable error
-/// - use_vc: flag whether to use vector code or not (if present)
 /// - njobs: number of jobs to use when multithreading
 
 template < typename input_array_type ,  // type of array with knot point data
@@ -1797,7 +1789,6 @@ void filter_nd ( input_array_type & input ,
                  int nbpoles ,
                  const double * pole ,
                  double tolerance ,
-                 bool use_vc = true ,
                  int njobs = default_njobs )
 {
   // check if operation is in-place. I assume that the test performed here
@@ -1858,7 +1849,6 @@ void filter_nd ( input_array_type & input ,
       nbpoles ,
       pole ,
       tolerance ,
-      use_vc ,
       njobs ) ;
 
   // but if degree <= 1 we're done already, since copying the data again
@@ -1877,7 +1867,6 @@ void filter_nd ( input_array_type & input ,
           nbpoles ,
           pole ,
           tolerance ,
-          use_vc ,
           njobs ) ;
   }
 }
diff --git a/prefilter.h b/prefilter.h
index 57d8a32..259fea5 100644
--- a/prefilter.h
+++ b/prefilter.h
@@ -185,7 +185,6 @@ void solve ( input_array_type & input ,
              int degree ,
              double tolerance ,
              double smoothing = 0.0 ,
-             bool use_vc = true ,
              int njobs = default_njobs )
 {
   if ( smoothing != 0.0 )
@@ -203,7 +202,6 @@ void solve ( input_array_type & input ,
                 npoles ,
                 pole ,
                 tolerance ,
-                use_vc ,
                 njobs ) ;
   }
   else
@@ -214,7 +212,6 @@ void solve ( input_array_type & input ,
                 degree / 2 ,
                 vspline_constants::precomputed_poles [ degree ] ,
                 tolerance ,
-                use_vc ,
                 njobs ) ;
 }
 
diff --git a/remap.h b/remap.h
index e1f7a4c..4346966 100644
--- a/remap.h
+++ b/remap.h
@@ -177,9 +177,7 @@ struct _fill
 {
   void operator() ( generator_type & gen ,
                     MultiArrayView < dim_out , typename generator_type::value_type >
-                      & output ,
-                    bool use_vc = true
-                  )
+                      & output )
   {
       // we're not yet at the intended lowest level of recursion,
       // so we slice output and generator and feed the slices to the
@@ -190,7 +188,7 @@ struct _fill
         auto sub_output = output.bindOuter ( c ) ;
         auto sub_gen = gen.bindOuter ( c ) ;
         _fill < decltype ( sub_gen ) , dim_out - 1 >()
-          ( sub_gen , sub_output , use_vc ) ;
+          ( sub_gen , sub_output ) ;
       }
   }
 } ;
@@ -202,9 +200,7 @@ struct _fill < generator_type , 1 >
 {
   void operator() ( generator_type & gen ,
                     MultiArrayView < 1 , typename generator_type::value_type >
-                      & output ,
-                    bool use_vc = true
-                  )
+                      & output )
   {
     typedef typename generator_type::value_type value_type ;
     typedef typename generator_type::functor_type functor_type ;
@@ -214,45 +210,42 @@ struct _fill < generator_type , 1 >
 
 #ifdef USE_VC
 
-    if ( use_vc )
-    {
-      const int vsize = generator_type::vsize ;
-      int aggregates = leftover / vsize ;        // number of full vectors
-      leftover -= aggregates * vsize ;           // remaining leftover single values
+    const int vsize = generator_type::vsize ;
+    int aggregates = leftover / vsize ;        // number of full vectors
+    leftover -= aggregates * vsize ;           // remaining leftover single values
 
-      if ( output.isUnstrided() )
+    if ( output.isUnstrided() )
+    {
+      // best case: output array has consecutive memory
+      // get a pointer to target memory        
+      value_type * destination = output.data() ;
+      
+      for ( int a = 0 ; a < aggregates ; a++ , destination += vsize )
       {
-        // best case: output array has consecutive memory
-        // get a pointer to target memory        
-        value_type * destination = output.data() ;
-        
-        for ( int a = 0 ; a < aggregates ; a++ , destination += vsize )
-        {
-          // pass pointer to target memory to the generator
-          gen ( destination ) ;
-        }
+        // pass pointer to target memory to the generator
+        gen ( destination ) ;
       }
-      else
-      {
-        const functor_type & f ( gen.get_functor() ) ;
-        typename functor_type::out_v target_buffer ;
-        value_type * destination = output.data() ;
-        
-        for ( int a = 0 ; a < aggregates ; a++ , destination += vsize * output.stride(0) )
-        {
-          // here we generate to a simdized target type
-          gen ( target_buffer ) ;
-          // and store it to destination using a helper routine of the functor
-          f.store ( target_buffer , destination , output.stride(0) ) ;
-        }
-      }        
-      // if there aren't any leftovers, we can return straight away.
-      if ( ! leftover )
-        return ;
-
-      // otherwise, advance target_it to remaining single values
-      target_it += aggregates * vsize ;
     }
+    else
+    {
+      const functor_type & f ( gen.get_functor() ) ;
+      typename functor_type::out_v target_buffer ;
+      value_type * destination = output.data() ;
+      
+      for ( int a = 0 ; a < aggregates ; a++ , destination += vsize * output.stride(0) )
+      {
+        // here we generate to a simdized target type
+        gen ( target_buffer ) ;
+        // and store it to destination using a helper routine of the functor
+        f.store ( target_buffer , destination , output.stride(0) ) ;
+      }
+    }        
+    // if there aren't any leftovers, we can return straight away.
+    if ( ! leftover )
+      return ;
+
+    // otherwise, advance target_it to remaining single values
+    target_it += aggregates * vsize ;
     
 #endif // USE_VC
 
@@ -275,8 +268,7 @@ template < typename generator_type  , // functor object yielding values
            int dim_out >              // number of dimensions of output array
 void st_fill ( shape_range_type < dim_out > range ,
                generator_type * const       p_gen ,
-               MultiArrayView < dim_out , typename generator_type::value_type > * p_output ,
-               bool use_vc = true )
+               MultiArrayView < dim_out , typename generator_type::value_type > * p_output )
 {
   // pick out output's subarray specified by 'range'
 
@@ -293,7 +285,7 @@ void st_fill ( shape_range_type < dim_out > range ,
   
   // have the results computed and put into the target
 
-  _fill < generator_type , dim_out >() ( gen , output , use_vc ) ;
+  _fill < generator_type , dim_out >() ( gen , output ) ;
 }
 
 /// multithreaded fill. This is the top-level fill routine. It takes a functor capable
@@ -306,9 +298,7 @@ template < typename generator_type  , // functor object yielding values
            int dim_target >              // number of dimensions of output array
 void fill ( generator_type & gen ,
             MultiArrayView < dim_target , typename generator_type::value_type >
-              & output ,
-            bool use_vc = true
-          )
+              & output )
 {
   // set up 'range' to cover a complete array of output's size
   
@@ -329,8 +319,7 @@ void fill ( generator_type & gen ,
                 njobs ,        // desired number of partitions
                 range ,        // 'full' range which is to be partitioned
                 &gen ,         // generator_type object
-                &output ,      // target array
-                use_vc ) ;     // flag to switch use of Vc on/off
+                &output ) ;    // target array
 } ;
 
 /// Next we code 'generators' for use with fill(). These objects can yield values
@@ -358,8 +347,6 @@ struct warp_generator
   
   const unary_functor_type & itp ;
   
-  const bool use_vc ;
-
   const unary_functor_type & get_functor()
   {
     return itp ;
@@ -367,26 +354,23 @@ struct warp_generator
   
   warp_generator
     ( const warp_array_type & _warp ,
-      const unary_functor_type & _itp ,
-      bool _use_vc = true
-    )
+      const unary_functor_type & _itp )
   : warp ( _warp ) ,
-    itp ( _itp ) ,
-    use_vc ( _use_vc )
+    itp ( _itp )
   { } ;
 
   warp_generator < dimension , unary_functor_type , strided_warp >
     subrange ( const shape_range_type < dimension > & range ) const
   {
     return warp_generator < dimension , unary_functor_type , strided_warp >
-             ( warp.subarray ( range[0] , range[1] ) , itp , use_vc ) ;
+             ( warp.subarray ( range[0] , range[1] ) , itp ) ;
   }
   
   warp_generator < dimension - 1 , unary_functor_type , strided_warp >
     bindOuter ( const int & c ) const
   {
     return warp_generator < dimension - 1 , unary_functor_type , strided_warp >
-             ( warp.bindOuter ( c ) , itp , use_vc ) ;
+             ( warp.bindOuter ( c ) , itp ) ;
   }  
 } ;
 
@@ -411,8 +395,6 @@ struct warp_generator < 1 , unary_functor_type , strided_warp >
   
   const unary_functor_type & itp ;
   
-  const bool use_vc ;
-  
   const unary_functor_type & get_functor()
   {
     return itp ;
@@ -420,22 +402,16 @@ struct warp_generator < 1 , unary_functor_type , strided_warp >
   
   warp_generator
     ( const warp_array_type & _warp ,
-      const unary_functor_type & _itp ,
-      bool _use_vc = true
-    )
+      const unary_functor_type & _itp )
   : warp ( _warp ) ,
     stride ( _warp.stride(0) ) ,
     itp ( _itp ) ,
     witer ( _warp.begin() ) ,
-    data ( _warp.data() ) ,
-    use_vc ( _use_vc )
+    data ( _warp.data() )
   {
 #ifdef USE_VC
-    if ( use_vc )
-    {
-      int aggregates = warp.size() / vsize ;
-      witer += aggregates * vsize ;
-    }
+    int aggregates = warp.size() / vsize ;
+    witer += aggregates * vsize ;
 #endif
   } ;
 
@@ -485,7 +461,7 @@ struct warp_generator < 1 , unary_functor_type , strided_warp >
     subrange ( const shape_range_type < 1 > & range ) const
   {
     return warp_generator < 1 , unary_functor_type , strided_warp >
-             ( warp.subarray ( range[0] , range[1] ) , itp , use_vc ) ;
+             ( warp.subarray ( range[0] , range[1] ) , itp ) ;
   }
 
 } ;
@@ -523,9 +499,6 @@ struct warp_generator < 1 , unary_functor_type , strided_warp >
 ///
 /// - a reference to a MultiArrayView to use as a target. This is where the resulting
 ///   data are put.
-///
-/// - a boolan flag 'use_vc' which can be set to false to switch off the use of vector
-///   code in compiles which have it activated
 
 template < typename unary_functor_type  , // functor object yielding values for coordinates
            int dim_target >               // number of dimensions of output array
@@ -533,8 +506,7 @@ void remap ( const unary_functor_type & ev ,
              const MultiArrayView < dim_target ,
                                     typename unary_functor_type::in_type > & warp ,
              MultiArrayView < dim_target ,
-                              typename unary_functor_type::out_type > & output ,
-             bool use_vc = true
+                              typename unary_functor_type::out_type > & output
            )
 {
   // check shape compatibility
@@ -555,15 +527,15 @@ void remap ( const unary_functor_type & ev ,
   {
     //                                set strided_warp to false vvv
     typedef warp_generator < dim_target , unary_functor_type , false > gen_t ;  
-    gen_t gen ( warp , ev , use_vc ) ;  
-    fill < gen_t , dim_target > ( gen , output , use_vc ) ;
+    gen_t gen ( warp , ev ) ;  
+    fill < gen_t , dim_target > ( gen , output ) ;
   }
   else
   {
     // warp array is strided even in dimension 0
     typedef warp_generator < dim_target , unary_functor_type , true > gen_t ;  
-    gen_t gen ( warp , ev , use_vc ) ;  
-    fill < gen_t , dim_target > ( gen , output , use_vc ) ;
+    gen_t gen ( warp , ev ) ;  
+    fill < gen_t , dim_target > ( gen , output ) ;
   }
 }
 
@@ -576,9 +548,7 @@ template < class unary_functor_type , // type satisfying the interface in class
            int dim_target >          // dimension of target array
 void apply ( const unary_functor_type & ev ,
               MultiArrayView < dim_target ,
-                               typename unary_functor_type::out_type > & output ,
-              bool use_vc = true           
-            )
+                               typename unary_functor_type::out_type > & output )
 {
   remap < unary_functor_type , dim_target > ( ev , output , output ) ;
 }
@@ -599,8 +569,7 @@ int remap ( const MultiArrayView < coordinate_traits < coordinate_type > :: dime
             bcv_type < coordinate_traits < coordinate_type > :: dimension > bcv
               = bcv_type < coordinate_traits < coordinate_type > :: dimension >
                 ( MIRROR ) ,
-            int degree = 3 ,
-            bool use_vc = true )
+            int degree = 3 )
 {
   const int dim_in = coordinate_traits < coordinate_type > :: dimension ;
   typedef typename coordinate_traits < coordinate_type > :: rc_type rc_type ;
@@ -620,7 +589,7 @@ int remap ( const MultiArrayView < coordinate_traits < coordinate_type > :: dime
   
   // prefilter, taking data in 'input' as knot point data
   
-  bsp.prefilter ( use_vc , ncores * 8 , input ) ;
+  bsp.prefilter ( ncores * 8 , input ) ;
 
   // create an evaluator over the bspline
 
@@ -630,7 +599,7 @@ int remap ( const MultiArrayView < coordinate_traits < coordinate_type > :: dime
   // and call the other remap variant,
   // passing in the evaluator, the coordinate array and the target array
   
-  remap < evaluator_type , dim_out > ( ev , warp , output , use_vc ) ;
+  remap < evaluator_type , dim_out > ( ev , warp , output ) ;
     
   return 0 ;
 }
@@ -662,7 +631,6 @@ struct index_generator
   
   const unary_functor_type & itp ;
   const shape_range_type < dimension > range ;
-  const bool use_vc ;
   
   const unary_functor_type & get_functor()
   {
@@ -671,19 +639,16 @@ struct index_generator
   
   index_generator
     ( const unary_functor_type & _itp ,
-      const shape_range_type < dimension > _range ,
-      bool _use_vc = true
-    )
+      const shape_range_type < dimension > _range )
   : itp ( _itp ) ,
-    range ( _range ) ,
-    use_vc ( _use_vc )
+    range ( _range )
   { } ;
 
   index_generator < unary_functor_type , level >
     subrange ( const shape_range_type < dimension > range ) const
   {
     return index_generator < unary_functor_type , level >
-             ( itp , range , use_vc ) ;
+             ( itp , range ) ;
   }
   
   index_generator < unary_functor_type , level - 1 >
@@ -695,7 +660,7 @@ struct index_generator
     slice_end [ level ] = slice_start [ level ] + 1 ;
     
     return index_generator < unary_functor_type , level - 1 >
-             ( itp , shape_range_type < dimension > ( slice_start , slice_end ) , use_vc ) ;
+             ( itp , shape_range_type < dimension > ( slice_start , slice_end ) ) ;
   }  
 } ;
 
@@ -735,7 +700,6 @@ struct index_generator < unary_functor_type , 0 >
 
   const unary_functor_type & itp ;
   const shape_range_type < dimension > range ;
-  const bool use_vc ;
   
   const unary_functor_type & get_functor()
   {
@@ -744,31 +708,26 @@ struct index_generator < unary_functor_type , 0 >
   
   index_generator
     ( const unary_functor_type & _itp ,
-      const shape_range_type < dimension > _range ,
-      bool _use_vc = true
+      const shape_range_type < dimension > _range
     )
   : itp ( _itp ) ,
-    range ( _range ) ,
-    use_vc ( _use_vc )
+    range ( _range )
   {
     // initially, set the singular index to the beginning of the range
     current = index_type ( range[0] ) ;
     
 #ifdef USE_VC
     
-    if ( use_vc )
-    {
-      // initialize current_v to hold the first simdized index
-      for ( int d = 0 ; d < dimension ; d++ )
-        current_v[d] = index_ele_v ( range[0][d] ) ;
-      current_v[0] += index_ele_v::IndexesFromZero() ;
-      
-      // if vc is used, the singular index will only be used for mop-up action
-      // after all aggregates have been processed.
-      int size = range[1][0] - range[0][0] ;
-      int aggregates = size / vsize ;
-      current[0] += index_ele_type ( aggregates * vsize ) ; // for mop-up
-    }
+    // initialize current_v to hold the first simdized index
+    for ( int d = 0 ; d < dimension ; d++ )
+      current_v[d] = index_ele_v ( range[0][d] ) ;
+    current_v[0] += index_ele_v::IndexesFromZero() ;
+    
+    // if vc is used, the singular index will only be used for mop-up action
+    // after all aggregates have been processed.
+    int size = range[1][0] - range[0][0] ;
+    int aggregates = size / vsize ;
+    current[0] += index_ele_type ( aggregates * vsize ) ; // for mop-up
 
 #endif
 
@@ -805,7 +764,7 @@ struct index_generator < unary_functor_type , 0 >
     subrange ( const shape_range_type < dimension > range ) const
   {
     return index_generator < unary_functor_type , 0 >
-             ( itp , range , use_vc ) ;
+             ( itp , range ) ;
   }
 } ;
 
@@ -831,16 +790,11 @@ struct index_generator < unary_functor_type , 0 >
 ///
 /// - a reference to a MultiArrayView to use as a target. This is where the resulting
 ///   data are put.
-///
-/// - a boolan flag 'use_vc' which can be set to false to switch off the use of vector
-///   code in compiles which have it activated
 
 template < class unary_functor_type > // type satisfying the interface in class unary_functor
 void index_remap ( const unary_functor_type & ev ,
                    MultiArrayView < unary_functor_type::dim_in ,
-                                    typename unary_functor_type::out_type > & output ,
-                   bool use_vc = true           
-                )
+                                    typename unary_functor_type::out_type > & output )
 {
   enum { dim_target = unary_functor_type::dim_in } ;
   
@@ -849,8 +803,8 @@ void index_remap ( const unary_functor_type & ev ,
   typedef index_generator < unary_functor_type , dim_target - 1 > gen_t ;
 
   shape_range_type < dim_target > range ( nd_ic_type() , output.shape() ) ;  
-  gen_t gen ( ev , range , use_vc ) ;  
-  fill < gen_t , dim_target > ( gen , output , use_vc ) ;
+  gen_t gen ( ev , range ) ;  
+  fill < gen_t , dim_target > ( gen , output ) ;
 }
 
 namespace detail // workhorse code for grid_eval

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/vspline.git