[vspline] 42/72: mainly work on remap.h This was consolidation work and performance tuning, also streamlining of the code to make it more compact. Added more comments, as well.
Kay F. Jahnke
kfj-guest at moszumanska.debian.org
Sun Jul 2 09:02:41 UTC 2017
This is an automated email from the git hooks/post-receive script.
kfj-guest pushed a commit to branch master
in repository vspline.
commit 9f272ab1be20e80287052c23451faee9bb11ecd8
Author: Kay F. Jahnke <kfjahnke at gmail.com>
Date: Mon Apr 10 11:42:38 2017 +0200
mainly work on remap.h
This was consolidation work and performance tuning, also streamlining of the code
to make it more compact. Added more comments, as well.
---
multithread.h | 31 +++
remap.h | 759 ++++++++++++++++++++------------------------------------
unary_functor.h | 64 ++---
3 files changed, 327 insertions(+), 527 deletions(-)
diff --git a/multithread.h b/multithread.h
index d32d151..ffa1c19 100644
--- a/multithread.h
+++ b/multithread.h
@@ -367,6 +367,37 @@ partition ( shape_range_type<d> range ,
return res ;
}
+/// specialization for 1D shape range
+
+template<>
+partition_type < shape_range_type<1> >
+partition ( shape_range_type<1> range ,
+ int nparts )
+{
+ auto size = range[1][0] - range[0][0] ;
+ auto part_size = size / nparts ;
+ if ( part_size < 1 )
+ part_size = size ;
+
+ nparts = int ( size / part_size ) ;
+ if ( nparts * part_size < size )
+ nparts++ ;
+
+ partition_type < shape_range_type<1> > res ( nparts ) ;
+
+ auto start = range[0] ;
+ auto stop = start + part_size ;
+ for ( auto & e : res )
+ {
+ e[0] = start ;
+ e[1] = stop ;
+ start = stop ;
+ stop = start + part_size ;
+ }
+ res[nparts-1][1] = size ;
+ return res ;
+}
+
/// action_wrapper wraps a functional into an outer function which
/// first calls the functional and then checks if this was the last
/// of a bunch of actions to complete, by incrementing the counter
diff --git a/remap.h b/remap.h
index 8b6e7de..762c693 100644
--- a/remap.h
+++ b/remap.h
@@ -3,7 +3,7 @@
/* vspline - a set of generic tools for creation and evaluation */
/* of uniform b-splines */
/* */
-/* Copyright 2015, 2016 by Kay F. Jahnke */
+/* Copyright 2015 - 2017 by Kay F. Jahnke */
/* */
/* Permission is hereby granted, free of charge, to any person */
/* obtaining a copy of this software and associated documentation */
@@ -50,12 +50,12 @@
/// t[j] = i(a,c[j]) for all j
///
/// st_remap is the single_threaded implementation; remap itself partitions it's work
-/// and creates several threads, each running one instance of st_remap.
+/// and feeds several threads, each running one instance of st_remap.
///
-/// all remap routines take two template arguments:
+/// remap takes two template arguments:
///
/// - unary_functor_type: functor object yielding values for coordinates
-/// - dim_target: number of dimensions of output array
+/// - dim_target: number of dimensions of output array
///
/// remaps to other-dimensional objects are supported. This makes it possible to,
/// for example, remap from a volume to a 2D image, using a 2D warp array containing
@@ -63,24 +63,22 @@
///
/// In these routines, we can switch the use of vectorization on or off. When using
/// vectorization, this is done in a straightforward fashion by aggregating the input.
-/// If the source/target array lend themselves to it, we can pass it's memory directly
-/// to the vectorized eval code. To keep the complexity down, if the memory is not
-/// suited, I 'manually' aggregate to a small buffer and pass the buffer to and fro.
-/// There is possibly room for improvement here - one might use gather/scatter operations
-/// where the data aren't strictly in sequence - but the bulk of the processing time
-/// needed is in the access to (possibly widely scattered) memory and the complex
-/// calculations needed to provide the result, so I don't include code for more
-/// data access modes here.
+/// If the source/target array lends itself to it, we can pass it's memory directly
+/// to the vectorized eval code.
+/// If the memory is not suited, I 'manually' aggregate to a simdized type.
///
/// There is also a second set of remap functions in this file, which don't take a
/// 'warp' array. Instead, for every target location, the location's discrete coordinates
/// are passed to the unary_functor_type object. This way, transformation-based remaps
-/// can be implemented easily: the user code just has to provide a suitable 'interpolator'
+/// can be implemented easily: the user code just has to provide a suitable functor
/// to yield values for coordinates. This interpolator will internally take the discrete
/// incoming coordinates (into the target array) and transform them as required, internally
/// producing coordinates suitable for the 'actual' interpolation using a b-spline or some
/// other object capable of producing values for real coordinates. The routine offering
-/// this service is called index_remap.
+/// this service is called index_remap, and only takes one template argument, which is
+/// enough to derive all other types involved:
+///
+/// - unary_functor_type: functor object yielding values for coordinates
///
/// This file also has code to evaluate a b-spline at positions in a mesh grid, which can be used
/// for scaling, and for separable geometric transformations.
@@ -111,181 +109,6 @@ using namespace vigra ;
template < int dimension >
using bcv_type = vigra::TinyVector < bc_code , dimension > ;
-/// struct coordinate_iterator_v provides the vectorized equivalent of vigra's
-/// MultiCoordinateIterator. The iterator itself is not very elaborate, it only
-/// supports dereferencing and preincrement, but for the intended purpose that's
-/// enough. The iterator is cyclical, so the number of times it's used has to be
-/// controlled by the calling code.
-///
-/// dereferencing this object will yield a vectorized coordinate which is guaranteed
-/// to deinterleave to vsize valid ordinary coordinates, but it is only guaranteed that
-/// these ordinary coordinates are unique if several preconditions are met:
-///
-/// - the shape (end - start) must have more than vsize entries
-///
-/// - the iteration doesn't proceed beyond the number of full vectors
-///
-/// The vectorized coordinate delivered at the 'wrap-around-point', so one iteration
-/// after the last full set has been taken, will be partly wrapped around to the start
-/// of the iteration. This makes sure the contained ordinary coordinates are valid.
-/// but accessing data through it will again touch elements which have already been
-/// seen. As long as the indices aren't used for an in-place operation, recalculating
-/// a few values does no harm.
-
-// TODO this is utility code and might be placed into another header, probably next to
-// the routine producing successive gather indices for vectorized array traversal
-
-// TODO when experimenting with coordinate iterators, I found that vectorized code for
-// creating gather/scatter indexes performed worse than simply filling the index vector
-// with successive single indices generated by vigra, and using vigra's facilities
-// also simplifies matters, see the use in filter.h, ele_aggregating_filter.
-
-#ifdef USE_VC
-
-template < class rc_type , class ic_type , int _dimension , int vsize >
-struct coordinate_iterator_v
-{
- enum { dimension = _dimension } ;
-
- typedef vigra::TinyVector < rc_type , dimension > nd_rc_type ;
- typedef vigra::TinyVector < ic_type , dimension > nd_ic_type ;
-
- typedef typename vector_traits < rc_type , vsize > :: type rc_v ;
- typedef typename vector_traits < ic_type , vsize > :: type ic_v ;
-
- typedef vigra::TinyVector < ic_v , dimension > nd_ic_v ;
- typedef vigra::TinyVector < rc_v , dimension > nd_rc_v ;
-
- const nd_ic_type start ;
- const nd_ic_type end ;
- const nd_ic_type shape ;
-
- nd_ic_v c ;
-
-public:
-
- coordinate_iterator_v ( nd_ic_type _start , nd_ic_type _end )
- : start ( _start ) ,
- end ( _end ) ,
- shape ( _end - _start )
- {
- vigra::MultiCoordinateIterator<dimension> mci_start ( shape ) ;
- auto mci_end = mci_start.getEndIterator() ;
- auto mci = mci_start ;
-
- for ( int i = 0 ; i < vsize ; i++ )
- {
- nd_ic_type index = *mci + start ;
- ++mci ;
- if ( mci >= mci_end )
- mci = mci_start ;
- for ( int d = 0 ; d < dimension ; d++ )
- c[d][i] = index[d] ;
- }
- } ;
-
- const nd_ic_v & operator*()
- {
- return c ;
- }
-
- coordinate_iterator_v & operator++()
- {
- c[0] += vsize ; // increase level 0 values
- auto mask = ( c[0] >= end[0] ) ; // mask overflow
- while ( any_of ( mask ) )
- {
- // we need to do more only if c[0] overflows
- c[0] ( mask ) -= shape[0] ; // fold back to range
- for ( int d = 1 ; d < dimension ; d++ )
- {
- c[d] ( mask ) += 1 ; // increase next-higher dimension's index
- mask = ( c[d] >= end[d] ) ; // check for range overflow
- // resultant mask is either empty or the same as before
- if ( none_of ( mask ) ) // if no overflow, we're done
- break ;
- c[d] ( mask ) = start[d] ; // set back to lower bound
- // with the next loop iteration, the next dimension's index is increased
- }
- // having increased c[0] by vsize can require several iterations
- // if shape[0] < vsize, so we need to mask, test, etc. again until
- // all of c[0] are back in range
- mask = ( c[0] >= end[0] ) ; // mask overflow
- }
- // otherwise, increasing input[0] landed inside the range,
- return *this ;
- } ;
-} ;
-
-#endif
-
-// currently unused variant, using vigra's MultiCoordinateIterator. seems to perform the same.
-
-// template < class _rc_v , int _dimension >
-// struct coordinate_iterator_v
-// {
-// enum { dimension = _dimension } ;
-// typedef vigra::TinyVector < int , dimension > shape_type ;
-//
-// typedef _rc_v rc_v ;
-// enum { vsize = rc_v::Size } ;
-// typedef vigra::TinyVector < rc_v , dimension > nd_rc_v ;
-//
-// const shape_type start ;
-// const shape_type end ;
-// const shape_type shape ;
-//
-// nd_rc_v c ;
-//
-// typedef vigra::MultiCoordinateIterator<dimension> iter_type ;
-//
-// const iter_type mci_start ;
-// const iter_type mci_end ;
-// iter_type mci ;
-//
-// public:
-//
-// coordinate_iterator_v ( const shape_type & _start ,
-// const shape_type & _end )
-// : mci_start ( _end - _start ) ,
-// mci_end ( iter_type ( _end - start ) . getEndIterator() ) ,
-// start ( _start ) ,
-// end ( _end ) ,
-// shape ( _end - _start )
-// {
-// mci = mci_start ;
-//
-// for ( int i = 0 ; i < vsize ; i++ )
-// {
-// shape_type index = *mci + start ;
-// ++mci ;
-// if ( mci >= mci_end )
-// mci = mci_start ;
-// for ( int d = 0 ; d < dimension ; d++ )
-// c[d][i] = index[d] ;
-// }
-// } ;
-//
-// const nd_rc_v& operator*() const
-// {
-// return c ;
-// }
-//
-// coordinate_iterator_v & operator++()
-// {
-// for ( int i = 0 ; i < vsize ; i++ )
-// {
-// shape_type index = *mci + start ;
-// ++mci ;
-// if ( mci >= mci_end )
-// mci = mci_start ;
-// for ( int d = 0 ; d < dimension ; d++ )
-// c[d][i] = index[d] ;
-// }
-// return *this ;
-// } ;
-// } ;
-
/// struct _fill contains the implementation of the 'engine' used for remap-like
/// functions. The design logic is this: a remap will ultimately produce an array
/// of results. This array is filled in standard scan order sequence by repeated
@@ -302,7 +125,7 @@ public:
/// to the caller, since the data are deposited in normal interleaved fashion. The
/// extra effort for vectorized operation is in the implementation of the generator
/// functor and reasonably straightforward. If only the standard remap functions
-/// are used, the user can remain ignorant of the vectorizetion.
+/// are used, the user can remain ignorant of the vectorization.
///
/// Why is _fill an object? It might be a function if partial specialization of functions
/// were allowed. Since this is not so, we have to use a class and put the function in it's
@@ -321,6 +144,20 @@ public:
/// - it has to offer a subrange routine, limiting output to a subarray
/// of the 'whole' output
+// TODO might write an abstract base class specifying the interface
+
+/// In the current implementation, the hierarchical descent to subdimensional slices
+/// is always taken to the lowest level, leaving the actual calls of the functor to
+/// occur there. While the hierarchical access may consume some processing time, mainly
+/// to establish the bounds for the 1D operation - but possibly optimized away,
+/// the operation on 1D data can use optimizations which gain more than is needed
+/// for the hierarchical descent. Especially when vectorized code is used, operation
+/// on 1D data is very efficient, since the data can be accessed using load/store
+/// or gather/scatter operations, even when the arrays involved are strided.
+/// Taking the hierarchical descent down to level 0 is encoded in fill() and it's
+/// workhorse code, the generator objects implemented here depend on the descent
+/// going all thw way down.
+///
/// _fill is used by st_fill. It's an object implementing an hierarchical fill
/// of the target array. This is done recursively. while the generator's and the
/// target's dimension is greater 1 they are sliced and the slices are processed
@@ -328,7 +165,6 @@ public:
template < typename generator_type , // functor object yielding values
int dim_out > // number of dimensions of output array
-// int floor_dim = 1 > // intended lowest dimensionality
struct _fill
{
void operator() ( generator_type & gen ,
@@ -337,94 +173,22 @@ struct _fill
bool use_vc = true
)
{
-#ifdef USE_VC
-
-// const int vsize = generator_type::vsize ;
-
- if ( use_vc )
- {
-// if ( dim_out > floor_dim )
-// {
- // if we're not yet at the intended lowest level of recursion,
- // we slice output and generator and feed the slices to the
- // next lower recursion level
- for ( int c = 0 ; c < output.shape ( dim_out - 1 ) ; c++ )
- {
- // recursively call _fill for each slice along the highest axis
- auto sub_output = output.bindOuter ( c ) ;
- auto sub_gen = gen.bindOuter ( c ) ;
- _fill < decltype ( sub_gen ) , dim_out - 1 >()
- ( sub_gen , sub_output ) ;
- }
- // after processing all slices, we're done and can return straight away
- return ;
-// }
- }
-
-// // we are at the intended lowest level of recursion already, so we
-// // do the work here and now. This is the same code as the specialization
-// // for dim_out==1 uses, so this code could be factored out. But we
-// // want the option to do the processing without recursing all the way
-// // down if that isn't necessary. The calling code can figure this out
-// // and set floor_dim accordingly.
-//
-// int aggregates = leftover / vsize ; // number of full vectors
-// leftover -= aggregates * vsize ; // remaining leftover single values
-//
-// if ( output.isUnstrided() )
-// {
-// // best case: output array has consecutive memory
-// // get a pointer to target memory
-// value_type * destination = output.data() ;
-//
-// for ( int a = 0 ; a < aggregates ; a++ , destination += vsize )
-// {
-// // pass pointer to target memory to the generator
-// gen ( destination ) ;
-// }
-// // if there aren't any leftovers, we can return straight away.
-// if ( ! leftover )
-// return ;
-//
-// // otherwise, advance target_it to remaining single values
-// target_it += aggregates * vsize ;
-// }
-// else
-// {
-// // fall back to buffering and storing individual result values.
-// // have a buffer ready
-// value_type target_buffer [ vsize ] ;
-//
-// for ( int a = 0 ; a < aggregates ; a++ )
-// {
-// gen ( target_buffer ) ; // pass buffer to the generator
-// for ( int e = 0 ; e < vsize ; e++ )
-// {
-// *target_it = target_buffer[e] ; // and deposit in target
-// ++target_it ;
-// }
-// }
-// }
-// }
-
- #endif // USE_VC
-
- typedef typename generator_type::value_type value_type ;
-
- auto target_it = output.begin() ;
- int leftover = output.elementCount() ;
-
- // process leftovers. If vc isn't used, this loop does all the processing
-
- while ( leftover-- )
- {
- // process leftovers with single-value evaluation
- gen ( *target_it ) ;
- ++target_it ;
- }
+ // we're not yet at the intended lowest level of recursion,
+ // so we slice output and generator and feed the slices to the
+ // next lower recursion level
+ for ( int c = 0 ; c < output.shape ( dim_out - 1 ) ; c++ )
+ {
+ // recursively call _fill for each slice along the highest axis
+ auto sub_output = output.bindOuter ( c ) ;
+ auto sub_gen = gen.bindOuter ( c ) ;
+ _fill < decltype ( sub_gen ) , dim_out - 1 >()
+ ( sub_gen , sub_output ) ;
+ }
}
} ;
+/// specialization of _fill for level 0 ends the recursive descent
+
template < typename generator_type > // functor object yielding values
struct _fill < generator_type , 1 >
{
@@ -435,6 +199,7 @@ struct _fill < generator_type , 1 >
)
{
typedef typename generator_type::value_type value_type ;
+ typedef typename generator_type::functor_type functor_type ;
auto target_it = output.begin() ;
int leftover = output.elementCount() ;
@@ -458,41 +223,25 @@ struct _fill < generator_type , 1 >
// pass pointer to target memory to the generator
gen ( destination ) ;
}
- // if there aren't any leftovers, we can return straight away.
- if ( ! leftover )
- return ;
-
- // otherwise, advance target_it to remaining single values
- target_it += aggregates * vsize ;
}
else
{
- // TODO: we're level 0 now, so we can scatter
-// generator_type::nd_rc_v target_buffer ;
-// for ( int a = 0 ; a < aggregates ; a++ )
-// {
-// gen ( target_buffer ) ; // pass buffer to the generator
-//
-// for ( int d = 0 ; d < value_type::size() ; d++ )
-// {
-// target_buffer[d].scatter ( destination , rc_v::IndexesFromZero * d + d ) ;
-// }
+ const functor_type & f ( gen.get_functor() ) ;
+ typename functor_type::out_v target_buffer ;
+ value_type * destination = output.data() ;
- // fall back to buffering and storing individual result values.
- // have a buffer ready
-
- value_type target_buffer [ vsize ] ;
- for ( int a = 0 ; a < aggregates ; a++ )
+ for ( int a = 0 ; a < aggregates ; a++ , destination += vsize * output.stride(0) )
{
- gen ( target_buffer ) ; // pass buffer to the generator
-
- for ( int e = 0 ; e < vsize ; e++ )
- {
- *target_it = target_buffer[e] ; // and deposit in target
- ++target_it ;
- }
+ gen ( target_buffer ) ;
+ f.store ( target_buffer , destination , output.stride(0) ) ;
}
- }
+ }
+ // if there aren't any leftovers, we can return straight away.
+ if ( ! leftover )
+ return ;
+
+ // otherwise, advance target_it to remaining single values
+ target_it += aggregates * vsize ;
}
#endif // USE_VC
@@ -507,8 +256,6 @@ struct _fill < generator_type , 1 >
}
} ;
-
-
/// single-threaded fill. This routine receives the range to process and the generator
/// object capable of providing result values. The generator object is set up to provide
/// values for the desired subrange and then passed to _fill, which handles the calls to
@@ -562,9 +309,6 @@ void fill ( generator_type & gen ,
int njobs = 64 ; // = output.shape ( dim_target - 1 ) / 4 ;
-// if ( njobs < 16 ) // small thing, try at least 16
-// njobs = 16 ;
-
// call multithread(), specifying the single-threaded fill routine as the functor
// to invoke the threads, passing in 'range', which will be partitioned by multithread(),
// followed by all the other parameters the single-threaded fill needs, which is
@@ -579,119 +323,43 @@ void fill ( generator_type & gen ,
use_vc ) ; // flag to switch use of Vc on/off
} ;
-// /// struct warp_generator combines a warp array and an interpolator into a functor
-// /// producing values to store in the output. This functor handles the whole chain of
-// /// operations from picking the coordinates from the warp array to delivering target
-// /// values. It can be fed to fill() to produce a remap function, see below.
-//
-// template < int dimension ,
-// typename unary_functor_type ,
-// bool strided_warp >
-// struct _warp_generator
-// {
-// typedef typename unary_functor_type::in_type nd_rc_type ;
-// typedef typename unary_functor_type::out_type value_type ;
-//
-// typedef MultiArrayView < dimension , nd_rc_type > warp_array_type ;
-//
-// const warp_array_type warp ; // must not use reference here!
-// typename warp_array_type::const_iterator witer ;
-//
-// const unary_functor_type & itp ;
-//
-// _warp_generator
-// ( const warp_array_type & _warp ,
-// const unary_functor_type & _itp )
-// : warp ( _warp ) ,
-// itp ( _itp ) ,
-// witer ( _warp.begin() )
-// { } ;
-//
-// void operator() ( value_type & target )
-// {
-// itp.eval ( *witer , target ) ;
-// ++witer ;
-// }
-//
-// #ifdef USE_VC
-//
-// enum { vsize = unary_functor_type :: vsize } ;
-//
-// // operator() incorporates two variants, which depend on a template argument,
-// // so the conditional has no run-time effect. We use witer to stay consistent
-// // with the unvectorized version, which may be used to mop up leftovers.
-//
-// void operator() ( value_type * target )
-// {
-// if ( strided_warp )
-// {
-// nd_rc_type buffer [ vsize ] ;
-// for ( int e = 0 ; e < vsize ; e++ , witer++ )
-// buffer[e] = *witer ;
-// itp.eval ( buffer , target ) ;
-// }
-// else
-// {
-// itp.eval ( &(*witer) , target ) ;
-// witer += vsize ;
-// }
-// }
-//
-// #endif
-//
-// _warp_generator < dimension , unary_functor_type , strided_warp >
-// subrange ( const shape_range_type < dimension > & range ) const
-// {
-// return _warp_generator < dimension , unary_functor_type , strided_warp >
-// ( warp.subarray ( range[0] , range[1] ) , itp ) ;
-// }
-//
-// _warp_generator < dimension - 1 , unary_functor_type , strided_warp >
-// bindOuter ( const int & c ) const
-// {
-// return _warp_generator < dimension - 1 , unary_functor_type , strided_warp >
-// ( warp.bindOuter ( c ) , itp ) ;
-// }
-// } ;
-
-/// warp_generator for dimensions > 1. Here we provide 'subrange' and
+/// Next we code 'generators' for use with fill(). These objects can yield values
+/// to the fill routine, each in it's specific way. The first type we define is
+/// warp_generator. This generator yields data from an array, which, in the context
+/// of a remap-like function, will provide the coordinates to feed to the interpolator.
+/// First is warp_generator for dimensions > 1. Here we provide 'subrange' and
/// 'bindOuter' to be used for the hierarchical descent in _fill. The current
-/// implementation relies of the hierarchical descent going all the way to 1D!
+/// implementation relies of the hierarchical descent going all the way to 1D,
+/// and does not implement operator() until the 1D specialization.
template < int dimension ,
typename unary_functor_type ,
bool strided_warp >
struct warp_generator
{
- typedef typename unary_functor_type::in_type nd_rc_type ;
+ typedef unary_functor_type functor_type ;
+
typedef typename unary_functor_type::out_type value_type ;
+ typedef typename unary_functor_type::in_type nd_rc_type ;
typedef MultiArrayView < dimension , nd_rc_type > warp_array_type ;
const warp_array_type warp ; // must not use reference here!
- const nd_rc_type * data ;
- typename warp_array_type::const_iterator witer ;
const unary_functor_type & itp ;
+
+ const unary_functor_type & get_functor()
+ {
+ return itp ;
+ }
warp_generator
( const warp_array_type & _warp ,
const unary_functor_type & _itp )
: warp ( _warp ) ,
- itp ( _itp ) ,
- witer ( _warp.begin() ) ,
- data ( _warp.data() )
+ itp ( _itp )
{ } ;
- // with Vc present, we only need operator() here for the code to compile,
- // it's not used. But the unvectorized version may use it.
-
- void operator() ( value_type & target )
- {
- itp.eval ( *witer , target ) ;
- ++witer ;
- }
-
warp_generator < dimension , unary_functor_type , strided_warp >
subrange ( const shape_range_type < dimension > & range ) const
{
@@ -708,41 +376,43 @@ struct warp_generator
} ;
/// specialization of warp_generator for dimension 1. Here we have
-/// (vectorized) evaluation code, but not for subrange and bindOuter.
+/// (vectorized) evaluation code and subrange(), but no bindOuter().
template < typename unary_functor_type ,
bool strided_warp >
struct warp_generator < 1 , unary_functor_type , strided_warp >
{
+ typedef unary_functor_type functor_type ;
+
typedef typename unary_functor_type::in_type nd_rc_type ;
typedef typename unary_functor_type::out_type value_type ;
typedef MultiArrayView < 1 , nd_rc_type > warp_array_type ;
const warp_array_type warp ; // must not use reference here!
+ const int stride ;
const nd_rc_type * data ;
typename warp_array_type::const_iterator witer ;
const unary_functor_type & itp ;
+ const unary_functor_type & get_functor()
+ {
+ return itp ;
+ }
+
warp_generator
( const warp_array_type & _warp ,
const unary_functor_type & _itp )
: warp ( _warp ) ,
+ stride ( _warp.stride(0) ) ,
itp ( _itp ) ,
witer ( _warp.begin() ) ,
data ( _warp.data() )
{
#ifdef USE_VC
- if ( ! strided_warp )
- {
- // if the warp array is unstrided, 'witer' will only be used
- // by the 'mop-up' action processing the leftover single values
- // so we advance witer to this location.
-
- int aggregates = warp.size() / vsize ;
- witer += aggregates * vsize ;
- }
+ int aggregates = warp.size() / vsize ;
+ witer += aggregates * vsize ;
#endif
} ;
@@ -761,36 +431,51 @@ struct warp_generator < 1 , unary_functor_type , strided_warp >
enum { vsize = unary_functor_type :: vsize } ;
// operator() incorporates two variants, which depend on a template argument,
- // so the conditional has no run-time effect.
+ // so the conditional has no run-time effect. The template argument target_type
+ // determines the evaluation target and picks the appropriate eval variant.
- void operator() ( value_type * target )
+ template < typename target_type >
+ void operator() ( target_type & target )
{
if ( strided_warp )
{
- // if the warp array is strided, we collect the values one by one
- nd_rc_type buffer [ vsize ] ;
- for ( int e = 0 ; e < vsize ; e++ , witer++ )
- buffer[e] = *witer ;
+ // if the warp array is strided, we use unary_functor's strided load
+ // routine to assemble a simdized input value with gather operations
+ typename unary_functor_type::in_v buffer ;
+ itp.load ( data , stride , buffer ) ;
+ // now we pass the simdized value to the evaluation routine
itp.eval ( buffer , target ) ;
+ data += vsize * stride ;
}
else
{
- // otherwise we can collect them with an efficient load operation,
- // which is affected by passing a pointer (data) to the evaluation
- // routine
+ // otherwise we can collect them with a more efficient load operation,
+ // which is affected by passing a pointer to the evaluation routine
itp.eval ( data , target ) ;
data += vsize ;
}
}
#endif
+
+ warp_generator < 1 , unary_functor_type , strided_warp >
+ subrange ( const shape_range_type < 1 > & range ) const
+ {
+ return warp_generator < 1 , unary_functor_type , strided_warp >
+ ( warp.subarray ( range[0] , range[1] ) , itp ) ;
+ }
+
} ;
/// implementation of remap() by delegation to the more general fill() routine, passing
-/// in the warp array and the interpolator via a generator object. warp_generator
-/// has a fast variant which requires the whole warp array to be unstrided, a requirement
-/// which can usually be fulfilled, since warp arrays tend to be made just for the purpose
-/// of a remap. Picking of the correct variant is done here.
+/// in the warp array and the interpolator via a generator object. Calling this routine
+/// remap() doesn't quite do it's scope justice, it might be more appropriate to call it
+/// transform(), since it applies a functor to a set of inputs yielding a set of outputs.
+/// This is a generalization of a remap routine: the remap concept looks at the incoming
+/// data as coordinates, at the functor as an interpolator yielding values for coordinates,
+/// and at the output as an array of thusly generated values. In this implementation of
+/// remap incoming and outgoing data aren't necessarily coordinates or the result of
+/// an interpolation, they can be any pair of types which the functor can handle.
///
/// remap takes two template arguments:
///
@@ -820,7 +505,7 @@ struct warp_generator < 1 , unary_functor_type , strided_warp >
/// code in compiles which have it activated
template < typename unary_functor_type , // functor object yielding values for coordinates
- int dim_target > // number of dimensions of output array
+ int dim_target > // number of dimensions of output array
void remap ( const unary_functor_type & ev ,
const MultiArrayView < dim_target ,
typename unary_functor_type::in_type > & warp ,
@@ -859,6 +544,22 @@ void remap ( const unary_functor_type & ev ,
}
}
+/// we code 'apply' as a special variant of remap where the output
+/// is also used as 'warp', so the effect is to feed the unary functor
+/// each 'output' value in turn, let it process it and store the result
+/// back to the same location.
+
+template < class unary_functor_type , // type satisfying the interface in class unary_functor
+ int dim_target > // dimension of target array
+void apply ( const unary_functor_type & ev ,
+ MultiArrayView < dim_target ,
+ typename unary_functor_type::out_type > & output ,
+ bool use_vc = true
+ )
+{
+ remap < unary_functor_type , dim_target > ( ev , output , output ) ;
+}
+
/// This is a variant of remap, which directly takes an array of values and remaps it,
/// internally creating a b-spline of given order just for the purpose. This is used for
/// one-shot remaps where the spline isn't reused.
@@ -900,7 +601,6 @@ int remap ( const MultiArrayView < coordinate_traits < coordinate_type > :: dime
// create an evaluator over the bspline
-// typedef evaluator < dim_in , value_type , rc_type , ic_type > evaluator_type ;
typedef evaluator < coordinate_type , value_type > evaluator_type ;
evaluator_type ev ( bsp ) ;
@@ -912,19 +612,20 @@ int remap ( const MultiArrayView < coordinate_traits < coordinate_type > :: dime
return 0 ;
}
-/// index_generator is like warp_generator, but instead of picking coordinates from
-/// an array, the corresponding discrete coordinates are passed to the interpolator.
-/// This is a convenience
-/// routine, because it delivers the logic of both coordinate generation and value
-/// storage, in several threads (due to the use of fill) - and leaves the interesting
-/// work to be done in the unary_functor.
+/// class index_generator provides nD indices as input to it's functor which coincide
+/// with the location in the target array for which the functor is called. The data type
+/// of these indices is derived from the functor's input type. Again we presume that
+/// fill() will recurse until level 0, so index_generator's operator() will only be called
+/// at the loweset level of recursion, and we needn't even define it for higher levels.
-template < int dim_target , typename unary_functor_type >
+template < typename unary_functor_type , int level >
struct index_generator
{
+ typedef unary_functor_type functor_type ;
+
typedef typename unary_functor_type::out_type value_type ;
- enum { dim_source = unary_functor_type::dim_in } ;
+ enum { dimension = unary_functor_type::dim_in } ;
#ifdef USE_VC
@@ -937,92 +638,159 @@ struct index_generator
#endif
const unary_functor_type & itp ;
- const shape_range_type < dim_target > range ;
- vigra::MultiCoordinateIterator < dim_source > mci ;
+ const shape_range_type < dimension > range ;
+
+ const unary_functor_type & get_functor()
+ {
+ return itp ;
+ }
index_generator
( const unary_functor_type & _itp ,
- const shape_range_type < dim_target > _range
+ const shape_range_type < dimension > _range
)
: itp ( _itp ) ,
- range ( _range ) ,
- mci ( _range[1] - _range[0] )
+ range ( _range )
{ } ;
- void operator() ( value_type & target )
+ index_generator < unary_functor_type , level >
+ subrange ( const shape_range_type < dimension > range ) const
{
- itp.eval ( *mci + range[0] , target ) ;
- ++mci ;
+ return index_generator < unary_functor_type , level >
+ ( itp , range ) ;
}
+
+ index_generator < unary_functor_type , level - 1 >
+ bindOuter ( const int & c ) const
+ {
+ auto slice_start = range[0] , slice_end = range[1] ;
+
+ slice_start [ level ] += c ;
+ slice_end [ level ] = slice_start [ level ] + 1 ;
+
+ return index_generator < unary_functor_type , level - 1 >
+ ( itp , shape_range_type < dimension > ( slice_start , slice_end ) ) ;
+ }
+} ;
+
+/// specialization of index_generator for level 0. Here, the indices for all higher
+/// dimensions have been fixed by the hierarchical descent, and we only need to concern
+/// ourselves with the index for dimension 0, and supply the operator() implementations.
+/// Note how we derive the concrete type of index from the functor. This way, whatever
+/// the functor takes is provided with no need of type conversion, which would be necessary
+/// if we'd only produce integral indices here.
+template < typename unary_functor_type >
+struct index_generator < unary_functor_type , 0 >
+{
+ typedef unary_functor_type functor_type ;
+
+ typedef typename unary_functor_type::in_type index_type ;
+ typedef typename unary_functor_type::in_ele_type index_ele_type ;
+ typedef typename unary_functor_type::out_type value_type ;
+
+ enum { dimension = unary_functor_type::dim_in } ;
+
#ifdef USE_VC
+
+ enum { vsize = unary_functor_type::vsize } ;
+ typedef typename unary_functor_type::in_v index_v ;
+ typedef typename unary_functor_type::in_ele_v index_ele_v ;
+
+ index_v current_v ; // current vectorized index to feed to functor
+
+#else
+
+ enum { vsize = 1 } ;
+
+#endif
- typedef typename vigra::TinyVector < typename unary_functor_type::ic_v ,
- dim_source > nd_ic_v ;
+ index_type current ; // singular index
+
+ const unary_functor_type & itp ;
+ const shape_range_type < dimension > range ;
- void operator() ( value_type * target )
+ const unary_functor_type & get_functor()
{
- nd_ic_v index ;
- for ( int e = 0 ; e < vsize ; e++ )
- {
- for ( int d = 0 ; d < dim_source ; d++ )
- {
- index[d][e] = (*mci)[d] + range[0][d] ;
- }
- ++mci ;
- }
- itp.eval ( index , target ) ;
+ return itp ;
}
+
+ index_generator
+ ( const unary_functor_type & _itp ,
+ const shape_range_type < dimension > _range
+ )
+ : itp ( _itp ) ,
+ range ( _range )
+ {
+ // initially, set the singular index to the beginning of the range
+ current = index_type ( range[0] ) ;
+
+#ifdef USE_VC
+
+ // initialize current_v to hold the first simdized index
+ for ( int d = 0 ; d < dimension ; d++ )
+ current_v[d] = index_ele_v ( range[0][d] ) ;
+ current_v[0] += index_ele_v::IndexesFromZero() ;
+
+ // if vc is used, the singular index will only be used for mop-up action
+ // after all aggregates have been processed.
+ int size = range[1][0] - range[0][0] ;
+ int aggregates = size / vsize ;
+ current[0] += index_ele_type ( aggregates * vsize ) ; // for mop-up
#endif
- index_generator < dim_target , unary_functor_type >
- subrange ( const shape_range_type < dim_target > range ) const
+ } ;
+
+ /// single-value evaluation. This will be used for all values if vc isn't used,
+ /// or only for mop-up action after all full vectors are processed.
+
+ void operator() ( value_type & target )
{
- return index_generator < dim_target , unary_functor_type >
- ( itp , range ) ;
+ itp.eval ( current , target ) ;
+ current[0] += index_ele_type ( 1 ) ;
}
-
- index_generator < dim_target , unary_functor_type >
- bindOuter ( const int & c ) const
+
+#ifdef USE_VC
+
+ /// vectorized evaluation. Hierarchical decent has left us with only the
+ /// level0 coordinate to increase, making this code very efficient.
+
+ template < typename target_type >
+ void operator() ( target_type & target )
{
- auto slice_start = range[0] , slice_end = range[1] ;
+ itp.eval ( current_v , target ) ;
+ current_v[0] += index_ele_v ( vsize ) ;
+ }
- slice_start [ dim_target - 1 ] += c ;
- slice_end [ dim_target - 1 ] = slice_start [ dim_target - 1 ] + 1 ;
-
- return index_generator < dim_target , unary_functor_type >
- ( itp , shape_range_type < dim_target > ( slice_start , slice_end ) ) ;
- }
+#endif
+
+ /// while we are at the lowest level here, we still need the subrange routine
+ /// for cases where the data are 1D in the first place: in this situation,
+ /// we need to split up the range as well.
+
+ index_generator < unary_functor_type , 0 >
+ subrange ( const shape_range_type < dimension > range ) const
+ {
+ return index_generator < unary_functor_type , 0 >
+ ( itp , range ) ;
+ }
} ;
/// index_remap() is very similar to remap(), but while remap() picks coordinates from
/// an array (the 'warp' array), index_remap feeds the discrete coordinates to the
/// successive places data should be rendered to to the unary_functor_type object.
-/// So here we need a different type of unary_functor_type object: in remap(), we would use
-/// one which takes nD real coordinates, here we use one which takes nD discrete
-/// coordinates. Typically the unary_functor_type object we use here will therefore have
-/// some stage of converting the incoming discrete coordinates to 'interesting' real
-/// coordinates. If, for example, incoming coordinates are like
-/// (0,0), (1,0), (2,0) ...
-/// the unary_functor_type object might, in a first step, use a real factor on them to
-/// produce real coordinates like
-/// (0.0,0.0), (0.1,0.0), (0.2,0.0)...
-/// which in turn are used to perform some evaluation, yielding target data
-/// (R,G,B), (R,G,B),...
-/// which might be taken up again by index_remap to be stored in the target.
+/// Since the data type of the coordinates is derived from the unary functor's input type,
+/// index_generator can produce integral and real indices, as needed.
///
-/// index_remap takes two template arguments:
+/// index_remap takes one template argument:
///
/// - 'unary_functor_type', which is a class satisfying the interface laid down in
/// unary_functor.h. This is an object which can provide values given coordinates,
/// like class evaluator, but generalized to allow for arbitrary ways of achieving
-/// it's goal
-///
-/// - 'dim_target' - the number of dimensions of the target array. While the number of
-/// dimensions of the source data is apparent from the unary_functor_type object, the
-/// target array's dimensionality may be different, like when picking 2D slices from
-/// a volume.
+/// it's goal. The unary functor's in_type determines the number of dimensions of
+/// the indices - since they are indices into the target array, the functor's input
+/// type has to have the same number of dimensions as the target array.
///
/// index_remap takes three parameters:
///
@@ -1035,17 +803,18 @@ struct index_generator
/// - a boolan flag 'use_vc' which can be set to false to switch off the use of vector
/// code in compiles which have it activated
-template < class unary_functor_type , // type satisfying the interface in class unary_functor
- int dim_target > // dimension of target array
+template < class unary_functor_type > // type satisfying the interface in class unary_functor
void index_remap ( const unary_functor_type & ev ,
- MultiArrayView < dim_target ,
+ MultiArrayView < unary_functor_type::dim_in ,
typename unary_functor_type::out_type > & output ,
bool use_vc = true
)
{
+ enum { dim_target = unary_functor_type::dim_in } ;
+
typedef typename unary_functor_type::out_type value_type ;
typedef TinyVector < int , dim_target > nd_ic_type ;
- typedef index_generator < dim_target , unary_functor_type > gen_t ;
+ typedef index_generator < unary_functor_type , dim_target - 1 > gen_t ;
shape_range_type < dim_target > range ( nd_ic_type() , output.shape() ) ;
gen_t gen ( ev , range ) ;
@@ -1283,7 +1052,7 @@ void st_grid_eval ( shape_range_type < target_type::actual_dimension > range ,
// this is the multithreaded version of grid_eval, which sets up the
// full range over 'result' and calls 'multithread' to do the rest
-/// grid_eval evaluates a b-spline object (TODO: general interpolator obj?)
+/// grid_eval evaluates a b-spline object
/// at points whose coordinates are distributed in a grid, so that for
/// every axis there is a set of as many coordinates as this axis is long,
/// which will be used in the grid as the coordinate for this axis at the
@@ -1305,7 +1074,7 @@ void st_grid_eval ( shape_range_type < target_type::actual_dimension > range ,
/// splines this saves quite some time, since the generation of weights
/// is computationally expensive.
///
-/// grid_eval is useful for generating scaled representation of the original
+/// grid_eval is useful for generating a scaled representation of the original
/// data, but when scaling down, aliasing will occur and the data should be
/// low-pass-filtered adequately before processing. Let me hint here that
/// low-pass filtering can be achieved by using b-spline reconstruction on
diff --git a/unary_functor.h b/unary_functor.h
index 6f6ccde..5ddfdd6 100644
--- a/unary_functor.h
+++ b/unary_functor.h
@@ -238,22 +238,22 @@ struct unary_functor
}
}
-// /// variant of load() taking a stride, currently unused
-//
-// template < typename T >
-// void load ( const T * const & pc ,
-// const int & stride ,
-// typename vector_traits < T , vsize > :: type & cv ) const
-// {
-// typedef typename vigra::ExpandElementResult < T > :: type ele_type ;
-// enum { dimension = vigra::ExpandElementResult < T > :: size } ;
-//
-// for ( int d = 0 ; d < dimension ; d++ )
-// {
-// cv[d].gather ( (ele_type*) pc ,
-// ic_v::IndexesFromZero() * dimension * stride + d ) ;
-// }
-// }
+ /// variant of load() taking a stride
+
+ template < typename T >
+ void load ( const T * const & pc ,
+ const int & stride ,
+ typename vector_traits < T , vsize > :: type & cv ) const
+ {
+ typedef typename vigra::ExpandElementResult < T > :: type ele_type ;
+ enum { dimension = vigra::ExpandElementResult < T > :: size } ;
+
+ for ( int d = 0 ; d < dimension ; d++ )
+ {
+ cv[d].gather ( (ele_type*) pc ,
+ ic_v::IndexesFromZero() * dimension * stride + d ) ;
+ }
+ }
/// method to store/scatter a simdized value to interleaved memory
@@ -278,22 +278,22 @@ struct unary_functor
}
} ;
-// /// variant of store taking a stride, currently unused
-//
-// template < typename T >
-// void store ( const typename vector_traits < T , vsize > :: type & ev ,
-// T * const & result ,
-// const int & stride ) const
-// {
-// typedef typename vigra::ExpandElementResult < T > :: type ele_type ;
-// enum { dimension = vigra::ExpandElementResult < T > :: size } ;
-//
-// for ( int d = 0 ; d < dimension ; d++ )
-// {
-// ev[d].scatter ( (ele_type*) result ,
-// ic_v::IndexesFromZero() * dimension * stride + d ) ;
-// }
-// } ;
+ /// variant of store taking a stride
+
+ template < typename T >
+ void store ( const typename vector_traits < T , vsize > :: type & ev ,
+ T * const & result ,
+ const int & stride ) const
+ {
+ typedef typename vigra::ExpandElementResult < T > :: type ele_type ;
+ enum { dimension = vigra::ExpandElementResult < T > :: size } ;
+
+ for ( int d = 0 ; d < dimension ; d++ )
+ {
+ ev[d].scatter ( (ele_type*) result ,
+ ic_v::IndexesFromZero() * dimension * stride + d ) ;
+ }
+ } ;
void eval ( const in_type * const pmc ,
out_type * result ) const
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/vspline.git
More information about the debian-science-commits
mailing list