[vspline] 23/72: added code for evaluation of mesh grids to remap.h
Kay F. Jahnke
kfj-guest at moszumanska.debian.org
Sun Jul 2 09:02:39 UTC 2017
This is an automated email from the git hooks/post-receive script.
kfj-guest pushed a commit to branch master
in repository vspline.
commit 1e263420474aed458919fce03d627393f9565910
Author: Kay F. Jahnke <kfjahnke at gmail.com>
Date: Sun Dec 25 12:22:31 2016 +0100
added code for evaluation of mesh grids to remap.h
---
eval.h | 152 ++++++++++++++++++++++++++++++--
remap.h | 273 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
thread_pool.h | 12 +--
3 files changed, 426 insertions(+), 11 deletions(-)
diff --git a/eval.h b/eval.h
index 317d7c1..5d5b044 100644
--- a/eval.h
+++ b/eval.h
@@ -604,6 +604,16 @@ private:
public:
+ const int & get_order() const
+ {
+ return ORDER ;
+ }
+
+ const shape_type & get_stride() const
+ {
+ return coefficients.stride() ;
+ }
+
/// this constructor is the most flexible variant and will ultimately be called by all other
/// constructor overloads. class evaluator is coded to be (thread)safe as a functor: all state
/// (in terms of member data) is fixed at construction time and remains constant afterwards.
@@ -749,6 +759,16 @@ public:
(*(fweight[axis])) ( weight.data() + axis * ORDER , *ci ) ;
}
+ /// obtain weight for a single axis
+
+ template < typename rc_type , typename weight_type >
+ void obtain_weights ( weight_type * p_weight ,
+ const int & axis ,
+ const rc_type& c ) const
+ {
+ (*(fweight[axis])) ( p_weight , c ) ;
+ }
+
/// _eval is the workhorse routine and implements the recursive arithmetic needed to
/// evaluate the spline. First the weights for the current dimension are obtained
/// from the weights object passed in. Once the weights are known, they are successively
@@ -817,11 +837,59 @@ public:
}
} ;
+ /// _xeval takes an ele_type** 'weight', pointing to level ele_type*, which each
+ /// point to ORDER weights. This variant is used when the per-axis components
+ /// of the weights appear repeatedly,like in mesh grid processing.
+ // TODO it has to be seen if this is actually faster than just copying together
+ // the weights into the 2D MultiArrayView used by the routines above
+ // ... This is hard to vectorize and introduces lots of new code, so I'm not
+ // using it, but it's left in for now
+
+ // tentatively I coded:
+
+// template < int level , class dtype >
+// struct _xeval
+// {
+// dtype operator() ( const dtype* & pdata ,
+// offset_iterator & ofs ,
+// const ele_type** const weight ,
+// const int & ORDER
+// ) const
+// {
+// dtype result = dtype() ;
+// for ( int i = 0 ; i < ORDER ; i++ )
+// {
+// result += weight [ level ] [ i ]
+// * _eval < level - 1 , dtype >() ( pdata , ofs , weight ) ;
+// }
+// return result ;
+// }
+// } ;
+//
+// template < class dtype >
+// struct _xeval < 0 , dtype >
+// {
+// dtype operator() ( const dtype* & pdata ,
+// offset_iterator & ofs ,
+// const ele_type** const const weight ,
+// const int & ORDER
+// ) const
+// {
+// dtype result = dtype() ;
+// for ( int i = 0 ; i < ORDER ; i++ )
+// {
+// result += pdata [ *ofs ] * weight [ 0 ] [ i ] ;
+// ++ofs ;
+// }
+// return result ;
+// }
+// } ;
+
// next are evaluation routines. there are quite a few, since I've coded for operation
// from different starting points and both for vectorized and nonvectorized operation.
/// evaluation variant which takes an offset and a set of weights. This is the final delegate,
- /// calling the recursive _eval method. Having the weights passed in via a const MultiArrayViev &
+ /// calling the recursive _eval method. Having the weights passed in via a const MultiArrayView &
/// allows calling code to provide their own weights, together with their shape, in one handy
/// packet. And in the normal sequence of delegation inside class eval, the next routine 'up'
/// can also package the weights nicely in a MultiArrayView. Note that select is now an ic_type,
@@ -836,6 +904,19 @@ public:
result = _eval<level,value_type>() ( base , ofs , weight ) ;
}
+// /// x... variant, taking component weights by adress.
+// /// this variant is used for mesh grid processing
+//
+// void xeval ( const ic_type & select ,
+// const ele_type** const const weight ,
+// const int & ORDER ,
+// value_type & result ) const
+// {
+// const value_type * base = coefficients.data() + select ;
+// offset_iterator ofs = offsets.begin() ;
+// result = _xeval<level,value_type>() ( base , ofs , weight , ORDER ) ;
+// }
+
/// 'penultimate' delegate, taking a multidimensional index 'select' to the beginning of
/// the coefficient window to process. Here the multidimensional index is translated
/// into an offset from the coefficient array's base adress. Carrying the information as
@@ -1092,6 +1173,55 @@ public:
}
} ;
+// template < class dtype , int level >
+// struct _v_xeval
+// {
+// dtype operator() ( const component_base_type& base , ///< base adresses of components
+// const ic_v& origin , ///< offsets to evaluation window origins
+// offset_iterator & ofs , ///< offsets to coefficients inside this window
+// const ele_v ** const weight ,
+// const int & ORDER ) const
+// {
+// dtype sum = dtype() ; ///< to accumulate the result
+// dtype subsum ; ///< to pick up the result of the recursive call
+//
+// for ( int i = 0 ; i < ORDER ; i++ )
+// {
+// subsum = _v_xeval < dtype , level - 1 >() ( base , origin , ofs , weight , ORDER );
+// for ( int ch = 0 ; ch < channels ; ch++ )
+// {
+// sum[ch] += weight [ level ] [ i ] * subsum[ch] ;
+// }
+// }
+// return sum ;
+// }
+// } ;
+//
+// /// the level 0 routine terminates the recursion
+//
+// template < class dtype >
+// struct _v_xeval < dtype , 0 >
+// {
+// dtype operator() ( const component_base_type& base , ///< base adresses of components
+// const ic_v& origin , ///< offsets to evaluation window origins
+// offset_iterator & ofs , ///< offsets to coefficients in this window
+// const ele_v ** const weight ,
+// const int & ORDER ) const ///< weights to apply
+// {
+// dtype sum = dtype() ;
+//
+// for ( int i = 0 ; i < ORDER ; i++ )
+// {
+// for ( int ch = 0 ; ch < channels ; ch++ )
+// {
+// sum[ch] += weight [ 0 ] [ i ] * ele_v ( base[ch] , origin + *ofs ) ;
+// }
+// ++ofs ;
+// }
+// return sum ;
+// }
+// } ;
+
// vectorized variants of the evaluation routines:
/// this is the vectorized version of the final delegate, calling _v_eval. The first
@@ -1114,9 +1244,21 @@ public:
result = _v_eval < mc_ele_v , level >() ( component_base , select , ofs , weight ) ;
}
- /// in the penultimate delegate, we calculate the weights. We also use this
- /// method 'further down' when we operate on 'compact split coordinates',
- /// which contain offsets and fractional parts.
+// /// x... variant, taking componnt weights by adress
+// /// used for mesh grid processing
+//
+// void v_xeval ( const ic_v& select ,
+// const ele_v ** const weight ,
+// const int & ORDER ,
+// mc_ele_v & result ) const
+// {
+// offset_iterator ofs = component_offsets.begin() ;
+// result = _v_xeval < mc_ele_v , level >() ( component_base , select , ofs , weight , ORDER ) ;
+// }
+//
+// /// in the penultimate delegate, we calculate the weights. We also use this
+// /// method 'further down' when we operate on 'compact split coordinates',
+// /// which contain offsets and fractional parts.
void v_eval ( const ic_v& select , // offsets to coefficient windows
const nd_rc_v& tune , // fractional parts of the coordinates
@@ -1191,7 +1333,7 @@ public:
/// data are strided. Anyway, it doesn't hurt to have the option.
void v_eval ( const split_coordinate_type * const psc , // pointer to vsize split coordinates
- value_type * result ) const // pointer to vsize result values
+ value_type * result ) const // pointer to vsize result values
{
nd_ic_v select ;
nd_rc_v tune ;
diff --git a/remap.h b/remap.h
index 2f04dbe..2cf7565 100644
--- a/remap.h
+++ b/remap.h
@@ -1202,6 +1202,279 @@ void make_warp_array ( transformation < rc_type ,
}
+// in grid_weight, for every dimension we have a set of ORDER weights
+// for every position in this dimension. in grid_ofs, we have the
+// partial offset for this dimension for every position. these partial
+// offsets are the product of the index for this dimension at the position
+// and the stride for this dimension, so that the sum of the partial
+// offsets for all dimensions yields the offset into the coefficient array
+// to the window of coefficients where the weights are to be applied.
+
+template < typename interpolator_type , // type offering eval()
+ typename target_type , // iterates over target array
+ typename weight_type , // singular weight data type
+ int level > // current axis
+struct _grid_eval
+{
+ void operator() ( int initial_ofs ,
+ MultiArrayView < 2 , weight_type > & weight ,
+ weight_type** const & grid_weight ,
+ const int & ORDER ,
+ int ** const & grid_ofs ,
+ const interpolator_type & itp ,
+ target_type & result )
+ {
+ for ( int ofs = 0 ; ofs < result.shape ( level ) ; ofs++ )
+ {
+ for ( int e = 0 ; e < ORDER ; e++ )
+ weight [ vigra::Shape2 ( e , level ) ] = grid_weight [ level ] [ ORDER * ofs + e ] ;
+ int cum_ofs = initial_ofs + grid_ofs [ level ] [ ofs ] ;
+ auto region = result.bindAt ( level , ofs ) ;
+ _grid_eval < interpolator_type , decltype ( region ) , weight_type , level-1 >()
+ ( cum_ofs , weight , grid_weight , ORDER , grid_ofs , itp , region ) ;
+ }
+ }
+} ;
+
+template < typename interpolator_type ,
+ typename target_type ,
+ typename weight_type >
+struct _grid_eval < interpolator_type ,
+ target_type ,
+ weight_type ,
+ 0 >
+{
+ void operator() ( int initial_ofs ,
+ MultiArrayView < 2 , weight_type > & weight ,
+ weight_type** const & grid_weight ,
+ const int & ORDER ,
+ int ** const & grid_ofs ,
+ const interpolator_type & itp ,
+ target_type & region )
+ {
+ auto iter = region.begin() ;
+ int ofs_start = 0 ;
+
+#ifdef USE_VC
+
+ // on my system, using clang++, the vectorized code is slightly slower
+ // than the unvectorized code. With g++, the vectorized code is faster
+ // than either clang version, but the unvectorized code is much slower.
+
+ const int vsize = interpolator_type::vsize ;
+ const int channels = interpolator_type::channels ;
+ typedef typename interpolator_type::value_type value_type ;
+ typedef typename interpolator_type::ele_type ele_type ;
+ typedef typename interpolator_type::ic_v ic_v ;
+ typedef typename interpolator_type::ele_v ele_v ;
+ typedef typename interpolator_type::mc_ele_v mc_ele_v ;
+
+ // number of vectorized results
+ int aggregates = region.size() / vsize ;
+ // vectorized weights
+ MultiArray < 2 , ele_v > vweight ( weight.shape() ) ;
+ // vectorized offset
+ ic_v select ;
+ // buffer for target data
+ mc_ele_v vtarget ;
+
+ // initialize the vectorized weights for dimensions > 0
+ for ( int d = 1 ; d < weight.shape(1) ; d++ )
+ {
+ for ( int o = 0 ; o < ORDER ; o++ )
+ vweight [ vigra::Shape2 ( o , d ) ] = weight [ vigra::Shape2 ( o , d ) ] ;
+ }
+
+ // get a pointer to the target array's data (seen as elementary type)
+ ele_type * p_target = (ele_type*) ( region.data() ) ;
+ // and the stride, if any, also in terms of the elementary type, from
+ // one cluster of target data to the next
+ int stride = vsize * channels * region.stride(0) ;
+
+ for ( int a = 0 ; a < aggregates ; a++ )
+ {
+ // gather the individual weights into the vectorized form
+ for ( int o = 0 ; o < ORDER ; o++ )
+ {
+ vweight[ vigra::Shape2 ( o , 0 ) ].gather
+ ( grid_weight [ 0 ] + ORDER * a * vsize ,
+ ORDER * ic_v::IndexesFromZero() + o ) ;
+ }
+ select.load ( grid_ofs [ 0 ] + a * vsize ) ; // get the offsets from grid_ofs
+ select += initial_ofs ; // add cumulated offsets from higher dimensions
+ select *= channels ; // offsets are in terms of value_type, expand
+
+ // now we can call the vectorized eval routine
+ itp.v_eval ( select , vweight , vtarget ) ;
+
+ // finally we scatter the vectorized result to target memory
+ for ( int ch = 0 ; ch < channels ; ch++ )
+ vtarget[ch].scatter ( p_target + ch ,
+ channels * ic_v::IndexesFromZero() ) ;
+
+ // and set p_target to the next cluster of target values
+ p_target += stride ;
+ }
+
+ // adapt the iterator into target array
+ iter += aggregates * vsize ;
+ // and the initial offset
+ ofs_start += aggregates * vsize ;
+
+#endif
+
+ // if Vc wasn't used, we start with ofs = 0 and this loop
+ // does all the processing:
+ for ( int ofs = ofs_start ; ofs < region.shape ( 0 ) ; ofs++ )
+ {
+ for ( int e = 0 ; e < ORDER ; e++ )
+ weight [ vigra::Shape2 ( e , 0 ) ] = grid_weight [ 0 ] [ ORDER * ofs + e ] ;
+ int cum_ofs = initial_ofs + grid_ofs [ 0 ] [ ofs ] ;
+ // TODO: here's the place to vectorize: gather weights into ORDER vectors
+ // of weights, have a target buffer, call v_eval
+ itp.eval ( cum_ofs , weight , *iter ) ;
+ ++iter ;
+ }
+ }
+} ;
+
+// Here is the single-threaded code for the grid_eval function.
+// The first argument is a shape range, defining the subsets of data
+// to process in a single thread. the remainder are forwards of the
+// arguments to grid_eval, partly as pointers. The call is affected
+// via 'multithread()' which sets up the partitioning and distribution
+// to threads from a thread pool.
+
+template < typename interpolator_type , // type offering eval()
+ typename target_type , // type of target MultiArrayView
+ typename weight_type , // singular weight data type
+ typename rc_type > // singular real coordinate
+void st_grid_eval ( shape_range_type < target_type::actual_dimension > range ,
+ rc_type ** const _grid_coordinate ,
+ const interpolator_type * itp ,
+ target_type * p_result )
+{
+ const int ORDER = itp->get_order() ;
+ const int dim_target = target_type::actual_dimension ;
+
+ // pick the subarray of the 'whole' target array pertaining to this thread's range
+ auto result = p_result->subarray ( range[0] , range[1] ) ;
+
+ // pick the subset of coordinates pertaining to this thread's range
+ const rc_type * grid_coordinate [ dim_target ] ;
+ for ( int d = 0 ; d < dim_target ; d++ )
+ grid_coordinate[d] = _grid_coordinate[d] + range[0][d] ;
+
+ // set up storage for precalculated weights and offsets
+
+ weight_type * grid_weight [ dim_target ] ;
+ int * grid_ofs [ dim_target ] ;
+
+ // get some metrics
+ TinyVector < int , dim_target > shape ( result.shape() ) ;
+ TinyVector < int , dim_target > stride ( itp->get_stride() ) ;
+
+ // allocate space for the per-axis weights and offsets
+ for ( int d = 0 ; d < dim_target ; d++ )
+ {
+ grid_weight[d] = new weight_type [ ORDER * shape [ d ] ] ;
+ grid_ofs[d] = new int [ shape [ d ] ] ;
+ }
+
+ int select ;
+ rc_type tune ;
+
+ // obtain the evaluator's mapping
+ auto mmap = itp->get_mapping() ;
+
+ // fill in the weights and offsets, using the interplator's mapping to split
+ // the coordinates received in grid_coordinate, the interpolator's obtain_weights
+ // method to produce the weight components, and the strides of the coefficient array
+ // to convert the integral parts of the coordinates into offsets.
+ for ( int d = 0 ; d < dim_target ; d++ )
+ {
+ for ( int c = 0 ; c < shape [ d ] ; c++ )
+ {
+ mmap ( grid_coordinate [ d ] [ c ] , select , tune , d ) ;
+ itp->obtain_weights ( grid_weight [ d ] + ORDER * c , d , tune ) ;
+ grid_ofs [ d ] [ c ] = select * stride [ d ] ;
+ }
+ }
+
+ // allocate storage for a set of singular weights
+ MultiArray < 2 , weight_type > weight ( vigra::Shape2 ( ORDER , dim_target ) ) ;
+
+ // now call the recursive workhorse routine
+ _grid_eval < interpolator_type ,
+ target_type ,
+ weight_type ,
+ dim_target - 1 >()
+ ( 0 ,
+ weight ,
+ grid_weight ,
+ ORDER ,
+ grid_ofs ,
+ *itp ,
+ result ) ;
+
+ // clean up
+ for ( int d = 0 ; d < dim_target ; d++ )
+ {
+ delete[] grid_weight[d] ;
+ delete[] grid_ofs[d] ;
+ }
+
+}
+
+// this is the multithreaded version of grid_eval, which sets up the
+// full range over 'result' and calls 'multithread' to do the rest
+
+/// grid_eval evaluates a b-spline object (TODO: general interpolator obj)
+/// at points whose coordinates are distributed in a grid, so that for
+/// every axis there is a set of as many coordinates as this axis is long,
+/// which will be used in the grid as the coordinate for this axis at the
+/// corresponding position. The resulting coordinate matrix (which remains
+/// implicit) is like a mesh grid made from the per-axis coordinates.
+///
+/// If we have two dimensions and x coordinates x0, x1 and x2, and y
+/// coordinates y0 and y1, the resulting implicit coordinate matrix is
+///
+/// (x0,y0) (x1,y0) (x2,y0)
+///
+/// (x0,y1) (x1,y1) (x2,y1)
+///
+/// since the offsets and weights needed to perform an interpolation
+/// only depend on the coordinates, this highly redundant coordinate array
+/// can be processed more efficiently by precalculating the offset component
+/// and weight component for all axes and then simply permutating them to
+/// obtain the result. Especially for higher-degree and higher-dimensional
+/// splines this saves quite some time, since the generation of weights
+/// is computationally expensive.
+///
+/// grid_eval is useful for generating scaled representation of the original
+/// data, but when scaling down, aliasing will occur and the data should be
+/// low-pass-filtered adequately before processing.
+
+// TODO: I intend to implement a process where evaluation to a smaller target
+// is performed on unfiltered data with a b-spline evaluator of suitable degree,
+// resulting in a suitably filtered scaled-down version. This process is then
+// repeated several times, resulting in a pyramid which can be used to obtain
+// antialiased interpolates at all scales.
+
+template < typename interpolator_type , // type offering eval()
+ typename target_type ,
+ typename weight_type , // singular weight data type
+ typename rc_type > // singular real coordinate
+void grid_eval ( rc_type ** const grid_coordinate ,
+ const interpolator_type & itp ,
+ target_type & result )
+{
+ const int dim_target = target_type::actual_dimension ;
+ shape_range_type < dim_target > range ( shape_type < dim_target > () , result.shape() ) ;
+ multithread ( st_grid_eval < interpolator_type , target_type , weight_type , rc_type > ,
+ ncores , range , grid_coordinate , &itp , &result ) ;
+}
+
} ; // end of namespace vspline
#endif // VSPLINE_REMAP_H
diff --git a/thread_pool.h b/thread_pool.h
index a917def..4b4318e 100644
--- a/thread_pool.h
+++ b/thread_pool.h
@@ -43,12 +43,6 @@
namespace vspline
{
-/// code to run a worker thread
-/// We use a thread pool of worker threads. These threads have a very
-/// simple cycle: They try and obtain a task (std::function<void()>).
-/// If there is one to be had, it is invoked, otherwise they wait on
-/// task_cv.
-
class thread_pool
{
// used to switch off the worker threads at program termination
@@ -74,6 +68,12 @@ public:
private:
+ /// code to run a worker thread
+ /// We use a thread pool of worker threads. These threads have a very
+ /// simple cycle: They try and obtain a task (std::function<void()>).
+ /// If there is one to be had, it is invoked, otherwise they wait on
+ /// task_cv.
+
void worker_thread()
{
std::function < void() > task ;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/vspline.git
More information about the debian-science-commits
mailing list