[vspline] 12/72: factored out multithreading code into multithread.h, using it in filter.h Now that I rewrote the multithreading code, I need to make some changes to the code using multithreading as well, since I introduced the passing of ranges together with the 'whole' arguments to the single-thread code and the signatures are slightly different. In this intermediate commit, the old code in common.h (splite_array_to_cunks, divide_and_conquer) is still present and used for evaluation of the spline, but the (pre)filtering code already uses the new multithreading code in multithread.h.
Kay F. Jahnke
kfj-guest at moszumanska.debian.org
Sun Jul 2 09:02:38 UTC 2017
This is an automated email from the git hooks/post-receive script.
kfj-guest pushed a commit to branch master
in repository vspline.
commit a29afea2a33c7fed04ec5740a2ebd4c086a38a8a
Author: Kay F. Jahnke <kfjahnke at gmail.com>
Date: Sun Nov 20 11:54:15 2016 +0100
factored out multithreading code into multithread.h, using it in filter.h
Now that I rewrote the multithreading code, I need to make some changes to
the code using multithreading as well, since I introduced the passing of
ranges together with the 'whole' arguments to the single-thread code and
the signatures are slightly different. In this intermediate commit, the
old code in common.h (splite_array_to_cunks, divide_and_conquer) is still
present and used for evaluation of the spline, but the (pre)filtering code
already uses the new multithreading code in multithread.h.
---
common.h | 2 +-
example/impulse_response.cc | 461 +-------------------------------------------
filter.h | 219 +++++++++++----------
multithread.h | 354 ++++++++++++++++++++++++++++++++++
4 files changed, 474 insertions(+), 562 deletions(-)
diff --git a/common.h b/common.h
index b07acd9..aeb9768 100644
--- a/common.h
+++ b/common.h
@@ -43,7 +43,7 @@
#define VSPLINE_COMMON
#include <vigra/multi_array.hxx>
-#include <thread>
+#include <vspline/multithread.h>
#ifdef USE_VC
diff --git a/example/impulse_response.cc b/example/impulse_response.cc
index 71fef16..f9a42ec 100644
--- a/example/impulse_response.cc
+++ b/example/impulse_response.cc
@@ -72,459 +72,8 @@
#include <iomanip>
#include <assert.h>
+#include <vspline/multithread.h>
#include <vspline/vspline.h>
-#include <vigra/multi_math.hxx>
-#include <random>
-#include <tuple>
-
-// iterator_splitter will try to set up n ranges from a range. the partial
-// ranges are stored in a std::vector. The split may succeed producing n
-// or less ranges, and if iter_range can't be split at all, a single range
-// encompassing the whole of iter_range will be returned in the result vector.
-
-template < class _iterator_type >
-struct iterator_splitter
-{
- typedef _iterator_type iterator_type ;
- typedef vigra::TinyVector < iterator_type , 2 > range_type ;
- typedef std::vector < range_type > partition_type ;
-
- static partition_type part ( const range_type & iter_range ,
- int n )
- {
- std::vector < range_type > res ;
- assert ( n > 0 ) ;
-
- iterator_type start = iter_range [ 0 ] ;
- iterator_type end = iter_range [ 1 ] ;
- int size = end - start ;
- if ( n > size )
- n = size ;
-
- int chunk_size = size / n ; // will be at least 1
-
- for ( int i = 0 ; i < n - 1 ; i++ )
- {
- res.push_back ( range_type ( start , start + chunk_size ) ) ;
- start += chunk_size ;
- }
- res.push_back ( range_type ( start , end ) ) ;
- return res ;
- }
-} ;
-
-// shape_splitter will try to split a shape into n ranges by 'chopping' it
-// along the outermost axis that can be split n-ways. The additional parameter
-// 'forbid' prevents that particular axis from being split. The split may succeed
-// producing n or less ranges, and if 'shape' can't be split at all, a single range
-// encompassing the whole of 'shape' will be returned in the result vector.
-
-template < int dim >
-struct shape_splitter
-{
- typedef typename vigra::MultiArrayShape < dim > :: type shape_type ;
- typedef vigra::TinyVector < shape_type , 2 > range_type ;
- typedef std::vector < range_type > partition_type ;
-
- static partition_type part ( const shape_type & shape , ///< shape to be split n-ways
- int n , ///< intended number of chunks
- int forbid = -1 ) ///< axis which shouldn't be split
- {
- partition_type res ;
-
- // find the outermost dimension that can be split n ways, and it's extent
- int split_dim = -1 ;
- int max_extent = -1 ;
- for ( int md = dim - 1 ; md >= 0 ; md-- )
- {
- if ( md != forbid
- && shape[md] > max_extent
- && shape[md] >= n )
- {
- max_extent = shape[md] ;
- split_dim = md ;
- break ;
- }
- }
-
- // if the search did not yet succeed:
- if ( max_extent == -1 )
- {
- // repeat process with relaxed conditions: now the search will also succeed
- // if there is an axis which can be split less than n ways
- for ( int md = dim - 1 ; md >= 0 ; md-- )
- {
- if ( md != forbid
- && shape[md] > 1 )
- {
- max_extent = shape[md] ;
- split_dim = md ;
- break ;
- }
- }
- }
-
- if ( split_dim == -1 )
- {
- // we have not found a dimension for splitting. We pass back res with
- // a range over the whole initial shape as it's sole member
- res.push_back ( range_type ( shape_type() , shape ) ) ;
- }
- else
- {
- // we can split the shape along split_dim
-
- int w = shape [ split_dim ] ; // extent of the dimension we can split
- n = std::min ( n , w ) ; // just in case, if that is smaller than n
-
- int cut [ n ] ; // where to chop up this dimension
-
- for ( int i = 0 ; i < n ; i++ )
- cut[i] = ( (i+1) * w ) / n ; // roughly equal chunks, but certainly last cut == a.end()
-
- shape_type start , end = shape ;
-
- for ( int i = 0 ; i < n ; i++ )
- {
- end [ split_dim ] = cut [ i ]; // apply the cut locations
- res.push_back ( range_type ( start , end ) ) ;
- start [ split_dim ] = end [ split_dim ] ;
- }
- }
- return res ; // return the number of chunks
- }
-} ;
-
-// // we have a std::vector of tuples of bunch_type to which we want to assign
-// // array chunks, so that the first tuple in the vector receives the first chunks
-// // of all elements of bunch, the second tuple receives the second set of chunks
-// // and so forth. We can't iterate over tuple elements in a loop (get<K> requires
-// // a const K), but we can recurse over the elements, filling the Kth member
-// // of all tuples in the target vector in every step of the recursion:
-//
-// template < class bunch_type , int e , int dimension >
-// struct push_members
-// {
-// static void push ( std::vector < bunch_type > & target ,
-// const bunch_type & bunch ,
-// const typename shape_splitter < dimension > :: partition_type & ranges )
-// {
-// // extract member e-1 from the unsplit tuple
-// auto current = std::get<e-1> ( bunch ) ;
-//
-// // to perform the shape consistency check, we compare to the first member:
-// if ( e > 1 )
-// {
-// const auto & first = std::get<0> ( bunch ) ;
-// if ( first.shape() != current.shape() )
-// throw vspline::shape_mismatch ( "coprocess: all arrays must have the same size" ) ;
-// }
-//
-// // obtain the parts by applying 'ranges'
-// // and fill in the parts at position e-1 in every tuple in 'target'
-// for ( int k = 0 ; k < ranges.size() ; k++ )
-// std::get < e - 1 > ( target [ k ] ) = current.subarray ( ranges[k][0] , ranges[k][1] ) ;
-// // and recurse to the next-lower member
-// push_members < bunch_type , e - 1 , dimension > :: push ( target , bunch , ranges ) ;
-// }
-// } ;
-//
-// // terminate the recursion over e, the ordinal number of the tuple's element
-// // with a partial specialization for e == 0
-//
-// template < class bunch_type , int dimension >
-// struct push_members < bunch_type , 0 , dimension >
-// {
-// static void push ( ... ) { } // doesn't do anything at this level.
-// } ;
-//
-// // using C++ magic gleaned from first answer in
-// // http://stackoverflow.com/questions/7858817/unpacking-a-tuple-to-call-a-matching-function-pointer?noredirect=1&lq=1
-//
-// // seq is an empty struct with a variadic template argument list of int
-// template<int ...>
-// struct seq { };
-//
-// // level N 'gens' inherits from level N-1 'gens'
-// template<int N, int ...S>
-// struct gens : gens<N-1, N-1, S...> { };
-//
-// // level 0 'gens' is partially specialized (N==0) and typedefs 'type'
-// // this typedef is inherited by all higher-level 'gens'
-// template<int ...S>
-// struct gens<0, S...> {
-// typedef seq<S...> type;
-// };
-//
-// // why all the artistry? we have our parameter sets in tuples, but the
-// // function we want to call expects individual arguments. while we might
-// // require that the function be rewritten to accept a tuple instead of
-// // individual arguments (which is trivial), being able to use the function
-// // 'as is' is more intuitive, especially if the function's definition is
-// // somewhere else and we'd have to code a wrapper for it. We use two steps
-// // to achieve the call to func:
-// //
-// // here we have the 'inner' callFunc.
-// // the actual call to func happens here: _callFunc takes func, an argument
-// // tuple and a 'seq' type 'S' with a template parameter list of ints 0...N-1
-// // with N being the number of arguments func expects.
-// // the sequence is submitted to pack expansion with a pattern which
-// // produces the tuple's kth element for each k in S. The comma-separated
-// // list of the tuple members which results is passed as argument list to func.
-//
-// template < typename return_type , typename ...Args , int ...S >
-// return_type _callFunc ( std::function < return_type ( Args... ) > func ,
-// std::tuple < Args... > params ,
-// seq<S...> )
-// {
-// return func ( std::get < S > ( params ) ... ) ; // expand pack params and call func
-// }
-//
-// // the 'outer' callFunc receives func and a tuple of arguments.
-// // Here, the number of arguments is established (sizeof...(Args)) and used to
-// // create a 'seq' object, whose type reflects this number of arguments and
-// // which, by pack expansion, can produce individual arguments from the tuple
-// // in the previous routine _callFunc. func and params are passed on to the
-// // 'inner' _callFunc, together with the seq object. The inner _callFunc
-// // performs the actual function call.
-//
-// template < typename return_type , typename ...Args >
-// return_type callFunc ( std::function < return_type ( Args... ) > func ,
-// std::tuple < Args... > params )
-// {
-// return _callFunc ( func , params , typename gens<sizeof...(Args)>::type() ) ;
-// }
-//
-// // with the infrastructure code at hand, we can proceed to formulate a generalized
-// // multithreading routine for sets of MultiArrayViews. Here we have 'coprocess'
-// // which receives a std::function taking one or several MultiArrayViews, the
-// // desired number of threads, and as many MultiArrayViews as 'func' can process.
-// // TODO: I haven't managed yet to pass the views per reference
-// // coprocess tries to create a set of tuples of the same type as the tuple
-// // which was created from the arguments passed to coprocess (bunch_type),
-// // so that each tuple in the set contains arguments for a call to func which
-// // operates on a set of subarrays of the original arguments. With this set of tuples,
-// // representing a partition of the work, the work can be delegated to several threads.
-//
-// template<class ... Types>
-// int coprocess ( std::function < void ( Types... ) > func , int parts , Types ...args )
-// {
-// // we gather the arguments in a tuple
-// auto bunch = std::make_tuple ( args... ) ;
-// typedef decltype ( bunch ) bunch_type ;
-// const int members = std::tuple_size < bunch_type > :: value ;
-//
-// auto first = std::get<0> ( bunch ) ;
-// typedef decltype ( first ) view_type ;
-// const int dimension = view_type::actual_dimension ;
-//
-// auto shape = first.shape() ;
-// auto ranges = shape_splitter < dimension > :: part ( shape , parts ) ;
-// parts = ranges.size() ;
-//
-// // set up the set of tuples containing arguments for the partitioned job,
-//
-// std::vector < bunch_type > partition ( parts ) ;
-//
-// // fill in the tuples corresponding to the ranges
-// push_members < bunch_type , members , dimension > :: push ( partition , bunch , ranges ) ;
-//
-// // set up thread objects for the additional threads we'll use
-// int new_threads = parts - 1 ;
-// std::thread * t[new_threads] ;
-//
-// // start the additional threads, passing callFunc as the thread's functional,
-// // and func and the tuple containing the partial job's arguments. the new threads
-// // will proceed to 'explode' the tuple and feed the individual arguments to func:
-// int s ;
-// for ( s = 0 ; s < new_threads ; s++ )
-// {
-// t[s] = new std::thread ( callFunc < void , Types... > , func , partition[s] ) ;
-// }
-// // do the last partial - or only - job in this very thread:
-// callFunc ( func , partition[s] ) ;
-//
-// // finally, join the additional threads and delete their thread objects
-// for ( s = 0 ; s < new_threads ; s++ )
-// {
-// t[s]->join() ;
-// delete t[s] ;
-// }
-// return parts ;
-// }
-//
-// // the simpler task is to formulate coprocess using 'bunched' arguments in the first
-// // place. This makes the formulation of 'func' a bit more involved, as func now has
-// // to accept a tuple as it's sole argument and unpack the tuple's contents, but the
-// // processing in this variant of 'coprocess' is much more straightforward, since the
-// // artistry needed to call func with an 'exploded' tuple is not needed here:
-//
-// template < class bunch_type >
-// int coprocess ( std::function < void ( bunch_type ) > func , int parts , bunch_type bunch )
-// {
-// const int members = std::tuple_size < bunch_type > :: value ;
-//
-// auto first = std::get<0> ( bunch ) ;
-// typedef decltype ( first ) view_type ;
-// const int dimension = view_type::actual_dimension ;
-//
-// auto shape = first.shape() ;
-// auto ranges = shape_splitter < dimension > :: part ( shape , parts ) ;
-// parts = ranges.size() ;
-//
-// // set up the set of tuples containing arguments for the partitioned job
-// std::vector < bunch_type > partition ( parts ) ;
-//
-// // fill in the tuples corresponding to the ranges
-// push_members < bunch_type , members , dimension > :: push ( partition , bunch , ranges ) ;
-//
-// // set up thread objects for the additional threads we'll use (if any)
-// int new_threads = parts - 1 ;
-// std::thread * t[new_threads] ;
-//
-// // start the additional threads, passing func as the thread's functional and the tuple
-// // containing the partial job's arguments. the new threads will proceed to pass their tuple
-// // straight to func:
-// int s ;
-// for ( s = 0 ; s < new_threads ; s++ )
-// {
-// t[s] = new std::thread ( func , partition[s] ) ;
-// }
-// // do the last partial job in this very thread. this may be the only job, if push_members
-// // has returned 1, failing to partition the job
-// func ( partition[s] ) ;
-//
-// // finally, join the additional threads and delete their thread objects
-// for ( s = 0 ; s < new_threads ; s++ )
-// {
-// t[s]->join() ;
-// delete t[s] ;
-// }
-// // and return the number of threads which was used.
-// return parts ;
-// }
-//
-// // sample function to run with coprocess:
-//
-// void twosome ( vigra::MultiArrayView < 1 , double > in1 , vigra::MultiArrayView < 1 , double > in2 )
-// {
-// // std::cout << "shape 1 : " << in1 . shape() << std::endl ;
-// // std::cout << "shape 2 : " << in2 . shape() << std::endl ;
-// for ( int i = 0 ; i < in1.shape(0) ; i++ )
-// in2[i] = in1[i] + 42.0 ;
-// }
-//
-// // sample function to run with the bunched version of coprocess:
-//
-// void btwosome ( std::tuple < vigra::MultiArrayView < 1 , double > ,
-// vigra::MultiArrayView < 1 , double > > in )
-// {
-// for ( int i = 0 ; i < std::get<1>(in).shape(0) ; i++ )
-// std::get<1>(in) [ i ] += std::get<0>(in) [ i ] + 43.0 ;
-// }
-
-// given limit_type, we define range_type as a TinyVector of two limit_types,
-// the first denoting the beginning of the range and the second it's end, with
-// end being outside of the range.
-
-template < class limit_type >
-using range_type = vigra::TinyVector < limit_type , 2 > ;
-
-// given range_type, we define partition_type as a std::vector of range_type.
-// This data type is used to hold the partitioning of a range into subranges.
-
-template < class range_type >
-using partition_type = std::vector < range_type > ;
-
-// given a dimension, we define a shape_type as a TinyVector of long
-// of this dimension. This is equivalent to vigra's shape type.
-
-template < int dimension >
-using shape_type = vigra::TinyVector < long , dimension > ;
-
-// given a dimension, we define shape_range_type as a range defined by
-// two shapes of the given dimension. This definition allows us to directly
-// pass the two shapes as arguments to a call of subarray() on a MultiArrayView
-// of the given dimension
-
-template < int dimension >
-using shape_range_type = range_type < shape_type < dimension > > ;
-
-// for any dimension d, we specialize function partition() for shape_range_type<d>.
-// we use the extant class shape_splitter to perform the split
-
-template < int d >
-partition_type < shape_range_type<d> > partition ( shape_range_type<d> range , int nparts )
-{
- // perform the split, silently assuming that the range passed in starts at origin
- // TODO while we don't ever call this specialization of partition() with a range which
- // doensn't start at origin, we might either accomodate such use, forbid it, or rethink
- // the logic...
-
- return shape_splitter < d > :: part ( range[1] , nparts ) ;
-}
-
-/// multithread is a routine to perform multithreading with functors accessing several
-/// containers, iterators etc. ('manifolds') of identical structure together.
-/// when such a functor is invoked for a specific thread, it will only process a subset
-/// of each of the original manifold objects. This 'subset' notion is codified as range_type:
-/// range_type specifies a starting point and an end point inside the manifolds defining
-/// the extent of the subset. While it is tempting to code so that the multithreading
-/// routine feeds the subsets to the individual threads, I found that passing the
-/// range to the thread, together with the original variadic arguments to multithread,
-/// produces clean and compact code with a slightly higher burden on the functor, since
-/// it has to 'cut out' the subset it is meant to process itself. Why is this better?
-/// Because it unburdens the multithreading code from knowing any specifics about the
-/// 'cutting out' procedure, and at the same time allows the functor to access the
-/// 'whole' manifold and it's metrics if this is needed.
-/// The partitioning of the incoming range is left to function 'partition' which has to
-/// be specialized for any concrete range_type used, and multithread simply relies on
-/// a specialized version being present.
-
-template < class range_type , class ...Types >
-int multithread ( std::function < void ( range_type , Types... ) > func ,
- int nparts ,
- range_type range ,
- Types ...args )
-{
- // partition the range and update nparts in case the partitioning produced
- // less ranges than we initially asked for. Rely on the presence of a function
- // partition() which can process range_type:
-
- auto partitioning = partition ( range , nparts ) ;
- nparts = partitioning.size() ;
-
- // set up thread objects for the additional threads we'll use (if any)
-
- int new_threads = nparts - 1 ;
- std::thread * t[new_threads] ;
-
- // start the additional threads (if any), passing func as the thread's functional,
- // followed by the range over which func is meant to operate, followed by any other
- // arguments passed to multithread as parameter pack 'args'
-
- for ( int s = 0 ; s < new_threads ; s++ )
- {
- t[s] = new std::thread ( func , partitioning[s] , args... ) ;
- }
-
- // do the last partial job in this very thread. this may be the only job,
- // if partition() has failed to partition the job
-
- func ( partitioning [ new_threads ] , args... ) ;
-
- // join the additional threads (if any) and delete their thread objects
-
- for ( int s = 0 ; s < new_threads ; s++ )
- {
- t[s]->join() ;
- delete t[s] ;
- }
-
- // and return the number of threads which was used.
-
- return nparts ;
-}
/// sample function to run with multithread
/// for any dimension d, we define 'rtwosome' as a function taking a shape_range_type
@@ -533,7 +82,7 @@ int multithread ( std::function < void ( range_type , Types... ) > func ,
/// the functions used with multithread might be more or less specialized.
template < int dimension >
-void rtwosome ( shape_range_type<dimension> r ,
+void rtwosome ( vspline::shape_range_type<dimension> r ,
vigra::MultiArrayView < dimension , double > _in1 ,
vigra::MultiArrayView < dimension , double > _in2 )
{
@@ -627,9 +176,9 @@ int main ( int argc , char * argv[] )
// auto t = std::make_tuple ( va , vb ) ;
// coprocess ( bfun , 4 , t ) ;
- shape_range_type<1> range ( shape_type() , va.shape() ) ;
- std::function < void ( shape_range_type<1> , view_type , view_type ) > rfun ( rtwosome<1> ) ;
- multithread ( rfun , 4 , range , va , vb ) ;
+ vspline::shape_range_type<1> range ( shape_type(3) , va.shape() ) ;
+ std::function < void ( vspline::shape_range_type<1> , view_type , view_type ) > rfun ( rtwosome<1> ) ;
+ vspline::multithread ( rfun , 4 , range , va , vb ) ;
for ( auto v : vb )
std::cout << v << std::endl ;
diff --git a/filter.h b/filter.h
index 6fa0cb7..e784109 100644
--- a/filter.h
+++ b/filter.h
@@ -62,7 +62,7 @@ namespace vspline {
/// overall_gain is a helper routine:
/// Simply executing the filtering code by itself will attenuate the signal.
-/// Here we calculate the gain which, applied to the signal, will cancel this effect.
+/// Here we calculate the gain which, pre-applied to the signal, will cancel this effect.
/// While this code was initially part of the filter's constructor, I took it out
/// to gain some flexibility by passing in the gain as a parameter.
@@ -821,25 +821,33 @@ scatter ( const T* source ,
/// While the buffering consumes some time, it saves time on the actual filter calculation,
/// especially with higher-order filters. On my system, I found I broke even even with only
/// one pole, so there is no special treatment here for low-order filtering (TODO confirm)
-/// TODO: what if dim == 1? should it have special treatment or is source.bindAt ( axis , 0 )
-/// safe in this case?
+/// note the use of range_type<T>, which is from multithread.h
template < class source_type ,
class target_type >
-int nonaggregating_filter ( source_type &source ,
- target_type &target ,
- int axis ,
- double gain ,
- bc_code bc ,
- int nbpoles ,
- const double * pole ,
- double tolerance
- )
+void nonaggregating_filter ( range_type < typename source_type::difference_type > range ,
+ source_type original_source ,
+ target_type original_target ,
+ int axis ,
+ double gain ,
+ bc_code bc ,
+ int nbpoles ,
+ const double * pole ,
+ double tolerance
+ )
{
const int dim = source_type::actual_dimension ;
typedef typename source_type::value_type value_type ;
typedef typename vigra::ExpandElementResult < value_type > :: type ele_type ;
+ // we're in the single-threaded code now. multithread() has simply forwarded
+ // the source and target MultiArrayViews and a range, here we use the range
+ // to pick out the subarrays of original_source and original_target which we
+ // are meant to process in this thread:
+
+ auto source = original_source.subarray ( range[0] , range[1] ) ;
+ auto target = original_target.subarray ( range[0] , range[1] ) ;
+
int count = source.shape ( axis ) ;
/// we use a buffer of count value_types
@@ -892,7 +900,7 @@ int nonaggregating_filter ( source_type &source ,
while ( source_sliter < source_sliter_end )
{
- // copy from the array to the buffer
+ // copy from the array to the buffer with a monadic gather
source_index = &(*source_sliter) - source_base_adress ;
@@ -904,8 +912,7 @@ int nonaggregating_filter ( source_type &source ,
solver.solve ( buffer.begin() ) ;
- // and perform extended scatter with extrusion parameters to write the filtered data
- // to the destination
+ // and perform a monadic scatter to write the filtered data to the destination
scatter<value_type> ( buffer_base_adress , target_base_adress + source_index ,
source_stride , count ) ;
@@ -943,7 +950,6 @@ int nonaggregating_filter ( source_type &source ,
++target_sliter ;
}
}
- return 0 ;
}
// the use of Vc has to be switched on with the flag USE_VC.
@@ -1083,42 +1089,33 @@ scatter ( const typename VT::value_type * source ,
}
/// aggregating_filter (below) keeps a buffer of vector-aligned memory, which it fills
-/// with 1D subarrays of the source array which are collinear to the processing
-/// axis, The buffer is then submitted to vectorized forward-backward recursive filtering
+/// with 1D subarrays of the source array which are collinear to the processing axis.
+/// The buffer is then submitted to vectorized forward-backward recursive filtering
/// and finally stored back to the corresponding memory area in target, which may
/// be the same as source, in which case the operation is seemingly performed
/// in-place (while in fact the buffer is still used). Buffering takes the bulk
/// of the processing time (on my system), the vectorized maths are fast by
-/// comparison. Depending on array size and spline degree, sometimes the
-/// nonvectorized code is faster. But as both grow, bufering soon comes out
-/// the winner.
+/// comparison. Depending on data type, array size and spline degree, sometimes the
+/// nonvectorized code is faster. But as both grow, bufering soon comes out on top.
/// ele_aggregating_filter is a subroutine processing arrays of elementary value_type.
/// It's used by aggregating_filter, either directly if the arrays' value_type is already
/// an elementary type, or after element-expanding the array(s).
template < class source_type ,
class target_type >
-int ele_aggregating_filter ( source_type &source ,
- target_type &target ,
- int axis ,
- double gain ,
- bc_code bc ,
- int nbpoles ,
- const double * pole ,
- double tolerance
- )
+void ele_aggregating_filter ( source_type &source ,
+ target_type &target ,
+ int axis ,
+ double gain ,
+ bc_code bc ,
+ int nbpoles ,
+ const double * pole ,
+ double tolerance
+ )
{
typedef typename source_type::value_type ele_type ; ///< elementary type in source
- // Initially I used a plain Vc::Vector as the simdized type, but I found that using a SimdArray
- // of twice that size performed slightly better. Here's the one place where this type can be fixed,
- // it's choice 'trickles down' to the remainder of the code, so experimentation is cheap. vsize has
- // to be a multiple of the genuine vector's size, though, because in gather/scatter above we explicitly
- // use aligned load/stores to the buffer. If we use plain load/stores to the buffer there, other
- // vsizes can be used, but performance suffers - we want to use the fastest ops possible after all.
- // TODO: This should be tested with different systems
-
- // we now have a traits class fixing the simdized type (in common.h):
+ // we have a traits class fixing the simdized type (in common.h):
typedef typename vector_traits < ele_type > :: type simdized_type ;
@@ -1263,19 +1260,28 @@ int ele_aggregating_filter ( source_type &source ,
target_indexes , mask , target_stride , count ) ;
}
}
- return 0 ;
}
+/// here we provide a common routine 'aggregating_filter', which works for elementary
+/// value_types and also for aggregate value_types. Processing is different for these
+/// two cases, because the vector code can only process elementary types, and if
+/// value_type isn't elementary, we need to element-expand the source and target
+/// arrays. Since this routine is the functor passed to multithread() and therefore
+/// receives a range parameter to pick out a subset of the data to process in the
+/// single thread, we also take the opportunity here to pick out the subarrays
+/// for further processing.
+
template < class source_type ,
class target_type >
-int aggregating_filter ( source_type &source ,
- target_type &target ,
- int axis ,
- double gain ,
- bc_code bc ,
- int nbpoles ,
- const double * pole ,
- double tolerance
+void aggregating_filter ( range_type < typename source_type::difference_type > range ,
+ source_type original_source ,
+ target_type original_target ,
+ int axis ,
+ double gain ,
+ bc_code bc ,
+ int nbpoles ,
+ const double * pole ,
+ double tolerance
)
{
const int dim = source_type::actual_dimension ;
@@ -1284,6 +1290,11 @@ int aggregating_filter ( source_type &source ,
"aggregating_filter: both arrays must have the same value_type" ) ;
typedef typename vigra::ExpandElementResult < value_type > :: type ele_type ;
+ // continue processing on the subarrays of source and target specified by 'range':
+
+ auto source = original_source.subarray ( range[0] , range[1] ) ;
+ auto target = original_target.subarray ( range[0] , range[1] ) ;
+
if ( vigra::ExpandElementResult < value_type > :: size != 1 )
{
// value_type is an aggregate type, but we want to operate on elementary types
@@ -1291,9 +1302,9 @@ int aggregating_filter ( source_type &source ,
// arrays with elementary types.
auto expanded_source = source.expandElements ( 0 ) ;
auto expanded_target = target.expandElements ( 0 ) ;
- return ele_aggregating_filter ( expanded_source , expanded_target ,
- axis + 1 , gain , bc ,
- nbpoles , pole , tolerance ) ;
+ ele_aggregating_filter ( expanded_source , expanded_target ,
+ axis + 1 , gain , bc ,
+ nbpoles , pole , tolerance ) ;
}
else
{
@@ -1306,20 +1317,18 @@ int aggregating_filter ( source_type &source ,
es ( source.shape() , source.stride() , (ele_type*)(source.data()) ) ;
vigra::MultiArrayView < target.actual_dimension , ele_type >
et ( target.shape() , target.stride() , (ele_type*)(target.data()) ) ;
- return ele_aggregating_filter ( es , et ,
- axis , gain , bc ,
- nbpoles , pole , tolerance ) ;
+ ele_aggregating_filter ( es , et ,
+ axis , gain , bc ,
+ nbpoles , pole , tolerance ) ;
}
}
#endif
/// Now we have the routines which perform the buffering and filtering for a chunk of data,
-/// We add code for multithreading. This is done by using utility code from common.h, namely
-/// the routine 'divide_and_conquer_2', which splits two arrays in the same way and applies
-/// a functor to each pair of chunks in a separate thread.
+/// We add code for multithreading. This is done by using utility code from multithread.h.
/// filter_1d, which is the routine processing nD arrays along a specific axis, might as well
-/// be a function. But functions can't be patially specialized (at least not with my compiler)
+/// be a function. But functions can't be partially specialized (at least not with my compiler)
/// so I use a functor instead, which, as a class, can be partially specialized. We want a
/// partial specialization for 1D arrays, where all of our usual schemes of multithreading and
/// vectorization don't intrinsically work and we have to employ a different method, see there.
@@ -1331,28 +1340,37 @@ class filter_1d
{
public:
void operator() ( input_array_type &input , ///< source data. the routine can also operate in-place
- output_array_type &output , ///< where input == output.
- int axis ,
- double gain ,
- bc_code bc , ///< boundary treatment for this solver
- int nbpoles ,
- const double * pole ,
- double tolerance ,
- bool use_vc = true ,
- int nslices = ncores ) ///< number of threads to use
+ output_array_type &output , ///< where input == output.
+ int axis ,
+ double gain ,
+ bc_code bc , ///< boundary treatment for this solver
+ int nbpoles ,
+ const double * pole ,
+ double tolerance ,
+ bool use_vc = true ,
+ int nslices = ncores ) ///< number of threads to use
{
typedef typename input_array_type::value_type value_type ;
+ typedef typename input_array_type::difference_type shape_type ;
+ typedef range_type < shape_type > range_t ;
+ typedef partition_type < range_t > partition_t ;
typedef typename vigra::MultiArray < 1 , value_type > ::iterator iter_type ;
- using namespace std::placeholders ;
-
- // we use bind to create a functor which we can pass to divide_and_conquer_2.
- // divide_and_conquer_2 will split input and output into chunks and then apply
- // the functor to pairs of chunks in separate threads.
-
- typedef decltype ( & nonaggregating_filter < input_array_type , output_array_type > )
- filter_functor_type ;
-
+ // multithread() expects a std::function, so here we fix the type of
+ // std::function we want to pass to it, create an instance of the std::function
+ // and assign the routine appropriate to the task:
+
+ typedef
+ std::function < void ( range_t ,
+ input_array_type ,
+ output_array_type ,
+ int ,
+ double ,
+ bc_code ,
+ int ,
+ const double * ,
+ double ) > filter_functor_type ;
+
filter_functor_type filter_functor ;
#ifdef USE_VC
@@ -1368,30 +1386,32 @@ public:
#endif
- auto chunk_func_2
- = std::bind ( filter_functor ,
- _1 , // placeholders to accept data chunks
- _2 , // from divide_and_conquer
- axis , // axis to process
+ // obtain a partitioning of the data array into subranges. We do this 'manually' here
+ // because we must instruct shape_splitter not to chop up the current processing axis
+ // (by passing the 'forbid' parameter, which is taken as -1 (no effect) in the default
+ // partitioning of nD arrays.
+
+ partition_t partitioning = shape_splitter < dim > :: part ( input.shape() , nslices , axis ) ;
+
+ multithread ( filter_functor ,
+ partitioning ,
+ input ,
+ output ,
+ axis ,
gain ,
bc ,
nbpoles ,
pole ,
tolerance ) ;
-
-
- // divide_and_conquer_2 performs the array splitting and multithreading
-
- divide_and_conquer_2 < input_array_type , output_array_type >
- :: run ( input , // knot point data
- output , // space for coefficients
- chunk_func_2 , // functor from above to apply the solver
- axis , // forbid splitting along processing axis
- nslices ) ; // use nslices threads
}
} ;
-/// now here's the specialization for 1D arrays.
+/// now here's the specialization for 1D arrays. It may come as a surprise that it looks
+/// nothing like the nD routine. This is due to the fact that we follow a specific strategy:
+/// We 'fold up' the 1D array into a 'fake 2D' array, process this 2D array with the nD code
+/// which is very efficient, and 'mend' the stripes along the margins of the fake 2D array
+/// which contain wrong results due to the fact that some boundary condition appropriate
+/// for the 2D case was applied.
template < typename input_array_type , ///< type of array with knot point data
typename output_array_type > ///< type of array for coefficients (may be the same)
@@ -1426,7 +1446,6 @@ public:
#ifdef USE_VC
-// typedef vector_traits < ele_type > simdized_type ;
const int vsize = vector_traits < ele_type > :: vsize ;
// if we can use vector code, the number of lanes is multiplied by the
@@ -1439,13 +1458,7 @@ public:
#endif
-// std::cout << "have " << lanes << " lanes" << std::endl ;
-
// we give the filter some space to run up to precision
- // TODO we may want to use the proper horizon values here
- // TODO and make sure 64 is enough.
-
-// std::cout << "tolerance: " << tolerance << std::endl ;
if ( tolerance <= 0.0 )
{
@@ -1459,7 +1472,7 @@ public:
int horizon = ceil ( log ( tolerance ) / log ( fabs ( pole[0] ) ) ) ;
- // tis is just was much as we want for the filter to run up to precision
+ // this is just as much as we want for the filter to run up to precision
// starting with BC code 'IGNORE' at the margins
runup = horizon ;
@@ -1473,12 +1486,11 @@ public:
if ( input.shape(0) < min_length )
{
-// std::cout << "input too short for fake 2D with the given tolerance" << std::endl ;
lanes = 1 ;
}
else
{
- // input's larger than the absolute minimum, maybe we can even increase
+ // input is larger than the absolute minimum, maybe we can even increase
// the number of lanes some more? we'd like to do this if the input is
// very large, since we use buffering and don't want the buffers to become
// overly large. But the smaller the run along the split x axis, the more
@@ -1508,7 +1520,6 @@ public:
if ( lanes == 1 )
{
-// std::cout << "using simple 1D filter" << std::endl ;
// this is a simple single-threaded implementation
filter_type solver ( input.shape(0) ,
gain ,
@@ -1522,8 +1533,6 @@ public:
// the input qualifies for fake 2D processing.
-// std::cout << "input qualifies for fake 2D, using " << lanes << " lanes" << std::endl ;
-
// we want as many chunks as we have lanes. There may be some data left
// beyond the chunks (tail_size of value_type)
diff --git a/multithread.h b/multithread.h
new file mode 100644
index 0000000..00bff78
--- /dev/null
+++ b/multithread.h
@@ -0,0 +1,354 @@
+/************************************************************************/
+/* */
+/* vspline - a set of generic tools for creation and evaluation */
+/* of uniform b-splines */
+/* */
+/* Copyright 2015, 2016 by Kay F. Jahnke */
+/* */
+/* Permission is hereby granted, free of charge, to any person */
+/* obtaining a copy of this software and associated documentation */
+/* files (the "Software"), to deal in the Software without */
+/* restriction, including without limitation the rights to use, */
+/* copy, modify, merge, publish, distribute, sublicense, and/or */
+/* sell copies of the Software, and to permit persons to whom the */
+/* Software is furnished to do so, subject to the following */
+/* conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the */
+/* Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES */
+/* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND */
+/* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT */
+/* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, */
+/* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING */
+/* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR */
+/* OTHER DEALINGS IN THE SOFTWARE. */
+/* */
+/************************************************************************/
+
+/// \file multithread.h
+///
+/// \brief code to distribute the processing of bulk data to several threads
+///
+/// The code in this header provides a resonably general method to perform
+/// processing of manifolds of data with several threads in parallel. In vspline,
+/// there are several areas where potentially large numbers of individual values
+/// have to be processed independently of each other or in a dependence which
+/// can be preserved in partitioning. To process such 'bulk' data effectively,
+/// vspline employs two strategies: multithreading and vectorization.
+/// This file handles the multithreading.
+///
+/// To produce generic code for the purpose, we first intoduce a model of what
+/// we intend to do. This model looks at the data as occupying a 'range' having
+/// a defined starting point and end point. We keep with the convention of defining
+/// ranges so that the start point is inside and the end point outside the data
+/// set described by the range, just like iterators obtained by begin() and end().
+/// This range is made explicit, even if it is implicit in the data which we want to
+/// submit to multithreading, and there is a type for the purpose, struct range_type.
+/// range_type merely captures the concept of a range, taking 'limit_type' as it's
+/// template parameter, so that any type of range can be accomodated.
+/// Next we define an object holding a set of ranges, modeling a partitioning of
+/// an 'original' range into subranges, which, withing the context of this code,
+/// are disparate and in sequence. This object is modeled as struct partition_type,
+/// taking a range_type as it's template argument.
+/// With these types, we model concrete ranges and partitionings. The most important
+/// group is dealing with multidimensional shapes, where a range extends from a 'lower'
+/// coordinate to a 'higer' coordinate. These two coordinates can be used directly
+/// to call vigra's 'subarray' function.
+/// Next we provide code to partition such ranges into sets of subranges.
+/// Finally we can express a generalized multithreading routine. This routine takes
+/// a functor capable of processing a range specification and a parameter pack of
+/// arbitrary further parameters, of which some will usually be refering to manifolds
+/// of data for which the given range makes sense. We call this routine with a
+/// partitioning of the original range and the same parameter pack that is to be passed
+/// to the functor. The multithreading routine proceeds to set up threads as needed,
+/// starting each with the functor as it's functional, receiving a subrange from
+/// the partitioning and the parameter pack as arguments. There's also an overload
+/// to multithread() where we pass in a range over the data and a desired number of
+/// threads, leaving the partitioning to code inside multithread(), resulting in some
+/// default partitioning provided by a suitable overload of function partition().
+/// This is the commonly used variant, since it's usually not necessary to obtain
+/// a partitioning other than the default one.
+
+#ifndef VSPLINE_MULTITHREAD_H
+#define VSPLINE_MULTITHREAD_H
+
+#include <assert.h>
+#include <vigra/tinyvector.hxx>
+#include <thread>
+
+namespace vspline
+{
+
+/// given limit_type, we define range_type as a TinyVector of two limit_types,
+/// the first denoting the beginning of the range and the second it's end, with
+/// end being outside of the range.
+
+template < class limit_type >
+using range_type = vigra::TinyVector < limit_type , 2 > ;
+
+/// given range_type, we define partition_type as a std::vector of range_type.
+/// This data type is used to hold the partitioning of a range into subranges.
+
+template < class range_type >
+using partition_type = std::vector < range_type > ;
+
+/// given a dimension, we define a shape_type as a TinyVector of long
+/// of this dimension. This is equivalent to vigra's shape type.
+
+template < int dimension >
+using shape_type = vigra::TinyVector < long , dimension > ;
+
+/// given a dimension, we define shape_range_type as a range defined by
+/// two shapes of the given dimension. This definition allows us to directly
+/// pass the two shapes as arguments to a call of subarray() on a MultiArrayView
+/// of the given dimension
+
+template < int dimension >
+using shape_range_type = range_type < shape_type < dimension > > ;
+
+// #include <iomanip>
+// #include <vspline/vspline.h>
+// #include <vigra/multi_math.hxx>
+// #include <random>
+// #include <tuple>
+
+// currently unused
+// // iterator_splitter will try to set up n ranges from a range. the partial
+// // ranges are stored in a std::vector. The split may succeed producing n
+// // or less ranges, and if iter_range can't be split at all, a single range
+// // encompassing the whole of iter_range will be returned in the result vector.
+//
+// template < class _iterator_type >
+// struct iterator_splitter
+// {
+// typedef _iterator_type iterator_type ;
+// typedef vigra::TinyVector < iterator_type , 2 > range_type ;
+// typedef std::vector < range_type > partition_type ;
+//
+// static partition_type part ( const range_type & iter_range ,
+// int n )
+// {
+// std::vector < range_type > res ;
+// assert ( n > 0 ) ;
+//
+// iterator_type start = iter_range [ 0 ] ;
+// iterator_type end = iter_range [ 1 ] ;
+// int size = end - start ;
+// if ( n > size )
+// n = size ;
+//
+// int chunk_size = size / n ; // will be at least 1
+//
+// for ( int i = 0 ; i < n - 1 ; i++ )
+// {
+// res.push_back ( range_type ( start , start + chunk_size ) ) ;
+// start += chunk_size ;
+// }
+// res.push_back ( range_type ( start , end ) ) ;
+// return res ;
+// }
+// } ;
+
+/// shape_splitter will try to split a shape into n ranges by 'chopping' it
+/// along the outermost axis that can be split n-ways. The additional parameter
+/// 'forbid' prevents that particular axis from being split. The split may succeed
+/// producing n or less ranges, and if 'shape' can't be split at all, a single range
+/// encompassing the whole of 'shape' will be returned in the result vector.
+
+template < int dim >
+struct shape_splitter
+{
+ typedef shape_type < dim > shape_t ;
+ typedef range_type < shape_t > range_t ;
+ typedef partition_type < range_t > partition_t ;
+
+ static partition_t part ( const shape_t & shape , ///< shape to be split n-ways
+ int n , ///< intended number of chunks
+ int forbid = -1 ) ///< axis which shouldn't be split
+ {
+ partition_t res ;
+
+ // find the outermost dimension that can be split n ways, and it's extent
+ int split_dim = -1 ;
+ int max_extent = -1 ;
+ for ( int md = dim - 1 ; md >= 0 ; md-- )
+ {
+ if ( md != forbid
+ && shape[md] > max_extent
+ && shape[md] >= n )
+ {
+ max_extent = shape[md] ;
+ split_dim = md ;
+ break ;
+ }
+ }
+
+ // if the search did not yet succeed:
+ if ( max_extent == -1 )
+ {
+ // repeat process with relaxed conditions: now the search will also succeed
+ // if there is an axis which can be split less than n ways
+ for ( int md = dim - 1 ; md >= 0 ; md-- )
+ {
+ if ( md != forbid
+ && shape[md] > 1 )
+ {
+ max_extent = shape[md] ;
+ split_dim = md ;
+ break ;
+ }
+ }
+ }
+
+ if ( split_dim == -1 )
+ {
+ // we have not found a dimension for splitting. We pass back res with
+ // a range over the whole initial shape as it's sole member
+ res.push_back ( range_t ( shape_t() , shape ) ) ;
+ }
+ else
+ {
+ // we can split the shape along split_dim
+
+ int w = shape [ split_dim ] ; // extent of the dimension we can split
+ n = std::min ( n , w ) ; // just in case, if that is smaller than n
+
+ int cut [ n ] ; // where to chop up this dimension
+
+ for ( int i = 0 ; i < n ; i++ )
+ cut[i] = ( (i+1) * w ) / n ; // roughly equal chunks, but certainly last cut == a.end()
+
+ shape_t start , end = shape ;
+
+ for ( int i = 0 ; i < n ; i++ )
+ {
+ end [ split_dim ] = cut [ i ]; // apply the cut locations
+ res.push_back ( range_t ( start , end ) ) ;
+ start [ split_dim ] = end [ split_dim ] ;
+ }
+ }
+ return res ;
+ }
+} ;
+
+/// for any dimension d, we specialize function partition() for shape_range_type<d>.
+/// we use shape_splitter to perform the split. This function template
+/// provides the default partitioning mechanism for a shape_range, which is used by
+/// the second variant of multithread(), which is called with a range and a number
+/// of desired threads instead of a ready-made partitioning.
+
+template < int d >
+partition_type < shape_range_type<d> > partition ( shape_range_type<d> range , int nparts )
+{
+ if ( range[0].any() )
+ {
+ // the lower limit of the range is not at the origin, so get the shape
+ // of the region between range[0] and range[1], call shape_splitter with
+ // this shape, and add the offset to the lower limit of the original range
+ // to the partial ranges in the result
+ auto shape = range[1] - range[0] ;
+ auto res = shape_splitter < d > :: part ( shape , nparts ) ;
+ for ( auto & r : res )
+ {
+ r[0] += range[0] ;
+ r[1] += range[0] ;
+ }
+ return res ;
+ }
+ // if range[0] is at the origin, we don't have use an offset
+ return shape_splitter < d > :: part ( range[1] , nparts ) ;
+}
+
+/// multithread is a routine to perform multithreading with functors accessing several
+/// containers, iterators etc. ('manifolds') of identical structure together.
+/// when such a functor is invoked for a specific thread, it will only process a subset
+/// of each of the original manifold objects. This 'subset' notion is codified as range_type:
+/// range_type specifies a starting point and an end point inside the manifolds defining
+/// the extent of the subset. While it is tempting to code so that the multithreading
+/// routine feeds the subsets to the individual threads, I found that passing the
+/// range to the thread, together with the original variadic arguments to multithread,
+/// produces clean and compact code with a slightly higher burden on the functor, since
+/// it has to 'cut out' the subset(s) it is meant to process itself. Why is this better?
+/// Because it unburdens the multithreading code from knowing any specifics about the
+/// 'cutting out' procedure, and at the same time allows the functor to access the
+/// 'whole' manifold and it's metrics if this is needed, like in code looking at the
+/// coordinates into an array, which should refer to the 'whole' array, not to the
+/// subarrays produced by partitioning.
+/// I have also taken the actual partitioning of the original range out of the core
+/// multithreading routine, because at times the calling code needs to influence this
+/// process. The situation inside vspline where this is necessary is in the prefiltering
+/// code, where the partitioning has to avoid splitting up the current processing axis,
+/// passing the exis as additional parameter 'forbid' to class shape_splitter.
+
+template < class range_type , class ...Types >
+int multithread ( std::function < void ( range_type , Types... ) > func ,
+ partition_type < range_type > partitioning ,
+ Types ...args )
+{
+ // get the number of ranges in the partitioning
+
+ int nparts = partitioning.size() ;
+
+ assert ( nparts > 0 ) ; // make sure, for now, that there is at least one range
+
+ // set up thread objects for the additional threads we'll use (if any)
+
+ int new_threads = nparts - 1 ;
+ std::thread * t[new_threads] ;
+
+ // start the additional threads (if any), passing func as the thread's functional,
+ // followed by the range over which func is meant to operate, followed by any other
+ // arguments passed to multithread as parameter pack 'args'
+
+ for ( int s = 0 ; s < new_threads ; s++ )
+ {
+ t[s] = new std::thread ( func , partitioning[s] , args... ) ;
+ }
+
+ // do the last partial job in this very thread. this may be the only job,
+ // if partitioning only contains a single range
+
+ func ( partitioning [ new_threads ] , args... ) ;
+
+ // join the additional threads (if any) and delete their thread objects
+
+ for ( int s = 0 ; s < new_threads ; s++ )
+ {
+ t[s]->join() ;
+ delete t[s] ;
+ }
+
+ // and return the number of threads which was used.
+
+ return nparts ;
+}
+
+/// variant of multithread taking the desired number of threads and a range instead
+/// of the partitioning. This variant relies on the presence of a suitable function
+/// partition() to provide the partitioning of the incoming range into at most nparts
+/// subranges. If the calling code doesn't to influence the partitioning process, this
+/// is the variant to use.
+
+template < class range_type , class ...Types >
+int multithread ( std::function < void ( range_type , Types... ) > func ,
+ int nparts ,
+ range_type range ,
+ Types ...args )
+{
+ // partition the range and update nparts in case the partitioning produced
+ // less ranges than we initially asked for. Rely on the presence of a function
+ // partition() which can process range_type:
+
+ partition_type < range_type > partitioning = partition ( range , nparts ) ;
+ nparts = partitioning.size() ;
+
+ return multithread ( func , partitioning , args... ) ;
+}
+
+} ; // end if namespace vspline
+
+#endif // #ifndef VSPLINE_MULTITHREAD_H
\ No newline at end of file
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/vspline.git
More information about the debian-science-commits
mailing list