[Shootout-list] Another benchmark

Sebastien Loisel Sebastien Loisel <sloisel@gmail.com>
Fri, 25 Mar 2005 06:44:17 -0800


------=_Part_574_12567035.1111761857589
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: quoted-printable
Content-Disposition: inline

In the previous email
http://lists.alioth.debian.org/pipermail/shootout-list/2005-March/001272.ht=
ml,
I introduced myself and explained what kind of benchmark I would like
to contribute. I proposed a benchmark from a semifamous contest as an
example which was easy to understand so that everyone understands
where I'd like to go, in C++, ocaml and python.

Now I am attaching another example benchmark. This one is not so
simple as the first one, which is why it's the second one I'm
proposing. However, I think it is a more important and relevant
benchmark. It uses several nontrivial numerical algorithms embedded
into each other, making cheating optimizers less able to see the
structure. I'm not certain (I haven't tried yet) but it may also be
challenging to write versions in C, ocaml, python, lisp and so on,
which are similarly concise.

This benchmark is not yet ready for translation in all the languages.
The entire program is scalar, a vector version would be better as it
would add another layer of complexity. I'm not sure how to write that
without bloating everything, but I'll try. I'll also try to work in
some nontrivial traditional data structures.

So I would like to solicit comments. Can a benchmark like this
eventually get included?

Thanks,

S=E9bastien Loisel

------=_Part_574_12567035.1111761857589
Content-Type: text/plain; name=rootfinder.cpp; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="rootfinder.cpp"

#include <math.h>
#include <iostream>
#include <complex>
#include <stdio.h>

/*
 * NOTATION
 * This is mathematical software, and we use "operator" so often that
 * I find it more readable to use the word "op" instead. It also makes
 * the program much more concise, in the "ml" style.
 */
#define op operator
using namespace std;

/*
 * UTILITY FUNCTIONS
 *
 * These are tiny little polymorphic functions useful all the time
 */

// fac -- factorial
int fac(int k) { int i,ret=1; for(i=2;i<=k;i++) ret*=i; return ret; }
// sqr -- the square of x
template <class F>
F sqr(const F &x) { return x*x; }
// pow -- raising x to a non-negative integer power
template <class F>
F pow(const F &x, int i)
{ if(i<=0) return F(1); if(i&1) return x*pow(x,i-1); return sqr(pow(x,i/2)); }

// These three operators make the complex<> types more convenient for this
// benchmark
template <class F>
complex<F> op *(const complex<F> &x, int i) { return x*complex<F>(i); }
template <class F>
complex<F> op /(const complex<F> &x, int i) { return x/complex<F>(i); }
template <class F>
complex<F> op +(const complex<F> &x, int i) { return x+complex<F>(i); }

/*
 * THE AUTOMATIC DIFFERENTIATION CLASS
 *
 * This class is a bit dense, but you don't need to pay too much attention
 * to the details. It essentially defines a new scalar type, much like complex
 * or int. We don't use the < operator on it anywhere, so I didn't define it.
 * Further mathematical functions can be defined on ad, like sin and log, but
 * for the benchmark I'm not doing it. Everything in the class is to define
 * basic constructors, and the operator =*+-/ as well as the convenient []
 * indexing operator to fetch the Taylor coefficients. If you write a polymorphic
 * function g that takes a template scalar F for input and feed it an ad<double>,
 * the output will be a truncated Taylor series expansion of g about the input
 * parameter.
 */
template <class F, int N>
struct ad
{
  F x[N];
  void clr() { int i; for(i=0;i<N;i++) x[i]=0; }
  ad() { clr(); }
  ad(int y) { clr(); x[0]=y; }
  ad(const F &y) { clr(); x[0]=y; }
  ad & op =(const F &y) { clr(); x[0]=y; return *this; }
  ad & op =(int y) { clr(); x[0]=y; return *this; }
  F & op [](int i) { return x[i]; }
  const F & op [](int i) const { return x[i]; }
  ad op +(const ad &y) const
  { ad ret; int i; for(i=0;i<N;i++) ret[i]=x[i]+y[i]; return ret; }
  ad op +(int y) const { ad ret(*this); ret[0]=ret[0]+y; return ret; }
  ad op -(const ad &y) const
  { ad ret; int i; for(i=0;i<N;i++) ret[i]=x[i]-y[i]; return ret; }
  ad op -(const F &y) const { ad ret(*this); ret[0]=ret[0]-y; return ret; }
  ad op *(int y) const
  { ad ret; int i; for(i=0;i<N;i++) ret[i]=x[i]*y; return ret; }
  ad op *(const F & y) const
  { ad ret; int i; for(i=0;i<N;i++) ret[i]=x[i]*y; return ret; }
  ad op *(const ad &y) const
  {
    ad ret; int i,j;
    for(i=0;i<N;i++) for(j=0;j<=i;j++) ret[i]=ret[i]+x[j]*y[i-j];
    return ret;
  }
  ad compose(const ad &y) const
  {
    int i; ad ret,temp;
    temp=y; temp[0]=0; for(i=0;i<N;i++) ret=ret+pow(temp,i)*x[i];
    return ret;
  }
  ad inv() const
  {
    int i; ad temp(F(1)/x[0]);
    for(i=1;i<N;i++) temp[i]=F((1-(i&1)*2))/pow(x[0],i+1);
    return temp.compose(*this);
  }
  ad op / (const ad &y) const { return (*this)*y.inv(); }
};

// Convenient output function for ad scalars
template <class F, int N>
ostream & op <<(ostream &o, const ad<F,N> &x)
{ int i; for(i=0;i<N;i++) { if(i>0) o<<','; o<<x[i]; } return o; }

// A rational function that can be used with doubles or ad<double>s
template <class F>
F rat(const F &x)
{ return (x*2+pow(x,2)*3+pow(x,6)*7+pow(x,11)*5+1)/(x*5-pow(x,3)*6-pow(x,7)*3+2); }

// This function is a very good approximation of sqrt(x). Between 1 and 10,
// it is much more precise than long double. However, it has significant error
// at 1,000,000. This function is given by a program, but the Taylor series is
// computed efficiently by ad<double>. If one tries to write an algebraic
// expression for the Taylor coefficients, one ends up with hundreds of pages
// of formulae
template <class F>
F mysqrt(const F &x)
{ int i; F ret(1); for(i=0;i<10;i++) ret=(ret+x/ret)/F(2); return ret; }

// the Newton iteration for finding zeros of functions
template <class F, int N, class fun>
F newton(F x0, int k, int n, fun &g, const ad<F,N> &dummy)
{
  ad<F,N> val;
  int i;
  for(i=0;i<n;i++) { val=g(x0); x0=x0-val[k]/(val[k+1]*(k+1)); }
  return x0;
}

// A helper macro to create root-finders for newton
#define finder(x) \
template <class F, int N> \
struct x##finder \
{ \
  F a; \
  x##finder() {} \
  x##finder(const F &z) : a(z) {} \
  ad<F,N> op () (const F &z) { ad<F,N> z1(z); z1[1]=F(1); return x(z1)-a; } \
}
// Zero-finding for rat, mysqrt, sqrt and sqr
finder(rat);
finder(mysqrt);
finder(sqrt);
finder(sqr);

/*
 * TRAPEZOID METHOD
 *
 * To integrate the ODE
 *
 *    y'=f(t,y)
 *
 * using the Trapezoidal method, one uses the approximation
 *
 *    f(t+dt,y[k+1])+f(t,y[k])-2*dt*(y[k+1]-y[k])=0
 *
 * The left-hand-side is a function of y[k+1] and is implemented by the
 * class trapezoid_method_rooter, which can be fed into newton to compute
 * the solution y[k+1].
 *
 * The function trapezoid_method does this for you, numsteps many times.
 */
template <class F, class fun>
struct trapezoid_method_rooter
{
  fun g;
  ad<F,2> g0;
  F y0,t0,t1;
  trapezoid_method_rooter() {}
  trapezoid_method_rooter(fun &G, const F &Y0, const F &T0, const F &T1) :
    g(G),y0(Y0),t0(T0),t1(T1),g0(G(T0,Y0)) {}
  ad<F,2> op () (const F &y1)
  {
    ad<F,2> Y1(y1),g1;
    Y1[1]=F(1); g1=g(ad<F,2>(t1),Y1);
    return (g1+g0)*((t1-t0)/2)+ad<F,2>(y0)-Y1;
  }
};

// Integration with trapezoid method
template <class F, class fun>
F trapezoid_method(F t0, const F &dt, F y0,
		   fun &g, int numsteps)
{
  int i;
  for(i=0;i<numsteps;i++)
    {
      trapezoid_method_rooter<F,fun> solver(g,y0,t0,t0+dt);
      y0=newton(y0,0,10,solver,ad<F,2>());
      t0=t0+dt;
    }
  return y0;
}

// An integrand leading to the solution exp(-1/x^2)
template <class F>
struct integrand1
{ F op ()(const F &t, const F &y) { return y/pow(t,3)*(2); } };

// these results were checked in maple or otherwise and are known to be exact
void sanity_check(void)
{
  ad<double,10> x;
  x[0]=0.25;
  x[1]=1;
  cout<<"rational taylor series: "<<rat(x)<<endl; //verified in Maple
  x[0]=1000000;
  cout<<"mysqrt taylor series: "<<mysqrt(x)<<endl; //verified in http://www.math.mcgill.ca/loisel/ad-matlab.pdf
  sqrfinder<double,2> foo(2.0);
  cout<<"newton-sqrt 2: "<<newton(1.0,0,10,foo,ad<double,2>())<<endl; //verified with calculator
  integrand1<ad<double,2> > baz;
  cout<<"exp(-1/4): "<<trapezoid_method(1.0,0.1,exp(-1.0),baz,10)<<endl; // verified from analytical solution
}

#define make_integrand(x) \
template <class F> \
struct x##integrand { F op () (const F &t, const F &y) { return x(y); } }
make_integrand(sqr);
make_integrand(mysqrt);
make_integrand(rat);

template <class F>
void integrate_functions(F x0, int n)
{
  F t0(1),dt(1.0/n);
  sqrintegrand<ad<F,2> > i1;
  mysqrtintegrand<ad<F,2> > i2;
  ratintegrand<ad<F,2> > i3;
  cout<<"sizeof(F)="<<sizeof(F)
      <<"\ni1="<<trapezoid_method(t0,dt,x0,i1,n)
      <<"\ni2="<<trapezoid_method(t0,dt,x0,i2,n)
      <<"\ni3="<<trapezoid_method(t0,dt,x0,i3,n)<<endl;
}

int main(void)
{
  cout.precision(30); // bug? in gcc doesn't let us do that for long doubles
  sanity_check();
  integrate_functions((float)0.001,1000);
  integrate_functions((double)0.001,1000);
  integrate_functions((long double)0.001,1000);
  integrate_functions(complex<float>(0.001,0.001),200);
  integrate_functions(complex<double>(0.001,0.001),200);
  integrate_functions(complex<long double>(0.001,0.001),200);
  return 0;
}


------=_Part_574_12567035.1111761857589--