[Pkg-ofed-commits] [rds-tools] 01/02: Imported Upstream version 1.4.1-OFED-1.4.2

Ana Beatriz Guerrero López ana at moszumanska.debian.org
Wed Jul 2 14:34:58 UTC 2014


This is an automated email from the git hooks/post-receive script.

ana pushed a commit to branch master
in repository rds-tools.

commit bff58facfa627c9fa256488b770fdb905f008e0c
Author: Ana Guerrero López <ana at ekaia.org>
Date:   Wed Jul 2 16:34:33 2014 +0200

    Imported Upstream version 1.4.1-OFED-1.4.2
---
 Makefile                  |  106 ++
 Makefile.in               |  106 ++
 README                    |    9 +
 configure                 | 2126 +++++++++++++++++++++++++++++++++++
 configure.in              |   10 +
 docs/rds-architecture.txt |  356 ++++++
 examples/Makefile         |    6 +
 examples/README           |    6 +
 examples/rds-sample.c     |  347 ++++++
 kernel-list.h             |  194 ++++
 net/ib_rds.h              |  265 +++++
 net/rds.h                 |   50 +
 options.c                 |  481 ++++++++
 pfhack.c                  |  124 +++
 pfhack.h                  |   60 +
 rds-gen.1                 |   89 ++
 rds-gen.c                 |  322 ++++++
 rds-info.1                |  162 +++
 rds-info.c                |  363 ++++++
 rds-ping.1                |   69 ++
 rds-ping.c                |  385 +++++++
 rds-rdma.7                |  427 +++++++
 rds-sink.1                |    1 +
 rds-sink.c                |  250 +++++
 rds-stress.1              |  174 +++
 rds-stress.c              | 2715 +++++++++++++++++++++++++++++++++++++++++++++
 rds-tools.spec            |   38 +
 rds-tools.spec.in         |   38 +
 rds-tools.txt             |   39 +
 rds.7                     |  445 ++++++++
 rdstool.h                 |  112 ++
 stap/README               |   15 +
 stap/rds.stp              |   35 +
 stats.c                   |  227 ++++
 34 files changed, 10152 insertions(+)

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f52710e
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,106 @@
+prefix		= $(DESTDIR)/usr
+exec_prefix	= $(DESTDIR)${prefix}
+bindir		= $(DESTDIR)${exec_prefix}/bin
+mandir		= $(DESTDIR)${prefix}/share/man
+incdir		= $(DESTDIR)${prefix}/include
+
+all: all-programs
+
+CFLAGS = -O2 -Wall
+CPPFLAGS = -DDEBUG_EXE -MD -MP -MF $(@D)/.$(basename $(@F)).d
+
+HEADERS = kernel-list.h rdstool.h pfhack.h net/rds.h net/ib_rds.h
+COMMON_SOURCES = options.c stats.c pfhack.c
+SOURCES = $(addsuffix .c,$(PROGRAMS)) $(COMMON_SOURCES)
+CLEAN_OBJECTS = $(addsuffix .o,$(PROGRAMS)) $(subst .c,.o,$(COMMON_SOURCES))
+
+# This is the default
+DYNAMIC_PF_RDS = true
+
+ifneq ($(DYNAMIC_PF_RDS),)
+CPPFLAGS += -DDYNAMIC_PF_RDS
+COMMON_OBJECTS = $(subst .c,.o,$(COMMON_SOURCES))
+else
+COMMON_OBJECTS = $(subst .c,.o,$(filter-out pfhack.c,$(COMMON_SOURCES)))
+endif
+
+PROGRAMS = rds-gen rds-sink rds-info rds-stress rds-ping
+
+all-programs: $(PROGRAMS)
+
+install: $(PROGRAMS)
+	install -d $(bindir)
+	install -m 555 -s $(PROGRAMS) $(bindir)
+	install -d $(mandir)/man1
+	install -d $(mandir)/man7
+	install -m 644 *.1 $(mandir)/man1
+	install -m 644 *.7 $(mandir)/man7
+	install -d $(incdir)/net
+	install -m 444 net/rds.h $(incdir)/net
+
+clean:
+	rm -f $(PROGRAMS) $(CLEAN_OBJECTS)
+
+distclean: clean
+	rm -f .*.d
+
+
+
+$(PROGRAMS) : % : %.o $(COMMON_OBJECTS)
+	gcc $(CFLAGS) $(LDFLAGS) -o $@ $^
+
+LOCAL_DFILES := $(wildcard .*.d)
+ifneq ($(LOCAL_DFILES),)
+.PHONY: $(LOCAL_DFILES)
+-include $(LOCAL_DFILES)
+endif
+
+VERSION := 1.4
+RELEASE := 1
+
+TAR_PREFIX := rds-tools-$(VERSION)-$(RELEASE)
+TAR_FILE := $(TAR_PREFIX).tar.gz
+
+EXTRA_DIST := 	rds-info.1 \
+		rds-gen.1 \
+		rds-sink.1 \
+		rds-stress.1 \
+		rds-ping.1 \
+		rds.7 \
+		rds-rdma.7 \
+		Makefile.in \
+		rds-tools.spec.in \
+		configure.in \
+		configure \
+		README \
+		rds-tools.txt \
+		stap/rds.stp \
+		stap/README \
+		docs/rds-architecture.txt \
+		examples/Makefile \
+		examples/rds-sample.c \
+		examples/README
+
+DISTFILES := $(SOURCES) $(HEADERS) $(EXTRA_DIST)
+
+$(TAR_FILE): Makefile rds-tools.spec
+	@rm -rf $@ $(TAR_PREFIX) || :
+	@mkdir $(TAR_PREFIX)
+	for a in $^ $(DISTFILES); do    \
+                if [ ! -f $$a ]; then                                   \
+                        continue;                                       \
+                fi;                                                     \
+                targ=$(TAR_PREFIX)/$$(dirname $$a);                     \
+                mkdir -p $$targ;                                        \
+                cp $$a $$targ;                                          \
+        done
+
+	tar -zcf $@ $(TAR_PREFIX)
+
+.PHONY: rpm
+rpm: $(TAR_FILE)
+	rpmbuild -ta $^
+
+.PHONY: dist
+dist: $(TAR_FILE)
+
diff --git a/Makefile.in b/Makefile.in
new file mode 100644
index 0000000..088ee69
--- /dev/null
+++ b/Makefile.in
@@ -0,0 +1,106 @@
+prefix		= $(DESTDIR)@prefix@
+exec_prefix	= $(DESTDIR)@exec_prefix@
+bindir		= $(DESTDIR)@bindir@
+mandir		= $(DESTDIR)@mandir@
+incdir		= $(DESTDIR)@includedir@
+
+all: all-programs
+
+CFLAGS = -O2 -Wall
+CPPFLAGS = -DDEBUG_EXE -MD -MP -MF $(@D)/.$(basename $(@F)).d
+
+HEADERS = kernel-list.h rdstool.h pfhack.h net/rds.h net/ib_rds.h
+COMMON_SOURCES = options.c stats.c pfhack.c
+SOURCES = $(addsuffix .c,$(PROGRAMS)) $(COMMON_SOURCES)
+CLEAN_OBJECTS = $(addsuffix .o,$(PROGRAMS)) $(subst .c,.o,$(COMMON_SOURCES))
+
+# This is the default
+DYNAMIC_PF_RDS = true
+
+ifneq ($(DYNAMIC_PF_RDS),)
+CPPFLAGS += -DDYNAMIC_PF_RDS
+COMMON_OBJECTS = $(subst .c,.o,$(COMMON_SOURCES))
+else
+COMMON_OBJECTS = $(subst .c,.o,$(filter-out pfhack.c,$(COMMON_SOURCES)))
+endif
+
+PROGRAMS = rds-gen rds-sink rds-info rds-stress rds-ping
+
+all-programs: $(PROGRAMS)
+
+install: $(PROGRAMS)
+	install -d $(bindir)
+	install -m 555 -s $(PROGRAMS) $(bindir)
+	install -d $(mandir)/man1
+	install -d $(mandir)/man7
+	install -m 644 *.1 $(mandir)/man1
+	install -m 644 *.7 $(mandir)/man7
+	install -d $(incdir)/net
+	install -m 444 net/rds.h $(incdir)/net
+
+clean:
+	rm -f $(PROGRAMS) $(CLEAN_OBJECTS)
+
+distclean: clean
+	rm -f .*.d
+
+
+
+$(PROGRAMS) : % : %.o $(COMMON_OBJECTS)
+	gcc $(CFLAGS) $(LDFLAGS) -o $@ $^
+
+LOCAL_DFILES := $(wildcard .*.d)
+ifneq ($(LOCAL_DFILES),)
+.PHONY: $(LOCAL_DFILES)
+-include $(LOCAL_DFILES)
+endif
+
+VERSION := @VERSION@
+RELEASE := @RELEASE@
+
+TAR_PREFIX := rds-tools-$(VERSION)-$(RELEASE)
+TAR_FILE := $(TAR_PREFIX).tar.gz
+
+EXTRA_DIST := 	rds-info.1 \
+		rds-gen.1 \
+		rds-sink.1 \
+		rds-stress.1 \
+		rds-ping.1 \
+		rds.7 \
+		rds-rdma.7 \
+		Makefile.in \
+		rds-tools.spec.in \
+		configure.in \
+		configure \
+		README \
+		rds-tools.txt \
+		stap/rds.stp \
+		stap/README \
+		docs/rds-architecture.txt \
+		examples/Makefile \
+		examples/rds-sample.c \
+		examples/README
+
+DISTFILES := $(SOURCES) $(HEADERS) $(EXTRA_DIST)
+
+$(TAR_FILE): Makefile rds-tools.spec
+	@rm -rf $@ $(TAR_PREFIX) || :
+	@mkdir $(TAR_PREFIX)
+	for a in $^ $(DISTFILES); do    \
+                if [ ! -f $$a ]; then                                   \
+                        continue;                                       \
+                fi;                                                     \
+                targ=$(TAR_PREFIX)/$$(dirname $$a);                     \
+                mkdir -p $$targ;                                        \
+                cp $$a $$targ;                                          \
+        done
+
+	tar -zcf $@ $(TAR_PREFIX)
+
+.PHONY: rpm
+rpm: $(TAR_FILE)
+	rpmbuild -ta $^
+
+.PHONY: dist
+dist: $(TAR_FILE)
+
diff --git a/README b/README
new file mode 100644
index 0000000..0c6a8d6
--- /dev/null
+++ b/README
@@ -0,0 +1,9 @@
+
+== Short build instructions ==
+
+	autoconf
+	./configure
+	make rpm
+
+This should result in an rds-tools rpm which is versioned by the VERSION
+in the Makefile and the subversion rev that was checked out.
diff --git a/configure b/configure
new file mode 100755
index 0000000..67b6316
--- /dev/null
+++ b/configure
@@ -0,0 +1,2126 @@
+#! /bin/sh
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.59.
+#
+# Copyright (C) 2003 Free Software Foundation, Inc.
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+## --------------------- ##
+## M4sh Initialization.  ##
+## --------------------- ##
+
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
+  set -o posix
+fi
+DUALCASE=1; export DUALCASE # for MKS sh
+
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+  as_unset=unset
+else
+  as_unset=false
+fi
+
+
+# Work around bugs in pre-3.0 UWIN ksh.
+$as_unset ENV MAIL MAILPATH
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+for as_var in \
+  LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
+  LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
+  LC_TELEPHONE LC_TIME
+do
+  if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
+    eval $as_var=C; export $as_var
+  else
+    $as_unset $as_var
+  fi
+done
+
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+
+# Name of the executable.
+as_me=`$as_basename "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)$' \| \
+	 .     : '\(.\)' 2>/dev/null ||
+echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
+  	  /^X\/\(\/\/\)$/{ s//\1/; q; }
+  	  /^X\/\(\/\).*/{ s//\1/; q; }
+  	  s/.*/./; q'`
+
+
+# PATH needs CR, and LINENO needs CR and PATH.
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  echo "#! /bin/sh" >conf$$.sh
+  echo  "exit 0"   >>conf$$.sh
+  chmod +x conf$$.sh
+  if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
+    PATH_SEPARATOR=';'
+  else
+    PATH_SEPARATOR=:
+  fi
+  rm -f conf$$.sh
+fi
+
+
+  as_lineno_1=$LINENO
+  as_lineno_2=$LINENO
+  as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+  test "x$as_lineno_1" != "x$as_lineno_2" &&
+  test "x$as_lineno_3"  = "x$as_lineno_2"  || {
+  # Find who we are.  Look in the path if we contain no path at all
+  # relative or not.
+  case $0 in
+    *[\\/]* ) as_myself=$0 ;;
+    *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+
+       ;;
+  esac
+  # We did not find ourselves, most probably we were run as `sh COMMAND'
+  # in which case we are not to be found in the path.
+  if test "x$as_myself" = x; then
+    as_myself=$0
+  fi
+  if test ! -f "$as_myself"; then
+    { echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2
+   { (exit 1); exit 1; }; }
+  fi
+  case $CONFIG_SHELL in
+  '')
+    as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for as_base in sh bash ksh sh5; do
+	 case $as_dir in
+	 /*)
+	   if ("$as_dir/$as_base" -c '
+  as_lineno_1=$LINENO
+  as_lineno_2=$LINENO
+  as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+  test "x$as_lineno_1" != "x$as_lineno_2" &&
+  test "x$as_lineno_3"  = "x$as_lineno_2" ') 2>/dev/null; then
+	     $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; }
+	     $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; }
+	     CONFIG_SHELL=$as_dir/$as_base
+	     export CONFIG_SHELL
+	     exec "$CONFIG_SHELL" "$0" ${1+"$@"}
+	   fi;;
+	 esac
+       done
+done
+;;
+  esac
+
+  # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+  # uniformly replaced by the line number.  The first 'sed' inserts a
+  # line-number line before each line; the second 'sed' does the real
+  # work.  The second script uses 'N' to pair each line-number line
+  # with the numbered line, and appends trailing '-' during
+  # substitution so that $LINENO is not a special case at line end.
+  # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+  # second 'sed' script.  Blame Lee E. McMahon for sed's syntax.  :-)
+  sed '=' <$as_myself |
+    sed '
+      N
+      s,$,-,
+      : loop
+      s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
+      t loop
+      s,-$,,
+      s,^['$as_cr_digits']*\n,,
+    ' >$as_me.lineno &&
+  chmod +x $as_me.lineno ||
+    { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
+   { (exit 1); exit 1; }; }
+
+  # Don't try to exec as it changes $[0], causing all sort of problems
+  # (the dirname of $[0] is not the place where we might find the
+  # original and so on.  Autoconf is especially sensible to this).
+  . ./$as_me.lineno
+  # Exit status is that of the last command.
+  exit
+}
+
+
+case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
+  *c*,-n*) ECHO_N= ECHO_C='
+' ECHO_T='	' ;;
+  *c*,*  ) ECHO_N=-n ECHO_C= ECHO_T= ;;
+  *)       ECHO_N= ECHO_C='\c' ECHO_T= ;;
+esac
+
+if expr a : '\(a\)' >/dev/null 2>&1; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+rm -f conf$$ conf$$.exe conf$$.file
+echo >conf$$.file
+if ln -s conf$$.file conf$$ 2>/dev/null; then
+  # We could just check for DJGPP; but this test a) works b) is more generic
+  # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
+  if test -f conf$$.exe; then
+    # Don't use ln at all; we don't have any links
+    as_ln_s='cp -p'
+  else
+    as_ln_s='ln -s'
+  fi
+elif ln conf$$.file conf$$ 2>/dev/null; then
+  as_ln_s=ln
+else
+  as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.file
+
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p=:
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+as_executable_p="test -f"
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.
+as_nl='
+'
+IFS=" 	$as_nl"
+
+# CDPATH.
+$as_unset CDPATH
+
+
+# Name of the host.
+# hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+exec 6>&1
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+ac_config_libobj_dir=.
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+SHELL=${CONFIG_SHELL-/bin/sh}
+
+# Maximum number of lines to put in a shell here document.
+# This variable seems obsolete.  It should probably be removed, and
+# only ac_max_sed_lines should be used.
+: ${ac_max_here_lines=38}
+
+# Identity of this package.
+PACKAGE_NAME=
+PACKAGE_TARNAME=
+PACKAGE_VERSION=
+PACKAGE_STRING=
+PACKAGE_BUGREPORT=
+
+ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS VERSION RELEASE LIBOBJS LTLIBOBJS'
+ac_subst_files=''
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datadir='${prefix}/share'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+libdir='${exec_prefix}/lib'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+infodir='${prefix}/info'
+mandir='${prefix}/man'
+
+ac_prev=
+for ac_option
+do
+  # If the previous option needs an argument, assign it.
+  if test -n "$ac_prev"; then
+    eval "$ac_prev=\$ac_option"
+    ac_prev=
+    continue
+  fi
+
+  ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'`
+
+  # Accept the important Cygnus configure options, so we can diagnose typos.
+
+  case $ac_option in
+
+  -bindir | --bindir | --bindi | --bind | --bin | --bi)
+    ac_prev=bindir ;;
+  -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+    bindir=$ac_optarg ;;
+
+  -build | --build | --buil | --bui | --bu)
+    ac_prev=build_alias ;;
+  -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+    build_alias=$ac_optarg ;;
+
+  -cache-file | --cache-file | --cache-fil | --cache-fi \
+  | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+    ac_prev=cache_file ;;
+  -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+  | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+    cache_file=$ac_optarg ;;
+
+  --config-cache | -C)
+    cache_file=config.cache ;;
+
+  -datadir | --datadir | --datadi | --datad | --data | --dat | --da)
+    ac_prev=datadir ;;
+  -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \
+  | --da=*)
+    datadir=$ac_optarg ;;
+
+  -disable-* | --disable-*)
+    ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+      { echo "$as_me: error: invalid feature name: $ac_feature" >&2
+   { (exit 1); exit 1; }; }
+    ac_feature=`echo $ac_feature | sed 's/-/_/g'`
+    eval "enable_$ac_feature=no" ;;
+
+  -enable-* | --enable-*)
+    ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+      { echo "$as_me: error: invalid feature name: $ac_feature" >&2
+   { (exit 1); exit 1; }; }
+    ac_feature=`echo $ac_feature | sed 's/-/_/g'`
+    case $ac_option in
+      *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
+      *) ac_optarg=yes ;;
+    esac
+    eval "enable_$ac_feature='$ac_optarg'" ;;
+
+  -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+  | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+  | --exec | --exe | --ex)
+    ac_prev=exec_prefix ;;
+  -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+  | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+  | --exec=* | --exe=* | --ex=*)
+    exec_prefix=$ac_optarg ;;
+
+  -gas | --gas | --ga | --g)
+    # Obsolete; use --with-gas.
+    with_gas=yes ;;
+
+  -help | --help | --hel | --he | -h)
+    ac_init_help=long ;;
+  -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+    ac_init_help=recursive ;;
+  -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+    ac_init_help=short ;;
+
+  -host | --host | --hos | --ho)
+    ac_prev=host_alias ;;
+  -host=* | --host=* | --hos=* | --ho=*)
+    host_alias=$ac_optarg ;;
+
+  -includedir | --includedir | --includedi | --included | --include \
+  | --includ | --inclu | --incl | --inc)
+    ac_prev=includedir ;;
+  -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+  | --includ=* | --inclu=* | --incl=* | --inc=*)
+    includedir=$ac_optarg ;;
+
+  -infodir | --infodir | --infodi | --infod | --info | --inf)
+    ac_prev=infodir ;;
+  -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+    infodir=$ac_optarg ;;
+
+  -libdir | --libdir | --libdi | --libd)
+    ac_prev=libdir ;;
+  -libdir=* | --libdir=* | --libdi=* | --libd=*)
+    libdir=$ac_optarg ;;
+
+  -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+  | --libexe | --libex | --libe)
+    ac_prev=libexecdir ;;
+  -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+  | --libexe=* | --libex=* | --libe=*)
+    libexecdir=$ac_optarg ;;
+
+  -localstatedir | --localstatedir | --localstatedi | --localstated \
+  | --localstate | --localstat | --localsta | --localst \
+  | --locals | --local | --loca | --loc | --lo)
+    ac_prev=localstatedir ;;
+  -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+  | --localstate=* | --localstat=* | --localsta=* | --localst=* \
+  | --locals=* | --local=* | --loca=* | --loc=* | --lo=*)
+    localstatedir=$ac_optarg ;;
+
+  -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+    ac_prev=mandir ;;
+  -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+    mandir=$ac_optarg ;;
+
+  -nfp | --nfp | --nf)
+    # Obsolete; use --without-fp.
+    with_fp=no ;;
+
+  -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+  | --no-cr | --no-c | -n)
+    no_create=yes ;;
+
+  -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+  | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+    no_recursion=yes ;;
+
+  -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+  | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+  | --oldin | --oldi | --old | --ol | --o)
+    ac_prev=oldincludedir ;;
+  -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+  | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+  | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+    oldincludedir=$ac_optarg ;;
+
+  -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+    ac_prev=prefix ;;
+  -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+    prefix=$ac_optarg ;;
+
+  -program-prefix | --program-prefix | --program-prefi | --program-pref \
+  | --program-pre | --program-pr | --program-p)
+    ac_prev=program_prefix ;;
+  -program-prefix=* | --program-prefix=* | --program-prefi=* \
+  | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+    program_prefix=$ac_optarg ;;
+
+  -program-suffix | --program-suffix | --program-suffi | --program-suff \
+  | --program-suf | --program-su | --program-s)
+    ac_prev=program_suffix ;;
+  -program-suffix=* | --program-suffix=* | --program-suffi=* \
+  | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+    program_suffix=$ac_optarg ;;
+
+  -program-transform-name | --program-transform-name \
+  | --program-transform-nam | --program-transform-na \
+  | --program-transform-n | --program-transform- \
+  | --program-transform | --program-transfor \
+  | --program-transfo | --program-transf \
+  | --program-trans | --program-tran \
+  | --progr-tra | --program-tr | --program-t)
+    ac_prev=program_transform_name ;;
+  -program-transform-name=* | --program-transform-name=* \
+  | --program-transform-nam=* | --program-transform-na=* \
+  | --program-transform-n=* | --program-transform-=* \
+  | --program-transform=* | --program-transfor=* \
+  | --program-transfo=* | --program-transf=* \
+  | --program-trans=* | --program-tran=* \
+  | --progr-tra=* | --program-tr=* | --program-t=*)
+    program_transform_name=$ac_optarg ;;
+
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil)
+    silent=yes ;;
+
+  -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+    ac_prev=sbindir ;;
+  -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+  | --sbi=* | --sb=*)
+    sbindir=$ac_optarg ;;
+
+  -sharedstatedir | --sharedstatedir | --sharedstatedi \
+  | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+  | --sharedst | --shareds | --shared | --share | --shar \
+  | --sha | --sh)
+    ac_prev=sharedstatedir ;;
+  -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+  | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+  | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+  | --sha=* | --sh=*)
+    sharedstatedir=$ac_optarg ;;
+
+  -site | --site | --sit)
+    ac_prev=site ;;
+  -site=* | --site=* | --sit=*)
+    site=$ac_optarg ;;
+
+  -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+    ac_prev=srcdir ;;
+  -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+    srcdir=$ac_optarg ;;
+
+  -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+  | --syscon | --sysco | --sysc | --sys | --sy)
+    ac_prev=sysconfdir ;;
+  -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+  | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+    sysconfdir=$ac_optarg ;;
+
+  -target | --target | --targe | --targ | --tar | --ta | --t)
+    ac_prev=target_alias ;;
+  -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+    target_alias=$ac_optarg ;;
+
+  -v | -verbose | --verbose | --verbos | --verbo | --verb)
+    verbose=yes ;;
+
+  -version | --version | --versio | --versi | --vers | -V)
+    ac_init_version=: ;;
+
+  -with-* | --with-*)
+    ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+      { echo "$as_me: error: invalid package name: $ac_package" >&2
+   { (exit 1); exit 1; }; }
+    ac_package=`echo $ac_package| sed 's/-/_/g'`
+    case $ac_option in
+      *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
+      *) ac_optarg=yes ;;
+    esac
+    eval "with_$ac_package='$ac_optarg'" ;;
+
+  -without-* | --without-*)
+    ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+      { echo "$as_me: error: invalid package name: $ac_package" >&2
+   { (exit 1); exit 1; }; }
+    ac_package=`echo $ac_package | sed 's/-/_/g'`
+    eval "with_$ac_package=no" ;;
+
+  --x)
+    # Obsolete; use --with-x.
+    with_x=yes ;;
+
+  -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+  | --x-incl | --x-inc | --x-in | --x-i)
+    ac_prev=x_includes ;;
+  -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+  | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+    x_includes=$ac_optarg ;;
+
+  -x-libraries | --x-libraries | --x-librarie | --x-librari \
+  | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+    ac_prev=x_libraries ;;
+  -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+  | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+    x_libraries=$ac_optarg ;;
+
+  -*) { echo "$as_me: error: unrecognized option: $ac_option
+Try \`$0 --help' for more information." >&2
+   { (exit 1); exit 1; }; }
+    ;;
+
+  *=*)
+    ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null &&
+      { echo "$as_me: error: invalid variable name: $ac_envvar" >&2
+   { (exit 1); exit 1; }; }
+    ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`
+    eval "$ac_envvar='$ac_optarg'"
+    export $ac_envvar ;;
+
+  *)
+    # FIXME: should be removed in autoconf 3.0.
+    echo "$as_me: WARNING: you should use --build, --host, --target" >&2
+    expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+      echo "$as_me: WARNING: invalid host type: $ac_option" >&2
+    : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}
+    ;;
+
+  esac
+done
+
+if test -n "$ac_prev"; then
+  ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+  { echo "$as_me: error: missing argument to $ac_option" >&2
+   { (exit 1); exit 1; }; }
+fi
+
+# Be sure to have absolute paths.
+for ac_var in exec_prefix prefix
+do
+  eval ac_val=$`echo $ac_var`
+  case $ac_val in
+    [\\/$]* | ?:[\\/]* | NONE | '' ) ;;
+    *)  { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
+   { (exit 1); exit 1; }; };;
+  esac
+done
+
+# Be sure to have absolute paths.
+for ac_var in bindir sbindir libexecdir datadir sysconfdir sharedstatedir \
+	      localstatedir libdir includedir oldincludedir infodir mandir
+do
+  eval ac_val=$`echo $ac_var`
+  case $ac_val in
+    [\\/$]* | ?:[\\/]* ) ;;
+    *)  { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
+   { (exit 1); exit 1; }; };;
+  esac
+done
+
+# There might be people who depend on the old broken behavior: `$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+  if test "x$build_alias" = x; then
+    cross_compiling=maybe
+    echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host.
+    If a cross compiler is detected then cross compile mode will be used." >&2
+  elif test "x$build_alias" != "x$host_alias"; then
+    cross_compiling=yes
+  fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+  ac_srcdir_defaulted=yes
+  # Try the directory containing this script, then its parent.
+  ac_confdir=`(dirname "$0") 2>/dev/null ||
+$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$0" : 'X\(//\)[^/]' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| \
+	 .     : '\(.\)' 2>/dev/null ||
+echo X"$0" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+  	  /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+  	  /^X\(\/\/\)$/{ s//\1/; q; }
+  	  /^X\(\/\).*/{ s//\1/; q; }
+  	  s/.*/./; q'`
+  srcdir=$ac_confdir
+  if test ! -r $srcdir/$ac_unique_file; then
+    srcdir=..
+  fi
+else
+  ac_srcdir_defaulted=no
+fi
+if test ! -r $srcdir/$ac_unique_file; then
+  if test "$ac_srcdir_defaulted" = yes; then
+    { echo "$as_me: error: cannot find sources ($ac_unique_file) in $ac_confdir or .." >&2
+   { (exit 1); exit 1; }; }
+  else
+    { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2
+   { (exit 1); exit 1; }; }
+  fi
+fi
+(cd $srcdir && test -r ./$ac_unique_file) 2>/dev/null ||
+  { echo "$as_me: error: sources are in $srcdir, but \`cd $srcdir' does not work" >&2
+   { (exit 1); exit 1; }; }
+srcdir=`echo "$srcdir" | sed 's%\([^\\/]\)[\\/]*$%\1%'`
+ac_env_build_alias_set=${build_alias+set}
+ac_env_build_alias_value=$build_alias
+ac_cv_env_build_alias_set=${build_alias+set}
+ac_cv_env_build_alias_value=$build_alias
+ac_env_host_alias_set=${host_alias+set}
+ac_env_host_alias_value=$host_alias
+ac_cv_env_host_alias_set=${host_alias+set}
+ac_cv_env_host_alias_value=$host_alias
+ac_env_target_alias_set=${target_alias+set}
+ac_env_target_alias_value=$target_alias
+ac_cv_env_target_alias_set=${target_alias+set}
+ac_cv_env_target_alias_value=$target_alias
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+  # Omit some internal or obsolete options to make the list less imposing.
+  # This message is too long to be a string in the A/UX 3.1 sh.
+  cat <<_ACEOF
+\`configure' configures this package to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE.  See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+  -h, --help              display this help and exit
+      --help=short        display options specific to this package
+      --help=recursive    display the short help of all the included packages
+  -V, --version           display version information and exit
+  -q, --quiet, --silent   do not print \`checking...' messages
+      --cache-file=FILE   cache test results in FILE [disabled]
+  -C, --config-cache      alias for \`--cache-file=config.cache'
+  -n, --no-create         do not create output files
+      --srcdir=DIR        find the sources in DIR [configure dir or \`..']
+
+_ACEOF
+
+  cat <<_ACEOF
+Installation directories:
+  --prefix=PREFIX         install architecture-independent files in PREFIX
+			  [$ac_default_prefix]
+  --exec-prefix=EPREFIX   install architecture-dependent files in EPREFIX
+			  [PREFIX]
+
+By default, \`make install' will install all the files in
+\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc.  You can specify
+an installation prefix other than \`$ac_default_prefix' using \`--prefix',
+for instance \`--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+  --bindir=DIR           user executables [EPREFIX/bin]
+  --sbindir=DIR          system admin executables [EPREFIX/sbin]
+  --libexecdir=DIR       program executables [EPREFIX/libexec]
+  --datadir=DIR          read-only architecture-independent data [PREFIX/share]
+  --sysconfdir=DIR       read-only single-machine data [PREFIX/etc]
+  --sharedstatedir=DIR   modifiable architecture-independent data [PREFIX/com]
+  --localstatedir=DIR    modifiable single-machine data [PREFIX/var]
+  --libdir=DIR           object code libraries [EPREFIX/lib]
+  --includedir=DIR       C header files [PREFIX/include]
+  --oldincludedir=DIR    C header files for non-gcc [/usr/include]
+  --infodir=DIR          info documentation [PREFIX/info]
+  --mandir=DIR           man documentation [PREFIX/man]
+_ACEOF
+
+  cat <<\_ACEOF
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+
+  cat <<\_ACEOF
+
+_ACEOF
+fi
+
+if test "$ac_init_help" = "recursive"; then
+  # If there are subdirs, report their specific --help.
+  ac_popdir=`pwd`
+  for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+    test -d $ac_dir || continue
+    ac_builddir=.
+
+if test "$ac_dir" != .; then
+  ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+  # A "../" for each directory in $ac_dir_suffix.
+  ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
+else
+  ac_dir_suffix= ac_top_builddir=
+fi
+
+case $srcdir in
+  .)  # No --srcdir option.  We are building in place.
+    ac_srcdir=.
+    if test -z "$ac_top_builddir"; then
+       ac_top_srcdir=.
+    else
+       ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
+    fi ;;
+  [\\/]* | ?:[\\/]* )  # Absolute path.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir ;;
+  *) # Relative path.
+    ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_builddir$srcdir ;;
+esac
+
+# Do not use `cd foo && pwd` to compute absolute paths, because
+# the directories may not exist.
+case `pwd` in
+.) ac_abs_builddir="$ac_dir";;
+*)
+  case "$ac_dir" in
+  .) ac_abs_builddir=`pwd`;;
+  [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";;
+  *) ac_abs_builddir=`pwd`/"$ac_dir";;
+  esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_top_builddir=${ac_top_builddir}.;;
+*)
+  case ${ac_top_builddir}. in
+  .) ac_abs_top_builddir=$ac_abs_builddir;;
+  [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;;
+  *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;;
+  esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_srcdir=$ac_srcdir;;
+*)
+  case $ac_srcdir in
+  .) ac_abs_srcdir=$ac_abs_builddir;;
+  [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;;
+  *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;;
+  esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_top_srcdir=$ac_top_srcdir;;
+*)
+  case $ac_top_srcdir in
+  .) ac_abs_top_srcdir=$ac_abs_builddir;;
+  [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;;
+  *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;;
+  esac;;
+esac
+
+    cd $ac_dir
+    # Check for guested configure; otherwise get Cygnus style configure.
+    if test -f $ac_srcdir/configure.gnu; then
+      echo
+      $SHELL $ac_srcdir/configure.gnu  --help=recursive
+    elif test -f $ac_srcdir/configure; then
+      echo
+      $SHELL $ac_srcdir/configure  --help=recursive
+    elif test -f $ac_srcdir/configure.ac ||
+	   test -f $ac_srcdir/configure.in; then
+      echo
+      $ac_configure --help
+    else
+      echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+    fi
+    cd $ac_popdir
+  done
+fi
+
+test -n "$ac_init_help" && exit 0
+if $ac_init_version; then
+  cat <<\_ACEOF
+
+Copyright (C) 2003 Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+_ACEOF
+  exit 0
+fi
+exec 5>config.log
+cat >&5 <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by $as_me, which was
+generated by GNU Autoconf 2.59.  Invocation command line was
+
+  $ $0 $@
+
+_ACEOF
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null     || echo unknown`
+
+/bin/arch              = `(/bin/arch) 2>/dev/null              || echo unknown`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null       || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+hostinfo               = `(hostinfo) 2>/dev/null               || echo unknown`
+/bin/machine           = `(/bin/machine) 2>/dev/null           || echo unknown`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null       || echo unknown`
+/bin/universe          = `(/bin/universe) 2>/dev/null          || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  echo "PATH: $as_dir"
+done
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Strip out --silent because we don't want to record it for future runs.
+# Also quote any args containing shell meta-characters.
+# Make two passes to allow for proper duplicate-argument suppression.
+ac_configure_args=
+ac_configure_args0=
+ac_configure_args1=
+ac_sep=
+ac_must_keep_next=false
+for ac_pass in 1 2
+do
+  for ac_arg
+  do
+    case $ac_arg in
+    -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
+    -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+    | -silent | --silent | --silen | --sile | --sil)
+      continue ;;
+    *" "*|*"	"*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*)
+      ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    case $ac_pass in
+    1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;;
+    2)
+      ac_configure_args1="$ac_configure_args1 '$ac_arg'"
+      if test $ac_must_keep_next = true; then
+	ac_must_keep_next=false # Got value, back to normal.
+      else
+	case $ac_arg in
+	  *=* | --config-cache | -C | -disable-* | --disable-* \
+	  | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
+	  | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
+	  | -with-* | --with-* | -without-* | --without-* | --x)
+	    case "$ac_configure_args0 " in
+	      "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
+	    esac
+	    ;;
+	  -* ) ac_must_keep_next=true ;;
+	esac
+      fi
+      ac_configure_args="$ac_configure_args$ac_sep'$ac_arg'"
+      # Get rid of the leading space.
+      ac_sep=" "
+      ;;
+    esac
+  done
+done
+$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; }
+$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; }
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log.  We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Be sure not to use single quotes in there, as some shells,
+# such as our DU 5.0 friend, will then `close' the trap.
+trap 'exit_status=$?
+  # Save into config.log some information that might help in debugging.
+  {
+    echo
+
+    cat <<\_ASBOX
+## ---------------- ##
+## Cache variables. ##
+## ---------------- ##
+_ASBOX
+    echo
+    # The following way of writing the cache mishandles newlines in values,
+{
+  (set) 2>&1 |
+    case `(ac_space='"'"' '"'"'; set | grep ac_space) 2>&1` in
+    *ac_space=\ *)
+      sed -n \
+	"s/'"'"'/'"'"'\\\\'"'"''"'"'/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='"'"'\\2'"'"'/p"
+      ;;
+    *)
+      sed -n \
+	"s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p"
+      ;;
+    esac;
+}
+    echo
+
+    cat <<\_ASBOX
+## ----------------- ##
+## Output variables. ##
+## ----------------- ##
+_ASBOX
+    echo
+    for ac_var in $ac_subst_vars
+    do
+      eval ac_val=$`echo $ac_var`
+      echo "$ac_var='"'"'$ac_val'"'"'"
+    done | sort
+    echo
+
+    if test -n "$ac_subst_files"; then
+      cat <<\_ASBOX
+## ------------- ##
+## Output files. ##
+## ------------- ##
+_ASBOX
+      echo
+      for ac_var in $ac_subst_files
+      do
+	eval ac_val=$`echo $ac_var`
+	echo "$ac_var='"'"'$ac_val'"'"'"
+      done | sort
+      echo
+    fi
+
+    if test -s confdefs.h; then
+      cat <<\_ASBOX
+## ----------- ##
+## confdefs.h. ##
+## ----------- ##
+_ASBOX
+      echo
+      sed "/^$/d" confdefs.h | sort
+      echo
+    fi
+    test "$ac_signal" != 0 &&
+      echo "$as_me: caught signal $ac_signal"
+    echo "$as_me: exit $exit_status"
+  } >&5
+  rm -f core *.core &&
+  rm -rf conftest* confdefs* conf$$* $ac_clean_files &&
+    exit $exit_status
+     ' 0
+for ac_signal in 1 2 13 15; do
+  trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -rf conftest* confdefs.h
+# AIX cpp loses on an empty file, so make sure it contains at least a newline.
+echo >confdefs.h
+
+# Predefined preprocessor variables.
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_NAME "$PACKAGE_NAME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_VERSION "$PACKAGE_VERSION"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_STRING "$PACKAGE_STRING"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
+_ACEOF
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer explicitly selected file to automatically selected ones.
+if test -z "$CONFIG_SITE"; then
+  if test "x$prefix" != xNONE; then
+    CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site"
+  else
+    CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site"
+  fi
+fi
+for ac_site_file in $CONFIG_SITE; do
+  if test -r "$ac_site_file"; then
+    { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5
+echo "$as_me: loading site script $ac_site_file" >&6;}
+    sed 's/^/| /' "$ac_site_file" >&5
+    . "$ac_site_file"
+  fi
+done
+
+if test -r "$cache_file"; then
+  # Some versions of bash will fail to source /dev/null (special
+  # files actually), so we avoid doing that.
+  if test -f "$cache_file"; then
+    { echo "$as_me:$LINENO: loading cache $cache_file" >&5
+echo "$as_me: loading cache $cache_file" >&6;}
+    case $cache_file in
+      [\\/]* | ?:[\\/]* ) . $cache_file;;
+      *)                      . ./$cache_file;;
+    esac
+  fi
+else
+  { echo "$as_me:$LINENO: creating cache $cache_file" >&5
+echo "$as_me: creating cache $cache_file" >&6;}
+  >$cache_file
+fi
+
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in `(set) 2>&1 |
+	       sed -n 's/^ac_env_\([a-zA-Z_0-9]*\)_set=.*/\1/p'`; do
+  eval ac_old_set=\$ac_cv_env_${ac_var}_set
+  eval ac_new_set=\$ac_env_${ac_var}_set
+  eval ac_old_val="\$ac_cv_env_${ac_var}_value"
+  eval ac_new_val="\$ac_env_${ac_var}_value"
+  case $ac_old_set,$ac_new_set in
+    set,)
+      { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,set)
+      { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5
+echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,);;
+    *)
+      if test "x$ac_old_val" != "x$ac_new_val"; then
+	{ echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5
+echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
+	{ echo "$as_me:$LINENO:   former value:  $ac_old_val" >&5
+echo "$as_me:   former value:  $ac_old_val" >&2;}
+	{ echo "$as_me:$LINENO:   current value: $ac_new_val" >&5
+echo "$as_me:   current value: $ac_new_val" >&2;}
+	ac_cache_corrupted=:
+      fi;;
+  esac
+  # Pass precious variables to config.status.
+  if test "$ac_new_set" = set; then
+    case $ac_new_val in
+    *" "*|*"	"*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*)
+      ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+    *) ac_arg=$ac_var=$ac_new_val ;;
+    esac
+    case " $ac_configure_args " in
+      *" '$ac_arg' "*) ;; # Avoid dups.  Use of quotes ensures accuracy.
+      *) ac_configure_args="$ac_configure_args '$ac_arg'" ;;
+    esac
+  fi
+done
+if $ac_cache_corrupted; then
+  { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5
+echo "$as_me: error: changes in the environment can compromise the build" >&2;}
+  { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5
+echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+VERSION=1.4
+RELEASE=1
+
+
+
+
+                    ac_config_files="$ac_config_files Makefile rds-tools.spec"
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems.  If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# `ac_cv_env_foo' variables (set or unset) will be overridden when
+# loading this file, other *unset* `ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, don't put newlines in cache variables' values.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+{
+  (set) 2>&1 |
+    case `(ac_space=' '; set | grep ac_space) 2>&1` in
+    *ac_space=\ *)
+      # `set' does not quote correctly, so add quotes (double-quote
+      # substitution turns \\\\ into \\, and sed turns \\ into \).
+      sed -n \
+	"s/'/'\\\\''/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+      ;;
+    *)
+      # `set' quotes correctly as required by POSIX, so do not add quotes.
+      sed -n \
+	"s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p"
+      ;;
+    esac;
+} |
+  sed '
+     t clear
+     : clear
+     s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
+     t end
+     /^ac_cv_env/!s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+     : end' >>confcache
+if diff $cache_file confcache >/dev/null 2>&1; then :; else
+  if test -w $cache_file; then
+    test "x$cache_file" != "x/dev/null" && echo "updating cache $cache_file"
+    cat confcache >$cache_file
+  else
+    echo "not updating unwritable cache $cache_file"
+  fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+# VPATH may cause trouble with some makes, so we remove $(srcdir),
+# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+  ac_vpsub='/^[	 ]*VPATH[	 ]*=/{
+s/:*\$(srcdir):*/:/;
+s/:*\${srcdir}:*/:/;
+s/:*@srcdir@:*/:/;
+s/^\([^=]*=[	 ]*\):*/\1/;
+s/:*$//;
+s/^[^=]*=[	 ]*$//;
+}'
+fi
+
+# Transform confdefs.h into DEFS.
+# Protect against shell expansion while executing Makefile rules.
+# Protect against Makefile macro expansion.
+#
+# If the first sed substitution is executed (which looks for macros that
+# take arguments), then we branch to the quote section.  Otherwise,
+# look for a macro that doesn't take arguments.
+cat >confdef2opt.sed <<\_ACEOF
+t clear
+: clear
+s,^[	 ]*#[	 ]*define[	 ][	 ]*\([^	 (][^	 (]*([^)]*)\)[	 ]*\(.*\),-D\1=\2,g
+t quote
+s,^[	 ]*#[	 ]*define[	 ][	 ]*\([^	 ][^	 ]*\)[	 ]*\(.*\),-D\1=\2,g
+t quote
+d
+: quote
+s,[	 `~#$^&*(){}\\|;'"<>?],\\&,g
+s,\[,\\&,g
+s,\],\\&,g
+s,\$,$$,g
+p
+_ACEOF
+# We use echo to avoid assuming a particular line-breaking character.
+# The extra dot is to prevent the shell from consuming trailing
+# line-breaks from the sub-command output.  A line-break within
+# single-quotes doesn't work because, if this script is created in a
+# platform that uses two characters for line-breaks (e.g., DOS), tr
+# would break.
+ac_LF_and_DOT=`echo; echo .`
+DEFS=`sed -n -f confdef2opt.sed confdefs.h | tr "$ac_LF_and_DOT" ' .'`
+rm -f confdef2opt.sed
+
+
+ac_libobjs=
+ac_ltlibobjs=
+for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
+  # 1. Remove the extension, and $U if already installed.
+  ac_i=`echo "$ac_i" |
+	 sed 's/\$U\././;s/\.o$//;s/\.obj$//'`
+  # 2. Add them.
+  ac_libobjs="$ac_libobjs $ac_i\$U.$ac_objext"
+  ac_ltlibobjs="$ac_ltlibobjs $ac_i"'$U.lo'
+done
+LIBOBJS=$ac_libobjs
+
+LTLIBOBJS=$ac_ltlibobjs
+
+
+
+: ${CONFIG_STATUS=./config.status}
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5
+echo "$as_me: creating $CONFIG_STATUS" >&6;}
+cat >$CONFIG_STATUS <<_ACEOF
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+SHELL=\${CONFIG_SHELL-$SHELL}
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+## --------------------- ##
+## M4sh Initialization.  ##
+## --------------------- ##
+
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
+  set -o posix
+fi
+DUALCASE=1; export DUALCASE # for MKS sh
+
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+  as_unset=unset
+else
+  as_unset=false
+fi
+
+
+# Work around bugs in pre-3.0 UWIN ksh.
+$as_unset ENV MAIL MAILPATH
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+for as_var in \
+  LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
+  LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
+  LC_TELEPHONE LC_TIME
+do
+  if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
+    eval $as_var=C; export $as_var
+  else
+    $as_unset $as_var
+  fi
+done
+
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+
+# Name of the executable.
+as_me=`$as_basename "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)$' \| \
+	 .     : '\(.\)' 2>/dev/null ||
+echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
+  	  /^X\/\(\/\/\)$/{ s//\1/; q; }
+  	  /^X\/\(\/\).*/{ s//\1/; q; }
+  	  s/.*/./; q'`
+
+
+# PATH needs CR, and LINENO needs CR and PATH.
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  echo "#! /bin/sh" >conf$$.sh
+  echo  "exit 0"   >>conf$$.sh
+  chmod +x conf$$.sh
+  if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
+    PATH_SEPARATOR=';'
+  else
+    PATH_SEPARATOR=:
+  fi
+  rm -f conf$$.sh
+fi
+
+
+  as_lineno_1=$LINENO
+  as_lineno_2=$LINENO
+  as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+  test "x$as_lineno_1" != "x$as_lineno_2" &&
+  test "x$as_lineno_3"  = "x$as_lineno_2"  || {
+  # Find who we are.  Look in the path if we contain no path at all
+  # relative or not.
+  case $0 in
+    *[\\/]* ) as_myself=$0 ;;
+    *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+
+       ;;
+  esac
+  # We did not find ourselves, most probably we were run as `sh COMMAND'
+  # in which case we are not to be found in the path.
+  if test "x$as_myself" = x; then
+    as_myself=$0
+  fi
+  if test ! -f "$as_myself"; then
+    { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5
+echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;}
+   { (exit 1); exit 1; }; }
+  fi
+  case $CONFIG_SHELL in
+  '')
+    as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for as_base in sh bash ksh sh5; do
+	 case $as_dir in
+	 /*)
+	   if ("$as_dir/$as_base" -c '
+  as_lineno_1=$LINENO
+  as_lineno_2=$LINENO
+  as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+  test "x$as_lineno_1" != "x$as_lineno_2" &&
+  test "x$as_lineno_3"  = "x$as_lineno_2" ') 2>/dev/null; then
+	     $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; }
+	     $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; }
+	     CONFIG_SHELL=$as_dir/$as_base
+	     export CONFIG_SHELL
+	     exec "$CONFIG_SHELL" "$0" ${1+"$@"}
+	   fi;;
+	 esac
+       done
+done
+;;
+  esac
+
+  # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+  # uniformly replaced by the line number.  The first 'sed' inserts a
+  # line-number line before each line; the second 'sed' does the real
+  # work.  The second script uses 'N' to pair each line-number line
+  # with the numbered line, and appends trailing '-' during
+  # substitution so that $LINENO is not a special case at line end.
+  # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+  # second 'sed' script.  Blame Lee E. McMahon for sed's syntax.  :-)
+  sed '=' <$as_myself |
+    sed '
+      N
+      s,$,-,
+      : loop
+      s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
+      t loop
+      s,-$,,
+      s,^['$as_cr_digits']*\n,,
+    ' >$as_me.lineno &&
+  chmod +x $as_me.lineno ||
+    { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5
+echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;}
+   { (exit 1); exit 1; }; }
+
+  # Don't try to exec as it changes $[0], causing all sort of problems
+  # (the dirname of $[0] is not the place where we might find the
+  # original and so on.  Autoconf is especially sensible to this).
+  . ./$as_me.lineno
+  # Exit status is that of the last command.
+  exit
+}
+
+
+case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
+  *c*,-n*) ECHO_N= ECHO_C='
+' ECHO_T='	' ;;
+  *c*,*  ) ECHO_N=-n ECHO_C= ECHO_T= ;;
+  *)       ECHO_N= ECHO_C='\c' ECHO_T= ;;
+esac
+
+if expr a : '\(a\)' >/dev/null 2>&1; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+rm -f conf$$ conf$$.exe conf$$.file
+echo >conf$$.file
+if ln -s conf$$.file conf$$ 2>/dev/null; then
+  # We could just check for DJGPP; but this test a) works b) is more generic
+  # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
+  if test -f conf$$.exe; then
+    # Don't use ln at all; we don't have any links
+    as_ln_s='cp -p'
+  else
+    as_ln_s='ln -s'
+  fi
+elif ln conf$$.file conf$$ 2>/dev/null; then
+  as_ln_s=ln
+else
+  as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.file
+
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p=:
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+as_executable_p="test -f"
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.
+as_nl='
+'
+IFS=" 	$as_nl"
+
+# CDPATH.
+$as_unset CDPATH
+
+exec 6>&1
+
+# Open the log real soon, to keep \$[0] and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling.  Logging --version etc. is OK.
+exec 5>>config.log
+{
+  echo
+  sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+} >&5
+cat >&5 <<_CSEOF
+
+This file was extended by $as_me, which was
+generated by GNU Autoconf 2.59.  Invocation command line was
+
+  CONFIG_FILES    = $CONFIG_FILES
+  CONFIG_HEADERS  = $CONFIG_HEADERS
+  CONFIG_LINKS    = $CONFIG_LINKS
+  CONFIG_COMMANDS = $CONFIG_COMMANDS
+  $ $0 $@
+
+_CSEOF
+echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5
+echo >&5
+_ACEOF
+
+# Files that config.status was made for.
+if test -n "$ac_config_files"; then
+  echo "config_files=\"$ac_config_files\"" >>$CONFIG_STATUS
+fi
+
+if test -n "$ac_config_headers"; then
+  echo "config_headers=\"$ac_config_headers\"" >>$CONFIG_STATUS
+fi
+
+if test -n "$ac_config_links"; then
+  echo "config_links=\"$ac_config_links\"" >>$CONFIG_STATUS
+fi
+
+if test -n "$ac_config_commands"; then
+  echo "config_commands=\"$ac_config_commands\"" >>$CONFIG_STATUS
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+ac_cs_usage="\
+\`$as_me' instantiates files from templates according to the
+current configuration.
+
+Usage: $0 [OPTIONS] [FILE]...
+
+  -h, --help       print this help, then exit
+  -V, --version    print version number, then exit
+  -q, --quiet      do not print progress messages
+  -d, --debug      don't remove temporary files
+      --recheck    update $as_me by reconfiguring in the same conditions
+  --file=FILE[:TEMPLATE]
+		   instantiate the configuration file FILE
+
+Configuration files:
+$config_files
+
+Report bugs to <bug-autoconf at gnu.org>."
+_ACEOF
+
+cat >>$CONFIG_STATUS <<_ACEOF
+ac_cs_version="\\
+config.status
+configured by $0, generated by GNU Autoconf 2.59,
+  with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
+
+Copyright (C) 2003 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+srcdir=$srcdir
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+# If no file are specified by the user, then we need to provide default
+# value.  By we need to know if files were specified by the user.
+ac_need_defaults=:
+while test $# != 0
+do
+  case $1 in
+  --*=*)
+    ac_option=`expr "x$1" : 'x\([^=]*\)='`
+    ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'`
+    ac_shift=:
+    ;;
+  -*)
+    ac_option=$1
+    ac_optarg=$2
+    ac_shift=shift
+    ;;
+  *) # This is not an option, so the user has probably given explicit
+     # arguments.
+     ac_option=$1
+     ac_need_defaults=false;;
+  esac
+
+  case $ac_option in
+  # Handling of the options.
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+  -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+    ac_cs_recheck=: ;;
+  --version | --vers* | -V )
+    echo "$ac_cs_version"; exit 0 ;;
+  --he | --h)
+    # Conflict between --help and --header
+    { { echo "$as_me:$LINENO: error: ambiguous option: $1
+Try \`$0 --help' for more information." >&5
+echo "$as_me: error: ambiguous option: $1
+Try \`$0 --help' for more information." >&2;}
+   { (exit 1); exit 1; }; };;
+  --help | --hel | -h )
+    echo "$ac_cs_usage"; exit 0 ;;
+  --debug | --d* | -d )
+    debug=: ;;
+  --file | --fil | --fi | --f )
+    $ac_shift
+    CONFIG_FILES="$CONFIG_FILES $ac_optarg"
+    ac_need_defaults=false;;
+  --header | --heade | --head | --hea )
+    $ac_shift
+    CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg"
+    ac_need_defaults=false;;
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil | --si | --s)
+    ac_cs_silent=: ;;
+
+  # This is an error.
+  -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&5
+echo "$as_me: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&2;}
+   { (exit 1); exit 1; }; } ;;
+
+  *) ac_config_targets="$ac_config_targets $1" ;;
+
+  esac
+  shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+  exec 6>/dev/null
+  ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+if \$ac_cs_recheck; then
+  echo "running $SHELL $0 " $ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6
+  exec $SHELL $0 $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+fi
+
+_ACEOF
+
+
+
+
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+for ac_config_target in $ac_config_targets
+do
+  case "$ac_config_target" in
+  # Handling of arguments.
+  "Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;;
+  "rds-tools.spec" ) CONFIG_FILES="$CONFIG_FILES rds-tools.spec" ;;
+  *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
+echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
+   { (exit 1); exit 1; }; };;
+  esac
+done
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used.  Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+  test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+fi
+
+# Have a temporary directory for convenience.  Make it in the build tree
+# simply because there is no reason to put it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Create a temporary directory, and hook for its removal unless debugging.
+$debug ||
+{
+  trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0
+  trap '{ (exit 1); exit 1; }' 1 2 13 15
+}
+
+# Create a (secure) tmp directory for tmp files.
+
+{
+  tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` &&
+  test -n "$tmp" && test -d "$tmp"
+}  ||
+{
+  tmp=./confstat$$-$RANDOM
+  (umask 077 && mkdir $tmp)
+} ||
+{
+   echo "$me: cannot create a temporary directory in ." >&2
+   { (exit 1); exit 1; }
+}
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<_ACEOF
+
+#
+# CONFIG_FILES section.
+#
+
+# No need to generate the scripts if there are no CONFIG_FILES.
+# This happens for instance when ./config.status config.h
+if test -n "\$CONFIG_FILES"; then
+  # Protect against being on the right side of a sed subst in config.status.
+  sed 's/,@/@@/; s/@,/@@/; s/,;t t\$/@;t t/; /@;t t\$/s/[\\\\&,]/\\\\&/g;
+   s/@@/,@/; s/@@/@,/; s/@;t t\$/,;t t/' >\$tmp/subs.sed <<\\CEOF
+s, at SHELL@,$SHELL,;t t
+s, at PATH_SEPARATOR@,$PATH_SEPARATOR,;t t
+s, at PACKAGE_NAME@,$PACKAGE_NAME,;t t
+s, at PACKAGE_TARNAME@,$PACKAGE_TARNAME,;t t
+s, at PACKAGE_VERSION@,$PACKAGE_VERSION,;t t
+s, at PACKAGE_STRING@,$PACKAGE_STRING,;t t
+s, at PACKAGE_BUGREPORT@,$PACKAGE_BUGREPORT,;t t
+s, at exec_prefix@,$exec_prefix,;t t
+s, at prefix@,$prefix,;t t
+s, at program_transform_name@,$program_transform_name,;t t
+s, at bindir@,$bindir,;t t
+s, at sbindir@,$sbindir,;t t
+s, at libexecdir@,$libexecdir,;t t
+s, at datadir@,$datadir,;t t
+s, at sysconfdir@,$sysconfdir,;t t
+s, at sharedstatedir@,$sharedstatedir,;t t
+s, at localstatedir@,$localstatedir,;t t
+s, at libdir@,$libdir,;t t
+s, at includedir@,$includedir,;t t
+s, at oldincludedir@,$oldincludedir,;t t
+s, at infodir@,$infodir,;t t
+s, at mandir@,$mandir,;t t
+s, at build_alias@,$build_alias,;t t
+s, at host_alias@,$host_alias,;t t
+s, at target_alias@,$target_alias,;t t
+s, at DEFS@,$DEFS,;t t
+s, at ECHO_C@,$ECHO_C,;t t
+s, at ECHO_N@,$ECHO_N,;t t
+s, at ECHO_T@,$ECHO_T,;t t
+s, at LIBS@,$LIBS,;t t
+s, at VERSION@,$VERSION,;t t
+s, at RELEASE@,$RELEASE,;t t
+s, at LIBOBJS@,$LIBOBJS,;t t
+s, at LTLIBOBJS@,$LTLIBOBJS,;t t
+CEOF
+
+_ACEOF
+
+  cat >>$CONFIG_STATUS <<\_ACEOF
+  # Split the substitutions into bite-sized pieces for seds with
+  # small command number limits, like on Digital OSF/1 and HP-UX.
+  ac_max_sed_lines=48
+  ac_sed_frag=1 # Number of current file.
+  ac_beg=1 # First line for current file.
+  ac_end=$ac_max_sed_lines # Line after last line for current file.
+  ac_more_lines=:
+  ac_sed_cmds=
+  while $ac_more_lines; do
+    if test $ac_beg -gt 1; then
+      sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
+    else
+      sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
+    fi
+    if test ! -s $tmp/subs.frag; then
+      ac_more_lines=false
+    else
+      # The purpose of the label and of the branching condition is to
+      # speed up the sed processing (if there are no `@' at all, there
+      # is no need to browse any of the substitutions).
+      # These are the two extra sed commands mentioned above.
+      (echo ':t
+  /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed
+      if test -z "$ac_sed_cmds"; then
+	ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed"
+      else
+	ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed"
+      fi
+      ac_sed_frag=`expr $ac_sed_frag + 1`
+      ac_beg=$ac_end
+      ac_end=`expr $ac_end + $ac_max_sed_lines`
+    fi
+  done
+  if test -z "$ac_sed_cmds"; then
+    ac_sed_cmds=cat
+  fi
+fi # test -n "$CONFIG_FILES"
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue
+  # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
+  case $ac_file in
+  - | *:- | *:-:* ) # input from stdin
+	cat >$tmp/stdin
+	ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+	ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+  *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+	ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+  * )   ac_file_in=$ac_file.in ;;
+  esac
+
+  # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories.
+  ac_dir=`(dirname "$ac_file") 2>/dev/null ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$ac_file" : 'X\(//\)[^/]' \| \
+	 X"$ac_file" : 'X\(//\)$' \| \
+	 X"$ac_file" : 'X\(/\)' \| \
+	 .     : '\(.\)' 2>/dev/null ||
+echo X"$ac_file" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+  	  /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+  	  /^X\(\/\/\)$/{ s//\1/; q; }
+  	  /^X\(\/\).*/{ s//\1/; q; }
+  	  s/.*/./; q'`
+  { if $as_mkdir_p; then
+    mkdir -p "$ac_dir"
+  else
+    as_dir="$ac_dir"
+    as_dirs=
+    while test ! -d "$as_dir"; do
+      as_dirs="$as_dir $as_dirs"
+      as_dir=`(dirname "$as_dir") 2>/dev/null ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| \
+	 .     : '\(.\)' 2>/dev/null ||
+echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+  	  /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+  	  /^X\(\/\/\)$/{ s//\1/; q; }
+  	  /^X\(\/\).*/{ s//\1/; q; }
+  	  s/.*/./; q'`
+    done
+    test ! -n "$as_dirs" || mkdir $as_dirs
+  fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
+echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
+   { (exit 1); exit 1; }; }; }
+
+  ac_builddir=.
+
+if test "$ac_dir" != .; then
+  ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+  # A "../" for each directory in $ac_dir_suffix.
+  ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
+else
+  ac_dir_suffix= ac_top_builddir=
+fi
+
+case $srcdir in
+  .)  # No --srcdir option.  We are building in place.
+    ac_srcdir=.
+    if test -z "$ac_top_builddir"; then
+       ac_top_srcdir=.
+    else
+       ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
+    fi ;;
+  [\\/]* | ?:[\\/]* )  # Absolute path.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir ;;
+  *) # Relative path.
+    ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_builddir$srcdir ;;
+esac
+
+# Do not use `cd foo && pwd` to compute absolute paths, because
+# the directories may not exist.
+case `pwd` in
+.) ac_abs_builddir="$ac_dir";;
+*)
+  case "$ac_dir" in
+  .) ac_abs_builddir=`pwd`;;
+  [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";;
+  *) ac_abs_builddir=`pwd`/"$ac_dir";;
+  esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_top_builddir=${ac_top_builddir}.;;
+*)
+  case ${ac_top_builddir}. in
+  .) ac_abs_top_builddir=$ac_abs_builddir;;
+  [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;;
+  *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;;
+  esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_srcdir=$ac_srcdir;;
+*)
+  case $ac_srcdir in
+  .) ac_abs_srcdir=$ac_abs_builddir;;
+  [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;;
+  *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;;
+  esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_top_srcdir=$ac_top_srcdir;;
+*)
+  case $ac_top_srcdir in
+  .) ac_abs_top_srcdir=$ac_abs_builddir;;
+  [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;;
+  *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;;
+  esac;;
+esac
+
+
+
+  if test x"$ac_file" != x-; then
+    { echo "$as_me:$LINENO: creating $ac_file" >&5
+echo "$as_me: creating $ac_file" >&6;}
+    rm -f "$ac_file"
+  fi
+  # Let's still pretend it is `configure' which instantiates (i.e., don't
+  # use $as_me), people would be surprised to read:
+  #    /* config.h.  Generated by config.status.  */
+  if test x"$ac_file" = x-; then
+    configure_input=
+  else
+    configure_input="$ac_file.  "
+  fi
+  configure_input=$configure_input"Generated from `echo $ac_file_in |
+				     sed 's,.*/,,'` by configure."
+
+  # First look for the input files in the build tree, otherwise in the
+  # src tree.
+  ac_file_inputs=`IFS=:
+    for f in $ac_file_in; do
+      case $f in
+      -) echo $tmp/stdin ;;
+      [\\/$]*)
+	 # Absolute (can't be DOS-style, as IFS=:)
+	 test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+   { (exit 1); exit 1; }; }
+	 echo "$f";;
+      *) # Relative
+	 if test -f "$f"; then
+	   # Build tree
+	   echo "$f"
+	 elif test -f "$srcdir/$f"; then
+	   # Source tree
+	   echo "$srcdir/$f"
+	 else
+	   # /dev/null tree
+	   { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+   { (exit 1); exit 1; }; }
+	 fi;;
+      esac
+    done` || { (exit 1); exit 1; }
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+  sed "$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s, at configure_input@,$configure_input,;t t
+s, at srcdir@,$ac_srcdir,;t t
+s, at abs_srcdir@,$ac_abs_srcdir,;t t
+s, at top_srcdir@,$ac_top_srcdir,;t t
+s, at abs_top_srcdir@,$ac_abs_top_srcdir,;t t
+s, at builddir@,$ac_builddir,;t t
+s, at abs_builddir@,$ac_abs_builddir,;t t
+s, at top_builddir@,$ac_top_builddir,;t t
+s, at abs_top_builddir@,$ac_abs_top_builddir,;t t
+" $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out
+  rm -f $tmp/stdin
+  if test x"$ac_file" != x-; then
+    mv $tmp/out $ac_file
+  else
+    cat $tmp/out
+    rm -f $tmp/out
+  fi
+
+done
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+{ (exit 0); exit 0; }
+_ACEOF
+chmod +x $CONFIG_STATUS
+ac_clean_files=$ac_clean_files_save
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded.  So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status.  When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+  ac_cs_success=:
+  ac_config_status_args=
+  test "$silent" = yes &&
+    ac_config_status_args="$ac_config_status_args --quiet"
+  exec 5>/dev/null
+  $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
+  exec 5>>config.log
+  # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+  # would make configure fail if this is the last instruction.
+  $ac_cs_success || { (exit 1); exit 1; }
+fi
+
diff --git a/configure.in b/configure.in
new file mode 100644
index 0000000..9cccaff
--- /dev/null
+++ b/configure.in
@@ -0,0 +1,10 @@
+AC_PREREQ(2.55)
+AC_INIT()
+
+VERSION=1.4
+RELEASE=1
+
+AC_SUBST(VERSION)
+AC_SUBST(RELEASE)
+
+AC_OUTPUT(Makefile rds-tools.spec)
diff --git a/docs/rds-architecture.txt b/docs/rds-architecture.txt
new file mode 100644
index 0000000..c67077c
--- /dev/null
+++ b/docs/rds-architecture.txt
@@ -0,0 +1,356 @@
+
+Overview
+========
+
+This readme tries to provide some background on the hows and whys of RDS,
+and will hopefully help you find your way around the code.
+
+In addition, please see this email about RDS origins:
+http://oss.oracle.com/pipermail/rds-devel/2007-November/000228.html
+
+RDS Architecture
+================
+
+RDS provides reliable, ordered datagram delivery by using a single
+reliable connection between any two nodes in the cluster. This allows
+applications to use a single socket to talk to any other process in the
+cluster - so in a cluster with N processes you need N sockets, in contrast
+to N*N if you use a connection-oriented socket transport like TCP.
+
+RDS is not Infiniband-specific; it was designed to support different
+transports.  The current implementation used to support RDS over TCP as well
+as IB. Work is in progress to support RDS over iWARP, and using DCE to
+guarantee no dropped packets on Ethernet, it may be possible to use RDS over
+UDP in the future.
+
+The high-level semantics of RDS from the application's point of view are
+
+ *	Addressing
+        RDS uses IPv4 addresses and 16bit port numbers to identify
+        the end point of a connection. All socket operations that involve
+        passing addresses between kernel and user space generally
+        use a struct sockaddr_in.
+
+        The fact that IPv4 addresses are used does not mean the underlying
+        transport has to be IP-based. In fact, RDS over IB uses a
+        reliable IB connection; the IP address is used exclusively to
+        locate the remote node's GID (by ARPing for the given IP).
+
+        The port space is entirely independent of UDP, TCP or any other
+        protocol.
+
+ *	Socket interface
+        RDS sockets work *mostly* as you would expect from a BSD
+        socket. The next section will cover the details. At any rate,
+        all I/O is performed through the standard BSD socket API.
+        Some additions like zerocopy support are implemented through
+        control messages, while other extensions use the getsockopt/
+        setsockopt calls.
+
+        Sockets must be bound before you can send or receive data.
+        This is needed because binding also selects a transport and
+        attaches it to the socket. Once bound, the transport assignment
+        does not change. RDS will tolerate IPs moving around (eg in
+        a active-active HA scenario), but only as long as the address
+        doesn't move to a different transport.
+
+ *	sysctls
+        RDS supports a number of sysctls in /proc/sys/net/rds
+
+
+Socket Interface
+================
+
+  AF_RDS, PF_RDS, SOL_RDS
+        These constants haven't been assigned yet, because RDS isn't in
+        mainline yet. Currently, the kernel module assigns some constant
+        and publishes it to user space through two sysctl files
+                /proc/sys/net/rds/pf_rds
+                /proc/sys/net/rds/sol_rds
+
+  fd = socket(PF_RDS, SOCK_SEQPACKET, 0);
+        This creates a new, unbound RDS socket.
+
+  setsockopt(SOL_SOCKET): send and receive buffer size
+        RDS honors the send and receive buffer size socket options.
+        You are not allowed to queue more than SO_SNDSIZE bytes to
+        a socket. A message is queued when sendmsg is called, and
+        it leaves the queue when the remote system acknowledges
+        its arrival.
+
+        The SO_RCVSIZE option controls the maximum receive queue length.
+        This is a soft limit rather than a hard limit - RDS will
+        continue to accept and queue incoming messages, even if that
+        takes the queue length over the limit. However, it will also
+        mark the port as "congested" and send a congestion update to
+        the source node. The source node is supposed to throttle any
+        processes sending to this congested port.
+
+  bind(fd, &sockaddr_in, ...)
+        This binds the socket to a local IP address and port, and a
+        transport.
+
+  sendmsg(fd, ...)
+        Sends a message to the indicated recipient. The kernel will
+        transparently establish the underlying reliable connection
+        if it isn't up yet.
+
+        An attempt to send a message that exceeds SO_SNDSIZE will
+        return with -EMSGSIZE
+
+        An attempt to send a message that would take the total number
+        of queued bytes over the SO_SNDSIZE threshold will return
+        EAGAIN.
+
+        An attempt to send a message to a destination that is marked
+        as "congested" will return ENOBUFS.
+
+  recvmsg(fd, ...)
+        Receives a message that was queued to this socket. The sockets
+        recv queue accounting is adjusted, and if the queue length
+        drops below SO_SNDSIZE, the port is marked uncongested, and
+        a congestion update is sent to all peers.
+
+        Applications can ask the RDS kernel module to receive
+        notifications via control messages (for instance, there is a
+        notification when a congestion update arrived, or when a RDMA
+        operation completes). These notifications are received through
+        the msg.msg_control buffer of struct msghdr. The format of the
+        messages is described in manpages.
+
+  poll(fd)
+        RDS supports the poll interface to allow the application
+        to implement async I/O.
+
+        POLLIN handling is pretty straightforward. When there's an
+        incoming message queued to the socket, or a pending notification,
+        we signal POLLIN.
+
+        POLLOUT is a little harder. Since you can essentially send
+        to any destination, RDS will always signal POLLOUT as long as
+        there's room on the send queue (ie the number of bytes queued
+        is less than the sendbuf size).
+
+        However, the kernel will refuse to accept messages to
+        a destination marked congested - in this case you will loop
+        forever if you rely on poll to tell you what to do.
+        This isn't a trivial problem, but applications can deal with
+        this - by using congestion notifications, and by checking for
+        ENOBUFS errors returned by sendmsg.
+
+  setsockopt(SOL_RDS, RDS_CANCEL_SENT_TO, &sockaddr_in)
+        This allows the application to discard all messages queued to a
+        specific destination on this particular socket.
+
+        This allows the application to cancel outstanding messages if
+        it detects a timeout. For instance, if it tried to send a message,
+        and the remote host is unreachable, RDS will keep trying forever.
+        The application may decide it's not worth it, and cancel the
+        operation. In this case, it would use RDS_CANCEL_SENT_TO to
+        nuke any pending messages.
+
+
+RDMA for RDS
+============
+
+  see rds-rdma(7) manpage (available in rds-tools)
+
+
+Congestion Notifications
+========================
+
+  see rds(7) manpage
+
+
+RDS Protocol
+============
+
+  Message header
+
+    The message header is a 'struct rds_header' (see rds.h):
+    Fields:
+      h_sequence:
+          per-packet sequence number
+      h_ack:
+          piggybacked acknowledgment of last packet received
+      h_len:
+          length of data, not including header
+      h_sport:
+          source port
+      h_dport:
+          destination port
+      h_flags:
+          CONG_BITMAP - this is a congestion update bitmap
+          ACK_REQUIRED - receiver must ack this packet
+          RETRANSMITTED - packet has previously been sent
+      h_credit:
+          indicate to other end of connection that
+          it has more credits available (i.e. there is
+          more send room)
+      h_padding[4]:
+          unused, for future use
+      h_csum:
+          header checksum
+      h_exthdr:
+          optional data can be passed here. This is currently used for
+          passing RDMA-related information.
+
+  ACK and retransmit handling
+
+      One might think that with reliable IB connections you wouldn't need
+      to ack messages that have been received.  The problem is that IB
+      hardware generates an ack message before it has DMAed the message
+      into memory.  This creates a potential message loss if the HCA is
+      disabled for any reason between when it sends the ack and before
+      the message is DMAed and processed.  This is only a potential issue
+      if another HCA is available for fail-over.
+
+      Sending an ack immediately would allow the sender to free the sent
+      message from their send queue quickly, but could cause excessive
+      traffic to be used for acks. RDS piggybacks acks on sent data
+      packets.  Ack-only packets are reduced by only allowing one to be
+      in flight at a time, and by the sender only asking for acks when
+      its send buffers start to fill up. All retransmissions are also
+      acked.
+
+  Flow Control
+
+      RDS's IB transport uses a credit-based mechanism to verify that
+      there is space in the peer's receive buffers for more data. This
+      eliminates the need for hardware retries on the connection.
+
+  Congestion
+
+      Messages waiting in the receive queue on the receiving socket
+      are accounted against the sockets SO_RCVBUF option value.  Only
+      the payload bytes in the message are accounted for.  If the
+      number of bytes queued equals or exceeds rcvbuf then the socket
+      is congested.  All sends attempted to this socket's address
+      should return block or return -EWOULDBLOCK.
+
+      Applications are expected to be reasonably tuned such that this
+      situation very rarely occurs.  An application encountering this
+      "back-pressure" is considered a bug.
+
+      This is implemented by having each node maintain bitmaps which
+      indicate which ports on bound addresses are congested.  As the
+      bitmap changes it is sent through all the connections which
+      terminate in the local address of the bitmap which changed.
+
+      The bitmaps are allocated as connections are brought up.  This
+      avoids allocation in the interrupt handling path which queues
+      sages on sockets.  The dense bitmaps let transports send the
+      entire bitmap on any bitmap change reasonably efficiently.  This
+      is much easier to implement than some finer-grained
+      communication of per-port congestion.  The sender does a very
+      inexpensive bit test to test if the port it's about to send to
+      is congested or not.
+
+
+RDS Transport Layer
+==================
+
+  As mentioned above, RDS is not IB-specific. Its code is divided
+  into a general RDS layer and a transport layer.
+
+  The general layer handles the socket API, congestion handling,
+  loopback, stats, usermem pinning, and the connection state machine.
+
+  The transport layer handles the details of the transport. The IB
+  transport, for example, handles all the queue pairs, work requests,
+  CM event handlers, and other Infiniband details.
+
+
+RDS Kernel Structures
+=====================
+
+  struct rds_message
+    aka possibly "rds_outgoing", the generic RDS layer copies data to
+    be sent and sets header fields as needed, based on the socket API.
+    This is then queued for the individual connection and sent by the
+    connection's transport.
+  struct rds_incoming
+    a generic struct referring to incoming data that can be handed from
+    the transport to the general code and queued by the general code
+    while the socket is awoken. It is then passed back to the transport
+    code to handle the actual copy-to-user.
+  struct rds_socket
+    per-socket information
+  struct rds_connection
+    per-connection information
+  struct rds_transport
+    pointers to transport-specific functions
+  struct rds_statistics
+    non-transport-specific statistics
+  struct rds_cong_map
+    wraps the raw congestion bitmap, contains rbnode, waitq, etc.
+
+Connection management
+=====================
+
+  Connections may be in UP, DOWN, CONNECTING, DISCONNECTING, and
+  ERROR states.
+
+  The first time an attempt is made by an RDS socket to send data to
+  a node, a connection is allocated and connected. That connection is
+  then maintained forever -- if there are transport errors, the
+  connection will be dropped and re-established.
+
+  Dropping a connection while packets are queued will cause queued or
+  partially-sent datagrams to be retransmitted when the connection is
+  re-established.
+
+
+The send path
+=============
+
+  rds_sendmsg()
+    struct rds_message built from incoming data
+    CMSGs parsed (e.g. RDMA ops)
+    transport connection alloced and connected if not already
+    rds_message placed on send queue
+    send worker awoken
+  rds_send_worker()
+    calls rds_send_xmit() until queue is empty
+  rds_send_xmit()
+    transmits congestion map if one is pending
+    may set ACK_REQUIRED
+    calls transport to send either non-RDMA or RDMA message
+    (RDMA ops never retransmitted)
+  rds_ib_xmit()
+    allocs work requests from send ring
+    adds any new send credits available to peer (h_credits)
+    maps the rds_message's sg list
+    piggybacks ack
+    populates work requests
+    post send to connection's queue pair
+
+The recv path
+=============
+
+  rds_ib_recv_cq_comp_handler()
+    looks at write completions
+    unmaps recv buffer from device
+    no errors, call rds_ib_process_recv()
+    refill recv ring
+  rds_ib_process_recv()
+    validate header checksum
+    copy header to rds_ib_incoming struct if start of a new datagram
+    add to ibinc's fraglist
+    if competed datagram:
+      update cong map if datagram was cong update
+      call rds_recv_incoming() otherwise
+      note if ack is required
+  rds_recv_incoming()
+    drop duplicate packets
+    respond to pings
+    find the sock associated with this datagram
+    add to sock queue
+    wake up sock
+    do some congestion calculations
+  rds_recvmsg
+    copy data into user iovec
+    handle CMSGs
+    return to application
+
+
diff --git a/examples/Makefile b/examples/Makefile
new file mode 100644
index 0000000..ef35c1f
--- /dev/null
+++ b/examples/Makefile
@@ -0,0 +1,6 @@
+
+all: rds-sample
+
+rds-sample: rds-sample.o
+
+CFLAGS = -I ../net
diff --git a/examples/README b/examples/README
new file mode 100644
index 0000000..3433656
--- /dev/null
+++ b/examples/README
@@ -0,0 +1,6 @@
+The source in this directory is meant to serve as an aid for
+becoming familiar with RDS socket programming.
+
+Questions about this or other rds-tools code are welcomed on the
+rds-devel list: http://oss.oracle.com/mailman/listinfo/rds-devel
+
diff --git a/examples/rds-sample.c b/examples/rds-sample.c
new file mode 100644
index 0000000..b7cd325
--- /dev/null
+++ b/examples/rds-sample.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright (c) 2008 Chelsio, Inc. All rights reserved.
+ *
+ * Author: Jon Mason <jon at opengridcomputing.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+/* FIXME - this is a hack to getaround RDS not exporting any header files.
+ * This is a local copy.
+ */
+#include "ib_rds.h"
+/* These are defined in rds.h....but that file is not happily included */
+#define SOL_RDS		272
+#define PF_RDS		28
+
+
+#define TESTPORT	4000
+#define BUFSIZE		94
+
+static int do_rdma_read(int sock, struct msghdr *msg, void *buf)
+{
+	struct rds_rdma_args *args;
+	struct rds_iovec iov;
+	struct cmsghdr *cmsg;
+	int rc;
+
+	cmsg = CMSG_FIRSTHDR(msg);
+	args = (struct rds_rdma_args *)CMSG_DATA(cmsg);
+
+	/* Do a sendmsg call to preform the RDMA */
+	cmsg->cmsg_level = SOL_RDS;
+	cmsg->cmsg_type = RDS_CMSG_RDMA_ARGS;
+	cmsg->cmsg_len = CMSG_LEN(sizeof(struct rds_rdma_args));
+
+	iov.addr = (uint64_t) buf;
+	iov.bytes = BUFSIZE * sizeof(char);
+
+	args->remote_vec.addr = 0;
+	args->remote_vec.bytes = BUFSIZE * sizeof(char);
+	args->local_vec_addr = (uint64_t) &iov;
+	args->nr_local = 1;
+	args->flags = RDS_RDMA_NOTIFY_ME;
+	args->user_token = 0;
+
+	msg->msg_controllen = CMSG_SPACE(sizeof(struct rds_rdma_args));
+
+	rc = sendmsg(sock, msg, 0);
+	if (rc < 0) {
+		printf("%s: Error sending message: %d %d\n", __func__, rc, errno);
+		return -1;
+	}
+
+	sleep(1);
+
+	rc = recvmsg(sock, msg, 0);
+	if (rc < 0) {
+		printf("%s: Error receiving message: %d %d\n", __func__, rc, errno);
+		return -1;
+	}
+
+	return 0;
+}
+
+static void server(char *address)
+{
+	struct sockaddr_in sin, din;
+	void *buf, *ctlbuf;
+	struct msghdr msg;
+	struct iovec *iov;
+	int rc, sock;
+
+	buf = calloc(BUFSIZE, sizeof(char));
+	if (!buf) {
+		printf("%s: calloc failed\n", __func__);
+		return;
+	}
+
+	sock = socket(PF_RDS, SOCK_SEQPACKET, 0);
+	if (sock < 0) {
+		printf("%s: Error creating Socket: %d\n", __func__, sock);
+		goto out;
+	}
+
+	memset(&sin, 0, sizeof(sin));
+	sin.sin_family = AF_INET;
+	sin.sin_addr.s_addr = inet_addr(address);
+	sin.sin_port = TESTPORT;
+
+	rc = bind(sock, (struct sockaddr *)&sin, sizeof(sin));
+	if (rc < 0) {
+		printf("%s: Error binding to address: %d %d\n", __func__, rc, errno);
+		goto out;
+	}
+
+	/* The recv iov could contain a regular RDS packet or an RDMA RDS
+	 * packet, so set it up for the worst case for both.
+	 */
+	iov = calloc(1, sizeof(struct iovec));
+	if (!iov) {
+		printf("%s: calloc failed\n", __func__);
+		goto out;
+	}
+
+	ctlbuf = calloc(1, sizeof(struct rds_rdma_args));
+	if (!ctlbuf) {
+		printf("%s: calloc failed\n", __func__);
+		goto out1;
+	}
+
+	iov[0].iov_base = buf;
+	iov[0].iov_len = BUFSIZE * sizeof(char);
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_name = &din;
+	msg.msg_namelen = sizeof(din);
+	msg.msg_iov = iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = ctlbuf;
+	msg.msg_controllen = CMSG_SPACE(sizeof(struct rds_rdma_args));
+
+	printf("server listening on %s\n", inet_ntoa(sin.sin_addr));
+
+	rc = recvmsg(sock, &msg, 0);
+	if (rc < 0) {
+		printf("%s: Error receiving message: %d %d\n", __func__, rc, errno);
+		goto out2;
+	}
+
+	printf("Received a packet len %d, cmsg len %d, on port %d\n",
+	       (uint32_t) iov[0].iov_len,
+	       (uint32_t) msg.msg_controllen,
+	       din.sin_port);
+
+	if (msg.msg_controllen) {
+		rc = do_rdma_read(sock, &msg, buf);
+		if (rc < 0)
+			goto out2;
+	}
+	printf("payload contains:  %s\n", (char *)buf);
+
+out2:
+	free(ctlbuf);
+out1:
+	free(iov);
+out:
+	free(buf);
+}
+
+static void create_message(char *buf)
+{
+	int i;
+
+	for (i = 0; i < BUFSIZE; i++)
+		buf[i] = i + 0x21;
+}
+
+static int build_rds_rdma_packet(int sock, struct msghdr *msg, void *buf, uint64_t *cookie)
+{
+	struct rds_get_mr_args mr_args;
+	struct cmsghdr *cmsg;
+	void *ctlbuf;
+
+	mr_args.vec.addr = (uint64_t) buf;
+	mr_args.vec.bytes = BUFSIZE * sizeof(char);
+	mr_args.cookie_addr = (uint64_t) cookie;
+	mr_args.flags = RDS_RDMA_READWRITE;
+
+	ctlbuf = calloc(1, CMSG_SPACE(sizeof(mr_args)));
+	if (!ctlbuf) {
+		printf("%s: calloc failed\n", __func__);
+		return -1;
+	}
+
+	msg->msg_control = ctlbuf;
+	msg->msg_controllen = CMSG_SPACE(sizeof(mr_args));
+
+	cmsg = CMSG_FIRSTHDR(msg);
+	cmsg->cmsg_level = SOL_RDS;
+	cmsg->cmsg_type = RDS_CMSG_RDMA_MAP;
+	cmsg->cmsg_len = CMSG_LEN(sizeof(mr_args));
+	memcpy(CMSG_DATA(cmsg), &mr_args, sizeof(mr_args));
+
+	msg->msg_iov = NULL;
+	msg->msg_iovlen = 0;
+
+	return 0;
+}
+
+static int build_rds_packet(struct msghdr *msg, char *buf)
+{
+	struct iovec *iov;
+
+	iov = calloc(1, sizeof(struct iovec));
+	if (!iov) {
+		printf("%s: calloc failed\n", __func__);
+		return -1;
+	}
+
+	msg->msg_iov = iov;
+	msg->msg_iovlen = 1;
+
+	iov[0].iov_base = buf;
+	iov[0].iov_len = BUFSIZE * sizeof(char);
+
+	return 0;
+}
+
+static void client(char *localaddr, char *remoteaddr, int rdma)
+{
+	struct sockaddr_in sin, din;
+	struct msghdr msg;
+	uint64_t cookie = 0;
+	int rc, sock;
+	void *buf;
+
+	buf = calloc(BUFSIZE, sizeof(char));
+	if (!buf) {
+		printf("%s: calloc failed\n", __func__);
+		return;
+	}
+
+	create_message((char *)buf);
+
+	sock = socket(PF_RDS, SOCK_SEQPACKET, 0);
+	if (sock < 0) {
+		printf("%s: Error creating Socket: %d\n", __func__, sock);
+		goto out;
+	}
+
+	memset(&sin, 0, sizeof(sin));
+	sin.sin_family = AF_INET;
+	sin.sin_addr.s_addr = inet_addr(localaddr);
+
+	rc = bind(sock, (struct sockaddr *)&sin, sizeof(sin));
+	if (rc < 0) {
+		printf("%s: Error binding to address: %d %d\n", __func__, rc, errno);
+		goto out;
+	}
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_name = &din;
+	msg.msg_namelen = sizeof(din);
+
+	memset(&din, 0, sizeof(din));
+	din.sin_family = AF_INET;
+	din.sin_addr.s_addr = inet_addr(remoteaddr);
+	din.sin_port = TESTPORT;
+
+	if (rdma) {
+		rc = build_rds_rdma_packet(sock, &msg, buf, &cookie);
+		if (rc < 0)
+			goto out;
+
+		printf("Client Sending RDMA message from %s to %s\n",
+		       localaddr, remoteaddr);
+	} else {
+		rc = build_rds_packet(&msg, buf);
+		if (rc < 0)
+			goto out;
+
+		printf("client sending %d byte message %s from %s to %s on port %d\n",
+		       (uint32_t) msg.msg_iov->iov_len,
+		       (char *)buf,
+		       localaddr,
+		       remoteaddr,
+		       sin.sin_port);
+	}
+
+	rc = sendmsg(sock, &msg, 0);
+	if (rc < 0) {
+		printf("%s: Error sending message: %d %d\n", __func__, rc, errno);
+		goto out1;
+	}
+
+	if (rdma) {
+		/* reuse the same msg, as it should no longer be necessary and this incoming
+		 * msg should be empty
+		 */
+		rc = recvmsg(sock, &msg, 0);
+		if (rc < 0) {
+			printf("%s: Error receiving message: %d %d\n", __func__, rc, errno);
+		}
+	}
+
+out1:
+	if (msg.msg_control)
+		free(msg.msg_control);
+	if (msg.msg_iov)
+		free(msg.msg_iov);
+out:
+	free(buf);
+}
+
+int main(int argc, char **argv)
+{
+	char *serveraddr = NULL, *clientaddr = NULL;
+	int i, rdma = 0;
+
+	if (argc < 3) {
+		printf("not enough args\n");
+		return -1;
+	}
+
+	for (i = 1; i < argc; i++) {
+		if (!strcmp("-s", argv[i])) {
+			serveraddr = argv[i+1];
+			i++;
+		} else if (!strcmp("-c", argv[i])) {
+			clientaddr = argv[i+1];
+			i++;
+		} else if (!strcmp("--rdma", argv[i])) {
+			rdma = 1;
+		} else
+			printf("Invalid param\n");
+	}
+
+	if (serveraddr && !clientaddr) {
+		server(serveraddr);
+	} else if (serveraddr && clientaddr) {
+		client(clientaddr, serveraddr, rdma);
+	}
+
+	return 0;
+}
+
diff --git a/kernel-list.h b/kernel-list.h
new file mode 100644
index 0000000..9e25b59
--- /dev/null
+++ b/kernel-list.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2006 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+	struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+	struct list_head name = { &name, &name }
+
+#define INIT_LIST_HEAD(ptr) do { \
+	(ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+#if (!defined(__GNUC__) && !defined(__WATCOMC__))
+#define __inline__
+#endif
+
+/*
+ * Insert a new entry between two known consecutive entries. 
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_add(struct list_head * new,
+	struct list_head * prev,
+	struct list_head * next)
+{
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	prev->next = new;
+}
+
+/*
+ * Insert a new entry after the specified head..
+ */
+static __inline__ void list_add(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head, head->next);
+}
+
+/*
+ * Insert a new entry at the tail
+ */
+static __inline__ void list_add_tail(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_del(struct list_head * prev,
+				  struct list_head * next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+static __inline__ void list_del(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+}
+
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+        __list_del(list->prev, list->next);
+        list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+				  struct list_head *head)
+{
+        __list_del(list->prev, list->next);
+        list_add_tail(list, head);
+}
+
+static __inline__ int list_empty(struct list_head *head)
+{
+	return head->next == head;
+}
+
+/*
+ * Splice in "list" into "head"
+ */
+static __inline__ void list_splice(struct list_head *list, struct list_head *head)
+{
+	struct list_head *first = list->next;
+
+	if (first != list) {
+		struct list_head *last = list->prev;
+		struct list_head *at = head->next;
+
+		first->prev = head;
+		head->next = first;
+
+		last->next = at;
+		at->prev = last;
+	}
+}
+
+#define list_entry(ptr, type, member) \
+	((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+#define list_for_each(pos, head) \
+        for (pos = (head)->next; pos != (head); pos = pos->next)
+
+#define list_for_each_safe(pos, n, head) \
+	        for (pos = (head)->next, n = pos->next; pos != (head); \
+		                     pos = n, n = pos->next)
+
+/**
+ * list_for_each_entry	-	iterate over list of given type
+ * @pos:	the type * to use as a loop counter.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member)				\
+	for (pos = list_entry((head)->next, typeof(*pos), member);	\
+	     &pos->member != (head); 					\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+#endif
diff --git a/net/ib_rds.h b/net/ib_rds.h
new file mode 100644
index 0000000..992139c
--- /dev/null
+++ b/net/ib_rds.h
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2008 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef IB_RDS_H
+#define IB_RDS_H
+
+#include <linux/types.h>
+
+/* These sparse annotated types shouldn't be in any user
+ * visible header file. We should clean this up rather
+ * than kludging around them. */
+#ifndef __KERNEL__
+#define __be16	u_int16_t
+#define __be32	u_int32_t
+#define __be64	u_int64_t
+#endif
+
+#define RDS_IB_ABI_VERSION		0x301
+
+/*
+ * setsockopt/getsockopt for SOL_RDS
+ */
+#define RDS_CANCEL_SENT_TO      	1
+#define RDS_GET_MR			2
+#define RDS_FREE_MR			3
+/* deprecated: RDS_BARRIER 4 */
+#define RDS_RECVERR			5
+#define RDS_CONG_MONITOR		6
+
+/*
+ * Control message types for SOL_RDS.
+ *
+ * CMSG_RDMA_ARGS (sendmsg)
+ *	Request a RDMA transfer to/from the specified
+ *	memory ranges.
+ *	The cmsg_data is a struct rds_rdma_args.
+ * RDS_CMSG_RDMA_DEST (recvmsg, sendmsg)
+ *	Kernel informs application about intended
+ *	source/destination of a RDMA transfer
+ * RDS_CMSG_RDMA_MAP (sendmsg)
+ *	Application asks kernel to map the given
+ *	memory range into a IB MR, and send the
+ *	R_Key along in an RDS extension header.
+ *	The cmsg_data is a struct rds_get_mr_args,
+ *	the same as for the GET_MR setsockopt.
+ * RDS_CMSG_RDMA_STATUS (recvmsg)
+ *	Returns the status of a completed RDMA operation.
+ */
+#define RDS_CMSG_RDMA_ARGS		1
+#define RDS_CMSG_RDMA_DEST		2
+#define RDS_CMSG_RDMA_MAP		3
+#define RDS_CMSG_RDMA_STATUS		4
+#define RDS_CMSG_CONG_UPDATE		5
+
+#define RDS_INFO_COUNTERS		10000
+#define RDS_INFO_CONNECTIONS		10001
+/* 10002 aka RDS_INFO_FLOWS is deprecated */
+#define RDS_INFO_SEND_MESSAGES		10003
+#define RDS_INFO_RETRANS_MESSAGES       10004
+#define RDS_INFO_RECV_MESSAGES          10005
+#define RDS_INFO_SOCKETS                10006
+#define RDS_INFO_TCP_SOCKETS            10007
+#define RDS_INFO_IB_CONNECTIONS		10008
+
+struct rds_info_counter {
+	u_int8_t	name[32];
+	u_int64_t	value;
+} __attribute__((packed));
+
+#define RDS_INFO_CONNECTION_FLAG_SENDING	0x01
+#define RDS_INFO_CONNECTION_FLAG_CONNECTING	0x02
+#define RDS_INFO_CONNECTION_FLAG_CONNECTED	0x04
+
+struct rds_info_connection {
+	u_int64_t	next_tx_seq;
+	u_int64_t	next_rx_seq;
+	__be32		laddr;
+	__be32		faddr;
+	u_int8_t	transport[15];		/* null term ascii */
+	u_int8_t	flags;
+} __attribute__((packed));
+
+struct rds_info_flow {
+	__be32		laddr;
+	__be32		faddr;
+	u_int32_t	bytes;
+	__be16		lport;
+	__be16		fport;
+} __attribute__((packed));
+
+#define RDS_INFO_MESSAGE_FLAG_ACK               0x01
+#define RDS_INFO_MESSAGE_FLAG_FAST_ACK          0x02
+
+struct rds_info_message {
+	u_int64_t	seq;
+	u_int32_t	len;
+	__be32		laddr;
+	__be32		faddr;
+	__be16		lport;
+	__be16		fport;
+	u_int8_t	flags;
+} __attribute__((packed));
+
+struct rds_info_socket {
+	u_int32_t	sndbuf;
+	__be32		bound_addr;
+	__be32		connected_addr;
+	__be16		bound_port;
+	__be16		connected_port;
+	u_int32_t	rcvbuf;
+	uint64_t	inum;
+} __attribute__((packed));
+
+struct rds_info_socket_v1 {
+	u_int32_t	sndbuf;
+	__be32		bound_addr;
+	__be32		connected_addr;
+	__be16		bound_port;
+	__be16		connected_port;
+	u_int32_t	rcvbuf;
+} __attribute__((packed));
+
+struct rds_info_tcp_socket {
+	__be32		local_addr;
+	__be16		local_port;
+	__be32		peer_addr;
+	__be16		peer_port;
+	u_int64_t	hdr_rem;
+	u_int64_t	data_rem;
+	u_int32_t	last_sent_nxt;
+	u_int32_t	last_expected_una;
+	u_int32_t	last_seen_una;
+} __attribute__((packed));
+
+#define RDS_IB_GID_LEN	16
+struct rds_info_ib_connection {
+	__be32		src_addr;
+	__be32		dst_addr;
+	uint8_t		src_gid[RDS_IB_GID_LEN];
+	uint8_t		dst_gid[RDS_IB_GID_LEN];
+
+	uint32_t	max_send_wr;
+	uint32_t	max_recv_wr;
+	uint32_t	max_send_sge;
+	uint32_t	rdma_fmr_max;
+	uint32_t	rdma_fmr_size;
+};
+
+/*
+ * Congestion monitoring.
+ * Congestion control in RDS happens at the host connection
+ * level by exchanging a bitmap marking congested ports.
+ * By default, a process sleeping in poll() is always woken
+ * up when the congestion map is updated.
+ * With explicit monitoring, an application can have more
+ * fine-grained control.
+ * The application installs a 64bit mask value in the socket,
+ * where each bit corresponds to a group of ports.
+ * When a congestion update arrives, RDS checks the set of
+ * ports that are now uncongested against the list bit mask
+ * installed in the socket, and if they overlap, we queue a
+ * cong_notification on the socket.
+ *
+ * To install the congestion monitor bitmask, use RDS_CONG_MONITOR
+ * with the 64bit mask.
+ * Congestion updates are received via RDS_CMSG_CONG_UPDATE
+ * control messages.
+ *
+ * The correspondence between bits and ports is
+ *	1 << (portnum % 64)
+ */
+#define RDS_CONG_MONITOR_SIZE	64
+#define RDS_CONG_MONITOR_BIT(port)  (((unsigned int) port) % RDS_CONG_MONITOR_SIZE)
+#define RDS_CONG_MONITOR_MASK(port) (1ULL << RDS_CONG_MONITOR_BIT(port))
+
+/*
+ * RDMA related types
+ */
+
+/*
+ * This encapsulates a remote memory location.
+ * In the current implementation, it contains the R_Key
+ * of the remote memory region, and the offset into it
+ * (so that the application does not have to worry about
+ * alignment).
+ */
+typedef u_int64_t	rds_rdma_cookie_t;
+
+struct rds_iovec {
+	u_int64_t	addr;
+	u_int64_t	bytes;
+};
+
+struct rds_get_mr_args {
+	struct rds_iovec vec;
+	u_int64_t	cookie_addr;
+	uint64_t	flags;
+};
+
+struct rds_free_mr_args {
+	rds_rdma_cookie_t cookie;
+	u_int64_t	flags;
+};
+
+struct rds_rdma_args {
+	rds_rdma_cookie_t cookie;
+	struct rds_iovec remote_vec;
+	u_int64_t	local_vec_addr;
+	u_int64_t	nr_local;
+	u_int64_t	flags;
+	u_int64_t	user_token;
+};
+
+struct rds_rdma_notify {
+	u_int64_t	user_token;
+	int32_t		status;
+};
+
+#define RDS_RDMA_SUCCESS	0
+#define RDS_RDMA_REMOTE_ERROR	1
+#define RDS_RDMA_CANCELED	2
+#define RDS_RDMA_DROPPED	3
+#define RDS_RDMA_OTHER_ERROR	4
+
+/*
+ * Common set of flags for all RDMA related structs
+ */
+#define RDS_RDMA_READWRITE	0x0001
+#define RDS_RDMA_FENCE		0x0002	/* use FENCE for immediate send */
+#define RDS_RDMA_INVALIDATE	0x0004	/* invalidate R_Key after freeing MR */
+#define RDS_RDMA_USE_ONCE	0x0008	/* free MR after use */
+#define RDS_RDMA_DONTWAIT	0x0010	/* Don't wait in SET_BARRIER */
+#define RDS_RDMA_NOTIFY_ME	0x0020	/* Notify when operation completes */
+
+#endif /* IB_RDS_H */
diff --git a/net/rds.h b/net/rds.h
new file mode 100644
index 0000000..ec9aa6c
--- /dev/null
+++ b/net/rds.h
@@ -0,0 +1,50 @@
+/*
+ * net/rds.h - user space interface for RDS
+ *
+ * Copyright (c) 2006 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NET_RDS_H
+#define __NET_RDS_H
+
+#include "ib_rds.h"
+
+static inline int
+rds_rdma_id_sign(uint64_t id1, uint64_t id2)
+{
+	int64_t diff = id1 - id2;
+
+	return (diff < 0)? -1 : ((diff == 0)? 0 : 1);
+}
+
+#define rds_rdma_id_cmp(id1, cmp, id2)	(rds_rdma_id_sign((id1), (id2)) cmp 0)
+
+#endif /* __NET_RDS_H */
diff --git a/options.c b/options.c
new file mode 100644
index 0000000..0956b25
--- /dev/null
+++ b/options.c
@@ -0,0 +1,481 @@
+/*
+ * Copyright (c) 2006 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * options.c - options and stuff
+ */
+
+#define _LARGEFILE64_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <limits.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <signal.h>
+#include <getopt.h>
+#include <libgen.h>
+#include <inttypes.h>
+#include <errno.h>
+
+#include "kernel-list.h"
+#include "rdstool.h"
+
+
+/* This gets changed in parse_options() */
+char *progname = "rds-generic-tool";
+unsigned int verbose = 1;
+
+sig_atomic_t running = 1;
+
+
+/*
+ * Take "address:port" and return a sockaddr(_in) that describes it.
+ * Since RDS is IPv4 only, we don't worry about PF_INET6.
+ *
+ * XXX: Should we try a default IP or default port?  RDS is very
+ * endpoint-oriented; right now we require explicitness.
+ *
+ * Since getaddrinfo(3) returns multiple addresses, we simply find the
+ * first SOCK_DGRAM AF_INET result.  Note that RDS actually uses
+ * SOCK_SEQPACKET, but we're lying to getaddrinfo(3).
+ */
+static int parse_endpoint(struct rds_endpoint *nep)
+{
+	int rc;
+	char *host, *port;
+	struct addrinfo *list, *try;
+	struct addrinfo hint = {
+		.ai_family	= PF_INET,
+		.ai_socktype	= SOCK_DGRAM,
+	};
+
+	host = strdup(nep->re_name);
+	if (!host) {
+		rc = -ENOMEM;
+		verbosef(0, stderr, "%s: Unable to allocate memory\n",
+			 progname);
+		goto out;
+	}
+
+	port = strchr(host, ':');
+	if (!port) {
+		rc = -EINVAL;
+		verbosef(0, stderr, "%s: Invalid endpoint: %s\n",
+			 progname, nep->re_name);
+		goto out;
+	}
+
+	*port = '\0';
+	port++;
+
+	rc = getaddrinfo(host, port, &hint, &list);
+	if (rc) {
+		verbosef(0, stderr, "%s: Unable to resolve \"%s\": %s\n",
+			 progname, nep->re_name, gai_strerror(rc));
+		goto out;
+	}
+
+	for (try = list; try; try = try->ai_next) {
+		if ((try->ai_family == PF_INET) &&
+		    (try->ai_socktype == SOCK_DGRAM))
+			break;
+	}
+
+	if (try) {
+		if (try->ai_addrlen != sizeof(struct sockaddr_in))
+			verbosef(0, stderr,
+				 "%s: OMG WTF BBQ!  try->ai_addrlen = %d, sizeof(struct sockaddr_in) = %zd\n",
+				 progname, try->ai_addrlen,
+				 sizeof(struct sockaddr_in));
+
+		memcpy(&nep->re_addr, try->ai_addr, try->ai_addrlen);
+	}
+
+	if (list)
+		freeaddrinfo(list);
+
+out:
+	return rc;
+}
+
+static int add_endpoint(const char *endpoint, struct list_head *list)
+{
+	int rc;
+	struct rds_endpoint *nep;
+
+	nep = malloc(sizeof(struct rds_endpoint));
+	if (!nep)
+		return -ENOMEM;
+
+	nep->re_name = strdup(endpoint);
+	if (!nep->re_name) {
+		free(nep);
+		return -ENOMEM;
+	}
+
+	rc = parse_endpoint(nep);
+	if (!rc) {
+		list_add_tail(&nep->re_item, list);
+	} else {
+		free(nep->re_name);
+		free(nep);
+	}
+
+	return rc;
+}
+
+static int get_number(char *arg, uint64_t *res)
+{
+	char *ptr = NULL;
+	uint64_t num;
+
+	num = strtoull(arg, &ptr, 0);
+
+	if ((ptr == arg) || (num == UINT64_MAX))
+		return(-EINVAL);
+
+	switch (*ptr) {
+	case '\0':
+		break;
+
+	case 'g':
+	case 'G':
+		num *= 1024;
+		/* FALL THROUGH */
+
+	case 'm':
+	case 'M':
+		num *= 1024;
+		/* FALL THROUGH */
+
+	case 'k':
+	case 'K':
+		num *= 1024;
+		/* FALL THROUGH */
+
+	case 'b':
+	case 'B':
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	*res = num;
+
+	return 0;
+}
+
+extern char *optarg;
+extern int optopt;
+extern int optind;
+extern int opterr;
+int parse_options(int argc, char *argv[], const char *opts,
+		  struct rds_context *ctxt)
+{
+	int c, rc = 0;
+	uint64_t val;
+	struct list_head saddrs;
+
+	if (argc && argv[0])
+		progname = basename(argv[0]);
+
+	INIT_LIST_HEAD(&saddrs);
+	opterr = 0;
+	while ((c = getopt(argc, argv, opts)) != EOF) {
+		switch (c) {
+			case 's':
+				if (!list_empty(&saddrs)) {
+					verbosef(0, stderr,
+						 "%s: Only one source address allowed\n",
+						 progname);
+					rc = -EINVAL;
+				} else
+					rc = add_endpoint(optarg, &saddrs);
+				break;
+
+			case 'd':
+				rc = add_endpoint(optarg, &ctxt->rc_daddrs);
+				break;
+
+			case 'm':
+				rc = get_number(optarg, &val);
+				if (rc) {
+					verbosef(0, stderr,
+						 "%s: Invalid number: %s\n",
+						 progname, optarg);
+					break;
+				}
+
+				if (val > UINT32_MAX) {
+					rc = -EINVAL;
+					verbosef(0, stderr,
+						 "%s: Message size too large: %"PRIu64"\n",
+						 progname, val);
+				} else
+					ctxt->rc_msgsize = (uint32_t)val;
+				break;
+
+			case 'l':
+				rc = get_number(optarg, &ctxt->rc_total);
+				if (rc) {
+					verbosef(0, stderr,
+						 "%s: Invalid number: %s\n",
+						 progname, optarg);
+				}
+				break;
+
+			case 'f':
+				ctxt->rc_filename = optarg;
+				stats_extended(1);
+				break;
+
+			case 'i':
+				rc = get_number(optarg, &val);
+				if (rc) {
+					verbosef(0, stderr,
+						 "%s: Invalid number: %s\n",
+						 progname, optarg);
+					break;
+				}
+
+				if (val > LONG_MAX) {
+					rc = -EINVAL;
+					verbosef(0, stderr,
+						 "%s: Sleep interval too large: %"PRIu64"\n",
+						 progname, val);
+				} else {
+					rc = stats_init((long)val);
+				}
+
+				break;
+
+
+			case 'v':
+				verbose++;
+				break;
+
+			case 'q':
+				if (verbose)
+					verbose--;
+				break;
+
+			case 'V':
+				print_version();
+				break;
+
+			case 'h':
+				print_usage(0);
+				break;
+
+			case '-':
+				if (!strcmp(optarg, "help"))
+					print_usage(0);
+				else if (!strcmp(optarg, "version"))
+					print_version();
+				else {
+					rc = -EINVAL;
+					verbosef(0, stderr,
+						 "%s: Invalid argument: \'--%s\'\n",
+						 progname, optarg);
+				}
+				break;
+
+			case '?':
+				verbosef(0, stderr,
+					 "%s: Invalid option \'-%c\'\n",
+					 progname, optopt);
+				rc = -EINVAL;
+				break;
+				
+			case ':':
+				verbosef(0, stderr,
+					 "%s: Option \'-%c\' requires an argument\n",
+					 progname, optopt);
+				rc = -EINVAL;
+				break;
+				
+			default:
+				verbosef(0, stderr,
+					 "%s: Shouldn't get here %c %c\n",
+					 progname, optopt, c);
+				rc = -EINVAL;
+				break;
+		}
+
+		if (rc)
+			goto out;
+	}
+
+	if (list_empty(&saddrs)) {
+		verbosef(0, stderr, "%s: Source endpoint address required\n",
+			 progname);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	ctxt->rc_saddr = list_entry(saddrs.prev, struct rds_endpoint,
+				    re_item);
+
+out:
+	return rc;
+}
+
+int rds_bind(struct rds_context *ctxt)
+{
+	int rc;
+	struct rds_endpoint *e = ctxt->rc_saddr;
+
+	rc = socket(PF_RDS, SOCK_SEQPACKET, 0);
+	if (rc < 0) {
+		rc = -errno;
+		verbosef(0, stderr, "%s: Unable to create socket: %s\n",
+			 progname, strerror(-rc));
+		goto out;
+	}
+
+	e->re_fd = rc;
+	rc = bind(e->re_fd, (struct sockaddr *)&e->re_addr,
+		  sizeof(struct sockaddr_in));
+	if (rc) {
+		rc = -errno;
+		verbosef(0, stderr, "%s: Unable to bind socket: %s\n",
+			 progname, strerror(-rc));
+
+		close(e->re_fd);
+		e->re_fd = -1;
+		goto out;
+	}
+
+out:
+	return rc;
+}
+
+int dup_file(struct rds_context *ctxt, int fd, int flags)
+{
+	int tmp_fd, rc = 0;
+	char *type;
+
+	/* "-" is stdin/stdout */
+	if (!strcmp(ctxt->rc_filename, "-"))
+		goto out;
+
+	tmp_fd = open64(ctxt->rc_filename, flags);
+	if (tmp_fd < 0) {
+		rc = -errno;
+		verbosef(0, stderr, "%s: Unable to open file \"%s\": %s\n",
+			 progname, ctxt->rc_filename, strerror(-rc));
+		goto out;
+	}
+
+	if (tmp_fd != fd) {
+		rc = dup2(tmp_fd, fd);
+		if (rc < 0) {
+			rc = -errno;
+			switch (fd) {
+				case STDIN_FILENO:
+					type = "stdin";
+					break;
+
+				case STDOUT_FILENO:
+					type = "stdout";
+					break;
+
+				case STDERR_FILENO:
+					type = "stderr";
+					break;
+
+				default:
+					type = "random fd";
+					break;
+			}
+
+			verbosef(0, stderr,
+				 "%s: Unable to set file \"%s\" as %s: %s\n",
+				 progname, ctxt->rc_filename, type,
+				 strerror(-rc));
+		} else if (rc != fd) {
+			verbosef(0, stderr,
+				 "%s: dup2(2) failed for some reason!\n",
+				 progname);
+			rc = -EBADF;
+		} else
+			rc = 0;
+	}
+
+out:
+	return rc;
+}
+
+int runningp(void)
+{
+	return running;
+}
+
+void handler(int signum)
+{
+	running = 0;
+}
+
+int setup_signals(void)
+{
+	int rc = -EINVAL;
+	struct sigaction act;
+
+	sigemptyset(&act.sa_mask);
+	act.sa_handler = handler;
+	act.sa_flags = 0;
+
+	if (sigaction(SIGTERM, &act, NULL))
+		goto out;
+
+	if (sigaction(SIGINT, &act, NULL))
+		goto out;
+
+	act.sa_handler = SIG_IGN;
+	if (sigaction(SIGPIPE, &act, NULL))  /* Get EPIPE instead */
+		goto out;
+
+	rc = 0;
+
+out:
+	return rc;
+}
diff --git a/pfhack.c b/pfhack.c
new file mode 100644
index 0000000..7e320db
--- /dev/null
+++ b/pfhack.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2006 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * pfhack.c - discover the RDS constants 
+ *
+ * PF_RDS and SOL_RDS should be assigned constants.  However, we don't have
+ * official values yet.  There is a hack to overload an existing PF_ value
+ * (21).  This dynamic code detects what the running kernel is using.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <limits.h>
+
+#include "kernel-list.h"
+#include "pfhack.h"
+#include "rdstool.h"
+
+#define PF_RDS_PATH	"/proc/sys/net/rds/pf_rds"
+#define SOL_RDS_PATH	"/proc/sys/net/rds/sol_rds"
+
+/* We don't allow any system that can't read pf_rds */
+static void explode(const char *reason)
+{
+	fprintf(stderr,
+	       	"%s: Unable to determine RDS constant: %s\n",
+	       	progname, reason);
+
+	exit(1);
+}
+
+static int discover_constant(const char *path, int official, int *found)
+{
+	int fd;
+	ssize_t ret, total = 0;
+	char buf[PATH_MAX];
+	char *ptr;
+	long val;
+
+	if (*found >= 0)
+		return *found;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		/* hmm, no more constants in /proc. we must not need it anymore
+		 * so use official values.
+		 */
+		*found = official;
+		return official;
+	}
+
+	while (total < sizeof(buf)) {
+		ret = read(fd, buf + total, sizeof(buf) - total);
+		if (ret > 0)
+			total += ret;
+		else
+			break;
+	}
+
+	close(fd);
+
+	if (ret < 0)
+		explode("Error reading address constant");
+
+	val = strtoul(buf, &ptr, 0);
+	if ((val > INT_MAX) || !ptr || (*ptr && (*ptr != '\n')))
+		explode("Invalid address constant");
+
+	*found = val;
+	return (int)val;
+}
+
+int discover_pf_rds()
+{
+	static int	pf_rds = -1;
+
+	return discover_constant(PF_RDS_PATH, OFFICIAL_PF_RDS, &pf_rds);
+}
+
+int discover_sol_rds()
+{
+	static int	sol_rds = -1;
+
+	return discover_constant(SOL_RDS_PATH, OFFICIAL_SOL_RDS, &sol_rds);
+}
diff --git a/pfhack.h b/pfhack.h
new file mode 100644
index 0000000..2a55b25
--- /dev/null
+++ b/pfhack.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2006 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * pfhack.h - discover the RDS constants 
+ *
+ * PF_RDS and SOL_RDS should be assigned constants.  However, we don't have
+ * official values yet.  There is a hack to overload an existing PF_ value
+ * (21).  This dynamic code detects what the running kernel is using.
+ */
+
+#ifndef __PF_HACK_H
+#define __PF_HACK_H
+
+#define OFFICIAL_PF_RDS		21
+#define OFFICIAL_SOL_RDS	276
+
+
+#ifdef DYNAMIC_PF_RDS
+extern int discover_pf_rds();
+extern int discover_sol_rds();
+
+#define AF_RDS discover_pf_rds()
+#define PF_RDS AF_RDS
+#define SOL_RDS discover_sol_rds()
+#endif  /* DYNAMIC_PF_RDS */
+
+#endif  /* __PF_HACK_H */
diff --git a/rds-gen.1 b/rds-gen.1
new file mode 100644
index 0000000..f203f59
--- /dev/null
+++ b/rds-gen.1
@@ -0,0 +1,89 @@
+.Dd October 30, 2006
+.Dt RDS-GEN-SINK 1
+.Os
+.Sh NAME
+.Nm rds-gen
+.Nd write data from a file to an RDS socket
+.Pp
+.Nm rds-sink
+.Nd write data from an RDS socket to a file
+.Sh SYNOPSIS
+.Nm rds-gen
+.Bk -words
+.Op Fl s Ar source_address:source_port
+.Op Fl d Ar destination_address:destination_port
+.Op Fl f Ar input_file
+.Op Fl m Ar message_size
+.Op Fl l Ar total_bytes
+.Op Fl i Ar interval
+
+.Nm rds-sink
+.Bk -words
+.Op Fl s Ar listen_address:listen_port
+.Op Fl f Ar output_file
+.Op Fl i Ar interval
+
+.Sh DESCRIPTION
+The
+.Nm
+and
+.Nm rds-sink
+utilities are used to stream data through RDS sockets.  rds-gen
+reads data from a file descriptor and sends it as messages
+down an RDS socket.  rds-sink receives messages from an RDS
+socket and writes it to a file descriptor.
+
+The following options are shared between rds-gen and rds-sink:
+.Bl -tag -width Ds
+.It Fl s Ar address:port
+Binds the RDS socket to the given address and port.  rds-gen will
+send messages from this address and port.  rds-sink will receive messages
+sent to this address and port.
+.It Fl f Ar file
+rds-gen will read data from this file and rds-sink will write
+data to this file.  If '-' is given as the filename then rds-gen
+will use standard input and rds-sink will use standard output.
+.It Fl i Ar interval_seconds
+An iterative summary of the number and size of messages that are sent and
+received is written to standard error at this interval.
+.El
+.Pp
+
+In addition, rds-gen supports the following options:
+.Bl -tag -width Ds
+.It Fl d Ar address:port
+Messages are sent to this destination address and port.  If this option
+is specified multiple times then the messages are sent to each destination
+address in a round-robin fashion.
+.It Fl m Ar message_size
+Specifies the size of the messages that are sent down the RDS socket.  The default
+message size is 4k.  The message size must not be greater than the buffer size.
+.It Fl l Ar total_bytes
+Specifies the number of bytes that will be sent out the socket before rds-gen
+exits.  If this is not specified and rds-gen was given a source file then it
+will run until it gets EOF from the file.  If no file was given and this
+option is not specified then rds-gen will send data indefinitely.
+.El
+.Pp
+
+.Sh EXAMPLES
+rds-gen on host src sends infinite data to rds-sink on dest who
+prints out the amount of data it receives every second.
+.Pp
+
+.Dl $ rds-sink -s dest:22222 -i 1
+.Dl $ rds-gen -s src:11111 -d dest:22222
+.Pp
+
+Read 100M from /dev/zero on src and write it to /dev/null on dest,
+printing stats on both sides every minute.
+
+.Dl $ rds-sink -s dest:22222 -f /dev/null -i 60
+.Dl $ rds-gen -s src:11111 -f /dev/zero -d dest:22222 -i 60
+.Pp
+
+Watch rds-gen write data as fast as it can into a local black hole because
+there is no bound receiving socket.
+
+.Dl $ rds-gen -s src:11111 -d localhost:31337 -i 1
+.Pp
diff --git a/rds-gen.c b/rds-gen.c
new file mode 100644
index 0000000..f9420c7
--- /dev/null
+++ b/rds-gen.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 2006 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * rds-gen.c: Spew some RDS packets
+ */
+
+#define _LARGEFILE64_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "kernel-list.h"
+#include "rdstool.h"
+
+void print_usage(int rc)
+{
+	int namelen = strlen(progname);
+	FILE *output = rc ? stderr : stdout;
+
+	verbosef(0, output,
+		 "Usage: %s -s <source_ip>:<source_port> [[-d <dest_ip>:<dest_port>] ...]\n"
+		 "       %*s [-f <input_file>] [-m <msg_size>]\n"
+		 "       %*s [-l <total_bytes>] [-i <interval>]\n"
+		 "       %*s [-v ...] [-q ...]\n"
+		 "       %s -h\n"
+		 "       %s -V\n",
+		 progname, namelen, "", namelen, "", namelen, "", progname,
+		 progname);
+
+	exit(rc);
+}
+
+void print_version()
+{
+	verbosef(0, stdout, "%s version VERSION\n", progname);
+
+	exit(0);
+}
+
+/*
+ * Pick the next destination.
+ * Currently round-robin, but could be made fancy
+ */
+static struct rds_endpoint *pick_dest(struct rds_context *ctxt,
+				      struct rds_endpoint *de)
+{
+	struct list_head *next;
+
+	if (!de || (de->re_item.next == &ctxt->rc_daddrs))
+		next = ctxt->rc_daddrs.next;
+	else
+		next = de->re_item.next;
+
+	return list_entry(next, struct rds_endpoint, re_item);
+}
+
+static ssize_t fill_stdin(struct rds_context *ctxt, char *bytes,
+			  ssize_t len)
+{
+	ssize_t ret = 0;
+	char *ptr = bytes;
+
+	static int first = 1;
+
+	if (!first)
+		return ret;
+
+	if (ctxt->rc_filename && strcmp(ctxt->rc_filename,"-"))
+		first = 0;
+
+	while (len && runningp()) {
+		stats_print();
+		ret = read(STDIN_FILENO, ptr, len);
+		if (!ret) {
+			if (ptr != bytes) {
+				verbosef(0, stderr,
+					 "%s: Unexpected end of file reading from %s\n",
+					 progname, ctxt->rc_filename);
+				ret = -EPIPE;
+			}
+			break;
+		}
+		if (ret < 0) {
+			ret = -errno;
+			if (ret == -EINTR)
+				continue;
+
+			verbosef(0, stderr,
+				 "%s: Error reading from %s: %s\n",
+				 progname, ctxt->rc_filename,
+				 strerror(-ret));
+			break;
+		}
+
+		stats_add_read(ret);
+		ptr += ret;
+		len -= ret;
+		ret = 0;  /* If this filled the buffer, we return success */
+	}
+	verbosef(3, stderr, "Read %zd bytes from stdin\n",
+		 ptr - bytes);
+	
+	return ret;
+}
+
+static ssize_t fill_pattern(struct rds_context *ctxt, char *bytes,
+			    ssize_t len)
+{
+	static int first = 1;
+
+	stats_print();
+
+	if (first) {
+		memset(bytes, 0, len);
+		first = 0;
+	}
+
+	return 0;
+}
+
+static ssize_t fill_buff(struct rds_context *ctxt, char *bytes, ssize_t len)
+{
+	ssize_t ret;
+
+	/* Each possible method must handle calling stats_print() */
+	if (ctxt->rc_filename)
+		ret = fill_stdin(ctxt, bytes, len);
+	else
+		ret = fill_pattern(ctxt, bytes, len);
+
+	return ret;
+}
+
+static ssize_t send_buff(struct rds_endpoint *se, struct msghdr *msg)
+{
+	ssize_t ret = 0;
+
+	while (runningp()) {
+		stats_print();
+
+		ret = sendmsg(se->re_fd, msg, 0);
+		if (ret < 0) {
+			ret = -errno;
+			if (ret == -EINTR)
+				continue;
+
+			verbosef(0, stderr,
+				 "%s: Error from sendmsg: %s\n",
+				 progname, strerror(-ret));
+		}
+
+		/* Success */
+		break;
+	}
+
+	return ret;
+}
+
+
+static int wli_do_send(struct rds_context *ctxt)
+{
+	char bytes[ctxt->rc_msgsize];
+	int ret = 0;
+	struct rds_endpoint *de = NULL, *se = ctxt->rc_saddr;
+	struct iovec iov = {
+		.iov_base = bytes,
+		.iov_len = ctxt->rc_msgsize,
+	};
+	struct msghdr msg = {
+		.msg_name = NULL,  /* Picked later */
+		.msg_namelen = sizeof(struct sockaddr_in),
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+		.msg_control = NULL,
+		.msg_controllen = 0,
+		.msg_flags = 0,
+	};
+
+	verbosef(2, stderr, "Starting send loop\n");
+
+	stats_start();
+
+	while (runningp()) {
+		/* Calls stats_print() */
+		ret = fill_buff(ctxt, bytes, ctxt->rc_msgsize);
+		if (ret) {
+			if (ret == -EINTR)
+				continue;
+			else
+				break;
+		}
+
+		de = pick_dest(ctxt, de);
+		verbosef(2, stderr, "Destination %s\n", de->re_name);
+
+		msg.msg_name = &de->re_addr;
+		if (ctxt->rc_total &&
+		    ((stats_get_send() + ctxt->rc_msgsize) > ctxt->rc_total))
+			iov.iov_len = ctxt->rc_total - stats_get_send();
+
+		/* Calls stats_print() */
+		ret = send_buff(se, &msg);
+		if (ret < 0)
+			break;
+
+		stats_add_send(ret);
+
+		if (ctxt->rc_total && (stats_get_send() >= ctxt->rc_total))
+			break;
+	}
+	verbosef(2, stderr, "Stopping send loop\n");
+
+	stats_total();
+
+	return ret;
+}
+
+
+int main(int argc, char *argv[])
+{
+	int rc;
+	char ipbuf[INET_ADDRSTRLEN];
+	struct rds_endpoint *e;
+	struct rds_context ctxt = {
+		.rc_msgsize = RDS_DEFAULT_MSG_SIZE,
+	};
+
+	INIT_LIST_HEAD(&ctxt.rc_daddrs);
+
+	rc = parse_options(argc, argv, RDS_TOOL_BASE_OPTS RDS_GEN_OPTS,
+			   &ctxt);
+	if (rc)
+		print_usage(rc);
+
+	if (list_empty(&ctxt.rc_daddrs)) {
+		verbosef(0, stderr,
+			 "%s: Destination endpoint address required\n",
+			 progname);
+		print_usage(-EINVAL);
+	}
+
+	inet_ntop(PF_INET, &ctxt.rc_saddr->re_addr.sin_addr, ipbuf,
+		  INET_ADDRSTRLEN);
+	verbosef(2, stderr, "Binding endpoint %s:%d\n",
+		 ipbuf, ntohs(ctxt.rc_saddr->re_addr.sin_port));
+
+	rc = rds_bind(&ctxt);
+	if (rc)
+		goto out;
+
+	if (ctxt.rc_filename) {
+		rc = dup_file(&ctxt, STDIN_FILENO, O_RDONLY);
+		if (rc)
+			goto out;
+		if (!strcmp(ctxt.rc_filename, "-"))
+			ctxt.rc_filename = "<standard input>";
+	}
+
+	list_for_each_entry(e, &ctxt.rc_daddrs, re_item) {
+		inet_ntop(PF_INET, &e->re_addr.sin_addr, ipbuf,
+			  INET_ADDRSTRLEN);
+		verbosef(2, stderr,
+			 "Adding destination %s:%d\n", ipbuf,
+			 ntohs(e->re_addr.sin_port));
+	}
+
+	rc = setup_signals();
+	if (rc) {
+		verbosef(0, stderr, "%s: Unable to initialize signals\n",
+			 progname);
+		goto out;
+	}
+
+	rc = wli_do_send(&ctxt);
+
+out:
+	free(ctxt.rc_saddr->re_name);
+	free(ctxt.rc_saddr);
+
+	return rc;
+}
diff --git a/rds-info.1 b/rds-info.1
new file mode 100644
index 0000000..499b72c
--- /dev/null
+++ b/rds-info.1
@@ -0,0 +1,162 @@
+.Dd October 30, 2006
+.Dt RDS-INFO 1
+.Os
+.Sh NAME
+.Nm rds-info
+.Nd display information from the RDS kernel module
+.Pp
+.Sh SYNOPSIS
+.Nm rds-info
+.Op Fl v
+.Bk -words
+.Op Fl cknrstIT
+
+.Sh DESCRIPTION
+The
+.Nm
+utility presents various sources of information that
+the RDS kernel module maintains.  When run without any optional arguments
+.Nm
+will output all the information it knows of.  When options are specified then
+only the information associated with those options is displayed.
+
+The options are as follows:
+.Bl -tag -width Ds
+.It Fl v
+Requests verbose output. When this option is given, some classes of information
+will display additional data.
+
+.It Fl c
+Display global counters.  Each counter increments as its event
+occurs.  The counters may not be reset.  The set of supported counters
+may change over time.
+
+.Bl -tag -width 4
+.It CounterName
+The name of the counter.  These names come from the kernel and can change
+depending on the capability of the kernel module.
+.It Value
+The number of times that the counter has been incremented since the kernel
+module was loaded.
+.El
+
+.It Fl k
+Display all the RDS sockets in the system.  There will always be one socket
+listed that is neither bound to nor connected to any addresses because
+.Nm
+itself uses an unbound socket to collect information.
+
+.Bl -tag -width 4
+.It BoundAddr, BPort
+The IP address and port that the socket is bound to.  0.0.0.0 0 indicates that
+the socket has not been bound.
+.It ConnAddr, CPort
+The IP address and port that the socket is connected to.  0.0.0.0 0 indicates
+that the socket has not been connected.
+.It SndBuf, RcvBuf
+The number of bytes of message payload which can be queued for sending or
+receiving on the socket, respectively.
+.It Inode
+The number of the inode object associated with the socket. Can be used to
+locate the process owning a given socket by searching /proc/*/fd for
+open files referencing a socket with this inode number.
+.El
+
+.It Fl n
+Display all RDS connections.  RDS connections are maintained between
+nodes by transports.  
+
+.Bl -tag -width 4
+.It LocalAddr
+The IP address of this node.  For connections that originate and terminate on
+the same node the local address indicates which address initiated the
+connection establishment.
+.It RemoteAddr
+The IP address of the remote end of the connection.  
+.It NextTX
+The sequence number that will be given to the next message that is sent
+over the connection.
+.It NextRX
+The sequence number that is expected from the next message to arrive over
+the connection.  Any incoming messages with sequence numbers less than this
+will be dropped.
+.It Flg
+Flags which indicate the state of the connection. 
+.Bl -tag -width 4
+.It s
+A process is currently sending a message down the connection.
+.It c
+The transport is attempting to connect to the remote address.
+.It C
+The connection to the remote host is connected and active.
+.El
+.El
+
+.It Fl r, Fl s, Fl t
+Display the messages in the receive, send, or retransmit queues respectively.
+.Bl -tag -width 4
+.It LocalAddr, LPort
+The local IP address and port on this node associated with the message. For
+sent messages this is the source address, for receive messages it is the
+destination address.
+.It RemoteAddr, RPort
+The remote IP address and port associated with the message. For sent messages
+this is the destination address, for receive messages it is the source address.
+.It Seq
+The sequence number of the message.
+.It Bytes
+The number of bytes in the message payload.
+.El
+
+The following information sources are dependent on specific transports which
+may not always be available. 
+
+.It Fl I
+Display the IB connections which the IB transport is using to provide
+RDS connections.
+
+.Bl -tag -width 4
+.It LocalAddr
+The local IP address of this connection.
+.It RemoteAddr
+The remote IP address of this connection.
+.It LocalDev
+The local IB Global Identifier, printed in IPv6 address syntax.
+.It RemoteDev
+The remote IB Global Identifier, printed in IPv6 address syntax.
+.El
+
+If verbose output is requested, per-connection settings such as the
+maximum number of send and receive work requests will be displayed
+in addition.
+
+.It Fl T
+Display the TCP sockets which the TCP transport is using to provide
+RDS connections.
+
+.Bl -tag -width 4
+.It LocalAddr, LPort
+The local IP address and port of this socket.
+.It RemoteAddr, RPort
+The remote IP address and port that this socket is connected to.
+.It HdrRemain
+The number of bytes that must be read off the socket to complete the next
+full RDS header.
+.It DataRemain
+The number of bytes that must be read off the socket to complete the data
+payload of the message which is being received.
+.It SentNxt
+The TCP sequence number of the first byte of the last message that we sent
+down the connection.
+.It ExpectedUna
+The TCP sequence number of the byte past the last byte of the last message
+that we sent down the connection.  When we see that the remote side has
+acked up to this byte then we know that the remote side has received all
+our RDS messages.
+.It SeenUna
+The TCP sequence number of the byte past the last byte which has been
+acked by the remote host.
+.El
+
+.El
+.Pp
diff --git a/rds-info.c b/rds-info.c
new file mode 100644
index 0000000..d90cc16
--- /dev/null
+++ b/rds-info.c
@@ -0,0 +1,363 @@
+/*
+ * Copyright (c) 2006 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <errno.h>
+#include <string.h>
+#include <inttypes.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include "net/rds.h"
+#include "rdstool.h"
+
+#define rds_conn_flag(conn, flag, letter) \
+	(conn.flags & RDS_INFO_CONNECTION_FLAG_##flag ? letter : '-')
+
+#define min(a, b) (a < b ? a : b)
+#define array_size(foo) (sizeof(foo) / sizeof(foo[0]))
+
+#define copy_into(var, data, each) ({			\
+	int __ret = 1;					\
+	memset(&var, 0, sizeof(var));			\
+	memcpy(&var, data, min(each, sizeof(var)));	\
+	__ret;						\
+})
+
+#define for_each(var, data, each, len) 			\
+	for (;len > 0 && copy_into(var, data, each);	\
+	     data += each, len -= min(len, each))
+
+static int	opt_verbose = 0;
+
+/* Like inet_ntoa, but can be re-entered several times without clobbering
+ * the previously returned string. */
+static const char *paddr(int af, const void *addrp)
+{
+	static char nbuf[8][INET6_ADDRSTRLEN];
+	static int which = 0;
+	char *string;
+
+	string = nbuf[which];
+	which = (which + 1) % 8;
+
+	inet_ntop(af, addrp, string, INET6_ADDRSTRLEN);
+	return string;
+}
+
+static const char *ipv4addr(uint32_t addr)
+{
+	return paddr(AF_INET, &addr);
+}
+
+static const char *ipv6addr(const void *addr)
+{
+	return paddr(AF_INET6, addr);
+}
+
+static void print_counters(void *data, int each, socklen_t len, void *extra)
+{
+	struct rds_info_counter ctr;
+
+	printf("\nCounters:\n%25s %16s\n", "CounterName", "Value");
+
+	for_each(ctr, data, each, len)
+		printf("%25s %16"PRIu64"\n", ctr.name, ctr.value);
+}
+
+static void print_sockets_v1(void *data, int each, socklen_t len, void *extra)
+{
+	struct rds_info_socket_v1 sk;
+
+	printf("\nRDS Sockets:\n%15s %5s %15s %5s %10s %10s\n",
+		"BoundAddr", "BPort", "ConnAddr", "CPort", "SndBuf",
+		"RcvBuf");
+	
+	for_each(sk, data, each, len) {
+		printf("%15s %5u %15s %5u %10u %10u\n",
+			ipv4addr(sk.bound_addr),
+			ntohs(sk.bound_port),
+			ipv4addr(sk.connected_addr),
+			ntohs(sk.connected_port),
+			sk.sndbuf, sk.rcvbuf);
+	}
+}
+
+static void print_sockets(void *data, int each, socklen_t len, void *extra)
+{
+	struct rds_info_socket sk;
+
+	if (each == sizeof(struct rds_info_socket_v1)) {
+		print_sockets_v1(data, each, len, extra);
+		return;
+	}
+
+	printf("\nRDS Sockets:\n%15s %5s %15s %5s %10s %10s %8s\n",
+		"BoundAddr", "BPort", "ConnAddr", "CPort", "SndBuf",
+		"RcvBuf", "Inode");
+
+	for_each(sk, data, each, len) {
+		printf("%15s %5u %15s %5u %10u %10u %8Lu\n",
+			ipv4addr(sk.bound_addr),
+			ntohs(sk.bound_port),
+			ipv4addr(sk.connected_addr),
+			ntohs(sk.connected_port),
+			sk.sndbuf, sk.rcvbuf,
+			(unsigned long long) sk.inum);
+	}
+}
+
+static void print_conns(void *data, int each, socklen_t len, void *extra)
+{
+	struct rds_info_connection conn;
+
+	printf("\nRDS Connections:\n%15s %15s %16s %16s %3s\n",
+		"LocalAddr", "RemoteAddr", "NextTX", "NextRX", "Flg");
+	
+	for_each(conn, data, each, len) {
+		printf("%15s %15s %16"PRIu64" %16"PRIu64" %c%c%c\n",
+			ipv4addr(conn.laddr),
+			ipv4addr(conn.faddr),
+			conn.next_tx_seq,
+			conn.next_rx_seq,
+			rds_conn_flag(conn, SENDING, 's'),
+			rds_conn_flag(conn, CONNECTING, 'c'),
+			rds_conn_flag(conn, CONNECTED, 'C'));
+	}
+}
+
+static void print_msgs(void *data, int each, socklen_t len, void *extra)
+{
+	struct rds_info_message msg;
+
+	printf("\n%s Message Queue:\n%15s %5s %15s %5s %16s %10s\n",
+		(char *)extra,
+		"LocalAddr", "LPort", "RemoteAddr", "RPort", "Seq", "Bytes");
+	
+	for_each(msg, data, each, len) {
+		printf("%15s %5u %15s %5u %16"PRIu64" %10u\n",
+			ipv4addr(msg.laddr),
+			ntohs(msg.lport),
+			ipv4addr(msg.faddr),
+			ntohs(msg.fport),
+			msg.seq, msg.len);
+	}
+}
+
+static void print_tcp_socks(void *data, int each, socklen_t len, void *extra)
+{		
+	struct rds_info_tcp_socket ts;
+
+	printf("\nTCP Connections:\n"
+		"%15s %5s %15s %5s %10s %10s %10s %10s %10s\n",
+		"LocalAddr", "LPort", "RemoteAddr", "RPort",
+		"HdrRemain", "DataRemain", "SentNxt", "ExpectUna", "SeenUna");
+	
+	for_each(ts, data, each, len) {
+		printf("%15s %5u %15s %5u %10"PRIu64" %10"PRIu64" %10u %10u %10u\n",
+			ipv4addr(ts.local_addr),
+			ntohs(ts.local_port),
+			ipv4addr(ts.peer_addr),
+			ntohs(ts.peer_port),
+			ts.hdr_rem, ts.data_rem, ts.last_sent_nxt,
+			ts.last_expected_una, ts.last_seen_una);
+	}
+}
+
+static void print_ib_conns(void *data, int each, socklen_t len, void *extra)
+{
+	struct rds_info_ib_connection ic;
+
+	printf("\nRDS IB Connections:\n%15s %15s %32s %32s\n",
+		"LocalAddr", "RemoteAddr", "LocalDev", "RemoteDev");
+
+	for_each(ic, data, each, len) {
+		printf("%15s %15s %32s %32s",
+			ipv4addr(ic.src_addr),
+			ipv4addr(ic.dst_addr),
+			ipv6addr(ic.src_gid),
+			ipv6addr(ic.dst_gid));
+
+		if (opt_verbose) {
+			printf("  send_wr=%u", ic.max_send_wr);
+			printf(", recv_wr=%u", ic.max_recv_wr);
+			printf(", send_sge=%u", ic.max_send_sge);
+			printf(", rdma_fmr_max=%u", ic.rdma_fmr_max);
+			printf(", rdma_fmr_size=%u", ic.rdma_fmr_size);
+		}
+
+		printf("\n");
+	}
+}
+
+struct info {
+	int opt_val;
+	char *description;
+	void (*print)(void *data, int each, socklen_t len, void *extra);
+	void *extra;
+	int option_given;
+};
+
+struct info infos[] = {
+	['c'] = { RDS_INFO_COUNTERS, "statistic counters",
+		print_counters, NULL, 0 },
+	['k'] = { RDS_INFO_SOCKETS, "sockets", 
+		print_sockets, NULL, 0 },
+	['n'] = { RDS_INFO_CONNECTIONS, "connections",
+		print_conns, NULL, 0 },
+	['r'] = { RDS_INFO_RECV_MESSAGES, "recv queue messages",
+		print_msgs, "Receive", 0 },
+	['s'] = { RDS_INFO_SEND_MESSAGES, "send queue messages",
+		print_msgs, "Send", 0 },
+	['t'] = { RDS_INFO_RETRANS_MESSAGES, "retransmit queue messages",
+		  print_msgs, "Retransmit", 0 },
+	['T'] = { RDS_INFO_TCP_SOCKETS, "TCP transport sockets",
+		  print_tcp_socks, NULL, 0 },
+	['I'] = { RDS_INFO_IB_CONNECTIONS, "IB transport connections",
+		  print_ib_conns, NULL, 0 },
+};
+
+void print_usage(int rc)
+{
+	FILE *output = rc ? stderr : stdout;
+	int i;
+
+	verbosef(0, output, "The following options limit output to the given "
+		 "sources:\n");
+
+	for (i = 0; i < array_size(infos); i++) {
+		if (!infos[i].opt_val)
+			continue;
+		printf("    -%c %s\n", i, infos[i].description);
+	}
+
+	verbosef(0, output,
+		"\n\nIf no options are given then all sources are used.\n");
+	exit(rc);
+}
+
+void print_version()
+{
+}
+
+int main(int argc, char **argv)
+{
+	char optstring[258] = "v+";
+	int given_options = 0;
+	socklen_t len = 0;
+	void *data = NULL;
+	int fd;
+	int each;
+	int c;
+	char *last;
+	int i;
+
+	/* quickly append all our info options to the optstring */
+	last = &optstring[strlen(optstring)];
+	for (i = 0; i < array_size(infos); i++) {
+		if (!infos[i].opt_val)
+			continue;
+		*last = (char)i;
+		last++;
+		*last = '\0';
+	}
+
+	while ((c = getopt(argc, argv, optstring)) != EOF) {
+		switch (c) {
+		case 'v':
+			opt_verbose++;
+			continue;
+		}
+
+		if (c >= array_size(infos) || !infos[c].opt_val) {
+			verbosef(0, stderr, "%s: Invalid option \'-%c\'\n",
+				 progname, optopt);
+			print_usage(1);
+		}
+
+		infos[c].option_given = 1;
+		given_options++;
+	}
+
+	fd = socket(PF_RDS, SOCK_SEQPACKET, 0);
+	if (fd < 0) {
+		verbosef(0, stderr, "%s: Unable to create socket: %s\n",
+			 progname, strerror(errno));
+		return 1;
+	}
+
+	for (i = 0; i < array_size(infos); i++) {
+		if (!infos[i].opt_val ||
+		    (given_options && !infos[i].option_given))
+			continue;
+
+		/* read in the info until we get a full snapshot */
+		while ((each = getsockopt(fd, SOL_RDS, infos[i].opt_val, data,
+				   &len)) < 0) {
+			if (errno != ENOSPC) {
+				verbosef(0, stderr,
+					 "%s: Unable get statistics: %s\n",
+					 progname, strerror(errno));
+				return 1;
+			}
+			if (data)
+				data = realloc(data, len);
+			else
+				data = malloc(len);
+
+			if (data == NULL) {
+				verbosef(0, stderr,
+					 "%s: Unable to allocate memory "
+					 "for %u bytes of info: %s\n",
+					 progname, len, strerror(errno));
+				return 1;
+			}
+		}
+
+		infos[i].print(data, each, len, infos[i].extra);
+
+		if (given_options && --given_options == 0)
+			break;
+	}
+
+	return 0;
+}
diff --git a/rds-ping.1 b/rds-ping.1
new file mode 100644
index 0000000..ae06787
--- /dev/null
+++ b/rds-ping.1
@@ -0,0 +1,69 @@
+.Dd Apr 22, 2008
+.Dt RDS-PING 1
+.Os
+.Sh NAME
+.Nm rds-ping
+.Nd test reachability of remote node over RDS
+.Pp
+.Sh SYNOPSIS
+.Nm rds-ping
+.Bk -words
+.Op Fl c Ar count
+.Op Fl i Ar interval
+.Op Fl I Ar local_addr
+.Ar remote_addr
+
+.Sh DESCRIPTION
+.Nm rds-ping
+is used to test whether a remote node is reachable over RDS.
+Its interface is designed to operate pretty much the standard
+.Xr ping 8
+utility, even though the way it works is pretty different.
+.Pp
+.Nm rds-ping
+opens several RDS sockets and sends packets to port 0 on
+the indicated host. This is a special port number to which
+no socket is bound; instead, the kernel processes incoming
+packets and responds to them.
+.Sh OPTIONS
+The following options are available for use on the command line:
+.Bl -tag -width Ds
+.It Fl c Ar count
+Causes
+.Nm rds-ping
+to exit after sending (and receiving) the specified number of
+packets.
+.It Fl I Ar address
+By default,
+.Nm rds-ping
+will pick the local source address for the RDS socket based
+on routing information for the destination address (i.e. if
+packets to the given destination would be routed through interface
+.Nm ib0 ,
+then it will use the IP address of
+.Nm ib0
+as source address).
+Using the
+.Fl I
+option, you can override this choice.
+.It Fl i Ar timeout
+By default,
+.Nm rds-ping
+will wait for one second between sending packets. Use this option
+to specified a different interval. The timeout value is given in
+seconds, and can be a floating point number. Optionally, append
+.Nm msec
+or
+.Nm usec
+to specify a timeout in milliseconds or microseconds, respectively.
+.It
+Specifying a timeout considerably smaller than the packet round-trip
+time will produce unexpected results.
+.El
+.Sh AUTHORS
+.Nm rds-ping
+was written by Olaf Kirch <olaf.kirch at oracle.com>.
+.Sh SEE ALSO
+.Xr rds 7 ,
+.Xr rds-info 1 ,
+.Xr rds-stress 1 .
diff --git a/rds-ping.c b/rds-ping.c
new file mode 100644
index 0000000..e9c88fc
--- /dev/null
+++ b/rds-ping.c
@@ -0,0 +1,385 @@
+/*
+ * rds-ping utility
+ *
+ * Test reachability of a remote RDS node by sending a packet to port 0.
+ *
+ * Copyright (C) 2008 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <errno.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <sys/poll.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include "net/rds.h"
+
+#ifdef DYNAMIC_PF_RDS
+#include "pfhack.h"
+#endif
+
+#define die(fmt...) do {		\
+	fprintf(stderr, fmt);		\
+	exit(1);			\
+} while (0)
+
+#define die_errno(fmt, args...) do {				\
+	fprintf(stderr, fmt ", errno: %d (%s)\n", ##args , errno,\
+		strerror(errno));				\
+	exit(1);						\
+} while (0)
+
+static struct timeval	opt_wait = { 1, 1 };		/* 1s */
+static unsigned long	opt_count;
+static struct in_addr	opt_srcaddr;
+static struct in_addr	opt_dstaddr;
+
+/* For reasons of simplicity, RDS ping does not use a packet
+ * payload that is being echoed, the way ICMP does.
+ * Instead, we open a number of sockets on different ports, and
+ * match packet sequence numbers with ports.
+ */
+#define NSOCKETS	8
+
+struct socket {
+	int fd;
+	unsigned int sent_id;
+	struct timeval sent_ts;
+	unsigned int nreplies;
+};
+
+
+static int	do_ping(void);
+static void	report_packet(struct socket *sp, const struct timeval *now,
+			const struct in_addr *from, int err);
+static void	usage(const char *complaint);
+static int	rds_socket(struct in_addr *src, struct in_addr *dst);
+static int	parse_timeval(const char *, struct timeval *);
+static int	parse_long(const char *ptr, unsigned long *);
+static int	parse_addr(const char *ptr, struct in_addr *);
+
+int
+main(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "c:i:I:")) != -1) {
+		switch (c) {
+		case 'c':
+			if (!parse_long(optarg, &opt_count))
+				die("Bad packet count <%s>\n", optarg);
+			break;
+
+		case 'I':
+			if (!parse_addr(optarg, &opt_srcaddr))
+				die("Unknown source address <%s>\n", optarg);
+			break;
+
+		case 'i':
+			if (!parse_timeval(optarg, &opt_wait))
+				die("Bad wait time <%s>\n", optarg);
+			break;
+
+		default:
+			usage("Unknown option");
+		}
+	}
+
+	if (optind + 1 != argc)
+		usage("Missing destination address");
+	if (!parse_addr(argv[optind], &opt_dstaddr))
+		die("Cannot parse destination address <%s>\n", argv[optind]);
+
+	return do_ping();
+}
+
+/* returns a - b in usecs */
+static inline long
+usec_sub(const struct timeval *a, const struct timeval *b)
+{
+	return ((long)(a->tv_sec - b->tv_sec) * 1000000UL) + a->tv_usec - b->tv_usec;
+}
+
+static int
+do_ping(void)
+{
+	struct sockaddr_in sin;
+	unsigned int	sent = 0, recv = 0;
+	struct timeval	next_ts;
+	struct socket	socket[NSOCKETS];
+	struct pollfd	pfd[NSOCKETS];
+	int		i, next = 0;
+
+	for (i = 0; i < NSOCKETS; ++i) {
+		int fd;
+
+		fd = rds_socket(&opt_srcaddr, &opt_dstaddr);
+
+		socket[i].fd = fd;
+		pfd[i].fd = fd;
+		pfd[i].events = POLLIN;
+	}
+
+	memset(&sin, 0, sizeof(sin));
+	sin.sin_family = AF_INET;
+	sin.sin_addr = opt_dstaddr;
+
+	gettimeofday(&next_ts, NULL);
+	while (1) {
+		struct timeval	now;
+		struct sockaddr_in from;
+		socklen_t	alen = sizeof(from);
+		long		deadline;
+		int		ret;
+
+		/* Fast way out - if we have received all packets, bail now.
+		 * If we're still waiting for some to come back, we need
+		 * to do the poll() below */
+		if (opt_count && recv >= opt_count)
+			break;
+
+		gettimeofday(&now, NULL);
+		if (timercmp(&now, &next_ts, >=)) {
+			struct socket *sp = &socket[next];
+			int err = 0;
+
+			if (opt_count && sent >= opt_count)
+				break;
+
+			timeradd(&next_ts, &opt_wait, &next_ts);
+			if (sendto(sp->fd, NULL, 0, 0, (struct sockaddr *) &sin, sizeof(sin)))
+				err = errno;
+			sp->sent_id = ++sent;
+			sp->sent_ts = now;
+			sp->nreplies = 0;
+			next = (next + 1) % NSOCKETS;
+
+			if (err) {
+				static unsigned int nerrs = 0;
+
+				report_packet(sp, NULL, NULL, err);
+				if (err == EINVAL && nerrs++ == 0)
+					printf("      Maybe your kernel does not support rds ping yet\n");
+			}
+		}
+
+		deadline = usec_sub(&next_ts, &now);
+		ret = poll(pfd, NSOCKETS, deadline / 1000);
+		if (ret < 0) {
+			if (errno == EINTR)
+				continue;
+			die_errno("poll");
+		}
+		if (ret == 0)
+			continue;
+
+		for (i = 0; i < NSOCKETS; ++i) {
+			struct socket *sp = &socket[i];
+
+			if (!(pfd[i].revents & POLLIN))
+				continue;
+
+			ret = recvfrom(sp->fd, NULL, 0, MSG_DONTWAIT,
+					(struct sockaddr *) &from, &alen);
+			gettimeofday(&now, NULL);
+
+			if (ret < 0) {
+				if (errno != EAGAIN &&
+				    errno != EINTR)
+					report_packet(sp, &now, NULL, errno);
+			} else {
+				report_packet(sp, &now, &from.sin_addr, 0);
+				recv++;
+			}
+		}
+	}
+
+	/* Program exit code: signal success if we received any response. */
+	return recv == 0;
+}
+
+static void
+report_packet(struct socket *sp, const struct timeval *now,
+		const struct in_addr *from_addr, int err)
+{
+	printf(" %3u:", sp->sent_id);
+	if (now)
+		printf(" %ld usec", usec_sub(now, &sp->sent_ts));
+	if (from_addr && from_addr->s_addr != opt_dstaddr.s_addr)
+		printf(" (%s)", inet_ntoa(*from_addr));
+	if (sp->nreplies)
+		printf(" DUP!");
+	if (err)
+		printf(" ERROR: %s", strerror(err));
+	printf("\n");
+
+	sp->nreplies++;
+}
+
+static int
+rds_socket(struct in_addr *src, struct in_addr *dst)
+{
+	struct sockaddr_in sin;
+	int fd;
+
+	memset(&sin, 0, sizeof(sin));
+	sin.sin_family = AF_INET;
+
+	fd = socket(PF_RDS, SOCK_SEQPACKET, 0);
+	if (fd < 0)
+		die_errno("unable to create RDS socket");
+
+	/* Guess the local source addr if not given. */
+	if (src->s_addr == 0) {
+		socklen_t alen;
+		int ufd;
+
+		ufd = socket(PF_INET, SOCK_DGRAM, 0);
+		if (ufd < 0)
+			die_errno("unable to create UDP socket");
+		sin.sin_addr = *dst;
+		sin.sin_port = htons(1);
+		if (connect(ufd, (struct sockaddr *) &sin, sizeof(sin)) < 0)
+			die_errno("unable to connect to %s",
+					inet_ntoa(*dst));
+
+		alen = sizeof(sin);
+		if (getsockname(ufd, (struct sockaddr *) &sin, &alen) < 0)
+			die_errno("getsockname failed");
+
+		*src = sin.sin_addr;
+		close(ufd);
+	}
+
+	sin.sin_addr = *src;
+	sin.sin_port = 0;
+
+	if (bind(fd, (struct sockaddr *) &sin, sizeof(sin)))
+		die_errno("bind() failed");
+
+	return fd;
+}
+
+static void
+usage(const char *complaint)
+{
+	fprintf(stderr,
+		"%s\nUsage: rds-ping [options] dst_addr\n"
+		"Options:\n"
+		" -c count      limit packet count\n"
+		" -I interface  source IP address\n",
+		complaint);
+	exit(1);
+}
+
+static int
+parse_timeval(const char *ptr, struct timeval *ret)
+{
+	double	seconds;
+	char *endptr;
+
+	seconds = strtod(ptr, &endptr);
+	if (!strcmp(endptr, "ms")
+	 || !strcmp(endptr, "msec")) {
+		seconds *= 1e-3;
+	} else
+	if (!strcmp(endptr, "us")
+	 || !strcmp(endptr, "usec")) {
+		seconds *= 1e-6;
+	} else if (*endptr)
+		return 0;
+
+	ret->tv_sec = (long) seconds;
+	seconds -= ret->tv_sec;
+
+	ret->tv_usec = (long) (seconds * 1e6);
+	return 1;
+}
+
+static int
+parse_long(const char *ptr, unsigned long *ret)
+{
+	unsigned long long val;
+	char *endptr;
+
+	val = strtoull(ptr, &endptr, 0);
+	switch (*endptr) {
+	case 'k': case 'K':
+		val <<= 10;
+		endptr++;
+		break;
+
+	case 'm': case 'M':
+		val <<= 20;
+		endptr++;
+		break;
+
+	case 'g': case 'G':
+		val <<= 30;
+		endptr++;
+		break;
+	}
+
+	if (*endptr)
+		return 0;
+
+	*ret = val;
+	return 1;
+}
+
+static int
+parse_addr(const char *ptr, struct in_addr *ret)
+{
+        struct hostent *hent;
+
+        hent = gethostbyname(ptr);
+        if (hent &&
+            hent->h_addrtype == AF_INET && hent->h_length == sizeof(*ret)) {
+		memcpy(ret, hent->h_addr, sizeof(*ret));
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * This are completely stupid.  options.c should be removed.
+ */
+void print_usage(int durr) { }
+void print_version() { }
diff --git a/rds-rdma.7 b/rds-rdma.7
new file mode 100644
index 0000000..20b1575
--- /dev/null
+++ b/rds-rdma.7
@@ -0,0 +1,427 @@
+.TH "RDS zerocopy" 7
+.SH NAME
+RDS-rdma \- Zerocopy Interface for RDMA over RDS
+.SH DESCRIPTION
+This manual page describes the zerocopy interface of RDS, which
+was added in RDSv3. For a description of the basic RDS interface,
+please refer to
+.BR rds (7).
+.PP
+The principal mode of operation for RDS zerocopy is like this:
+one participant (the client) wishes to initiate a direct transfer
+to or from some area of memory in its process address space.
+This memory does not have to be aligned.
+.PP
+The client obtains a handle for this region of memory, and
+passes it to the other participant (the server). This is called
+the RDMA cookie. To the application, the cookie is an opaque 64bit
+data type.
+.PP
+The client sends this handle to
+the server application, along with other details of the RDMA
+request (such as which data to transfer to that memory area).
+Throughout the following discussion, we will refer to this
+message as the RDMA request.
+.PP
+The server uses this RDMA cookie to initiate the requested RDMA
+transfer. The RDMA transfer is combined atomically with a
+normal RDS message, which is delivered to the client. This
+message is called the RDMA ACK throughout the following.  Atomic
+in this context means that either both the RDMA succeeds and the
+RDMA ACK is delivered, or neither succeeds.
+.PP
+Thus, when the client receives the RDMA ACK, it knows that
+the RDMA has completed successfully. It can then release the
+RDMA cookie for this memory region, if it wishes to.
+.PP
+RDMA operations are not reliable, in the sense that unlike normal
+RDS messages, RDS RDMA operations may fail, and get
+dropped.
+.\"-------------------------------
+.SH INTERFACE
+The interface is currently based on control messages (ancillary
+data) sent or received via the
+.BR sendmsg (2)
+and
+.BR recvmsg (2)
+system calls. Optionally, an older interface can be used that
+is based on the
+.BR setsockopt (2)
+system call. However, we recommend using control messages, as
+this reduces the number of system calls required.
+.\"-------------------------------
+.SS Control message interface
+With the control message interface, the RDMA cookie is passed to
+the server out-of-band, included in an extension header attached
+to the RDS message.
+.PP
+The following outlines the mode of operation; the data
+types used will be specified in details in a subsequent section.
+.PP
+Initially, the client will send RDMA requests along with a
+.B RDS_CMSG_RDMA_MAP
+control message. The control message contains the address and
+length of the memory region for which to obtain a handle, some
+flags, and a pointer to a memory location (in the caller's address
+space) where the kernel will store the RDMA cookie.
+.PP
+Alternatively, if the application has already obtained a RDMA cookie
+for the memory range it wants to RDMA to/from, it can hand this
+cookie to the kernel using the
+.B RDS_CMSG_RDMA_DEST
+control message.
+.PP
+Either way, the kernel will include the resulting RDMA cookie
+in an extension header that is transmitted as part of the RDMA
+request to the server.
+.PP
+When the server receives the RDMA request, the kernel will deliver the
+cookie wrapped inside a
+.B RDS_CMSG_RDMA_DEST
+control message.
+.PP
+The server then initiates the data transfer by sending the RDMA ACK message
+along with a
+.B RDS_CMSG_RDMA_ARGS
+control message. This message contains the RDMA cookie, and the local
+memory to copy to or from.
+.PP
+The server process may request a notification when an RDMA operation
+completes. Notifications are delivered as a
+.B RDS_CMSG_RDMA_STATUS
+control messages. When an application calls
+.BR recvmsg (2),
+it will either receive a regular RDS message (possibly with other RDMA
+related control messages), or an empty message with one or more
+status control messages.
+.PP
+In addition, applications
+When an RDMA operation fails for some reason and is discarded, the
+application can ask to receive notifications for failed messages as
+well, regardless of whether it asked for success notification of an
+individual message or not. This behavior is turned on by setting the
+.B RDS_RECVERR
+socket option.
+.\"-------------------------------
+.SS Setsockopt interface
+In addition to the control message interface, RDS allows a process
+to register and release memory ranges for RDMA through calls to
+.BR setsockopt (2).
+.TP
+.B RDS_GET_MR
+To obtain a RDMA cookie for a given memory range, the application can
+use
+.BR setsockopt " with " RDS_GET_MR .
+This operates essentially the same way as the
+.B RDS_CMSG_RDMA_MAP
+control message: the argument contains the address and length of the
+memory range to be registered, and a pointer to a RDMA cookie variable,
+in which the system call will store the cookie for the registered
+range.
+.TP
+.B RDS_FREE_MR
+Memory ranges can be released by calling
+.BR setsockopt " with " RDS_FREE_MR ,
+giving the RDMA cookie and additional flags as arguments.
+.TP
+.B RDS_RECVERR
+This is a boolean option which can be set as well as queried
+(using
+.BR getsockopt ).
+When enabled, RDS will send RDMA notification messages to
+the application for any RDMA operation that fails. This
+option defaults to off.
+.PP
+For all of these calls, the
+.B level
+argument to
+.B setsockopt
+is
+.BR SOL_RDS .
+.PP
+.\"-------------------------------
+.SH RDMA MACROS AND TYPES
+.fi
+.TP
+.B RDMA cookie
+.nf
+typedef u_int64_t       rds_rdma_cookie_t
+.fi
+.IP
+This encapsulates a memory location in the client process. In the
+current implementation, it contains the R_Key of the remote memory
+region, and the offset into it (so that the application does not
+have to worry about alignment.
+.IP
+The RDMA cookie is used in several struct types described below.
+The
+.BR RDS_CMSG_RDMA_DEST
+control message contains a rds_rdma_cookie_t all by itself as payload.
+.TP
+.B Mapping arguments
+The following data type is used with
+.B RDS_CMSG_RDMA_MAP
+control messages and with the
+.B RDS_GET_MR
+socket option:
+.IP
+.nf
+struct rds_iovec {
+        u_int64_t       addr;
+        u_int64_t       bytes;
+};
+
+struct rds_get_mr_args {
+        struct rds_iovec vec;
+        u_int64_t       cookie_addr;
+        uint64_t        flags;
+};
+.fi
+.IP
+The
+.B cookie_addr
+specifies a memory location where to store the RDMA cookie.
+.IP
+The
+.B flags
+value is a bitwise OR of any of the following flags:
+.RS
+.TP
+.B RDS_RDMA_USE_ONCE
+This tells the kernel that the allocated RDMA cookie is to be used
+exactly once. When the RDMA ACK message arrives, the kernel will
+automatically unbind the memory area and release any resources
+associated with the cookie.
+.IP
+If this flag is not set, it is the application's responsibility to
+release the memory region at a later time using the
+.BR RDS_FREE_MR
+socket option.
+.TP
+.B RDS_RDMA_INVALIDATE
+Normally, RDMA memory mappings are invalidated lazily, as this
+requires some relatively costly synchronization with the HCA. However,
+this means that the server application can continue to access the
+registered memory for some indeterminate amount of time.
+If this flag is set, the RDS code will invalidate
+the mapping at the time it is released (either upon arrival of the
+RDMA ACK, if
+.B USE_ONCE
+was specified; or when the application destroys it using
+.BR FREE_MR ).
+.RE
+.TP
+.B  RDMA Operation
+RDMA operations are initiated by the server using the
+.BR RDS_CMSG_RDMA_ARGS
+control message, which takes the following data as payload:
+.IP
+.nf
+struct rds_rdma_args {
+        rds_rdma_cookie_t cookie;
+        struct rds_iovec remote_vec;
+        u_int64_t       local_vec_addr;
+        u_int64_t       nr_local;
+        u_int64_t       flags;
+        u_int32_t       user_token;
+};
+.fi
+.IP
+The
+.B cookie
+argument contains the RDMA cookie received from the client.
+The local memory is given via an array of
+.BR rds_iovec s.
+The array address is given in
+.BR local_vec_addr ,
+and its number of elements is given in
+.BR nr_local .
+.IP
+The struct member
+.B remote_vec
+specifies a location relative to the memory area identified
+by the cookie:
+.BR remote_vec . addr
+is an offset into that region, and
+.BR remote_vec . bytes
+is the length of the memory window to copy to/from.
+This length must match the size of the local memory area,
+i.e. the sum of bytes in all members of the local iovec.
+.IP
+The flags field contains the bitwise OR of any of the following
+flags:
+.RS
+.TP
+.B RDS_RDMA_READWRITE
+If set, any RDMA WRITE is initiated from the server's memory
+to the client's. If not set, RDS will do a RDMA READ from the
+client's memory to the server's memory.
+.TP
+.B RDS_RDMA_FENCE
+By default, Infiniband makes no guarantee about the ordering of
+an RDMA READ with respect to subsequent SEND operations. Setting
+this flag asks that the RDMA READ should be fenced off the subsequent
+RDS ACK message. Setting this flag requires an additional round-trip
+of the IB fabric, but it is a good idea to use set this flag
+by default, unless you are really sure you do not want it.
+.TP
+.B RDS_RDMA_NOTIFY_ME
+This flag requests a notification upon completion of the RDMA
+operation (successful or otherwise). The noticiation will contain
+the value of the
+.B user_token
+field passed in by the application. This allows the application to
+release resources (such as buffers) assosicated with the RDMA transfer.
+.RE
+.IP
+The
+.B user_token
+can be used to pass an application specific identifier to the
+kernel. This token is returned to the application when a status
+notification is generated (see the following section).
+.TP
+.B RDMA Notification
+The RDS kernel code is able to notify the server application when
+an RDMA operation completes. These notifications are delivered
+via
+.B RDS_CMSG_RDMA_STATUS
+control messages.
+.IP
+By default, no notifications are generated. There are two ways an
+application can request them. On one hand, status notifications can
+be enabled on a per-operation basis by setting the
+.B RDS_RDMA_NOTIFY_ME
+flag in the RDMA arguments. On the other hand, the application can
+request notifications for all RDMA operations that fail by setting
+the
+.B RDS_RECVERR
+socket option (see below).
+In both cases, the format of the notification is the same; and at
+most one notification will be sent per completed operation.
+.IP
+The message format is this:
+.IP
+.nf
+struct rds_rdma_notify {
+        u_int32_t       user_token;
+        int32_t         status;
+};
+.fi
+.IP
+The
+.B user_token
+field contains the value previously given to the kernel in the
+.BR RDS_CMSG_RDMA_ARGS
+control message. The
+.BR status
+field contains a status value, with 0 indicating success, and
+non-zero indicating an error.
+.IP
+The following status codes are currently defined:
+.RS
+.TP
+.B RDS_RDMA_SUCCESS
+The RDMA operation succeeded.
+.TP
+.B RDS_RDMA_REMOTE_ERROR
+The RDMA operation failed due to a remote access error. This is
+usually due to an invalid R_key, offset or transfer size.
+.TP
+.B RDS_RDMA_CANCELED
+The RDMA operation was canceled by the application.
+(This error code is not yet generated).
+.TP
+.B RDS_RDMA_DROPPED
+RDMA operations were discarded after the connection broke and
+was re-established. The RDMA operation may have been processed
+partially.
+.TP
+.B RDS_RDMA_OTHER_ERROR
+Any other failure.
+.RE
+.TP
+.B RDMA setsockopt arguments
+When using the
+.B RDS_GET_MR
+socket option to register a memory range, the application passes
+a pointer to a
+.B struct rds_get_mr_args
+variable, described above.
+.IP
+The
+.B RDS_FREE_MR
+call takes an argument of type
+.BR "struct rds_free_mr_args" :
+.IP
+.nf
+struct rds_free_mr_args {
+        rds_rdma_cookie_t cookie;
+        u_int64_t       flags;
+};
+.fi
+.IP
+.B cookie
+specifies the RDMA cookie to be released. RDMA access to the memory
+range will usually not be invoked instantly, because the operation is
+rather costly. However, if the
+.B flags
+argument contains
+.BR RDS_RDMA_INVALIDATE ,
+RDS will invalidate the indicated mapping immediately,
+as described in section
+.B "Mapping arguments"
+above.
+.IP
+If the
+.B cookie
+argument is 0, and
+.BR RDS_RDMA_INVALIDATE
+is set, RDS will invalidate old memory mappings on all devices.
+.\"-------------------------------
+.SH ERRORS
+In addition to the usual error codes returned by
+.BR sendmsg ", " recvmsg " and " setsockopt ,
+RDS returns the following error codes:
+.TP
+.BR EAGAIN
+RDS was unable to map a memory range because the limit was
+exceeded (returned by
+.BR RDS_CMSG_RDMA_MAP " and " RDS_GET_MR ).
+.TP
+.BR EINVAL
+When sending a message, there were were conflicting control messages
+(e.g. two
+.B RDMA_MAP
+messages, or a
+.B RDMA_MAP " and a " RDMA_DEST
+message).
+.IP
+In a
+.BR RDS_CMSG_RDMA_MAP " or " RDS_GET_MR
+operation, the application specified memory range greater than the
+maximum size supported.
+.IP
+When setting up an RDMA operation with
+.BR RDS_CMSG_RDMA_ARGS ,
+the size of the local memory (given in the
+.BR rds_iovec )
+did not match the size of the remote memory range.
+.TP
+.B EBUSY
+RDS was unable to obtain a DMA mapping for the indicated memory.
+.\"-------------------------------
+.SH LIMITS
+Currently, the following limits apply
+.IP \(bu
+The maximum size of a zerocopy transfer is 1MB. This can be
+adjusted via the
+.B fmr_message_size
+module parameter.
+.IP \(bu
+The maximum number of memory ranges that can be mapped is
+limited to 2048 at the moment. This can be adjusted via the
+.B fmr_pool_size
+module parameter. However, the actual limit imposed by the
+hardware may in fact be lower.
+.SH AUTHORS
+RDS was written and is Copyright (C) 2007-2008 by Oracle, Inc.
diff --git a/rds-sink.1 b/rds-sink.1
new file mode 100644
index 0000000..05c9d73
--- /dev/null
+++ b/rds-sink.1
@@ -0,0 +1 @@
+.so man1/rds-gen.1
diff --git a/rds-sink.c b/rds-sink.c
new file mode 100644
index 0000000..2d47ade
--- /dev/null
+++ b/rds-sink.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2006 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * rds-sink.c: Collect some RDS packets.
+ */
+#define _LARGEFILE64_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <errno.h>
+#include <inttypes.h>
+
+#include "kernel-list.h"
+#include "rdstool.h"
+
+void print_usage(int rc)
+{
+	int namelen = strlen(progname);
+	FILE *output = rc ? stderr : stdout;
+
+	verbosef(0, output,
+		 "Usage: %s -s <source_ip>:<source_port>\n"
+		 "       %*s [-f <output_file>] [-i <interval>]\n"
+		 "       %*s [-v ...] [-q ...]\n"
+		 "       %s -h\n"
+		 "       %s -V\n",
+		 progname, namelen, "", namelen, "", progname, progname);
+
+	exit(rc);
+}
+
+void print_version()
+{
+	verbosef(0, stdout, "%s version VERSION\n", progname);
+
+	exit(0);
+}
+
+static int empty_buff(struct rds_context *ctxt, char *bytes, ssize_t len)
+{
+	int ret = 0;
+	char *ptr = bytes;
+
+	if (!ctxt->rc_filename)
+		len = 0;  /* Throw it away */
+
+	while (len && runningp()) {
+		stats_print();
+
+		ret = write(STDOUT_FILENO, ptr, len);
+		if (!ret) {
+			verbosef(0, stderr,
+				 "%s: Unexpected end of file writing to %s\n",
+				 progname, ctxt->rc_filename);
+			ret = -EPIPE;
+			break;
+		}
+		if (ret < 0) {
+			ret = -errno;
+			if (ret == -EINTR)
+				continue;
+
+			verbosef(0, stderr,
+				 "%s: Error writing to %s: %s\n",
+				 progname, ctxt->rc_filename,
+				 strerror(-ret));
+			break;
+		}
+
+		stats_add_write(ret);
+		ptr += ret;
+		len -= ret;
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static ssize_t recv_buff(struct rds_endpoint *e, struct msghdr *msg,
+			 int flags)
+{
+	ssize_t ret = 0;
+
+	while (runningp()) {
+		stats_print();
+
+		ret = recvmsg(e->re_fd, msg, flags);
+		if (ret < 0) {
+			ret = -errno;
+			if (ret == -EINTR)
+				continue;
+
+			verbosef(0, stderr,
+				 "%s: Error from recvmsg: %s\n",
+				 progname, strerror(-ret));
+		}
+
+		/* Success */
+		break;
+	}
+
+	return ret;
+}
+
+static int wli_do_recv(struct rds_context *ctxt)
+{
+	struct rds_endpoint *e = ctxt->rc_saddr;
+	ssize_t alloced = 0;
+	ssize_t ret = 0;
+	struct iovec iov = {
+		.iov_base = NULL,
+	};
+	struct msghdr msg = {
+		.msg_name = &e->re_addr,
+		.msg_namelen = sizeof(struct sockaddr_in),
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+	};
+
+	verbosef(2, stderr, "Starting receive loop\n");
+
+	stats_start();
+
+	while (runningp()) {
+		/* Calls stats_print() */
+		iov.iov_len = 0;
+		ret = recv_buff(e, &msg, MSG_PEEK|MSG_TRUNC);
+		if (ret < 0)
+			break;
+
+		if (ret > alloced) {
+			verbosef(3, stderr,
+				 "Growing buffer to %zd bytes\n",
+				 ret);
+			iov.iov_base = realloc(iov.iov_base, ret);
+			if (iov.iov_base == NULL) {
+				ret = -ENOMEM;
+				break;
+			}
+			alloced = ret;
+		}
+
+		/* Calls stats_print() */
+		iov.iov_len = ret;
+		ret = recv_buff(e, &msg, 0);
+		if (ret < 0)
+			break;
+
+		stats_add_recv(ret);
+
+		/* Calls stats_print() */
+		ret = empty_buff(ctxt, iov.iov_base, ret);
+		if (ret)
+			break;
+	}
+	verbosef(2, stderr, "Stopping receive loop\n");
+
+	stats_total();
+
+	return ret;
+}
+
+int main(int argc, char *argv[])
+{
+	int rc;
+	char ipbuf[INET_ADDRSTRLEN];
+        struct rds_context ctxt = {
+                .rc_filename = "-",
+        };
+
+
+	INIT_LIST_HEAD(&ctxt.rc_daddrs);
+
+	rc = parse_options(argc, argv, RDS_TOOL_BASE_OPTS RDS_SINK_OPTS,
+			   &ctxt);
+	if (rc)
+		print_usage(rc);
+
+	inet_ntop(PF_INET, &ctxt.rc_saddr->re_addr.sin_addr, ipbuf,
+		  INET_ADDRSTRLEN);
+	verbosef(2, stderr, "Binding endpoint %s:%d\n",
+		 ipbuf, ntohs(ctxt.rc_saddr->re_addr.sin_port));
+
+	rc = rds_bind(&ctxt);
+	if (rc)
+		goto out;
+
+	if (ctxt.rc_filename) {
+		rc = dup_file(&ctxt, STDOUT_FILENO, O_CREAT|O_WRONLY);
+		if (rc)
+			goto out;
+		if (!strcmp(ctxt.rc_filename, "-"))
+			ctxt.rc_filename = "<standard output>";
+	}
+
+	setup_signals();
+	if (rc) {
+		verbosef(0, stderr, "%s: Unable to initialize signals\n",
+			 progname);
+		goto out;
+	}
+
+	rc = wli_do_recv(&ctxt);
+
+out:
+	free(ctxt.rc_saddr->re_name);
+	free(ctxt.rc_saddr);
+
+	return rc;
+}
diff --git a/rds-stress.1 b/rds-stress.1
new file mode 100644
index 0000000..ec99887
--- /dev/null
+++ b/rds-stress.1
@@ -0,0 +1,174 @@
+.Dd May 15, 2007
+.Dt RDS-STRESS 1
+.Os
+.Sh NAME
+.Nm rds-stress
+.Nd send messages between processes over RDS sockets
+.Pp
+.Sh SYNOPSIS
+.Nm rds-stress
+.Bk -words
+.Op Fl p Ar port_number
+.Op Fl r Ar receive_address
+.Op Fl s Ar send_address
+.Op Fl a Ar ack_bytes
+.Op Fl q Ar request_bytes
+.Op Fl D Ar rdma_bytes
+.Op Fl d Ar queue_depth
+.Op Fl t Ar nr_tasks
+.Op Fl c
+.Op Fl R
+.Op Fl V
+.Op Fl v
+
+.Sh DESCRIPTION
+.Nm rds-stress
+sends messages between groups tasks, usually running on seperate
+machines.
+.Pp
+First a passive receiving instance is started.
+.Pp
+.Dl $ rds-stress
+.Pp
+Then an active sending instance is started, giving it
+the address and port at which it will find a listening
+passive receiver.  In addition, it is given configuration options which
+both instances will use.
+.Pp
+.Dl $ rds-stress -s recvhost -p 4000 -t 1 -d 1
+.Pp
+The active sender will parse the options, connect to the passive receiver, and
+send the options over this connection.  From this point on both instances
+exhibit the exact same behaviour.
+.Pp
+They will create a number of child tasks as specified by the -t option.  Once
+the children are created the parent sleeps for a second at a time, printing a
+summary of statistics at each interval. 
+.Pp
+Each child will open an RDS socket, each binding to a port number in order
+after the port number given on the command line.  The first child would bind to
+port 4001 in our example.  Each child sets the send and receive buffers to
+exactly fit the number of messages, requests and acks, that will be in flight
+as determind by the command line arguments.
+.Pp
+The children then enter their loop.  They will keep a number of sent messages
+outstanding as specified by the -d option.  When they reach this limit they
+will wait to receive acks which will allow them to send again.  As they receive
+messages from their peers they immediately send acks.
+.Pp
+Every second, the parent process will display statistics of the ongoing
+stress test. The output is described in section OUTPUT below.
+.Pp
+If the -T option is given, the test will terminate after the specified time,
+and a summary is printed.
+.Pp
+Each child maintains outstanding messages to all other children of the other instance.
+They do not send to their siblings.
+.Sh OPTIONS
+The following options are available for use on the command line:
+.Bl -tag -width Ds
+.It Fl p Ar port_number
+Each parent binds a TCP socket to this port number and their respective
+address.  They will trade the negotiated options over this socket.  Each
+child will bind an RDS socket to the range of ports immediately following
+this port number, for as many children as there are.
+.It Fl s Ar send_address
+A connection attempt is made to this address.  Once its complete and the
+options are sent over it then children will be created and work will proceed.
+.It Fl r Ar receive_address
+This specifies the address that messages will be sent from.  If -s is not
+specified then rds-stress waits for a connection on this address before
+proceeding.
+.Pp
+If this option is not given, rds-stress will choose an appropriate address.
+The passive process will accept connections on all local interfaces, and
+obtain the address once the control connection is established.
+The active process will choose a local address based on the interface through
+which it connects to the destination address.
+.It Fl a Ar ack_bytes
+This specifies the size of the ack messages, in bytes. There is a minimum size
+which depends on the format of the ack messages, which may change over time.
+See section "Message Sizes" below.
+.It Fl q Ar request_bytes
+This specifies the size of the request messages, in bytes.
+It also has a minimum size which may change over time.
+See section "Message Sizes" below.
+.It Fl D Ar rdma_bytes
+RDSv3 is capable of transmitting part of a message via RDMA directly from
+application buffer to application buffer. This option enables RDMA support
+in rds-stress: request packets include parameters for an RDMA READ or WRITE
+operation, which the receiving process executes at the time the ACK packet
+is sent.
+See section "Message Sizes" below.
+.It Fl d Ar queue_depth
+Each child will try to maintain this many sent messages outstanding to each
+of its peers on the remote address.
+.It Fl t Ar nr_tasks
+Each parent will create this many children tasks.
+.It Fl T Ar seconds
+Specify the duration of the test run. After the specified number of seconds,
+all processes on both ends of the connection will terminate, and the
+active instance will print a summary. By default, rds-stress will keep
+on sending and receiving messages.
+.It Fl z
+This flag can be used in conjunction with -T. It suppresses the ongoing
+display of statistics, and prints a summary only.
+.It Fl c
+This causes rds-stress to create child tasks which just consume CPU cycles.
+One task is created for each CPU in the system.  First each child observes the
+maximum rate at which it can consume cycles.  This means that this option
+should only be given on an idle system.  rds-stress can then calculate the CPU
+use of the system by observing the lesser rate at which the children consume
+cycles.  This option is *not* shared between the active and passive instances.
+It must be specified on each rds-stress command line.
+.It Fl R
+This tells the rds-stress parent process to run with SCHED_RR priority,
+giving it precedence over the child processes. This is useful when running
+with lots of tasks, where there is a risk of the child processes starving
+the parent, and skewing the results.
+.It Fl v
+With this option enabled, packets are filled with a pattern that is
+verified by the receiver. This check can help detect data corruption
+occuring under high load.
+.El
+.Pp
+
+.Ss Message Sizes
+Options which set a message size (such as -a) specify a number of bytes
+by default. By appending \fBK\fP, \fBM\fP, or \fBG\fP, you can specify the size
+in kilobytes, megabytes or gigabytes, respectively. For instance,
+the following will run rds-stress with a message and ACK size of 1024
+bytes, and an RDMA message size of 1048576 bytes:
+.Pp
+.Dl rds-stress ... -q 1K -a 1K -D 1M
+.Pp
+.Pp
+.Sh OUTPUT
+Each parent outputs columns of statistics at a regular interval:
+.Bl -tag -width Ds
+.It tsks
+The number of child tasks which are running.
+.It tx/s
+The number of sendmsg() calls that all children are executing, per second. 
+.It tx+rx K/s
+The total number of bytes that are flowing through sendmsg() and recvmsg() for all children.
+This includes both request and ack messages.
+.It rw+rr K/s
+The total number of bytes that are being transferred via RDMA READs and
+WRITEs for all children.
+.It tx us/c
+The average number of microseconds spent in sendmsg() calls.
+.It rtt us
+The average round trip time for a request and ack message pair.  This measures
+the total time between when a task sends a request and when it finally receives
+the ack for that message.  Because it includes the time it takes for the
+receiver to wake up, receive the message, and send an ack, it can grow to be
+quite large under load. 
+.It cpu %
+This is the percentage of available CPU resources on this machine that are being
+consumed since rds-stress started running.  It will show -1.00 if -c is not
+given.  It is calculated based on the amount of CPU resources that CPU soaking
+tasks are able to consume.  This lets it measure CPU use by the system, say in
+interrupt handlers, that task-based CPU accounting does not include.
+For this to work rds-stress must be started with -c on an idle system.
+.El
diff --git a/rds-stress.c b/rds-stress.c
new file mode 100644
index 0000000..dfe68c5
--- /dev/null
+++ b/rds-stress.c
@@ -0,0 +1,2715 @@
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <errno.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/uio.h>
+#include <netdb.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <time.h>
+#include <inttypes.h>
+#include <syscall.h>
+#include <sys/stat.h>
+#include <sys/poll.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <getopt.h>
+#include <byteswap.h>
+#include "net/rds.h"
+
+#ifdef DYNAMIC_PF_RDS
+#include "pfhack.h"
+#endif
+
+/*
+ *
+ * TODO
+ *  - checksum the data some day.
+ *  - use poll to wait instead of blocking recvmsg?  doesn't seem great.
+ *  - measure us/call of nonblocking recvmsg
+ *  - do something about receiver congestion
+ *  - notice when parent tcp socket dies
+ *  - should the parent be at a higher priority?
+ *  - catch ctl-c
+ *  - final stats summary page
+ */
+
+struct options {
+	uint32_t	req_depth;
+	uint32_t	req_size;
+	uint32_t	ack_size;
+	uint32_t	rdma_size;
+	uint32_t	send_addr;
+	uint32_t	receive_addr;
+	uint16_t	starting_port;
+	uint16_t	nr_tasks;
+	uint32_t	run_time;
+	uint8_t		summary_only;
+	uint8_t		rtprio;
+	uint8_t		tracing;
+	uint8_t		verify;
+	uint8_t		show_params;
+	uint8_t		show_perfdata;
+	uint8_t		use_cong_monitor;
+	uint8_t		rdma_use_once;
+	uint8_t		rdma_use_get_mr;
+	uint8_t		rdma_use_fence;
+	uint8_t		rdma_cache_mrs;
+	uint8_t		rdma_key_o_meter;
+	uint8_t		suppress_warnings;
+
+	uint32_t	rdma_alignment;
+	uint32_t	connect_retries;
+} __attribute__((packed));
+
+static struct options	opt;
+static int		control_fd;
+
+struct counter {
+	uint64_t	nr;
+	uint64_t	sum;
+	uint64_t	min;
+	uint64_t	max;
+};
+
+enum {
+	S_REQ_TX_BYTES = 0,
+	S_REQ_RX_BYTES,
+	S_ACK_TX_BYTES,
+	S_ACK_RX_BYTES,
+	S_RDMA_WRITE_BYTES,
+	S_RDMA_READ_BYTES,
+	S_SENDMSG_USECS,
+	S_RTT_USECS,
+	S__LAST
+};
+
+#define NR_STATS S__LAST
+
+/*
+ * Parents share a mapped array of these with their children.  Each child
+ * gets one.  It's used to communicate between the child and the parent
+ * simply.
+ */
+struct child_control {
+	pid_t pid;
+	int ready;
+	struct timeval start;
+	struct counter cur[NR_STATS];
+	struct counter last[NR_STATS];
+} __attribute__((aligned (256))); /* arbitrary */
+
+struct soak_control {
+	pid_t		pid;
+	uint64_t	per_sec;
+	uint64_t	counter;
+	uint64_t	last;
+	struct timeval	start;
+} __attribute__((aligned (256))); /* arbitrary */
+
+void stop_soakers(struct soak_control *soak_arr);
+
+/*
+ * Requests tend to be larger and we try to keep a certain number of them
+ * in flight at a time.  Acks are sent in response to requests and tend
+ * to be smaller.
+ */
+#define OP_REQ		1
+#define OP_ACK		2
+
+#define RDMA_OP_READ	1
+#define RDMA_OP_WRITE	2
+#define RDMA_OP_TOGGLE(x) (3 - (x))	/* read becomes write and vice versa */
+
+/*
+ * Every message sent with sendmsg gets a header.  This lets the receiver
+ * verify that it got what was sent.
+ */
+struct header {
+	uint32_t	seq;
+	uint32_t	from_addr;
+	uint32_t	to_addr;
+	uint16_t	from_port;
+	uint16_t	to_port;
+	uint16_t	index;
+	uint8_t		op;
+
+	/* RDMA related.
+	 * rdma_op must be the first field, because we
+	 * use offsetof(rdma_op) in fill_hdr and check_hdr
+	 */
+	uint8_t		rdma_op;
+	uint64_t	rdma_addr;
+	uint64_t	rdma_phyaddr;
+	uint64_t	rdma_pattern;
+	uint64_t	rdma_key;
+	uint32_t	rdma_size;
+
+	uint8_t		data[0];
+} __attribute__((packed));
+
+#define MIN_MSG_BYTES		(sizeof(struct header))
+#define BASIC_HEADER_SIZE	(size_t)(&((struct header *) 0)->rdma_op)
+
+#define die(fmt...) do {		\
+	fprintf(stderr, fmt);		\
+	exit(1);			\
+} while (0)
+
+#define die_errno(fmt, args...) do {				\
+	fprintf(stderr, fmt ", errno: %d (%s)\n", ##args , errno,\
+		strerror(errno));				\
+	exit(1);						\
+} while (0)
+
+static int	mrs_allocated = 0;
+
+#define trace(fmt...) do {		\
+	if (opt.tracing)		\
+		fprintf(stderr, fmt);	\
+} while (0)
+
+#define min(a,b) (a < b ? a : b)
+#define max(a,b) (a > b ? a : b)
+
+static unsigned long	sys_page_size;
+
+/* This macro casts a pointer to uint64_t without producing
+   warnings on either 32bit or 64bit platforms. At least
+   with gcc, that is.
+ */
+#define ptr64(p)	((unsigned long) (p))
+
+/* zero is undefined */
+static inline uint64_t minz(uint64_t a, uint64_t b)
+{
+	if (a == 0)
+		return b;
+	if (b == 0)
+		return a;
+	return min(a, b);
+}
+
+static unsigned long long parse_ull(char *ptr, unsigned long long max)
+{
+	unsigned long long val;
+	char *endptr;
+
+	val = strtoull(ptr, &endptr, 0);
+	switch (*endptr) {
+	case 'k': case 'K':
+		val <<= 10;
+		endptr++;
+		break;
+
+	case 'm': case 'M':
+		val <<= 20;
+		endptr++;
+		break;
+
+	case 'g': case 'G':
+		val <<= 30;
+		endptr++;
+		break;
+	}
+
+	if (*ptr && !*endptr && val <= max)
+		return val;
+
+	die("invalid number '%s'\n", ptr);
+}
+
+static uint32_t parse_addr(char *ptr)
+{
+	uint32_t addr;
+	struct hostent *hent;
+
+	hent = gethostbyname(ptr);
+	if (hent &&
+	    hent->h_addrtype == AF_INET && hent->h_length == sizeof(addr)) {
+		memcpy(&addr, hent->h_addr, sizeof(addr));
+		return ntohl(addr);
+	}
+
+	die("invalid host name or dotted quad '%s'\n", ptr);
+}
+
+static void usage(void)
+{
+	printf(
+	"\n"
+	"Send & Recv parameters:\n"
+	" -r [addr]         use this local address\n"
+	" -p [port, 4000]   starting port number\n"
+	"\n"
+	"Send parameters:\n"
+	" -s [addr]         send to this address (required)\n"
+	" -a [bytes, %u]    ack message length\n"
+	" -q [bytes, 1024]  request message length\n"
+	" -d [depth, 1]     request pipeline depth, nr outstanding\n"
+	" -t [nr, 1]        number of child tasks\n"
+	" -T [seconds, 0]   runtime of test, 0 means infinite\n"
+	" -D [bytes]        RDMA size (RDSv3 only)\n"
+	"\n"
+	"Optional flags:\n"
+	" -c                measure cpu use with per-cpu soak processes\n"
+	" -V                trace execution\n"
+	" -z                print a summary at end of test only\n"
+	"\n"
+	"Example:\n"
+	"  recv$ rds-stress\n"
+	"  send$ rds-stress -s recv -q 4096 -t 2 -d 2\n"
+	"\n", (int) MIN_MSG_BYTES);
+
+	exit(2);
+}
+
+static void set_rt_priority(void)
+{
+	struct sched_param	param;
+
+	memset(&param, 0, sizeof(param));
+	param.sched_priority = 1;
+
+	if (sched_setscheduler(0, SCHED_RR, &param) < 0)
+		die_errno("sched_setscheduler(SCHED_RR) failed");
+}
+
+/* This hack lets children notice when their parents die.
+ * We could also use kill(0), but that results in false
+ * positives when the parent is a zombie (and that happens
+ * if you have a script parsing the output of rds-stress,
+ * and the parent dies).
+ */
+static void check_parent(pid_t pid)
+{
+	if (pid != getppid())
+		die("parent %u exited\n", pid);
+}
+
+/*
+ * put a pattern in the message so the remote side can verify that it's
+ * what was expected.
+ */
+static unsigned char *	msg_pattern;
+
+static void init_msg_pattern(struct options *opts)
+{
+	unsigned int max_size = max(opts->req_size, opts->ack_size);
+	unsigned int i, k = 11;
+
+	msg_pattern = malloc(max_size);
+
+	/* k = 41 * (k + 3) is a generator of Z(256). Adding
+	 * (i >> 8) makes sure the pattern is shifted by 1 in
+	 * every successive 256 byte block, so that we can detect
+	 * swapped blocks. */
+	for (i = 0; i < max_size; i++, k = 41 * (k + 3) + (i >> 8))
+		msg_pattern[i] = k;
+}
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define htonll(x)	bswap_64(x)
+#define ntohll(x)	bswap_64(x)
+#else
+#define htonll(x)	(x)
+#define ntohll(x)	(x)
+#endif
+
+static void encode_hdr(struct header *dst, const struct header *hdr)
+{
+	memset(dst, 0, sizeof(*dst));
+
+	dst->seq = htonl(hdr->seq);
+	dst->from_addr = hdr->from_addr;	/* always network byte order */
+	dst->from_port = hdr->from_port;	/* ditto */
+	dst->to_addr = hdr->to_addr;		/* ditto */
+	dst->to_port = hdr->to_port;		/* ditto */
+	dst->index = htons(hdr->index);
+	dst->op = hdr->op;
+
+	dst->rdma_op = hdr->rdma_op;
+	dst->rdma_addr = htonll(hdr->rdma_addr);
+	dst->rdma_phyaddr = htonll(hdr->rdma_phyaddr);
+	dst->rdma_pattern = htonll(hdr->rdma_pattern);
+	dst->rdma_key = htonll(hdr->rdma_key);
+	dst->rdma_size = htonl(hdr->rdma_size);
+}
+
+static void decode_hdr(struct header *dst, const struct header *hdr)
+{
+	memset(dst, 0, sizeof(*dst));
+
+	dst->seq = ntohl(hdr->seq);
+	dst->from_addr = hdr->from_addr;	/* always network byte order */
+	dst->from_port = hdr->from_port;	/* ditto */
+	dst->to_addr = hdr->to_addr;		/* ditto */
+	dst->to_port = hdr->to_port;		/* ditto */
+	dst->index = ntohs(hdr->index);
+	dst->op = hdr->op;
+
+	dst->rdma_op = hdr->rdma_op;
+	dst->rdma_addr = ntohll(hdr->rdma_addr);
+	dst->rdma_phyaddr = ntohll(hdr->rdma_phyaddr);
+	dst->rdma_pattern = ntohll(hdr->rdma_pattern);
+	dst->rdma_key = ntohll(hdr->rdma_key);
+	dst->rdma_size = ntohl(hdr->rdma_size);
+}
+
+static void fill_hdr(void *message, uint32_t bytes, struct header *hdr)
+{
+	encode_hdr(message, hdr);
+	if (opt.verify)
+		memcpy(message + sizeof(*hdr), msg_pattern, bytes - sizeof(*hdr));
+}
+
+/* inet_ntoa uses a static buffer, so calling it twice in
+ * a single printf as we do below will produce undefined
+ * results. We copy the output to two static buffers,
+ * and switch between them.
+ */
+static char *inet_ntoa_32(uint32_t val)
+{
+	struct in_addr addr = { .s_addr = val };
+	static char buffer[2][64];
+	static unsigned int select = 0;
+
+	select = 1 - select;
+	strncpy(buffer[select], inet_ntoa(addr), 63);
+
+	return buffer[select];
+}
+
+/*
+ * Compare incoming message header with expected header. All header fields
+ * are in host byte order except for address and port fields.
+ */
+static int check_hdr(void *message, uint32_t bytes, const struct header *hdr)
+{
+	struct header msghdr;
+
+	decode_hdr(&msghdr, message);
+	if (memcmp(&msghdr, hdr, BASIC_HEADER_SIZE)) {
+#define bleh(var, disp)					\
+		disp(hdr->var),				\
+		msghdr.var == hdr->var ? " =" : "!=",	\
+		disp(msghdr.var)
+
+		/*
+		 * This is printed as one GIANT printf() so that it serializes
+		 * with stdout() and we don't get things stomping on each
+		 * other
+		 */
+		printf( "An incoming message had a header which\n"
+			"didn't contain the fields we expected:\n"
+			"    member        expected eq             got\n"
+			"       seq %15u %s %15u\n"
+			" from_addr %15s %s %15s\n"
+			" from_port %15u %s %15u\n"
+			"   to_addr %15s %s %15s\n"
+			"   to_port %15u %s %15u\n"
+			"     index %15u %s %15u\n"
+			"        op %15u %s %15u\n",
+			bleh(seq, /**/),
+			bleh(from_addr, inet_ntoa_32),
+			bleh(from_port, ntohs),
+			bleh(to_addr, inet_ntoa_32),
+			bleh(to_port, ntohs),
+			bleh(index, /**/),
+			bleh(op, /**/));
+#undef bleh
+
+		return 1;
+	}
+
+	if (opt.verify
+	 && memcmp(message + sizeof(*hdr), msg_pattern, bytes - sizeof(*hdr))) {
+		unsigned char *p = message + sizeof(*hdr);
+		unsigned int i, count = 0, total = bytes - sizeof(*hdr);
+		int offset = -1;
+
+		for (i = 0; i < total; ++i) {
+			if (p[i] != msg_pattern[i]) {
+				if (offset < 0)
+					offset = i;
+				count++;
+			}
+		}
+
+		printf("An incoming message has a corrupted payload at offset %u; "
+				"%u out of %u bytes corrupted\n",
+				offset, count, total);
+		return 1;
+	}
+
+	return 0;
+}
+
+void stat_inc(struct counter *ctr, uint64_t val)
+{
+	ctr->nr++;
+	ctr->sum += val;
+	ctr->min = minz(val, ctr->min);
+	ctr->max = max(val, ctr->max);
+}
+
+int64_t tv_cmp(const struct timeval *a, const struct timeval *b)
+{
+	int64_t a_usecs = ((uint64_t)a->tv_sec * 1000000ULL) + a->tv_usec;
+	int64_t b_usecs = ((uint64_t)b->tv_sec * 1000000ULL) + b->tv_usec;
+
+	return a_usecs - b_usecs;
+}
+
+/* returns a - b in usecs */
+uint64_t usec_sub(struct timeval *a, struct timeval *b)
+{
+	return ((uint64_t)(a->tv_sec - b->tv_sec) * 1000000ULL) +
+		a->tv_usec - b->tv_usec;
+}
+
+static int bound_socket(int domain, int type, int protocol,
+			struct sockaddr_in *sin)
+{
+	int fd;
+	int opt;
+
+	fd = socket(domain, type, protocol);
+	if (fd < 0)
+		die_errno("socket(%d, %d, %d) failed", domain, type, protocol);
+
+	opt = 1;
+	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)))
+		die_errno("setsockopt(SO_REUSEADDR) failed");
+
+	if (bind(fd, (struct sockaddr *)sin, sizeof(struct sockaddr_in)))
+		die_errno("bind() failed");
+
+	return fd;
+}
+
+static uint32_t get_local_address(int fd, struct sockaddr_in *sin)
+{
+	socklen_t alen = sizeof(*sin);
+
+	if (getsockname(fd, (struct sockaddr *) sin, &alen))
+		die_errno("getsockname failed");
+	return ntohl(sin->sin_addr.s_addr);
+}
+
+static int rds_socket(struct options *opts, struct sockaddr_in *sin)
+{
+	int bytes;
+	int fd;
+	int val;
+	socklen_t optlen;
+
+	fd = bound_socket(PF_RDS, SOCK_SEQPACKET, 0, sin);
+
+	bytes = opts->nr_tasks * opts->req_depth *
+		(opts->req_size + opts->ack_size) * 2;
+
+	if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &bytes, sizeof(bytes)))
+		die_errno("setsockopt(SNDBUF, %d) failed", bytes);
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &bytes, sizeof(bytes)))
+		die_errno("setsockopt(RCVBUF, %d) failed", bytes);
+
+	optlen = sizeof(val);
+	if (getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &val, &optlen))
+		die_errno("getsockopt(SNDBUF) failed");
+	if (val / 2 < bytes && !opts->suppress_warnings)
+		fprintf(stderr,
+			"getsockopt(SNDBUF) returned %d, we wanted %d * 2\n",
+			val, bytes);
+
+	optlen = sizeof(val);
+	if (getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, &optlen))
+		die_errno("getsockopt(RCVBUF) failed");
+	if (val / 2 < bytes && !opts->suppress_warnings)
+		fprintf(stderr,
+			"getsockopt(RCVBUF) returned %d, we need %d * 2\n",
+			val, bytes);
+
+	val = 1;
+	if (opts->use_cong_monitor
+	 && setsockopt(fd, SOL_RDS, RDS_CONG_MONITOR, &val, sizeof(val))) {
+		if (errno != ENOPROTOOPT)
+			die_errno("setsockopt(RDS_CONG_MONITOR) failed");
+		printf("Kernel does not support congestion monitoring; disabled\n");
+		opts->use_cong_monitor = 0;
+	}
+
+	fcntl(fd, F_SETFL, O_NONBLOCK);
+
+	return fd;
+}
+
+static int check_rdma_support(struct options *opts)
+{
+	struct sockaddr_in sin;
+	struct rds_free_mr_args args;
+	int fd, okay = 0;
+
+	/* We need a local address to bind to. If the user
+	 * didn't specify the -r option, we tell him to go on for
+	 * now - he'll call back once more later. */
+	if (opts->receive_addr == 0)
+		return 1;
+
+	sin.sin_family = AF_INET;
+	sin.sin_port = htons(opts->starting_port);
+	sin.sin_addr.s_addr = htonl(opts->receive_addr);
+
+	fd = bound_socket(AF_RDS, SOCK_SEQPACKET, 0, &sin);
+
+	memset(&args, 0, sizeof(args));
+	if (setsockopt(fd, SOL_RDS, RDS_FREE_MR, &args, sizeof(args)) >= 0) {
+		okay = 1;
+	} else if (errno == ENOPROTOOPT) {
+		okay = 0;
+	} else {
+		die_errno("%s: RDS_FREE_MR failed with unexpected error",
+				__FUNCTION__);
+	}
+	close(fd);
+
+	return okay;
+}
+
+static uint64_t get_rdma_key(int fd, uint64_t addr, uint32_t size)
+{
+	uint64_t cookie = 0;
+	struct rds_get_mr_args mr_args;
+
+	mr_args.vec.addr = addr;
+	mr_args.vec.bytes = size;
+	mr_args.cookie_addr = ptr64(&cookie);
+	mr_args.flags = RDS_RDMA_READWRITE; /* for now, always assume r/w */
+	if (opt.rdma_use_once)
+		mr_args.flags |= RDS_RDMA_USE_ONCE;
+
+	if (setsockopt(fd, SOL_RDS, RDS_GET_MR, &mr_args, sizeof(mr_args)))
+		die_errno("setsockopt(RDS_GET_MR) failed (%u allocated)", mrs_allocated);
+
+	trace("RDS get_rdma_key() = %Lx\n",
+				(unsigned long long) cookie);
+
+	mrs_allocated++;
+	return cookie;
+}
+
+static void free_rdma_key(int fd, uint64_t key)
+{
+	struct rds_free_mr_args mr_args;
+
+	trace("RDS free_rdma_key(%Lx)\n", (unsigned long long) key);
+
+	mr_args.cookie = key;
+#if 1
+	mr_args.flags = 0;
+#else
+	mr_args.flags = RDS_FREE_MR_ARGS_INVALIDATE;
+#endif
+	if (setsockopt(fd, SOL_RDS, RDS_FREE_MR, &mr_args, sizeof(mr_args)))
+		die_errno("setsockopt(RDS_FREE_MR) failed");
+	mrs_allocated--;
+}
+
+/*
+ * RDMA key-o-meter. We track how frequently the kernel
+ * re-issues R_Keys
+ *
+ * The key_o_meter data structures are shared between the processes
+ * without any locking. We don't care much for locking here...
+ */
+#define RDMA_MAX_TRACKED_KEYS	(32*1024)
+struct rdma_key_stamp {
+	uint32_t	r_key;
+	struct timeval	issued;
+};
+struct rdma_key_trace {
+	uint32_t	count, max;
+	struct rdma_key_stamp *entry;
+};
+struct rdma_key_o_meter {
+	struct rdma_key_trace *current;
+	struct rdma_key_trace *idle;
+};
+static struct rdma_key_o_meter *rdma_key_o_meter;
+static unsigned int rdma_key_task;
+
+static void rdma_key_o_meter_init(unsigned int nr_tasks)
+{
+	struct rdma_key_trace *kt;
+	struct rdma_key_stamp *ks;
+	uint32_t max;
+	unsigned int i, size;
+	void *base;
+
+	size = sizeof(struct rdma_key_o_meter)
+			+ 2 * nr_tasks * sizeof(*kt)
+			+ 2 * RDMA_MAX_TRACKED_KEYS * sizeof(*ks);
+	base = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, 0, 0);
+	if (base == MAP_FAILED)
+		die_errno("alloc_rdma_buffers: mmap failed");
+
+	rdma_key_o_meter = (struct rdma_key_o_meter *) base;
+	base = rdma_key_o_meter + 1;
+
+	rdma_key_o_meter->current = (struct rdma_key_trace *) base;
+	base = rdma_key_o_meter->current + nr_tasks;
+
+	rdma_key_o_meter->idle = (struct rdma_key_trace *) base;
+	base = rdma_key_o_meter->idle + nr_tasks;
+
+	ks = (struct rdma_key_stamp *) base;
+	max = RDMA_MAX_TRACKED_KEYS / nr_tasks;
+	for (i = 0, kt = rdma_key_o_meter->current; i < 2 * nr_tasks; ++i, ++kt) {
+		kt->count = 0;
+		kt->max = max;
+		kt->entry = ks + i * max;
+	}
+}
+
+/* This is called in the child process to set the index of
+ * the key-o-meter to use */
+static void rdma_key_o_meter_set_self(unsigned int task_idx)
+{
+	rdma_key_task = task_idx;
+}
+
+static void rdma_key_o_meter_add(uint32_t key)
+{
+	struct rdma_key_trace *kt;
+
+	if (!rdma_key_o_meter)
+		return;
+
+	kt = &rdma_key_o_meter->current[rdma_key_task];
+	if (kt->count < kt->max) {
+		kt->entry[kt->count].r_key = key;
+		gettimeofday(&kt->entry[kt->count].issued, NULL);
+		kt->count++;
+	}
+}
+
+static int rdma_key_stamp_compare(const void *p1, const void *p2)
+{
+	const struct rdma_key_stamp *ks1 = p1, *ks2 = p2;
+
+	if (ks1->r_key < ks2->r_key)
+		return -1;
+	if (ks1->r_key > ks2->r_key)
+		return 1;
+	return tv_cmp(&ks1->issued, &ks2->issued);
+}
+
+static void rdma_key_o_meter_check(unsigned int nr_tasks)
+{
+	struct rdma_key_stamp *ks, sorted[RDMA_MAX_TRACKED_KEYS];
+	struct rdma_key_trace *kt;
+	unsigned int i, j, count = 0;
+	unsigned int reissued = 0;
+	double min_elapsed = 0, avg_elapsed = 0;
+
+	if (!rdma_key_o_meter)
+		return;
+
+	/* Extract keys from all tasks and sort them. */
+	kt = rdma_key_o_meter->idle;
+	for (i = 0; i < nr_tasks; ++i, ++kt) {
+		ks = kt->entry;
+
+		for (j = 0; j < kt->count; ++j)
+			sorted[count++] = *ks++;
+		kt->count = 0;
+	}
+	qsort(sorted, count, sizeof(*sorted), rdma_key_stamp_compare);
+
+	/* Now see how many were reissued */
+	ks = sorted;
+	for (i = 0; i + 1 < count; ++i, ++ks) {
+		double elapsed;
+
+		if (ks[0].r_key != ks[1].r_key)
+			continue;
+		elapsed = 1e-6 * usec_sub(&ks[1].issued, &ks[0].issued);
+		if (reissued == 0 || elapsed < min_elapsed)
+			min_elapsed = elapsed;
+		avg_elapsed += elapsed;
+	}
+
+	if (reissued)
+		printf(" *** %u R_Keys were re-issued; min distance=%f sec, avg distance=%f sec\n",
+				reissued, min_elapsed, avg_elapsed / reissued);
+
+	/* Swap current and idle */
+	kt = rdma_key_o_meter->current;
+	rdma_key_o_meter->current = rdma_key_o_meter->idle;
+	rdma_key_o_meter->idle = kt;
+}
+
+static void rds_fill_buffer(void *buf, size_t size, uint64_t pattern)
+{
+	uint64_t *pos, *end;
+
+	pos = (uint64_t *) buf;
+	end = (uint64_t *) (buf + size);
+	while (pos < end)
+		*pos++ = pattern;
+}
+
+#if 0
+static void  rds_dump_buffer(const void *buf, size_t size)
+{
+	const uint64_t *pos;
+	unsigned int i, count;
+
+	pos = (const uint64_t *) buf;
+
+	count = size / sizeof(uint64_t);
+	pos = (const uint64_t *) buf;
+
+	printf("rds_dump_buffer(%p, %u)\n", buf, (int) size);
+	for (i = 0; i < count; ++i) {
+		if ((i % 4) == 0)
+			printf("\n%08x:", i);
+		printf(" %016Lx", (unsigned long long) *pos++);
+	}
+}
+#endif
+
+static void rds_compare_buffer(uint64_t *addr, int size, uint64_t pattern)
+{
+	int d, failed = 0;
+
+	for (d = 0; d < size / sizeof(uint64_t); d++) {
+		if (addr[d] == pattern)
+			continue;
+
+		failed = 1;
+		trace("compare fail pattern offset %u: expected %Lx got %Lx\n",
+				8 * d,
+				(unsigned long long) pattern,
+				(unsigned long long) addr[d]);
+
+#if 0
+		rds_dump_buffer(addr, size);
+		die("compare pass\n");
+#endif
+	}
+
+	if (!failed)
+		trace("compare pass pattern %Lx addr %p\n",
+			(unsigned long long) pattern, addr);
+}
+
+struct task {
+	unsigned int		nr;
+	unsigned int		pending;
+	unsigned int		unacked;
+	struct sockaddr_in	src_addr;	/* same for all tasks */
+	struct sockaddr_in	dst_addr;
+	unsigned char		congested;
+	unsigned char		drain_rdmas;
+	uint32_t		send_seq;
+	uint32_t		recv_seq;
+	uint16_t		send_index;
+	uint16_t		recv_index;
+	struct timeval *	send_time;
+	struct header *		ack_header;
+
+	/* RDMA related stuff */
+	uint64_t **		local_buf;
+	uint64_t **		rdma_buf;
+	uint64_t *		rdma_req_key;
+	uint8_t *		rdma_inflight;
+	uint32_t		buffid;
+	uint8_t			rdma_next_op;
+};
+
+static void alloc_rdma_buffers(struct task *t, struct options *opts)
+{
+	unsigned int i, j;
+	size_t len;
+	caddr_t	base;
+
+	/* We use mmap here rather than malloc, because it is always
+	 * page aligned. */
+	len = 2 * opts->nr_tasks * opts->req_depth * opts->rdma_size + sys_page_size;
+	base = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
+	if (base == MAP_FAILED)
+		die_errno("alloc_rdma_buffers: mmap failed");
+	memset(base, 0x2f, len);
+	base += opts->rdma_alignment;
+
+	for (i = 0; i < opts->nr_tasks; ++i, ++t) {
+		for (j = 0; j < opts->req_depth; ++j) {
+			t->rdma_buf[j] = (uint64_t *) base;
+			base += opts->rdma_size;
+
+			t->local_buf[j] = (uint64_t *) base;
+			base += opts->rdma_size;
+
+			t->rdma_req_key[j] = 0;
+			t->rdma_inflight[j] = 0;
+		}
+	}
+}
+
+static void rdma_build_req(int fd, struct header *hdr, struct task *t,
+		unsigned int rdma_size, unsigned int req_depth)
+{
+	uint64_t *rdma_addr, *rdma_key_p;
+
+	rdma_addr = t->rdma_buf[t->send_index];
+
+	rdma_key_p = &t->rdma_req_key[t->send_index];
+	if (opt.rdma_use_get_mr && *rdma_key_p == 0)
+		*rdma_key_p = get_rdma_key(fd, ptr64(rdma_addr), rdma_size);
+
+	/* We alternate between RDMA READ and WRITEs */
+	hdr->rdma_op = t->rdma_next_op;
+	t->rdma_next_op = RDMA_OP_TOGGLE(t->rdma_next_op);
+
+	hdr->rdma_pattern = (((uint64_t) t->send_seq) << 32) | getpid();
+	hdr->rdma_addr = ptr64(rdma_addr);
+	hdr->rdma_phyaddr = 0;
+	hdr->rdma_size = rdma_size;
+	hdr->rdma_key = *rdma_key_p;
+
+	if (RDMA_OP_READ == hdr->rdma_op) {
+		if (opt.verify)
+			rds_fill_buffer(rdma_addr, rdma_size, hdr->rdma_pattern);
+		trace("Requesting RDMA read for pattern %Lx "
+				"local addr to rdma read %p\n",
+				(unsigned long long) hdr->rdma_pattern,
+				rdma_addr);
+	} else {
+		if (opt.verify)
+			rds_fill_buffer(rdma_addr, rdma_size, 0);
+		trace("Requesting RDMA write for pattern %Lx "
+				"local addr to rdma write %p\n",
+				(unsigned long long) hdr->rdma_pattern,
+				rdma_addr);
+	}
+}
+
+static void rdma_validate(const struct header *in_hdr, struct options *opts)
+{
+	unsigned long	rdma_size;
+
+	rdma_size = in_hdr->rdma_size;
+	if (rdma_size != opts->rdma_size)
+		die("Unexpected RDMA size %lu in request\n", rdma_size);
+
+	if (in_hdr->rdma_op != RDMA_OP_READ && in_hdr->rdma_op != RDMA_OP_WRITE)
+		die("Unexpected RDMA op %u in request\n", in_hdr->rdma_op);
+
+
+	trace("RDS received request to issue rdma %s len %lu rva %Lx key %Lx pattern %Lx\n",
+		in_hdr->rdma_op == RDMA_OP_WRITE? "write to" : "read from",
+		rdma_size,
+		(unsigned long long) in_hdr->rdma_addr,
+		(unsigned long long) in_hdr->rdma_key,
+		(unsigned long long) in_hdr->rdma_pattern);
+}
+
+static void rdma_build_ack(struct header *hdr, const struct header *in_hdr)
+{
+	hdr->rdma_op = in_hdr->rdma_op;
+	hdr->rdma_size = in_hdr->rdma_size;
+	hdr->rdma_key = in_hdr->rdma_key;
+	hdr->rdma_phyaddr = in_hdr->rdma_phyaddr; /* remote's address to rdma to / from */
+	hdr->rdma_addr = in_hdr->rdma_addr; /* remote's address to rdma to / from */
+	hdr->rdma_pattern = in_hdr->rdma_pattern;
+}
+
+static inline unsigned int rdma_user_token(struct task *t, unsigned int qindex)
+{
+	return t->nr * opt.req_depth + qindex;
+}
+
+static void rdma_mark_completed(struct task *tasks, unsigned int token, int status)
+{
+	struct task *t;
+	unsigned int i;
+
+	trace("RDS rdma completion for token %x\n", token);
+
+	t = &tasks[token / opt.req_depth];
+	i = token % opt.req_depth;
+
+	if (status) {
+		const char *errmsg;
+
+		switch (status) {
+		case RDS_RDMA_REMOTE_ERROR:
+			errmsg = "remote error"; break;
+		case RDS_RDMA_CANCELED:
+			errmsg = "operation was cancelled"; break;
+		case RDS_RDMA_DROPPED:
+			errmsg = "operation was dropped"; break;
+		case RDS_RDMA_OTHER_ERROR:
+			errmsg = "other error"; break;
+		default:
+			errmsg = "unknown error"; break;
+		}
+
+		printf("%s:%u: RDMA op %u failed: %s\n",
+				inet_ntoa(t->dst_addr.sin_addr),
+				ntohs(t->dst_addr.sin_port),
+				i, errmsg);
+	}
+
+	t->rdma_inflight[i] = 0;
+	t->drain_rdmas = 0;
+}
+
+#define MSG_MAXIOVLEN 2
+
+/*
+ * Add a control message to the outgoing message
+ */
+static void rdma_put_cmsg(struct msghdr *msg, int type,
+			const void *ptr, size_t size)
+{
+	static char ctlbuf[1024];
+	struct cmsghdr *cmsg;
+
+	msg->msg_control = ctlbuf;
+	msg->msg_controllen = CMSG_SPACE(size);
+
+	cmsg = CMSG_FIRSTHDR(msg);
+	cmsg->cmsg_level = SOL_RDS;
+	cmsg->cmsg_type = type;
+	cmsg->cmsg_len = CMSG_LEN(size);
+	memcpy(CMSG_DATA(cmsg), ptr, size);
+}
+
+/*
+ * This sets up all the fields for an RDMA transfer.
+ * The request is passed as a control message along with
+ * the ACK packet.
+ */
+static void rdma_build_cmsg_xfer(struct msghdr *msg, const struct header *hdr,
+		unsigned int user_token, void *local_buf)
+{
+	static struct rds_iovec iov;
+	struct rds_rdma_args args;
+	unsigned int rdma_size;
+
+	rdma_size = hdr->rdma_size;
+
+	trace("RDS issuing rdma for token %x key %Lx len %u local_buf %p\n",
+			user_token,
+			(unsigned long long) hdr->rdma_key,
+			rdma_size, local_buf);
+
+	/* rdma args */
+	memset(&args, 0, sizeof(args));
+
+	/* Set up the iovec pointing to the RDMA buffer */
+	args.local_vec_addr = (uint64_t) &iov;
+	args.nr_local = 1;
+	iov.addr = ptr64(local_buf);
+	iov.bytes = rdma_size;
+
+	/* The remote could either give us a physical address, or
+	 * an index into a zero-based FMR. Either way, we just copy it.
+	 */
+	args.remote_vec.addr = hdr->rdma_phyaddr;
+	args.remote_vec.bytes = rdma_size;
+	args.cookie = hdr->rdma_key;
+
+	/* read or write */
+	switch (hdr->rdma_op) {
+	case RDMA_OP_WRITE:
+		args.flags = RDS_RDMA_READWRITE;
+
+		if (opt.verify)
+			rds_fill_buffer(local_buf, rdma_size, hdr->rdma_pattern);
+		break;
+
+	case RDMA_OP_READ:
+		args.flags = 0;
+		break;
+	}
+
+	/* Fence off subsequent SENDs - this is the default */
+	if (opt.rdma_use_fence)
+		args.flags |= RDS_RDMA_FENCE;
+
+	args.flags |= RDS_RDMA_NOTIFY_ME;
+	args.user_token = user_token;
+
+	rdma_put_cmsg(msg, RDS_CMSG_RDMA_ARGS, &args, sizeof(args));
+}
+
+static void rdma_build_cmsg_dest(struct msghdr *msg, rds_rdma_cookie_t rdma_dest)
+{
+	rdma_put_cmsg(msg, RDS_CMSG_RDMA_DEST, &rdma_dest, sizeof(rdma_dest));
+}
+
+static void rdma_build_cmsg_map(struct msghdr *msg, uint64_t addr, uint32_t size,
+			rds_rdma_cookie_t *cookie)
+{
+	struct rds_get_mr_args args;
+
+	args.vec.addr = addr;
+	args.vec.bytes = size;
+	args.cookie_addr = ptr64(cookie);
+	args.flags = RDS_RDMA_READWRITE; /* for now, always assume r/w */
+	if (opt.rdma_use_once)
+		args.flags |= RDS_RDMA_USE_ONCE;
+
+	rdma_put_cmsg(msg, RDS_CMSG_RDMA_MAP, &args, sizeof(args));
+}
+
+static void rdma_process_ack(int fd, struct header *hdr,
+		struct child_control *ctl)
+{
+	trace("RDS rcvd rdma %s ACK for request key %Lx len %u local addr %Lx\n",
+		  RDMA_OP_WRITE == hdr->rdma_op ? "write" : "read",
+		  (unsigned long long) hdr->rdma_key,
+		  hdr->rdma_size,
+		  (unsigned long long) hdr->rdma_addr);
+
+	/* Need to free the MR unless allocated with use_once */
+	if (!opt.rdma_use_once && !opt.rdma_cache_mrs)
+		free_rdma_key(fd, hdr->rdma_key);
+
+	/* if acking an rdma write request - then remote node wrote local host buffer
+	 * (data in) so count this as rdma data coming in (rdma_read) - else remote node read
+	 * local host buffer so count this as rdma write (data out)
+	 */
+	switch (hdr->rdma_op) {
+	case RDMA_OP_WRITE:
+		/* remote node wrote local buffer check pattern
+		 * sent via immediate data in rdma buffer
+		 */
+		stat_inc(&ctl->cur[S_RDMA_READ_BYTES],  hdr->rdma_size);
+
+		if (opt.verify) {
+			/* This funny looking cast avoids compile warnings
+			 * on 32bit platforms. */
+			rds_compare_buffer((void *)(unsigned long) hdr->rdma_addr,
+				hdr->rdma_size,
+				hdr->rdma_pattern);
+		}
+		break;
+
+	case RDMA_OP_READ:
+		stat_inc(&ctl->cur[S_RDMA_WRITE_BYTES],  hdr->rdma_size);
+		break;
+	}
+}
+
+static void build_header(struct task *t, struct header *hdr,
+		unsigned int op, unsigned int qindex)
+{
+	memset(hdr, 0, sizeof(*hdr));
+	hdr->op = op;
+	hdr->seq = t->send_seq;
+	hdr->from_addr = t->src_addr.sin_addr.s_addr;
+	hdr->from_port = t->src_addr.sin_port;
+	hdr->to_addr = t->dst_addr.sin_addr.s_addr;
+	hdr->to_port = t->dst_addr.sin_port;
+	hdr->index = qindex;
+}
+
+static int send_packet(int fd, struct task *t,
+		struct header *hdr, unsigned int size)
+{
+	unsigned char buf[size], *rdma_flight_recorder = NULL;
+	rds_rdma_cookie_t cookie = 0;
+	struct msghdr msg;
+	struct iovec iov;
+	ssize_t ret;
+
+	/* Make sure we always have the current sequence number.
+	 * When we send ACK packets, the seq that gets filled in is
+	 * stale. */
+	hdr->seq = t->send_seq;
+	fill_hdr(buf, size, hdr);
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_name  = (struct sockaddr *) &t->dst_addr;
+	msg.msg_namelen = sizeof(t->dst_addr);
+
+	msg.msg_iovlen = 1;
+	msg.msg_iov = &iov;
+	iov.iov_base = buf;
+	iov.iov_len = size;
+
+	/* If this is a REQ packet in which we pass the MR to the
+	 * peer, extract the RDMA cookie and pass it on in the control
+	 * message for now. */
+	if (hdr->op == OP_REQ && hdr->rdma_op != 0) {
+		if (hdr->rdma_key != 0) {
+			/* We used GET_MR to obtain a key */
+			rdma_build_cmsg_dest(&msg, hdr->rdma_key);
+			cookie = hdr->rdma_key;
+			hdr->rdma_key = 0;
+		} else {
+			/* Use the RDMA_MAP cmsg to have sendmsg do the
+			 * mapping on the fly. */
+			rdma_build_cmsg_map(&msg, hdr->rdma_addr,
+					hdr->rdma_size, &cookie);
+		}
+	}
+
+	/* If this is an ACK packet with RDMA, build the cmsg
+	 * header that goes with it. */
+	if (hdr->op == OP_ACK && hdr->rdma_op != 0) {
+		unsigned int qindex = hdr->index;
+
+		if (t->rdma_inflight[qindex] != 0) {
+			/* It is unlikely but (provably) possible for
+			 * new requests to arrive before the RDMA notification.
+			 * That's because RDMA notifications are triggered
+			 * by the RDS ACK processing, which happens after new
+			 * messages were queued on the socket.
+			 *
+			 * We return one of the more obscure error messages,
+			 * which we recognize and handle in the top loop. */
+			trace("Drain RDMA 0x%x\n", rdma_user_token(t, qindex));
+			errno = EBADSLT;
+			return -1;
+		}
+		rdma_build_cmsg_xfer(&msg, hdr,
+				rdma_user_token(t, qindex),
+				t->local_buf[qindex]);
+		rdma_flight_recorder = &t->rdma_inflight[qindex];
+	}
+
+	ret = sendmsg(fd, &msg, 0);
+	if (ret < 0) {
+		if (errno != EAGAIN && errno != ENOBUFS)
+			die_errno("sendto() failed");
+		return ret;
+	}
+	if (ret != size)
+		die("sendto() truncated - %zd", ret);
+
+	if (rdma_flight_recorder)
+		*rdma_flight_recorder = 1;
+	if (cookie) {
+		/* We just happen to know that the r_key is in the
+		 * lower 32bit of the cookie */
+		rdma_key_o_meter_add(cookie);
+	}
+	t->send_seq++;
+	return ret;
+}
+
+static int send_one(int fd, struct task *t,
+		struct options *opts,
+		struct child_control *ctl)
+{
+	struct timeval start;
+	struct timeval stop;
+	struct header hdr;
+	int ret;
+
+	build_header(t, &hdr, OP_REQ, t->send_index);
+	if (opts->rdma_size && t->send_seq > 10)
+		rdma_build_req(fd, &hdr, t,
+				opts->rdma_size,
+				opts->req_depth);
+
+
+	gettimeofday(&start, NULL);
+	ret = send_packet(fd, t, &hdr, opts->req_size);
+	gettimeofday(&stop, NULL);
+
+	if (ret < 0)
+		return ret;
+
+	t->send_time[t->send_index] = start;
+	if (!opts->rdma_cache_mrs)
+		t->rdma_req_key[t->send_index] = 0; /* we consumed this key */
+	stat_inc(&ctl->cur[S_REQ_TX_BYTES], ret);
+	stat_inc(&ctl->cur[S_SENDMSG_USECS],
+		 usec_sub(&stop, &start));
+
+	t->send_index = (t->send_index + 1) % opts->req_depth;
+	t->pending++;
+	return ret;
+}
+
+static int send_ack(int fd, struct task *t, unsigned int qindex,
+		struct options *opts,
+		struct child_control *ctl)
+{
+	struct header *hdr = &t->ack_header[qindex];
+	ssize_t ret;
+
+	/* send an ack in response to the req we just got */
+	ret = send_packet(fd, t, hdr, opts->ack_size);
+	if (ret < 0)
+		return ret;
+	if (ret != opts->ack_size)
+		die_errno("sendto() returned %zd", ret);
+
+	stat_inc(&ctl->cur[S_ACK_TX_BYTES], ret);
+
+	/* need separate rdma stats cells for send/recv */
+	switch (hdr->rdma_op) {
+	case RDMA_OP_WRITE:
+		stat_inc(&ctl->cur[S_RDMA_WRITE_BYTES], opts->rdma_size);
+		break;
+
+	case RDMA_OP_READ:
+		stat_inc(&ctl->cur[S_RDMA_READ_BYTES], opts->rdma_size);
+		break;
+	}
+
+	return ret;
+}
+
+static int ack_anything(int fd, struct task *t,
+			struct options *opts,
+			struct child_control *ctl,
+			int can_send)
+{
+	while (t->unacked) {
+		uint16_t qindex;
+
+		qindex = (t->recv_index - t->unacked + opts->req_depth) % opts->req_depth;
+		if (!can_send)
+			goto eagain;
+		if (send_ack(fd, t, qindex, opts, ctl) < 0)
+			return -1;
+		t->unacked -= 1;
+	}
+	return 0;
+
+eagain:
+	errno = EAGAIN;
+	return -1;
+}
+
+static int send_anything(int fd, struct task *t,
+			struct options *opts,
+			struct child_control *ctl,
+			int can_send)
+{
+	if (ack_anything(fd, t, opts, ctl, can_send) < 0)
+		return -1;
+	while (t->pending < opts->req_depth) {
+		if (!can_send)
+			goto eagain;
+		if (send_one(fd, t, opts, ctl) < 0)
+			return -1;
+	}
+
+	return 0;
+
+eagain:
+	errno = EAGAIN;
+	return -1;
+}
+
+static int recv_message(int fd,
+		void *buffer, size_t size,
+		rds_rdma_cookie_t *cookie,
+		struct sockaddr_in *sin,
+		struct timeval *tstamp,
+		struct task *tasks)
+{
+	struct cmsghdr *cmsg;
+	char cmsgbuf[256];
+	struct msghdr msg;
+	struct iovec iov;
+	ssize_t ret;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_name = (struct sockaddr *) sin;
+	msg.msg_namelen = sizeof(struct sockaddr_in);
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cmsgbuf;
+	msg.msg_controllen = sizeof(cmsgbuf);
+	iov.iov_base = buffer;
+	iov.iov_len = size;
+
+	ret = recvmsg(fd, &msg, MSG_DONTWAIT);
+	gettimeofday(tstamp, NULL);
+
+	if (ret < 0)
+		return ret;
+	if (ret && ret < sizeof(struct header))
+		die("recvmsg() returned short data: %zd", ret);
+	if (msg.msg_namelen < sizeof(struct sockaddr_in))
+		die("socklen = %d < sizeof(sin) (%zu)\n",
+		    msg.msg_namelen, sizeof(struct sockaddr_in));
+
+	/* See if the message comes with a RDMA destination */
+	for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+		struct rds_rdma_notify notify;
+
+		if (cmsg->cmsg_level != SOL_RDS)
+			continue;
+		switch (cmsg->cmsg_type) {
+		case RDS_CMSG_CONG_UPDATE:
+			if (cmsg->cmsg_len < CMSG_LEN(sizeof(uint64_t)))
+				die("RDS_CMSG_CONG_UPDATE data too small");
+			else {
+				unsigned int i, port;
+				uint64_t mask;
+
+				memcpy(&mask, CMSG_DATA(cmsg), sizeof(mask));
+				for (i = 0; i < opt.nr_tasks; ++i) {
+					port = ntohs(tasks[i].dst_addr.sin_port);
+					if (mask & RDS_CONG_MONITOR_MASK(port))
+						tasks[i].congested = 0;
+				}
+			}
+			break;
+		case RDS_CMSG_RDMA_DEST:
+			if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie)))
+				die("RDS_CMSG_RDMA_DEST data too small");
+			memcpy(cookie, CMSG_DATA(cmsg), sizeof(*cookie));
+			break;
+
+		case RDS_CMSG_RDMA_STATUS:
+			if (cmsg->cmsg_len < CMSG_LEN(sizeof(notify)))
+				die("RDS_CMSG_RDMA_DEST data too small");
+			memcpy(&notify, CMSG_DATA(cmsg), sizeof(notify));
+			rdma_mark_completed(tasks, notify.user_token, notify.status);
+			break;
+		}
+	}
+	return ret;
+}
+
+static int recv_one(int fd, struct task *tasks,
+			struct options *opts,
+		struct child_control *ctl)
+{
+	char buf[max(opts->req_size, opts->ack_size)];
+	rds_rdma_cookie_t rdma_dest = 0;
+	struct sockaddr_in sin;
+	struct header hdr, in_hdr;
+	struct timeval tstamp;
+	struct task *t;
+	uint16_t expect_index;
+	int task_index;
+	ssize_t ret;
+
+	ret = recv_message(fd, buf, sizeof(buf), &rdma_dest, &sin, &tstamp, tasks);
+	if (ret < 0)
+		return ret;
+
+	/* If we received only RDMA completions or cong updates,
+	 * ret will be 0 */
+	if (ret == 0)
+		return 0;
+
+	/* check the incoming sequence number */
+	task_index = ntohs(sin.sin_port) - opts->starting_port - 1;
+	if (task_index >= opts->nr_tasks)
+		die("received bad task index %u\n", task_index);
+	t = &tasks[task_index];
+
+	/* make sure the incoming message's size matches its op */
+	decode_hdr(&in_hdr, (struct header *) buf);
+	switch(in_hdr.op) {
+	case OP_REQ:
+		stat_inc(&ctl->cur[S_REQ_RX_BYTES], ret);
+		if (ret != opts->req_size)
+			die("req size %zd, not %u\n", ret,
+			    opts->req_size);
+		expect_index = t->recv_index;
+		break;
+	case OP_ACK:
+		stat_inc(&ctl->cur[S_ACK_RX_BYTES], ret);
+		if (ret != opts->ack_size)
+			die("ack size %zd, not %u\n", ret,
+			    opts->ack_size);
+
+		/* This ACK should be for the oldest outstanding REQ */
+		expect_index = (t->send_index - t->pending + opts->req_depth) % opts->req_depth;
+		break;
+	default:
+		die("unknown op %u\n", in_hdr.op);
+	}
+
+	/*
+	 * Verify that the incoming header indicates that this
+	 * is the next in-order message to us.  We can't predict
+	 * op.
+	 */
+	hdr.op = in_hdr.op;
+	hdr.seq = t->recv_seq;
+	hdr.from_addr = sin.sin_addr.s_addr;
+	hdr.from_port = sin.sin_port;
+	hdr.to_addr = t->src_addr.sin_addr.s_addr;
+	hdr.to_port = t->src_addr.sin_port;
+	hdr.index = expect_index;
+
+	if (check_hdr(buf, ret, &hdr))
+		die("header from %s:%u to id %u bogus\n",
+		    inet_ntoa(sin.sin_addr), htons(sin.sin_port),
+		    ntohs(t->src_addr.sin_port));
+
+	if (hdr.op == OP_ACK) {
+		stat_inc(&ctl->cur[S_RTT_USECS],
+			 usec_sub(&tstamp, &t->send_time[expect_index]));
+		t->pending -= 1;
+
+		if (in_hdr.rdma_key)
+			rdma_process_ack(fd, &in_hdr, ctl);
+	} else {
+		struct header *ack_hdr;
+
+		/* Build the ACK header right away */
+		ack_hdr = &t->ack_header[t->recv_index];
+		build_header(t, ack_hdr, OP_ACK, t->recv_index);
+
+		/* The RDMA is performed at the time the ACK
+		 * message is sent. We need to mirror all
+		 * RDMA related header fields in our response
+		 * anyway, so that's a good place for send_ack
+		 * to pick them up from.
+		 */
+		if (rdma_dest)
+			in_hdr.rdma_key = rdma_dest;
+		if (in_hdr.rdma_key) {
+			rdma_validate(&in_hdr, opts);
+			rdma_build_ack(ack_hdr, &in_hdr);
+		}
+
+		t->unacked += 1;
+		t->recv_index = (t->recv_index + 1) % opts->req_depth;
+	}
+	t->recv_seq++;
+
+	return ret;
+}
+
+static void run_child(pid_t parent_pid, struct child_control *ctl,
+		      struct options *opts, uint16_t id)
+{
+	struct sockaddr_in sin;
+	struct pollfd pfd;
+	int fd;
+	uint16_t i;
+	ssize_t ret;
+	struct task tasks[opts->nr_tasks];
+	struct timeval start;
+
+	sin.sin_family = AF_INET;
+	sin.sin_port = htons(opts->starting_port + 1 + id);
+	sin.sin_addr.s_addr = htonl(opts->receive_addr);
+
+	/* give main display thread a little edge? */
+	nice(5);
+
+	memset(tasks, 0, sizeof(tasks));
+	for (i = 0; i < opts->nr_tasks; i++) {
+		tasks[i].nr = i;
+		tasks[i].src_addr = sin;
+		tasks[i].dst_addr.sin_family = AF_INET;
+		tasks[i].dst_addr.sin_addr.s_addr = htonl(opts->send_addr);
+		tasks[i].dst_addr.sin_port = htons(opts->starting_port + 1 + i);
+		tasks[i].send_time = alloca(opts->req_depth * sizeof(struct timeval));
+		tasks[i].rdma_req_key = alloca(opts->req_depth * sizeof(uint64_t));
+		tasks[i].rdma_inflight = alloca(opts->req_depth * sizeof(uint8_t));
+		tasks[i].rdma_buf = alloca(opts->req_depth * sizeof(uint64_t *));
+		tasks[i].local_buf = alloca(opts->req_depth * sizeof(uint64_t *));
+		tasks[i].ack_header = alloca(opts->req_depth * sizeof(struct header));
+		tasks[i].rdma_next_op = (i & 1)? RDMA_OP_READ : RDMA_OP_WRITE;
+	}
+
+	if (opts->rdma_size)
+		alloc_rdma_buffers(tasks, opts);
+
+	fd = rds_socket(opts, &sin);
+
+	ctl->ready = 1;
+
+	while (ctl->start.tv_sec == 0) {
+		check_parent(parent_pid);
+		sleep(1);
+	}
+
+	/* sleep until we're supposed to start */
+	gettimeofday(&start, NULL);
+	if (tv_cmp(&start, &ctl->start) < 0)
+		usleep(usec_sub(&ctl->start, &start));
+
+	sin.sin_family = AF_INET;
+
+	pfd.fd = fd;
+	pfd.events = POLLIN | POLLOUT;
+	while (1) {
+		struct task *t;
+		int can_send;
+
+		check_parent(parent_pid);
+
+		ret = poll(&pfd, 1, -1);
+		if (ret < 0) {
+			if (errno == EINTR)
+				continue;
+			die_errno("poll failed");
+		}
+
+		pfd.events = POLLIN;
+
+		if (pfd.revents & POLLIN) {
+			while (recv_one(fd, tasks, opts, ctl) >= 0)
+				;
+		}
+
+		/* keep the pipeline full */
+		can_send = !!(pfd.revents & POLLOUT);
+		for (i = 0, t = tasks; i < opts->nr_tasks; i++, t++) {
+			if (opt.use_cong_monitor && t->congested)
+				continue;
+			if (t->drain_rdmas)
+				continue;
+			if (send_anything(fd, t, opts, ctl, can_send) < 0) {
+				pfd.events |= POLLOUT;
+
+				/* If the send queue is full, we will see EAGAIN.
+				 * If a particular destination is congested, the
+				 * kernel will return ENOBUFS. In the former case,
+				 * there's no point in trying other destinations;
+				 * in the latter case we certainly want to try
+				 * sending to other tasks.
+				 *
+				 * It would be nice if we could map the congestion
+				 * map into user space :-)
+				 */
+				if (errno == ENOBUFS)
+					t->congested = 1;
+				else if (errno == EBADSLT)
+					t->drain_rdmas = 1;
+				else
+					break;
+			}
+		}
+	}
+}
+
+static struct child_control *start_children(struct options *opts)
+{
+	struct child_control *ctl;
+	pid_t parent = getpid();
+	pid_t pid;
+	size_t len;
+	uint32_t i;
+
+	len = opts->nr_tasks * sizeof(*ctl);
+	ctl = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED,
+		   0, 0);
+	if (ctl == MAP_FAILED)
+		die("mmap of %u child control structs failed", opts->nr_tasks);
+
+	memset(ctl, 0, len);
+
+	init_msg_pattern(opts);
+
+	if (opts->rdma_key_o_meter)
+		rdma_key_o_meter_init(opts->nr_tasks);
+
+	for (i = 0; i < opts->nr_tasks; i++) {
+		pid = fork();
+		if (pid == -1)
+			die_errno("forking child nr %u failed", i);
+		if (pid == 0) {
+			opts->suppress_warnings = (i > 0);
+			if (control_fd >= 0) {
+				close(control_fd);
+				control_fd = -1;
+			}
+			rdma_key_o_meter_set_self(i);
+			run_child(parent, ctl + i, opts, i);
+			exit(0);
+		}
+		ctl[i].pid = pid;
+	}
+
+	for (i = 0; i < opts->nr_tasks; i++) {
+		if (ctl[i].ready)
+			continue;
+		pid = waitpid(-1, NULL, WNOHANG);
+		if (pid)
+			die("child %u (pid %u) exited\n", i, pid);
+		sleep(1);
+		i--; /* try this child again */
+	}
+
+	return ctl;
+}
+
+static double avg(struct counter *ctr)
+{
+	if (ctr->nr)
+		return (double)ctr->sum / (double)ctr->nr;
+	else
+		return 0.0;
+}
+
+static double throughput(struct counter *disp)
+{
+	return disp[S_REQ_TX_BYTES].sum + disp[S_REQ_RX_BYTES].sum +
+		disp[S_ACK_TX_BYTES].sum + disp[S_ACK_RX_BYTES].sum;
+}
+
+static double throughput_rdma(struct counter *disp)
+{
+	return disp[S_RDMA_WRITE_BYTES].sum + disp[S_RDMA_READ_BYTES].sum;
+}
+
+void stat_snapshot(struct counter *disp, struct child_control *ctl,
+		   uint16_t nr_tasks)
+{
+	struct counter tmp[NR_STATS];
+	uint16_t i;
+	uint16_t s;
+
+	memset(disp, 0, sizeof(tmp));
+
+	for (i = 0; i < nr_tasks; i++) {
+		memcpy(tmp, ctl[i].cur, sizeof(tmp));
+
+		for (s = 0; s < NR_STATS; s++) {
+			disp[s].nr += tmp[s].nr - ctl[i].last[s].nr;
+			disp[s].sum += tmp[s].sum - ctl[i].last[s].sum;
+			disp[s].min = minz(tmp[s].min, ctl[i].last[s].min);
+			disp[s].max = max(tmp[s].max, ctl[i].last[s].max);
+		}
+
+		memcpy(ctl[i].last, tmp, sizeof(tmp));
+	}
+}
+
+void stat_accumulate(struct counter *accum, const struct counter *cur)
+{
+	uint16_t s;
+
+	for (s = 0; s < NR_STATS; ++s, ++cur, ++accum) {
+		accum->nr += cur->nr;
+		accum->sum += cur->sum;
+		accum->min = minz(accum->min, cur->min);
+		accum->max = max(accum->max, cur->max);
+	}
+}
+
+void stat_total(struct counter *disp, struct child_control *ctl,
+		uint16_t nr_tasks)
+{
+	uint16_t i;
+	uint16_t s;
+
+	memset(disp, 0, sizeof(struct counter) * NR_STATS);
+
+	for (i = 0; i < nr_tasks; i++) {
+		for (s = 0; s < NR_STATS; s++) {
+			disp[s].nr += ctl[i].cur[s].nr;
+			disp[s].sum += ctl[i].cur[s].sum;
+			disp[s].min = minz(disp[s].min, ctl[i].cur[s].min);
+			disp[s].max = max(disp[s].max, ctl[i].cur[s].max);
+		}
+	}
+}
+
+static double cpu_use(struct soak_control *soak_arr)
+{
+	struct soak_control *soak;
+	uint64_t capacity = 0;
+	uint64_t soaked = 0;
+	uint64_t this;
+
+	if (soak_arr == NULL)
+		return -1.0;
+
+	for (soak = soak_arr; soak && soak->per_sec; soak++) {
+		capacity += soak->per_sec;
+		this = soak->counter;
+		soaked += min(soak->per_sec, this - soak->last);
+		soak->last = this;
+	}
+
+	return (double)(capacity - soaked) * 100 / (double)capacity;
+}
+
+static void
+get_stats(int initialize)
+{
+#define NTIMES 8
+	struct sys_stats {
+		/* Where we spent out time */
+		unsigned long long	times[NTIMES];
+		unsigned long long	other;
+
+		/* Interrupt count */
+		unsigned long long	intr;
+	};
+	static struct sys_stats prev, current;
+	static int disable = 0;
+	char buffer[2048];
+	FILE *fp;
+
+	if (disable)
+		return;
+	if ((fp = fopen("/proc/stat", "r")) == NULL) {
+		fprintf(stderr, "Cannot open /proc/stat (%s) - "
+				"not printing cpu stats\n",
+				strerror(errno));
+		disable = 1;
+		return;
+	}
+
+	memset(&current, 0, sizeof(current));
+	while (fgets(buffer, sizeof(buffer), fp)) {
+		if (!strncmp(buffer, "cpu ", 4)) {
+			char	*s = buffer + 4;
+			int	j;
+
+			for (j = 0; 1; ++j) {
+				unsigned long long v;
+
+				while (*s == ' ')
+					++s;
+				if (!isdigit(*s))
+					break;
+				v = strtoull(s, &s, 10);
+				if (j < NTIMES)
+					current.times[j] = v;
+				else
+					current.other += v;
+			}
+		} else
+		if (!strncmp(buffer, "intr ", 5)) {
+			sscanf(buffer + 5, "%Lu", &current.intr);
+		}
+	}
+	fclose(fp);
+
+	if (initialize) {
+		printf(",user:percent,system:percent,idle:percent"
+		       ",irq:percent,intr:count");
+	} else {
+		struct sys_stats sys;
+		unsigned long sum = 0;
+		double scale;
+		int j;
+
+		sum = sys.other = current.other - prev.other;
+		for (j = 0; j < NTIMES; ++j) {
+			sys.times[j] = current.times[j] - prev.times[j];
+			sum += current.times[j];
+		}
+		sys.intr = current.intr - prev.intr;
+
+		scale = sum? 100.0 / sum : 0;
+
+		/* Magic procfs offsets
+		 *  0	user
+		 *  1	nice
+		 *  2	system
+		 *  3	idle
+		 *  4	iowait
+		 *  5	irq
+		 *  6	softirq
+		 */
+		printf(",%f,%f,%f,%f,%Lu",
+			(sys.times[0] + sys.times[1]) * scale,
+			sys.times[2] * scale,
+			(sys.times[3] + sys.times[4]) * scale,
+			(sys.times[5] + sys.times[6]) * scale,
+			sys.intr);
+	}
+	prev = current;
+}
+
+static void
+get_perfdata(int initialize)
+{
+	static struct timeval last_ts, now;
+	static struct rds_info_counter *prev, *ctr;
+	static unsigned char *curr = NULL;
+	static socklen_t buflen = 0;
+	static int sock_fd = -1;
+	int i, count, item_size;
+
+	if (sock_fd < 0) {
+		sock_fd = socket(PF_RDS, SOCK_SEQPACKET, 0);
+		if (sock_fd < 0)
+			die_errno("Unable to create socket");
+	}
+
+	/* We should only loop once on the first call; after that the
+	 * buffer requirements for RDS counters should not change. */
+	while ((item_size = getsockopt(sock_fd, SOL_RDS, RDS_INFO_COUNTERS, curr, &buflen)) < 0) {
+		if (errno != ENOSPC)
+			die_errno("getsockopt(RDS_INFO_COUNTERS) failed");
+		curr = realloc(curr, buflen);
+		if (!curr)
+			die_errno("Cannot allocate buffer for stats counters");
+	}
+
+	if (item_size > sizeof(*ctr))
+		die("Bad counter item size in RDS_INFO_COUNTERS (got %d, max %zd)\n",
+				item_size, sizeof(*ctr));
+	count = buflen / item_size;
+
+	if (prev == NULL) {
+		/* First call - allocate buffer */
+		prev = calloc(count, sizeof(*ctr));
+		ctr = calloc(count, sizeof(*ctr));
+	}
+
+	for (i = 0; i < count; ++i)
+		memcpy(ctr + i, curr + i * item_size, item_size);
+
+	gettimeofday(&now, NULL);
+
+	if (initialize) {
+		for (i = 0; i < count; ++i) {
+			printf(",%s", ctr[i].name);
+			if (strstr((char *) ctr[i].name, "_bytes"))
+				printf(":bytes");
+			else
+				printf(":count");
+		}
+	} else {
+		double scale;
+
+		scale = 1e6 / usec_sub(&now, &last_ts);
+		for (i = 0; i < count; ++i) {
+			printf(",%f",
+				(ctr[i].value - prev[i].value) * scale);
+		}
+	}
+
+	memcpy(prev, ctr, count * sizeof(*ctr));
+	last_ts = now;
+
+	get_stats(initialize);
+}
+
+static int reap_one_child(int wflags)
+{
+	pid_t pid;
+	int status;
+
+	pid = waitpid(-1, &status, wflags);
+	if (pid < 0)
+		die("waitpid returned %u", pid);
+	if (pid == 0)
+		return 0;
+
+	if (WIFEXITED(status)) {
+		if (WEXITSTATUS(status) == 0)
+			return 1;
+		die("child pid %u exited with status %d\n",
+				pid, WEXITSTATUS(status));
+	}
+	if (WIFSIGNALED(status)) {
+		if (WTERMSIG(status) == SIGTERM)
+			return 1;
+		die("child pid %u exited with signal %d\n",
+				pid, WTERMSIG(status));
+	}
+	die("child pid %u wait status %d\n", pid, status);
+}
+
+static void release_children_and_wait(struct options *opts,
+				      struct child_control *ctl,
+				      struct soak_control *soak_arr,
+				      int active)
+{
+	struct counter disp[NR_STATS];
+	struct counter summary[NR_STATS];
+	struct timeval start, end, now, first_ts, last_ts;
+	double cpu_total = 0;
+	uint16_t i, cpu_samples = 0;
+	uint16_t nr_running;
+
+	gettimeofday(&start, NULL);
+	start.tv_sec += 2;
+	for (i = 0; i < opts->nr_tasks; i++)
+		ctl[i].start = start;
+
+	/* Allow for a 4 second delay: 2 seconds for the children
+	 * to come up, and 2 more of burn-in time
+	 */
+	printf("Starting up"); fflush(stdout);
+	for (i = 0; i < 4; ++i) {
+		sleep(1);
+		stat_snapshot(disp, ctl, opts->nr_tasks);
+		cpu_use(soak_arr);
+		printf(".");
+		fflush(stdout);
+	}
+	printf("\n");
+
+	gettimeofday(&first_ts, NULL);
+	if (opts->run_time && active) {
+		end = first_ts;
+		end.tv_sec += opts->run_time;
+	} else {
+		timerclear(&end);
+	}
+
+	nr_running = opts->nr_tasks;
+	memset(summary, 0, sizeof(summary));
+
+	if (opts->rtprio)
+		set_rt_priority();
+
+	/* Prime the perf data counters and display the CSV header line
+	 * You can filter the CSV data from the rds-stress output by
+	 * grepping for the "::" marker.
+	 */
+	if (opt.show_perfdata) {
+		printf("::");
+		printf("nr_tasks:count"
+		       ",req_size:bytes"
+		       ",ack_size:bytes"
+		       ",rdma_size:bytes");
+
+		printf(",req_sent:count"
+		       ",thruput:kB/s"
+		       ",thruput_rdma:kB/s"
+		       ",tx_delay:microseconds"
+		       ",rtt:microseconds"
+		       ",cpu:percent");
+		get_perfdata(1);
+		printf("\n");
+	} else {
+		printf("%4s %6s %10s %10s %7s %8s %5s\n",
+			"tsks", "tx/s", "tx+rx K/s", "rw+rr K/s",
+			"tx us/c", "rtt us", "cpu %");
+	}
+
+	last_ts = first_ts;
+	while (nr_running) {
+		double cpu;
+
+		if (active) {
+			sleep(1);
+		} else {
+			struct pollfd pfd;
+
+			pfd.fd = control_fd;
+			pfd.events = POLLIN|POLLHUP;
+			if (poll(&pfd, 1, 1000) == 1)
+				break;
+		}
+
+		/* XXX big bug, need to mark some ctl elements dead */
+		stat_snapshot(disp, ctl, nr_running);
+		gettimeofday(&now, NULL);
+		cpu = cpu_use(soak_arr);
+
+		if (!opts->summary_only) {
+			double scale;
+
+			/* Every loop takes a little more than one second;
+			 * and system load can actually introduce latencies.
+			 * So try to measure the actual time elapsed as precise
+			 * as possible, and scale all values by its inverse.
+			 */
+			scale = 1e6 / usec_sub(&now, &last_ts);
+
+			if (!opt.show_perfdata) {
+				printf("%4u %6"PRIu64" %10.2f %10.2f %7.2f %8.2f %5.2f\n",
+					nr_running,
+					disp[S_REQ_TX_BYTES].nr,
+					scale * throughput(disp) / 1024.0,
+					scale * throughput_rdma(disp) / 1024.0,
+					scale * avg(&disp[S_SENDMSG_USECS]),
+					scale * avg(&disp[S_RTT_USECS]),
+					scale * cpu);
+			} else {
+				printf("::");
+				printf("%u,%u,%u,%u,",
+				       opts->nr_tasks, opts->req_size,
+				       opts->ack_size, opts->rdma_size);
+
+				printf("%Lu,%f,%f,%f,%f,%f",
+					(unsigned long long) disp[S_REQ_TX_BYTES].nr,
+					scale * throughput(disp) / 1024.0,
+					scale * throughput_rdma(disp) / 1024.0,
+					scale * avg(&disp[S_SENDMSG_USECS]),
+					scale * avg(&disp[S_RTT_USECS]),
+					cpu >= 0? scale * cpu : 0);
+
+				/* Print RDS perf counters etc */
+				get_perfdata(0);
+				printf("\n");
+			}
+
+			rdma_key_o_meter_check(opts->nr_tasks);
+		}
+
+		stat_accumulate(summary, disp);
+		cpu_total += cpu;
+		cpu_samples++;
+		last_ts = now;
+
+		if (timerisset(&end) && timercmp(&now, &end, >=))
+			break;
+
+		/* see if any children have finished or died.
+		 * This is a bit touchy - we should really be
+		 * able to tell an exited soaker from an exiting
+		 * RDS child. */
+		if (reap_one_child(WNOHANG))
+			nr_running--;
+	}
+
+	close(control_fd);
+	control_fd = -1;
+
+	if (nr_running) {
+		for (i = 0; i < opts->nr_tasks; i++)
+			kill(ctl[i].pid, SIGTERM);
+		stop_soakers(soak_arr);
+	}
+
+	while (nr_running && reap_one_child(0))
+		nr_running--;
+
+	rdma_key_o_meter_check(opts->nr_tasks);
+
+	stat_total(disp, ctl, opts->nr_tasks);
+	if (!opts->summary_only)
+		printf("---------------------------------------------\n");
+	{
+		double scale;
+
+		scale = 1e6 / usec_sub(&last_ts, &first_ts);
+
+		printf("%4u %6lu %10.2f %10.2f %7.2f %8.2f %5.2f  (average)\n",
+			opts->nr_tasks,
+			(long) (scale * summary[S_REQ_TX_BYTES].nr),
+			scale * throughput(summary) / 1024.0,
+			scale * throughput_rdma(disp) / 1024.0,
+			avg(&summary[S_SENDMSG_USECS]),
+			avg(&summary[S_RTT_USECS]),
+			soak_arr? scale * cpu_total : -1.0);
+	}
+}
+
+static void peer_connect(int fd, const struct sockaddr_in *sin)
+{
+	int retries = 0;
+
+	printf("connecting to %s:%u",
+			inet_ntoa(sin->sin_addr),
+			ntohs(sin->sin_port));
+	fflush(stdout);
+
+	while (connect(fd, (struct sockaddr *) sin, sizeof(*sin))) {
+		if (retries == 0)
+			printf(": %s", strerror(errno));
+
+		switch (errno) {
+		case ECONNREFUSED:
+		case EHOSTUNREACH:
+		case ENETUNREACH:
+			if (retries >= opt.connect_retries)
+				break;
+			if (retries++ == 0)
+				printf(" - retrying");
+			printf(".");
+			fflush(stdout);
+			sleep(1);
+			continue;
+		}
+
+		printf("\n");
+		die("connect(%s) failed", inet_ntoa(sin->sin_addr));
+	}
+	printf("\n");
+}
+
+static void peer_send(int fd, const void *ptr, size_t size)
+{
+	ssize_t ret;
+
+	while (size) {
+		ret = write(fd, ptr, size);
+		if (ret < 0)
+			die_errno("Cannot send to peer");
+		size -= ret;
+		ptr += ret;
+	}
+}
+
+static void peer_recv(int fd, void *ptr, size_t size)
+{
+	ssize_t ret;
+
+	while (size) {
+		ret = read(fd, ptr, size);
+		if (ret < 0)
+			die_errno("Cannot recv from peer");
+		if (ret == 0)
+			die("Peer unexpectedly closed connection\n");
+		size -= ret;
+		ptr += ret;
+	}
+}
+
+static void encode_options(struct options *dst, const struct options *src)
+{
+	dst->req_depth = htonl(src->req_depth);
+	dst->req_size = htonl(src->req_size);
+	dst->ack_size = htonl(src->ack_size);
+	dst->rdma_size = htonl(src->rdma_size);
+	dst->send_addr = htonl(src->send_addr);		/* host byte order */
+	dst->receive_addr = htonl(src->receive_addr);	/* host byte order */
+	dst->starting_port = htons(src->starting_port);	/* host byte order */
+	dst->nr_tasks = htons(src->nr_tasks);
+	dst->run_time = htonl(src->run_time);
+	dst->summary_only = src->summary_only;		/* byte sized */
+	dst->rtprio = src->rtprio;			/* byte sized */
+	dst->tracing = src->tracing;			/* byte sized */
+	dst->verify = src->verify;			/* byte sized */
+	dst->show_params = src->show_params;		/* byte sized */
+	dst->show_perfdata = src->show_perfdata;	/* byte sized */
+	dst->use_cong_monitor = src->use_cong_monitor;	/* byte sized */
+	dst->rdma_use_once = src->rdma_use_once;	/* byte sized */
+	dst->rdma_use_get_mr = src->rdma_use_get_mr;	/* byte sized */
+	dst->rdma_use_fence = src->rdma_use_fence;	/* byte sized */
+	dst->rdma_cache_mrs = src->rdma_cache_mrs;	/* byte sized */
+	dst->rdma_key_o_meter = src->rdma_key_o_meter;	/* byte sized */
+
+	dst->rdma_alignment = htonl(src->rdma_alignment);
+	dst->connect_retries = htonl(src->connect_retries);
+
+	dst->suppress_warnings = src->suppress_warnings;/* byte sized */
+}
+
+static void decode_options(struct options *dst, const struct options *src)
+{
+	dst->req_depth = ntohl(src->req_depth);
+	dst->req_size = ntohl(src->req_size);
+	dst->ack_size = ntohl(src->ack_size);
+	dst->rdma_size = ntohl(src->rdma_size);
+	dst->send_addr = ntohl(src->send_addr);		/* host byte order */
+	dst->receive_addr = ntohl(src->receive_addr);	/* host byte order */
+	dst->starting_port = ntohs(src->starting_port);	/* host byte order */
+	dst->nr_tasks = ntohs(src->nr_tasks);
+	dst->run_time = ntohl(src->run_time);
+	dst->summary_only = src->summary_only;		/* byte sized */
+	dst->rtprio = src->rtprio;			/* byte sized */
+	dst->tracing = src->tracing;			/* byte sized */
+	dst->verify = src->verify;			/* byte sized */
+	dst->show_params = src->show_params;		/* byte sized */
+	dst->show_perfdata = src->show_perfdata;	/* byte sized */
+	dst->use_cong_monitor = src->use_cong_monitor;	/* byte sized */
+	dst->rdma_use_once = src->rdma_use_once;	/* byte sized */
+	dst->rdma_use_get_mr = src->rdma_use_get_mr;	/* byte sized */
+	dst->rdma_use_fence = src->rdma_use_fence;	/* byte sized */
+	dst->rdma_cache_mrs = src->rdma_cache_mrs;	/* byte sized */
+	dst->rdma_key_o_meter = src->rdma_key_o_meter;	/* byte sized */
+
+	dst->rdma_alignment = ntohl(src->rdma_alignment);
+	dst->connect_retries = ntohl(src->connect_retries);
+
+	dst->suppress_warnings = src->suppress_warnings;/* byte sized */
+}
+
+static void verify_option_encdec(const struct options *opts)
+{
+	struct options ebuf, dbuf;
+	unsigned int i;
+
+	memcpy(&dbuf, opts, sizeof(*opts));
+	for (i = 0; i < sizeof(*opts); ++i) {
+		unsigned char *x = &((unsigned char *) &dbuf)[i];
+
+		*x = ~*x;
+	}
+
+	encode_options(&ebuf, opts);
+	decode_options(&dbuf, &ebuf);
+
+	if (memcmp(&dbuf, opts, sizeof(*opts)))
+		die("encode/decode check of options struct failed");
+}
+
+static int active_parent(struct options *opts, struct soak_control *soak_arr)
+{
+	struct options enc_options;
+	struct child_control *ctl;
+	struct sockaddr_in sin;
+	int fd;
+	uint8_t ok;
+
+	if (opts->show_params) {
+		unsigned int k;
+
+		printf("Options:\n"
+		       "  %-10s %-7u\n"
+		       "  %-10s %-7u\n"
+		       "  %-10s %-7u\n"
+		       "  %-10s %-7u\n",
+		       "Tasks", opts->nr_tasks,
+		       "Req size", opts->req_size,
+		       "ACK size", opts->ack_size,
+		       "RDMA size", opts->rdma_size);
+
+		k = 0;
+		printf("  %-10s", "RDMA opts");
+		if (opts->rdma_use_once) {
+			printf(" use_once"); ++k;
+		}
+		if (opts->rdma_use_get_mr) {
+			printf(" use_get_mr"); ++k;
+		}
+		if (opts->rdma_use_fence) {
+			printf(" use_fence"); ++k;
+		}
+		if (opts->rdma_cache_mrs) {
+			printf(" cache_mrs"); ++k;
+		}
+		if (opts->rdma_alignment) {
+			printf(" align=%u", opts->rdma_alignment); ++k;
+		}
+		if (!k)
+			printf(" (defaults)");
+		printf("\n");
+		printf("\n");
+	}
+
+	/* Make sure that when we add new options, we don't forget
+	 * to add them to the encode/decode routines. */
+	verify_option_encdec(opts);
+
+	sin.sin_family = AF_INET;
+	sin.sin_port = htons(opts->starting_port);
+	sin.sin_addr.s_addr = htonl(opts->receive_addr);
+
+	fd = bound_socket(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sin);
+	control_fd = fd;
+
+	sin.sin_family = AF_INET;
+	sin.sin_port = htons(opts->starting_port);
+	sin.sin_addr.s_addr = htonl(opts->send_addr);
+
+	peer_connect(fd, &sin);
+
+	if (opts->receive_addr == 0) {
+		opts->receive_addr = get_local_address(fd, &sin);
+		if (opts->rdma_size && !check_rdma_support(opts))
+			die("RDMA not supported by this kernel\n");
+	}
+
+	/* "negotiation" is overstating things a bit :-)
+	 * We just tell the peer what options to use.
+	 */
+	encode_options(&enc_options, opts);
+	peer_send(fd, &enc_options, sizeof(struct options));
+
+	printf("negotiated options, tasks will start in 2 seconds\n");
+	ctl = start_children(opts);
+
+	/* Tell the peer to start up. This is necessary when testing
+	 * with a large number of tasks, because otherwise the peer
+	 * may start sending before we have all our tasks running.
+	 */
+	peer_send(fd, &ok, sizeof(ok));
+	peer_recv(fd, &ok, sizeof(ok));
+
+	release_children_and_wait(opts, ctl, soak_arr, 1);
+
+	return 0;
+}
+
+static int passive_parent(uint32_t addr, uint16_t port,
+			  struct soak_control *soak_arr)
+{
+	struct options remote, *opts;
+	struct child_control *ctl;
+	struct sockaddr_in sin;
+	socklen_t socklen;
+	int lfd, fd;
+	uint8_t ok;
+
+	sin.sin_family = AF_INET;
+	sin.sin_port = htons(port);
+	sin.sin_addr.s_addr = htonl(addr);
+
+	lfd = bound_socket(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sin);
+
+	if (listen(lfd, 255))
+		die_errno("listen() failed");
+
+	socklen = sizeof(sin);
+
+	fd = accept(lfd, (struct sockaddr *)&sin, &socklen);
+	if (fd < 0)
+		die_errno("accept() failed");
+	control_fd = fd;
+
+	/* Do not accept any further connections - we don't handle them
+	 * anyway. */
+	close(lfd);
+
+	printf("accepted connection from %s:%u", inet_ntoa(sin.sin_addr),
+		ntohs(sin.sin_port));
+	if (addr == 0) {
+		/* Get our receive address - i.e. the address the peer connected to. */
+		addr = get_local_address(control_fd, &sin);
+		printf(" on %s:%u", inet_ntoa(sin.sin_addr), ntohs(sin.sin_port));
+	}
+	printf("\n");
+
+	peer_recv(fd, &remote, sizeof(struct options));
+	decode_options(&remote, &remote);
+	opts = &remote;
+
+	/*
+	 * The sender gave us their send and receive addresses, we need
+	 * to swap them.
+	 */
+	opts->send_addr = opts->receive_addr;
+	opts->receive_addr = addr;
+	opt = *opts;
+
+	ctl = start_children(opts);
+
+	/* Wait for "GO" from the initiating peer */
+	peer_recv(fd, &ok, sizeof(ok));
+	peer_send(fd, &ok, sizeof(ok));
+
+	printf("negotiated options, tasks will start in 2 seconds\n");
+	release_children_and_wait(opts, ctl, soak_arr, 0);
+
+	return 0;
+}
+
+/*
+ * The soaker *constantly* spins calling getpid().  It tries to execute a
+ * second's worth of calls before checking that it's parent is still alive.  It
+ * uses gettimeofday() to figure out the per-second rate of the series it just
+ * executed.  It always tries to work from the highest rate it ever saw.
+ */
+static void run_soaker(pid_t parent_pid, struct soak_control *soak)
+{
+	uint64_t i;
+	uint64_t per_sec;
+	struct timeval start;
+	struct timeval stop;
+	uint64_t usecs;
+
+	nice(20);
+
+	soak->per_sec = 1000;
+
+	while (1) {
+		gettimeofday(&start, NULL);
+		for (i = 0; i < soak->per_sec; i++) {
+			syscall(SYS_getpid);
+			soak->counter++;
+		}
+		gettimeofday(&stop, NULL);
+
+		usecs = usec_sub(&stop, &start);
+		per_sec = (double)soak->per_sec * 1000000.0 / (double)usecs;
+
+		if (per_sec > soak->per_sec)
+			soak->per_sec = per_sec;
+
+		check_parent(parent_pid);
+	}
+}
+
+struct soak_control *start_soakers(void)
+{
+	struct soak_control *soak_arr;
+	pid_t parent = getpid();
+	pid_t pid;
+	size_t len;
+	long nr_soak = sysconf(_SC_NPROCESSORS_ONLN);
+	long i;
+
+	/* an extra terminating entry which will be all 0s */
+	len = (nr_soak + 1) * sizeof(struct soak_control);
+	soak_arr = mmap(NULL, len, PROT_READ|PROT_WRITE,
+			MAP_ANONYMOUS|MAP_SHARED, 0, 0);
+	if (soak_arr == MAP_FAILED)
+		die("mmap of %ld soak control structs failed", nr_soak);
+
+	memset(soak_arr, 0, len);
+
+	printf("started %ld cycle soaking processes\n", nr_soak);
+
+	for (i = 0; i < nr_soak; i++) {
+		pid = fork();
+		if (pid == -1)
+			die_errno("forking soaker nr %lu failed", i);
+		if (pid == 0) {
+			run_soaker(parent, soak_arr + i);
+			exit(0);
+		}
+		soak_arr[i].pid = pid;
+	}
+
+	return soak_arr;
+}
+
+void stop_soakers(struct soak_control *soak_arr)
+{
+	unsigned int i, nr_soak = sysconf(_SC_NPROCESSORS_ONLN);
+
+	if (!soak_arr)
+		return;
+	for (i = 0; i < nr_soak; ++i) {
+		kill(soak_arr[i].pid, SIGTERM);
+		waitpid(soak_arr[i].pid, NULL, 0);
+	}
+}
+
+void check_size(uint32_t size, uint32_t unspec, uint32_t max, char *desc,
+		char *opt)
+{
+	if (size == ~0)
+		die("specify %s with %s\n", desc, opt);
+	if (size < max)
+		die("%s must be at least %u bytes\n", desc, max);
+}
+
+enum {
+	OPT_RDMA_USE_ONCE = 0x100,
+	OPT_RDMA_USE_GET_MR,
+	OPT_RDMA_USE_FENCE,
+	OPT_RDMA_USE_NOTIFY,
+	OPT_RDMA_CACHE_MRS,
+	OPT_RDMA_ALIGNMENT,
+	OPT_RDMA_KEY_O_METER,
+	OPT_SHOW_PARAMS,
+	OPT_CONNECT_RETRIES,
+	OPT_USE_CONG_MONITOR,
+	OPT_PERFDATA,
+};
+
+static struct option long_options[] = {
+{ "req-bytes",		required_argument,	NULL,	'q'	},
+{ "ack-bytes",		required_argument,	NULL,	'a'	},
+{ "rdma-bytes",		required_argument,	NULL,	'D'	},
+{ "tasks",		required_argument,	NULL,	't'	},
+{ "depth",		required_argument,	NULL,	'd'	},
+{ "recv-addr",		required_argument,	NULL,	'r'	},
+{ "send-addr",		required_argument,	NULL,	's'	},
+{ "port",		required_argument,	NULL,	'p'	},
+{ "time",		required_argument,	NULL,	'T'	},
+{ "report-cpu",		no_argument,		NULL,	'c'	},
+{ "report-summary",	no_argument,		NULL,	'z'	},
+{ "rtprio",		no_argument,		NULL,	'R'	},
+{ "verify",		no_argument,		NULL,	'v'	},
+{ "trace",		no_argument,		NULL,	'V'	},
+
+{ "rdma-use-once",	required_argument,	NULL,	OPT_RDMA_USE_ONCE },
+{ "rdma-use-get-mr",	required_argument,	NULL,	OPT_RDMA_USE_GET_MR },
+{ "rdma-use-fence",	required_argument,	NULL,	OPT_RDMA_USE_FENCE },
+{ "rdma-use-notify",	required_argument,	NULL,	OPT_RDMA_USE_NOTIFY },
+{ "rdma-cache-mrs",	required_argument,	NULL,	OPT_RDMA_CACHE_MRS },
+{ "rdma-alignment",	required_argument,	NULL,	OPT_RDMA_ALIGNMENT },
+{ "rdma-key-o-meter",	no_argument,		NULL,	OPT_RDMA_KEY_O_METER },
+{ "show-params",	no_argument,		NULL,	OPT_SHOW_PARAMS },
+{ "show-perfdata",	no_argument,		NULL,	OPT_PERFDATA },
+{ "connect-retries",	required_argument,	NULL,	OPT_CONNECT_RETRIES },
+{ "use-cong-monitor",	required_argument,	NULL,	OPT_USE_CONG_MONITOR },
+
+{ NULL }
+};
+
+int main(int argc, char **argv)
+{
+	struct options opts;
+	struct soak_control *soak_arr = NULL;
+
+#ifdef DYNAMIC_PF_RDS
+	/* Discover PF_RDS/SOL_RDS once, and be done with it */
+	(void) discover_pf_rds();
+	(void) discover_sol_rds();
+#endif
+
+#ifdef _SC_PAGESIZE
+	sys_page_size = sysconf(_SC_PAGESIZE);
+#else
+	sys_page_size = 4096;
+#endif
+
+	/* We really want to see output when we redirect
+	 * stdout to a pipe. */
+	setlinebuf(stdout);
+
+	memset(&opts, 0xff, sizeof(opts));
+
+	opts.receive_addr = 0;
+	opts.starting_port = 4000;
+	opts.ack_size = MIN_MSG_BYTES;
+	opts.req_size = 1024;
+	opts.run_time = 0;
+	opts.summary_only = 0;
+	opts.rtprio = 0;
+	opts.tracing = 0;
+	opts.verify = 0;
+	opts.rdma_size = 0;
+	opts.use_cong_monitor = 1;
+	opts.rdma_use_fence = 1;
+	opts.rdma_cache_mrs = 0;
+	opts.rdma_alignment = 0;
+	opts.rdma_key_o_meter = 0;
+	opts.show_params = 0;
+	opts.connect_retries = 0;
+	opts.show_perfdata = 0;
+
+	while(1) {
+		int c, index;
+
+		c = getopt_long(argc, argv, "+a:cD:d:hp:q:Rr:s:t:T:vVz",
+				long_options, &index);
+		if (c == -1)
+			break;
+
+		switch(c) {
+			case 'a':
+				opts.ack_size = parse_ull(optarg, (uint32_t)~0);
+				break;
+			case 'c':
+				soak_arr = start_soakers();
+				break;
+			case 'D':
+				opts.rdma_size = parse_ull(optarg, (uint32_t)~0);
+				break;
+			case 'd':
+				opts.req_depth = parse_ull(optarg,(uint32_t)~0);
+				break;
+			case 'p':
+				opts.starting_port = parse_ull(optarg,
+							       (uint16_t)~0);
+				break;
+			case 'q':
+				opts.req_size = parse_ull(optarg, (uint32_t)~0);
+				break;
+			case 'R':
+				opts.rtprio = 1;
+				break;
+			case 'r':
+				opts.receive_addr = parse_addr(optarg);
+				break;
+			case 's':
+				opts.send_addr = parse_addr(optarg);
+				break;
+			case 't':
+				opts.nr_tasks = parse_ull(optarg,
+							  (uint16_t)~0);
+				break;
+			case 'T':
+				opts.run_time = parse_ull(optarg, (uint32_t)~0);
+				break;
+			case 'z':
+				opts.summary_only = 1;
+				break;
+			case 'v':
+				opts.verify = 1;
+				break;
+			case 'V':
+				opts.tracing = 1;
+				break;
+			case OPT_USE_CONG_MONITOR:
+				opts.use_cong_monitor = parse_ull(optarg, 1);
+				break;
+			case OPT_RDMA_USE_ONCE:
+				opts.rdma_use_once = parse_ull(optarg, 1);
+				break;
+			case OPT_RDMA_USE_GET_MR:
+				opts.rdma_use_get_mr = parse_ull(optarg, 1);
+				break;
+			case OPT_RDMA_USE_FENCE:
+				opts.rdma_use_fence = parse_ull(optarg, 1);
+				break;
+			case OPT_RDMA_CACHE_MRS:
+				opts.rdma_cache_mrs = parse_ull(optarg, 1);
+				break;
+			case OPT_RDMA_USE_NOTIFY:
+				(void) parse_ull(optarg, 1);
+				break;
+			case OPT_RDMA_ALIGNMENT:
+				opts.rdma_alignment = parse_ull(optarg, sys_page_size);
+				break;
+			case OPT_RDMA_KEY_O_METER:
+				opts.rdma_key_o_meter = 1;
+				break;
+			case OPT_SHOW_PARAMS:
+				opts.show_params = 1;
+				break;
+			case OPT_CONNECT_RETRIES:
+				opts.connect_retries = parse_ull(optarg, (uint32_t)~0);
+				break;
+			case OPT_PERFDATA:
+				opts.show_perfdata = 1;
+				break;
+			case 'h':
+			case '?':
+			default:
+				usage();
+				break;
+		}
+	}
+
+	if (opts.rdma_use_once == 0xff)
+		opts.rdma_use_once = !opts.rdma_cache_mrs;
+	else if (opts.rdma_cache_mrs && opts.rdma_use_once)
+		die("option --rdma-cache-mrs conflicts with --rdma-use-once\n");
+	if (opts.rdma_use_get_mr == 0xff)
+		opts.rdma_use_get_mr = opts.rdma_cache_mrs;
+	else if (opts.rdma_cache_mrs && !opts.rdma_use_get_mr)
+		die("option --rdma-cache-mrs conflicts with --rdma-use-get-mr=0\n");
+
+	/* the passive parent will read options off the wire */
+	if (opts.send_addr == ~0)
+		return passive_parent(opts.receive_addr, opts.starting_port,
+				      soak_arr);
+
+	/* the active parent verifies and sends its options */
+	check_size(opts.ack_size, ~0, MIN_MSG_BYTES, "ack size", "-a");
+	check_size(opts.req_size, ~0, MIN_MSG_BYTES, "req size", "-q");
+
+	/* defaults */
+	if (opts.req_depth == ~0)
+		opts.req_depth = 1;
+	if (opts.nr_tasks == (uint16_t)~0)
+		opts.nr_tasks = 1;
+
+	if (opts.rdma_size && !check_rdma_support(&opts))
+		die("RDMA not supported by this kernel\n");
+
+	/* We require RDMA to be multiples of the page size for now.
+	 * this is just to simplify debugging, but eventually we
+	 * need to support rdma sizes from 1 to 1meg byte
+	 */
+	if (opts.rdma_size && 0)
+		opts.rdma_size = (opts.rdma_size + 4095) & ~4095;
+
+	opt = opts;
+	return active_parent(&opts, soak_arr);
+}
+
+/*
+ * This are completely stupid.  options.c should be removed.
+ */
+void print_usage(int durr) { }
+void print_version() { }
diff --git a/rds-tools.spec b/rds-tools.spec
new file mode 100644
index 0000000..e49a728
--- /dev/null
+++ b/rds-tools.spec
@@ -0,0 +1,38 @@
+Summary: RDS support tools 
+Name: rds-tools
+Version: 1.4
+Release: 1
+License: GPL/BSD
+Group: Applications/Internet
+URL: http://oss.oracle.com/projects/rds/
+Source: rds-tools-%{version}-%{release}.tar.gz
+BuildRoot: /var/tmp/rds-tools-%{version}-%{release}
+
+%description
+rds-tools is a collection of support tools for the RDS socket API.
+
+%prep
+%setup -n rds-tools-%{version}-%{release}
+ 
+%build
+%configure
+make %{?_smp_mflags}
+
+%install
+rm -rf $RPM_BUILD_ROOT
+make DESTDIR=$RPM_BUILD_ROOT install
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%files
+%defattr(-,root,root)
+%{_bindir}/*
+%{_mandir}/*
+%{_includedir}/*
+
+%changelog
+* Sun Nov 25 2007 Vladimir Sokolovsky <vlad at mellanox.co.il>
+- Use DESTDIR
+* Mon Oct 27 2006 Zach Brown <zach.brown at oracle.com>
+- initial version
diff --git a/rds-tools.spec.in b/rds-tools.spec.in
new file mode 100644
index 0000000..6dd8f32
--- /dev/null
+++ b/rds-tools.spec.in
@@ -0,0 +1,38 @@
+Summary: RDS support tools 
+Name: rds-tools
+Version: @VERSION@
+Release: @RELEASE@
+License: GPL/BSD
+Group: Applications/Internet
+URL: http://oss.oracle.com/projects/rds/
+Source: rds-tools-%{version}-%{release}.tar.gz
+BuildRoot: /var/tmp/rds-tools-%{version}-%{release}
+
+%description
+rds-tools is a collection of support tools for the RDS socket API.
+
+%prep
+%setup -n rds-tools-%{version}-%{release}
+ 
+%build
+%configure
+make %{?_smp_mflags}
+
+%install
+rm -rf $RPM_BUILD_ROOT
+make DESTDIR=$RPM_BUILD_ROOT install
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%files
+%defattr(-,root,root)
+%{_bindir}/*
+%{_mandir}/*
+%{_includedir}/*
+
+%changelog
+* Sun Nov 25 2007 Vladimir Sokolovsky <vlad at mellanox.co.il>
+- Use DESTDIR
+* Mon Oct 27 2006 Zach Brown <zach.brown at oracle.com>
+- initial version
diff --git a/rds-tools.txt b/rds-tools.txt
new file mode 100644
index 0000000..2dac8b4
--- /dev/null
+++ b/rds-tools.txt
@@ -0,0 +1,39 @@
+
+
+So, rds-get-stats is easy and I already have it done.  we'd just import
+that.
+
+rds-gen would just send down a socket.  I'm hoping for options like:
+
+ -s addr:port
+	to bind the source address
+ -d addr:port
+	dest to send to, maybe just round-robin between multiple to
+	start?
+ -m units
+	the size of each sent message
+ -b units
+	the size of the socket buffer
+ -5
+	include an md5sum at the tail of each message
+ -f file
+	read from a file until eof
+ -p units
+	send from a memory pool of the given length
+ -S file
+	put the -p pool in this mmaped/mlocked file, use sendfile
+ -l units
+	only send this many bytes total
+ -i timespec
+	output vmstat-like line at this interval
+
+I guess that gives us enough to chew on for now :)  I want this stuff to
+be dirt simple.  trivial arg parser helpers, maybe some list.h from the
+kernel, no glib complexity explosion.  I guess I could send you some
+snippets of code along those lines.
+
+Oh, and I guess we'll need a little helper amongst the tools to get
+pf_rds and sol_rds from /proc/sys/net/rds/.
+
+- z
+
diff --git a/rds.7 b/rds.7
new file mode 100644
index 0000000..1bfc1a2
--- /dev/null
+++ b/rds.7
@@ -0,0 +1,445 @@
+.TH RDS 7
+.SH NAME
+RDS \- Reliable Datagram Sockets
+.SH
+SYNOPSIS
+.nf
+.B #include <sys/socket.h>
+.B #include <netinet/in.h>
+.fi
+.SH DESCRIPTION
+This is an implementation of the RDS socket API. It provides reliable,
+in-order datagram delivery between sockets over a variety of transports.
+.PP
+Currently, RDS can be transported over Infiniband, and loopback.
+RDS over TCP is disabled, but will be re-enabled in the near future.
+.PP
+RDS uses standard
+.B AF_INET
+addresses as described in
+.BR ip (7)
+to identify end points.
+.\"------------------------------------------------------------------
+.SS Socket Creation
+RDS is still in development and as such does not have a reserved protocol
+family constant. Applications must read the string representation of the
+protocol family value from the
+.B pf_rds
+sysctl parameter file described below.
+.PP
+.nf
+.B rds_socket = socket(pf_rds, SOCK_SEQPACKET, 0);
+.fi
+.PP
+.\"------------------------------------------------------------------
+.SS Socket Options
+RDS sockets support a number of socket options through the
+.BR setsockopt (2)
+and
+.BR getsockopt (2)
+calls. The following generic options (with socket level
+.BR SOL_SOCKET )
+are of specific importance:
+.TP
+.B SO_RCVBUF
+Specifies the size of the receive buffer. See section on
+"Congestion Control" below.
+.TP
+.B SO_SNDBUF
+Specifies the size of the send buffer. See "Message Transmission"
+below.
+.TP
+.B SO_SNDTIMEO
+Specifies the send timeout when trying to enqueue a message on a
+socket with a full queue in blocking mode.
+.PP
+In addition to these, RDS supports a number of protocol specific
+options (with socket level
+.BR SOL_RDS ).
+Just as with the RDS protocol family, an official value has not been
+assigned yet, so the kernel will assign a value dynamically.
+The assigned value can be retrieved from the
+.B sol_rds
+sysctl parameter file.
+.PP
+RDS specific socket options will be described in a separate section
+below.
+.\"------------------------------------------------------------------
+.SS Binding
+A new RDS socket has no local address when it is first returned from
+.BR socket (2).
+It must be bound to a local address by calling
+.BR bind (2)
+before any messages can be sent or received. This will also attach the
+socket to a specific transport, based on the type of interface the
+local address is attached to.  From that point on, the socket can only
+reach destinations which are available through this transport.
+.PP
+For instance, when binding to the address of an Infiniband interface
+such as
+.BR ib0 ,
+the socket will use the Infiniband transport. If RDS is not able
+to associate a transport with the given address, it will return
+.BR EADDRNOTAVAIL .
+.PP
+An RDS socket can only be bound to one address and only one socket can
+be bound to a given address/port pair. If no port is specified in the
+binding address then an unbound port is selected at random.
+.PP
+RDS does not allow the application to bind a previously bound socket
+to another address. Binding to the wildcard address
+.B INADDR_ANY
+is not permitted either.
+.\"------------------------------------------------------------------
+.SS Connecting
+The default mode of operation for RDS is to use unconnected socket,
+and specify a destination address as an argument to
+.BR sendmsg .
+However, RDS allows sockets to be connected to a remote end point using
+.BR connect (2).
+If a socket is connected, calling
+.BR sendmsg
+without specifying a destination address will use the previously given
+remote address.
+.\"------------------------------------------------------------------
+.SS Congestion Control
+RDS does not have explicit congestion control like common
+streaming protocols such as TCP. However, sockets have two queue limits
+associated with them; the send queue size and the receive queue size.
+Messages are accounted based on the number of bytes of payload.
+.PP
+The send queue size limits how much data local processes can queue on
+a local socket (see the following section). If that limit is exceeded,
+the kernel will not accept further messages until the queue is drained
+and messages have been delivered to and acknowledged by the remote host.
+.PP
+The receive queue size limits how much data RDS will put on the receive
+queue of a socket before marking the socket as
+.IR congested .
+When a socket becomes congested, RDS will send a
+.I congestion map update
+to the other participating hosts, who are then expected to stop sending
+more messages to this port.
+.PP
+There is a timing window during which a remote host can still continue
+to send messages to a congested port; RDS solves this by accepting
+these messages even if the socket's receive queue is already over
+the limit.
+.PP
+As the application pulls incoming messages off the receive queue using
+.BR recvmsg (2),
+the number of bytes on the receive queue will eventually
+drop below the receive queue size, at which point the port is then
+marked uncongested, and another congestion update is sent to all
+participating hosts. This tells them to allow applications to send
+additional messages to this port.
+.PP
+The default values for the send and receive buffer size are controlled
+by the
+A given RDS socket has limited transmit buffer space. It defaults to
+the system wide socket send buffer size set in the
+.B wmem_default
+and
+.B rmem_default
+sysctls, respectively. They can be tuned by the application through the
+.B SO_SNDBUF
+and
+.B SO_RCVBUF
+socket options.
+.PP
+.\"------------------------------------------------------------------
+.SS Blocking Behavior
+The
+.BR sendmsg (2)
+and
+.BR recvmsg (2)
+calls can block in a variety of situations.
+Whether a call blocks or returns with an error depends on the non-blocking
+setting of the file descriptor and the
+.B MSG_DONTWAIT
+message flag. If the file descriptor is set to blocking mode (which is the
+default), and the
+.B MSG_DONTWAIT
+flag is
+.I not
+given, the call will block.
+.PP
+In addition, the
+.B SO_SNDTIMEO
+and
+.B SO_RCVTIMEO
+socket options can be used to specify a timeout (in seconds) after
+which the call will abort waiting, and return an error. The default
+timeout is 0, which tells RDS to block indefinitely.
+.\"------------------------------------------------------------------
+.SS Message Transmission
+Messages may be sent using
+.BR sendmsg (2)
+once the RDS socket is bound. Message length cannot exceed 4 gigabytes
+as the wire protocol uses an unsigned 32 bit integer to express the
+message length.
+.PP
+RDS does not support out of band data. Applications are allowed to
+send to unicast addresses only; broadcast or multicast are not
+supported.
+.PP
+A successful
+.BR sendmsg (2)
+call puts the message in the socket's transmit queue where it will
+remain until either the destination acknowledges that the message is
+no longer in the network or the application removes the message from
+the send queue.
+.PP
+Messages can be removed from the send queue with the
+RDS_CANCEL_SENT_TO socket option described below.
+.PP
+While a message is in the transmit queue its payload bytes are accounted for.
+If an attempt is made to send a message while there is not sufficient
+room on the transmit queue, the call will either block or return
+.BR EAGAIN .
+.PP
+Trying to send to a destination that is marked congested (see above),
+the call will either block or return
+.BR ENOBUFS .
+.PP
+A message sent with no payload bytes will not consume any space in the
+destination's send buffer but will result in a message receipt on the
+destination. The receiver will not get any payload data but will be able
+to see the sender's address.
+.PP
+Messages sent to a port to which no socket is bound will be silently
+discarded by the destination host. No error messages are reported
+to the sender.
+.\"------------------------------------------------------------------
+.SS Message Receipt
+Messages may be received with
+.BR recvmsg (2)
+on an RDS socket once it is bound to a source address. RDS will return
+messages in-order, i.e. messages from the same sender will arrive in
+the same order in which they were be sent.
+.PP
+The address of the sender will be returned in the
+.B sockaddr_in
+structure pointed to by the
+.B msg_name
+field, if set.
+.PP
+If the
+.B MSG_PEEK
+flag is given, the first message on the receive is returned without
+removing it from the queue.
+.PP
+The memory consumed by messages waiting for delivery does not limit
+the number of messages that can be queued for receive. RDS does attempt
+to perform congestion control as described in the section above.
+.PP
+If the length of the message exceeds the size of the buffer provided to
+.BR recvmsg (2),
+then the remainder of the bytes in the message are discarded and the
+.BR MSG_TRUNC
+flag is set in the msg_flags field. In this truncating case
+.BR recvmsg (2)
+will still return the number of bytes copied, not the length of entire messge.
+If
+.BR MSG_TRUNC
+is set in the flags argument to
+.BR recvmsg (2),
+then it will return the number of bytes in the entire message. Thus one
+can examine the size of the next message in the receive queue without incurring
+a copying overhead by providing a zero length buffer and setting
+.BR MSG_PEEK " and " MSG_TRUNC
+in the flags argument.
+.PP
+The sending address of a zero-length message will still be provided in the
+.B msg_name
+field.
+.\"------------------------------------------------------------------
+.SS Control Messages
+RDS uses control messages (a.k.a. ancillary data) through the
+.B msg_control
+and
+.B msg_controllen
+fields in
+.BR sendmsg (2)
+and
+.BR recvmsg (2).
+Control messages generated by RDS have a
+.BR cmsg_level
+value of
+.BR sol_rds .
+Most control messages are related to the zerocopy interface added in
+RDS version 3, and are described in
+.BR rds-rdma (7).
+.PP
+The only exception is the
+.BR RDS_CMSG_CONG_UPDATE
+message, which is described in the following section.
+.\"------------------------------------------------------------------
+.SS Polling
+RDS supports the
+.BR poll (2)
+interface in a limited fashion.
+.B POLLIN
+is returned when there is a message (either a proper RDS message,
+or a control message) waiting in the socket's receive queue.
+.B POLLOUT
+is always returned while there is room on the socket's send queue.
+.PP
+Sending to congested ports requires special handling. When an application
+tries to send to a congested destination, the system call will return
+.BR ENOBUFS .
+However, it cannot poll for
+.BR POLLOUT ,
+as there is probably still room on the transmit queue, so the call to
+.BR poll (2)
+would return immediately, even though the destination is still congested.
+.PP
+There are two ways of dealing with this situation. The first is to
+simply poll for
+.BR POLLIN .
+By default, a process sleeping in
+.BR poll (2)
+is always woken up when the congestion map is updated,
+and thus the application can retry any previously congested
+sends.
+.PP
+The second option is explicit congestion monitoring, which
+gives the application more fine-grained control.
+.PP
+With explicit monitoring, the application polls for
+.B POLLIN
+as before, and additionally uses the
+.BR RDS_CONG_MONITOR
+socket option to install a 64bit mask value in the socket, where each
+bit corresponds to a group of ports. When a congestion update arrives,
+RDS checks the set of ports that became uncongested against the bit mask
+installed in the socket. If they overlap, a control messages is
+enqueued on the socket, and the application is woken up. When it calls
+.BR recvmsg (2),
+it will be given the control message containing the bitmap.
+on the socket.
+.PP
+The congestion monitor bitmask can be set and queried using
+.BR setsockopt (2)
+with
+.BR RDS_CONG_MONITOR ,
+and a pointer to the 64bit mask variable.
+.PP
+Congestion updates are delivered to the application via
+.B RDS_CMSG_CONG_UPDATE
+control messages. These control messages are always delivered
+by themselves (or possibly additional control messages), but never
+along with a RDS data message. The
+.BR cmsg_data
+field of the control message is an 8 byte datum containing the
+64bit mask value.
+.PP
+Applications can use the following macros to test for and set bits
+in the bitmask:
+.PP
+.nf
+#define RDS_CONG_MONITOR_SIZE   64
+#define RDS_CONG_MONITOR_BIT(port)  (((unsigned int) port) % RDS_CONG_MONITOR_SIZE)
+#define RDS_CONG_MONITOR_MASK(port) (1 << RDS_CONG_MONITOR_BIT(port))
+.fi
+.PP
+.\"------------------------------------------------------------------
+.SS Canceling Messages
+An application can cancel (flush) messages from the send queue using
+the
+.BR RDS_CANCEL_SENT_TO
+socket option with
+.BR setsockopt (2).
+This call takes an optional
+.B sockaddr_in
+address structure as argument. If given, only messages to the destination
+specified by this address are discarded. If no address is given, all
+pending messages are discarded.
+.PP
+Note that this affects messages that have not yet been transmitted
+as well as messages that have been transmitted, but for which no
+acknowledgment from the remote host has been received yet.
+.\"------------------------------------------------------------------
+.SS Reliability
+If
+.BR sendmsg (2)
+succeeds, RDS guarantees that the  message  will  be visible  to
+.BR recvmsg (2)
+on a socket bound to the destination address as long as that
+destination socket remains open.
+.PP
+If there is no socket bound on the  destination,  the  message  is
+silently  dropped.   If  the sending RDS can't be sure that there is no
+socket bound then it will try to send the message indefinitely until it
+can be sure or the sent message is canceled.
+.PP
+If  a socket is closed then all pending sent messages on the socket are
+canceled and may or may not be seen by the receiver.
+.PP
+The RDS_CANCEL_SENT_TO socket option can be used to cancel all  pending
+messages to a given destination.
+.PP
+If  a  receiving socket is closed with pending messages then the sender
+considers those messages as  having  left  the  network	and  will  not
+retransmit them.
+.PP
+A  message will only be seen by
+.BR recvmsg (2)
+once, unless
+.B MSG_PEEK
+was specified. Once the message has been delivered it is removed
+from the sending socket's transmit queue.
+.PP
+All messages sent from the same socket to the same destination will
+be delivered in the order they're sent. Messages sent from different
+sockets, or to different destinations, may be delivered in any order.
+.\"------------------------------------------------------------------
+.SH SYSCTL VALUES
+These parameteres may only be accessed through their files in
+.BR /proc/sys/net/rds .
+Access through
+.BR sysctl (2)
+is not supported.
+.TP
+.B pf_rds
+This file contains the string representation of the protocol family
+constant passed to
+.BR socket (2)
+to create a new RDS socket.
+.TP
+.B sol_rds
+This file contains the string representation of the socket level parameter
+that is passed to
+.BR getsockopt (2)
+and
+.BR setsockopt (2)
+to manipulate RDS socket options.
+.TP
+.BR max_unacked_bytes " and " max_unacked_packets
+These parameters are used to tune the generation of acknowledgements. By
+default, the system receiving RDS messages does not send back explicit
+acknowledgements unless it transmits a message of its own (in which
+case the ACK is piggybacked onto the outgoing message), or when the sending
+system requests an ACK.
+.IP
+However, the sender needs to see an ACK from time to time so that it
+can purge old messages from the send queue. The unacked bytes and
+packet counters are used to keep track of how much data has been
+sent without requesting an ACK. The default is to request an acknowledgement
+every 16 packets, or every 16 MB, whichever comes first.
+.TP
+.BR reconnect_delay_min_ms " and " reconnect_delay_max_ms
+RDS uses host-to-host connections to transport RDS messages (both for the TCP
+and the Infiniband transport). If this connection breaks, RDS will try to
+re-establish the connection. Because this reconnect may be triggered by
+both hosts at the same time and fail, RDS uses a random backoff before attempting
+a reconnect. These two parameters specify the minimum and maximum delay in
+milliseconds. The default values are 1 and 1000, respectively.
+.SH SEE ALSO
+.BR rds-rdma (7),
+.BR socket (2),
+.BR bind (2),
+.BR sendmsg (2),
+.BR recvmsg (2),
+.BR getsockopt(2),
+.BR setsockopt (2).
diff --git a/rdstool.h b/rdstool.h
new file mode 100644
index 0000000..cc7988f
--- /dev/null
+++ b/rdstool.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2006 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * tools header stuff
+ */
+
+#ifndef __RDS_TOOL_H
+#define __RDS_TOOL_H
+
+#include <netinet/in.h>
+
+#include "kernel-list.h"
+#include "pfhack.h"
+
+#ifndef AF_RDS
+# define AF_RDS OFFICIAL_PF_RDS
+#endif
+#ifndef PF_RDS
+# define PF_RDS AF_RDS
+#endif
+#ifndef SOL_RDS
+# define SOL_RDS OFFICIAL_SOL_RDS
+#endif
+
+#define RDS_TOOL_BASE_OPTS ":s:m:f:i:-:vqhV"
+#define RDS_SINK_OPTS
+#define RDS_GEN_OPTS "d:b:l:"
+
+#define RDS_DEFAULT_MSG_SIZE 4096
+
+#define verbosef(lvl, f, fmt, a...) do { \
+	if (verbose >= (lvl)) \
+		fprintf((f), fmt, ##a); \
+} while (0)
+
+struct rds_endpoint {
+    struct list_head re_item;
+    char *re_name;
+    struct sockaddr_in re_addr;
+    int re_fd;
+};
+
+struct rds_context {
+	struct rds_endpoint *rc_saddr;
+	struct list_head rc_daddrs;
+	const char *rc_filename;
+	uint32_t rc_msgsize;
+	uint64_t rc_total;
+};
+
+/* Set by parse_options() */
+extern char *progname;
+extern unsigned int verbose;
+
+extern int parse_options(int argc, char *argv[], const char *opts,
+			 struct rds_context *ctxt);
+extern int rds_bind(struct rds_context *ctxt);
+extern int dup_file(struct rds_context *ctxt, int fd, int flags);
+extern int setup_signals(void);
+extern int runningp(void);
+
+/* stats.c */
+extern int stats_init(int delay);
+extern void stats_extended(int extendedp);
+extern void stats_start(void);
+extern void stats_print(void);
+extern void stats_total(void);
+
+extern void stats_add_recv(uint64_t bytes);
+extern void stats_add_send(uint64_t bytes);
+extern uint64_t stats_get_send(void);
+extern void stats_add_read(uint64_t bytes);
+extern void stats_add_write(uint64_t bytes);
+
+
+/* Provided by C files with main() */
+extern void print_usage(int rc);
+extern void print_version(void);
+#endif  /* __RDS_TOOL_H */
diff --git a/stap/README b/stap/README
new file mode 100644
index 0000000..d74f0fb
--- /dev/null
+++ b/stap/README
@@ -0,0 +1,15 @@
+SystemTap script for RDS
+
+SystemTap: http://sourceware.org/systemtap/
+SystemTap wiki: http://sourceware.org/systemtap/wiki
+
+To use SystemTap for tracing RDS, please ensure you
+have debugging symbols available for both your installed kernel
+as well as RDS module. These usually take the form of 
+*-debuginfo RPMs, and may be available via your distro's
+update repository, a distro repository disabled by
+default, or via your distro's website.
+
+Please send any comments or improvement patches to
+rds-devel at oss.oracle.com.
+
diff --git a/stap/rds.stp b/stap/rds.stp
new file mode 100644
index 0000000..4baaf0e
--- /dev/null
+++ b/stap/rds.stp
@@ -0,0 +1,35 @@
+/*
+probe module("rds").function("rds_*")
+{
+	printf("RDS %s\n", pp())
+}
+*/
+
+global reads
+
+probe begin
+{
+	reads <<< 0
+}
+
+probe module("rds").function("rds_recvmsg").return
+{
+	reads <<< ret
+}
+
+probe module("rds").function("rds_send_pong").return
+{
+	reads <<< ret
+	//println(caller())
+}
+
+probe timer.sec(5)
+{
+	println("RDS bytes received")
+	print(@hist_log(reads))
+}
+
+probe end
+{
+	printf("end!\n")
+}
diff --git a/stats.c b/stats.c
new file mode 100644
index 0000000..daaabc8
--- /dev/null
+++ b/stats.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2006 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * stats.c - Print stats at an interval
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <time.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <arpa/inet.h>
+
+#include "kernel-list.h"
+#include "rdstool.h"
+
+static int stats_delay = 0;  /* Delay in seconds */
+static int print_extended = 0; /* Print read/write stats? */
+static sig_atomic_t time_to_print = 0;
+
+struct rds_tool_stats {
+	uint64_t rs_send_bytes;
+	uint64_t rs_send_bytes_interval;
+	uint64_t rs_send_packets;
+	uint64_t rs_send_packets_interval;
+	uint64_t rs_recv_bytes;
+	uint64_t rs_recv_bytes_interval;
+	uint64_t rs_recv_packets;
+	uint64_t rs_recv_packets_interval;
+	uint64_t rs_read_bytes;
+	uint64_t rs_read_bytes_interval;
+	uint64_t rs_write_bytes;
+	uint64_t rs_write_bytes_interval;
+} tool_stats;
+
+#define inc_net_stat(type, val)  do { \
+	tool_stats.rs_##type##_bytes += val; \
+	tool_stats.rs_##type##_bytes_interval += val; \
+	tool_stats.rs_##type##_packets += 1; \
+	tool_stats.rs_##type##_packets_interval += 1; \
+} while (0)
+
+#define inc_io_stat(type, val)  do { \
+	tool_stats.rs_##type##_bytes += val; \
+	tool_stats.rs_##type##_bytes_interval += val; \
+} while (0)
+
+#define clear_interval() do { \
+	tool_stats.rs_send_bytes_interval = 0; \
+	tool_stats.rs_recv_bytes_interval = 0; \
+	tool_stats.rs_send_packets_interval = 0; \
+	tool_stats.rs_recv_packets_interval = 0; \
+	tool_stats.rs_read_bytes_interval = 0; \
+	tool_stats.rs_write_bytes_interval = 0; \
+} while (0)
+
+static void handler(int signum)
+{
+	time_to_print = 1;
+}
+
+static int setup_alarm(void)
+{
+	int rc = 0;
+	struct sigaction act;
+
+	sigemptyset(&act.sa_mask);
+	act.sa_handler = handler;
+	act.sa_flags = 0;
+
+	rc = sigaction(SIGALRM, &act, NULL);
+	if (rc) {
+		rc = -errno;
+		verbosef(0, stderr,
+			 "%s: Unable to initialize timer: %s\n",
+			 progname, strerror(-rc));
+	}
+	
+	return rc;
+}
+
+void stats_add_read(uint64_t bytes)
+{
+	inc_io_stat(read, bytes);
+}
+
+void stats_add_write(uint64_t bytes)
+{
+	inc_io_stat(write, bytes);
+}
+
+void stats_add_send(uint64_t bytes)
+{
+	inc_net_stat(send, bytes);
+}
+
+uint64_t stats_get_send(void)
+{
+	return tool_stats.rs_send_bytes;
+}
+
+void stats_add_recv(uint64_t bytes)
+{
+	inc_net_stat(recv, bytes);
+}
+
+static void stats_arm(void)
+{
+	time_to_print = 0;
+	alarm(stats_delay);
+}
+
+int stats_init(int delay)
+{
+	int rc = 0;
+
+	stats_delay = delay;
+	if (stats_delay)
+		rc = setup_alarm();
+
+	return rc;
+}
+
+void stats_extended(int extendedp)
+{
+	print_extended = !!extendedp;
+}
+
+void stats_start(void)
+{
+	if (stats_delay) {
+		verbosef(1, stderr,
+			 "%19s %19s %19s %19s\n",
+			 "Bytes sent/s", "Packets sent/s",
+			 "Bytes recv/s", "Packets recv/s");
+		if (print_extended)
+			verbosef(1, stderr, " %19s %19s",
+				 "Bytes read/s", "Bytes written/s");
+		verbosef(1, stderr, "\n");
+
+		stats_arm();
+	}
+}
+
+static void stats_output(void)
+{
+	verbosef(0, stderr,
+		 "%19"PRIu64" %19"PRIu64" %19"PRIu64" %19"PRIu64,
+		 tool_stats.rs_send_bytes_interval / stats_delay,
+		 tool_stats.rs_send_packets_interval / stats_delay,
+		 tool_stats.rs_recv_bytes_interval / stats_delay,
+		 tool_stats.rs_recv_packets_interval / stats_delay);
+	if (print_extended)
+		verbosef(0, stderr, " %19"PRIu64" %19"PRIu64,
+			 tool_stats.rs_read_bytes_interval / stats_delay,
+			 tool_stats.rs_write_bytes_interval / stats_delay);
+	verbosef(0, stderr, "\n");
+}
+
+void stats_print(void)
+{
+	/* Are stats on? */
+	if (stats_delay && time_to_print) {
+		stats_output();
+		clear_interval();
+		stats_arm();
+	}
+}
+
+void stats_total(void)
+{
+	if (!stats_delay)
+		return;
+
+	verbosef(0, stderr,
+		 "Total:\n"
+		 "%19"PRIu64" %19"PRIu64" %19"PRIu64" %19"PRIu64,
+		 tool_stats.rs_send_bytes,
+		 tool_stats.rs_send_packets,
+		 tool_stats.rs_recv_bytes,
+		 tool_stats.rs_recv_packets);
+	if (print_extended)
+		verbosef(0, stderr, " %19"PRIu64" %19"PRIu64,
+			 tool_stats.rs_read_bytes,
+			 tool_stats.rs_write_bytes);
+
+	verbosef(0, stderr, "\n");
+}
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ofed/rds-tools.git



More information about the Pkg-ofed-commits mailing list