[Debian-ha-commits] [cluster-glue] 24/73: Imported Upstream version 1.0.7+hg2618

Richard Winters devrik-guest at moszumanska.debian.org
Sat Apr 18 20:24:32 UTC 2015


This is an automated email from the git hooks/post-receive script.

devrik-guest pushed a commit to branch master
in repository cluster-glue.

commit 9aefb34ae22931227e22d73eafa09c451d2e6c75
Author: Richard B Winters <rik at mmogp.com>
Date:   Sat Apr 18 07:48:45 2015 -0400

    Imported Upstream version 1.0.7+hg2618
---
 .hg_archival.txt                         |   2 +-
 .hgtags                                  |   1 +
 cluster-glue-fedora.spec                 |   1 +
 cluster-glue-suse.spec                   |   1 +
 configure.ac                             |  15 +-
 doc/stonith/Makefile.am                  |   3 +-
 doc/stonith/README.vcenter               |  90 +++
 hb_report/hb_report.in                   | 299 ++++++----
 hb_report/utillib.sh                     |  59 ++
 include/clplumbing/cl_log.h              |  22 +
 include/glue_config.h.in                 |   6 +
 lib/clplumbing/cl_log.c                  | 111 ++++
 lib/clplumbing/ipcsocket.c               | 113 ++--
 lib/plugins/lrm/raexecocf.c              |  16 +-
 lib/plugins/stonith/external.c           |   9 +-
 lib/plugins/stonith/external/Makefile.am |   7 +-
 lib/plugins/stonith/external/hetzner     | 132 +++++
 lib/plugins/stonith/external/ipmi        |  13 +-
 lib/plugins/stonith/external/libvirt     | 259 +++++++++
 lib/plugins/stonith/external/rackpdu     |   3 +-
 lib/plugins/stonith/external/sbd         |  53 +-
 lib/plugins/stonith/external/vcenter     | 266 +++++++++
 lib/plugins/stonith/meatware.c           |   2 +
 lib/stonith/Makefile.am                  |   2 +-
 lib/stonith/{sbd.c => sbd-common.c}      | 585 ++++++++-----------
 lib/stonith/sbd-md.c                     | 936 +++++++++++++++++++++++++++++++
 lib/stonith/sbd.h                        | 131 +++--
 lrm/admin/Makefile.am                    |   1 +
 lrm/admin/cibsecret.in                   | 347 ++++++++++++
 lrm/lrmd/Makefile.am                     |   2 +-
 lrm/lrmd/cib_secrets.c                   | 205 +++++++
 lrm/lrmd/lrmd.c                          |  92 ++-
 lrm/lrmd/lrmd.h                          |   5 +
 lrm/test/regression.sh.in                |   2 +-
 34 files changed, 3127 insertions(+), 664 deletions(-)

diff --git a/.hg_archival.txt b/.hg_archival.txt
index 76b6b91..0036b6b 100644
--- a/.hg_archival.txt
+++ b/.hg_archival.txt
@@ -1,2 +1,2 @@
 repo: e3ffdd7ae81c596b2be7e1e110d2c1255161340e
-node: 5e06b2ddd24b37ad6c1c25d958d7a9dda7d02f93
+node: 177de02442d54d435eaf1d4a6ac9e1248845c05e
diff --git a/.hgtags b/.hgtags
index baccb9f..c94a7ce 100644
--- a/.hgtags
+++ b/.hgtags
@@ -58,3 +58,4 @@ f6c2cd2593f365f984ce051db61466738ac05dcd Beta-0.4.9f
 3af80b93d9e5d5e441f3f4c3aad16775ea27d2d9 glue-1.0.5
 1c87a0c58c59fc384b93ec11476cefdbb6ddc1e1 glue-1.0.6
 61200fbe18358e420cdc2037d87e803e150c1eac glue-1.0.7-rc1
+5e06b2ddd24b37ad6c1c25d958d7a9dda7d02f93 glue-1.0.7
diff --git a/cluster-glue-fedora.spec b/cluster-glue-fedora.spec
index 4ad2b71..06a62bb 100644
--- a/cluster-glue-fedora.spec
+++ b/cluster-glue-fedora.spec
@@ -121,6 +121,7 @@ standards, and an interface to common STONITH devices.
 %{_sbindir}/ha_logger
 %{_sbindir}/hb_report
 %{_sbindir}/lrmadmin
+%{_sbindir}/cibsecret
 %{_sbindir}/meatclient
 %{_sbindir}/stonith
 %{_sbindir}/sbd
diff --git a/cluster-glue-suse.spec b/cluster-glue-suse.spec
index 3aafd14..c86d609 100644
--- a/cluster-glue-suse.spec
+++ b/cluster-glue-suse.spec
@@ -206,6 +206,7 @@ fi
 %{_sbindir}/ha_logger
 %{_sbindir}/hb_report
 %{_sbindir}/lrmadmin
+%{_sbindir}/cibsecret
 %{_sbindir}/meatclient
 %{_sbindir}/stonith
 %{_sbindir}/sbd
diff --git a/configure.ac b/configure.ac
index 8ef20dc..0896628 100644
--- a/configure.ac
+++ b/configure.ac
@@ -392,6 +392,14 @@ HA_COREDIR="${localstatedir}/lib/heartbeat/cores"
 AC_DEFINE_UNQUOTED(HA_COREDIR,"$HA_COREDIR", top directory of area to drop core files in)
 AC_SUBST(HA_COREDIR)
 
+LRM_VARLIBDIR="${localstatedir}/lib/heartbeat/lrm"
+AC_DEFINE_UNQUOTED(LRM_VARLIBDIR,"$LRM_VARLIBDIR", LRM directory)
+AC_SUBST(LRM_VARLIBDIR)
+
+LRM_CIBSECRETS="${localstatedir}/lib/heartbeat/lrm/secrets"
+AC_DEFINE_UNQUOTED(LRM_CIBSECRETS,"$LRM_CIBSECRETS", CIB secrets location)
+AC_SUBST(LRM_CIBSECRETS)
+
 AC_DEFINE_UNQUOTED(PILS_BASE_PLUGINDIR,"$libdir/heartbeat/plugins", Default plugin search path)
 AC_DEFINE_UNQUOTED(HA_PLUGIN_DIR,"$libdir/heartbeat/plugins", Where to find plugins)
 AC_DEFINE_UNQUOTED(LRM_PLUGIN_DIR,"$libdir/heartbeat/plugins/RAExec", Where to find LRM plugins)
@@ -643,7 +651,11 @@ AC_CHECK_HEADERS(getopt.h)
 AC_CHECK_HEADERS(glib.h)
 AC_CHECK_HEADERS(grp.h)
 AC_CHECK_HEADERS(limits.h)
-AC_CHECK_HEADERS(linux/errqueue.h)
+AC_CHECK_HEADERS(linux/errqueue.h,,,
+	[#ifdef HAVE_LINUX_TYPES_H
+	 # include <linux/types.h>
+	 #endif
+	])
 AC_CHECK_HEADERS(malloc.h)
 AC_CHECK_HEADERS(netdb.h)
 AC_CHECK_HEADERS(netinet/in.h)
@@ -1336,6 +1348,7 @@ lib/Makefile							\
 lrm/Makefile					        	\
    lrm/lrmd/Makefile				        	\
    lrm/admin/Makefile				        	\
+   lrm/admin/cibsecret				        	\
    lrm/test/Makefile				        	\
    lrm/test/regression.sh					\
    lrm/test/lrmregtest						\
diff --git a/doc/stonith/Makefile.am b/doc/stonith/Makefile.am
index a5b93c6..165a743 100644
--- a/doc/stonith/Makefile.am
+++ b/doc/stonith/Makefile.am
@@ -30,4 +30,5 @@ stdoc_DATA 		= README.bladehpi \
 			  README.riloe \
 			  README.vacm \
 			  README.wti_mpc \
-			  README_kdumpcheck.txt
+			  README_kdumpcheck.txt \
+			  README.vcenter
diff --git a/doc/stonith/README.vcenter b/doc/stonith/README.vcenter
new file mode 100644
index 0000000..e6cc9a5
--- /dev/null
+++ b/doc/stonith/README.vcenter
@@ -0,0 +1,90 @@
+VMware vCenter/ESX STONITH Module
+=================================
+
+1. Intro
+--------
+
+VMware vCenter/ESX STONITH Module is intended to provide STONITH support to
+clusters in VMware Virtual Infrastructures. It is able to deal with virtual
+machines running on physically different HostSystems (e.g. ESX/ESXi) by using
+VMware vSphere Web Services SDK http://www.vmware.com/support/developer/vc-sdk/
+and connecting directly on each HostSystem or through a VMware vCenter: in this
+last case the module locates the specified virtual machine in the Virtual
+Infrastructure and performs actions required by cluster policies.
+
+2. Software requirements
+------------------------
+
+VMware vSphere CLI, which includes both CLI tools and Perl SDK
+http://www.vmware.com/support/developer/vcli/ . The plugin has been tested with
+version 4.1  http://www.vmware.com/download/download.do?downloadGroup=VCLI41
+
+
+3. vCenter/ESX authentication settings
+--------------------------------------
+
+Create the credentials file with credstore_admin.pl:
+
+/usr/lib/vmware-vcli/apps/general/credstore_admin.pl \
+  -s 10.1.1.1 -u myuser -p mypass
+
+This should create $HOME/.vmware/credstore/vicredentials.xml
+Copy it to a system folder, e.g. /etc
+
+cp -p $HOME/.vmware/credstore/vicredentials.xml /etc
+
+
+4. Testing
+----------
+
+The plugin can be invoked directly to perform a very first connection test
+(replace all the provided sample values):
+
+VI_SERVER=10.1.1.1 \
+  VI_CREDSTORE=/etc/vicredentials.xml \
+  HOSTLIST="hostname1=vmname1;hostname2=vmname2" \
+  RESETPOWERON=0 \
+  /usr/lib/stonith/plugins/external/vcenter gethosts
+
+If everything works correctly you should get:
+
+hostname1
+hostname2
+
+When invoked in this way, the plugin connects to VI_SERVER, authenticates with
+credentials stored in VI_CREDSTORE and tries to retrieve the list of virtual
+machines (case insensitive) matching vmname1 and vmname2 (and any other listed).
+When finished, it reports the list back by mapping virtual machine names to
+hostnames as provided in HOSTLIST. If you see the full list of hostnames as a
+result, then everything is going well. If otherwise you are having a partial or
+empty list, you have to check parameters.
+
+You can even test "reset", "off" and "on" commands, to test (carefully!) the
+full chain. E.g.
+
+VI_SERVER=10.1.1.1 \
+  VI_CREDSTORE=/etc/vicredentials.xml \
+  HOSTLIST="hostname1=vmname1;hostname2=vmname2" \
+  RESETPOWERON=0 \
+  /usr/lib/stonith/plugins/external/vcenter reset hostname2
+
+In the above examples the referring infrastructure is a vCenter with several
+ESXi nodes. Server IP and credentials are referred to vCenter.
+
+5. CRM configuration
+--------------------
+
+The following is a sample procedure to setup STONITH for an HA 2-node cluster
+(replace all the provided sample values):
+
+crm configure primitive vfencing stonith::external/vcenter params \
+  VI_SERVER="10.1.1.1" VI_CREDSTORE="/etc/vicredentials.xml" \
+  HOSTLIST="hostname1=vmname1;hostname2=vmname2" RESETPOWERON="0" \
+  op monitor interval="60s"
+
+crm configure clone Fencing vfencing
+
+crm configure property stonith-enabled="true"
+
+
+
diff --git a/hb_report/hb_report.in b/hb_report/hb_report.in
index 048a20d..3094cc6 100755
--- a/hb_report/hb_report.in
+++ b/hb_report/hb_report.in
@@ -214,10 +214,10 @@ logmarks() {
 
 	for n in $NODES; do
 		if [ "$n" = "`uname -n`" ]; then
-			[ "$THIS_IS_NODE" ] && logmark $HA_LOGFACILITY.$HA_LOGLEVEL $msg
+			is_node && logmark $HA_LOGFACILITY.$HA_LOGLEVEL $msg
 		else
 			[ "$ssh_good" ] &&
-				echo $c | ssh $ssh_opts $n
+				ssh $ssh_opts $n "$c"
 		fi
 	done
 }
@@ -274,7 +274,7 @@ is_our_log() {
 		return 3 # this is the last good log
 	fi
 	# have to go further back
-	if [ x = "x$to_time" -o $to_time -ge $first_time ]; then
+	if [ $to_time -eq 0 -o $to_time -ge $first_time ]; then
 		return 1 # include this log
 	else
 		return 0 # don't include this log
@@ -360,6 +360,19 @@ print_logseg() {
 	trap "" 0
 }
 #
+# print some log info (important for crm history)
+#
+loginfo() {
+	local logf=$1
+	local fake=$2
+	local nextpos=`python -c "f=open('$logf');f.seek(0,2);print f.tell()+1"`
+	if [ "$fake" ]; then
+		echo "synthetic:$logf $nextpos"
+	else
+		echo "$logf $nextpos"
+	fi
+}
+#
 # find log/set of logs which are interesting for us
 #
 dumplogset() {
@@ -431,6 +444,7 @@ FROM_TIME=$FROM_TIME
 TO_TIME=$TO_TIME
 USER_NODES="$USER_NODES"
 NODES="$NODES"
+MASTER_NODE="$MASTER_NODE"
 HA_LOG=$HA_LOG
 MASTER_IS_HOSTLOG=$MASTER_IS_HOSTLOG
 DESTDIR=$DESTDIR
@@ -447,14 +461,28 @@ CORES_DIRS="$CORES_DIRS"
 VERBOSITY="$VERBOSITY"
 EOF
 }
-start_remote_collectors() {
-	for node in $NODES; do
-		[ "$node" = "$WE" ] && continue
-		dumpenv | ssh $ssh_opts $node \
-			"cat > $DESTDIR/.env; hb_report __slave $DESTDIR" |
-			(cd $DESTDIR && tar xf -) &
-		SLAVEPIDS="$SLAVEPIDS $!"
-	done
+is_collector() {
+	test "$SLAVE"
+}
+is_node() {
+	test "$THIS_IS_NODE"
+}
+is_master() {
+	! is_collector && test "$WE" = "$MASTER_NODE"
+}
+start_slave_collector() {
+	local node=$1
+
+	if [ "$node" = "$WE" ]; then
+		dumpenv > $DESTDIR/.env
+		hb_report __slave $DESTDIR
+	else
+		ssh $ssh_opts $node \
+			"$SUDO hb_report __slave $DESTDIR" |
+			(cd $DESTDIR && tar xf -)
+	fi &
+
+	SLAVEPIDS="$SLAVEPIDS $!"
 }
 
 #
@@ -514,14 +542,12 @@ getbacktraces() {
 		debug "found backtraces: $flist"
 	}
 }
-pe2png() {
+pe2dot() {
 	local pef=`basename $1`
 	local dotf=`basename $pef .bz2`.dot
-	local pngf=`basename $pef .bz2`.png
 	(
 	cd `dirname $1`
 	ptest -D $dotf -x $pef
-	# dot -Tpng -o $pngf $dotf >/dev/null 2>&1
 	)
 }
 getpeinputs() {
@@ -532,17 +558,16 @@ getpeinputs() {
 		test -d $pe_dir ||
 			continue
 		flist=$(
-			find_files $pe_dir $1 $2 | sed "s,`dirname $pe_dir`/,,g"
+			find_files $pe_dir $1 $2 | sed "s,`dirname $pe_dir`/,,g" |
+				grep -v '[.]last$'
 		)
 		[ "$flist" ] && {
 			(cd `dirname $pe_dir` && tar cf - $flist) | (cd $3 && tar xf -)
 			debug "found `echo $flist | wc -w` pengine input files in $pe_dir"
-			which dot >/dev/null 2>&1 ||
-				info "if you had graphviz, we'd also produce png graphics for all PE files"
 		}
 		if [ `echo $flist | wc -w` -le 20 ]; then
 			for f in $flist; do
-				pe2png $3/$f
+				pe2dot $3/$f
 			done
 		else
 			info "too many PE inputs to create dot files"
@@ -577,7 +602,7 @@ getconfigurations() {
 sys_info() {
 	cluster_info
 	hb_report -V # our info
-	echo "resource-agents: `grep 'Build version:' /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs`"
+	echo "resource-agents: `grep 'Build version:' @OCF_ROOT_DIR@/resource.d/heartbeat/.ocf-shellfuncs`"
 	crm_info
 	pkg_ver $PACKAGES
 	echo "Platform: `uname`"
@@ -814,6 +839,7 @@ combine_logs() {
 	test -x $HA_NOARCHBIN/combine-logs.pl ||
 		warning "cannot combine logs: no $HA_NOARCHBIN/combine-logs.pl"
 	$HA_NOARCHBIN/combine-logs.pl $destdir/*/$HALOG_F > $destdir/$HALOG_F
+	loginfo $destdir/$HALOG_F combined > $destdir/$HALOG_F.info
 	$HA_NOARCHBIN/combine-logs.pl $destdir/*/events.txt > $destdir/events.txt
 }
 
@@ -878,6 +904,88 @@ pickcompress() {
 		COMPRESS_EXT=
 	fi
 }
+# get the right part of the log
+getlog() {
+	local outf
+
+	if [ "$HA_LOG" ]; then  # log provided by the user?
+		[ -f "$HA_LOG" ] || {  # not present
+			is_collector ||  # warning if not on slave
+				warning "$HA_LOG not found; we will try to find log ourselves"
+			HA_LOG=""
+		}
+	fi
+	if [ "$HA_LOG" = "" ]; then
+		HA_LOG=`findlog`
+		[ "$HA_LOG" ] &&
+			cnt=`fgrep -c $UNIQUE_MSG < $HA_LOG`
+	fi
+	if [ "$cnt" ] && [ $cnt -eq $NODECNT ]; then
+		MASTER_IS_HOSTLOG=1
+		info "found the central log!"
+	fi
+
+	if is_node; then
+		outf=$DESTDIR/$WE/$HALOG_F
+	else
+		outf=$DESTDIR/$HALOG_F # we are log server, probably
+	fi
+	if [ -f "$HA_LOG" ]; then
+		if [ "$NO_str2time" ]; then
+			warning "a log found; but we cannot slice it"
+			warning "please install the perl Date::Parse module"
+		elif [ "$CTS" ]; then
+			cts_findlogseg $CTS $HA_LOG > $outf
+		else
+			getstampproc=`find_getstampproc < $HA_LOG`
+			if [ "$getstampproc" ]; then
+				export getstampproc # used by linetime
+				dumplogset $HA_LOG $FROM_TIME $TO_TIME > $outf
+				loginfo $HA_LOG > $outf.info
+			else
+				warning "could not figure out the log format of $HA_LOG"
+			fi
+		fi
+	elif [ "$CTS" ]; then
+		cts_findlogseg $CTS > $outf
+	else
+		[ "$MASTER_IS_HOSTLOG" ] ||
+			warning "could not find $HA_LOG on $WE"
+	fi
+}
+#
+# get all other info (config, stats, etc)
+#
+collect_info() {
+	getconfig $DESTDIR/$WE
+	getpeinputs $FROM_TIME $TO_TIME $DESTDIR/$WE
+	getbacktraces $FROM_TIME $TO_TIME $DESTDIR/$WE/$BT_F
+	getconfigurations $DESTDIR/$WE
+	touch_DC_if_dc $DESTDIR/$WE
+	sanitize $DESTDIR/$WE
+	crmconfig $DESTDIR/$WE
+	check_perms > $DESTDIR/$WE/$PERMISSIONS_F 2>&1
+	sys_info > $DESTDIR/$WE/$SYSINFO_F 2>&1
+	dlm_dump > $DESTDIR/$WE/$DLM_DUMP_F 2>&1
+	sys_stats > $DESTDIR/$WE/$SYSSTATS_F 2>&1
+
+	for l in $EXTRA_LOGS; do
+		[ "$NO_str2time" ] && break
+		[ ! -f "$l" ] && continue
+		if [ "$l" = "$HA_LOG" -a "$l" != "$HALOG_F" ]; then
+			ln -s $HALOG_F $DESTDIR/$WE/`basename $l`
+			continue
+		fi
+		getstampproc=`find_getstampproc < $l`
+		if [ "$getstampproc" ]; then
+			export getstampproc # used by linetime
+			dumplogset $l $FROM_TIME $TO_TIME > $DESTDIR/$WE/`basename $l`
+			loginfo $l > $DESTDIR/$WE/`basename $l`.info
+		else
+			warning "could not figure out the log format of $l"
+		fi
+	done
+}
 finalword() {
 	if [ "$COMPRESS" = "1" ]; then
 		echo "The report is saved in $DESTDIR.tar$COMPRESS_EXT"
@@ -897,14 +1005,14 @@ NO_str2time=""
 t=`str2time "12:00"`
 if [ "$t" = "" ]; then
 	NO_str2time=1
-	[ "$SLAVE" ] ||
+	is_collector ||
 		fatal "please install the perl Date::Parse module"
 fi
 
 #
 # part 1: get and check options; and the destination
 #
-if [ "$SLAVE" = "" ]; then
+if ! is_collector; then
 	setvarsanddefaults
 	userargs="$@"
 	DESTDIR="$HOME/hb_report-"`date +"%a-%d-%b-%Y"`
@@ -1016,10 +1124,10 @@ else
 	fatal "no stack specific support: $CF_SUPPORT"
 fi
 
-if [ "x$CTS" = "x" -o "x$SLAVE" != "x" ]; then
+if [ "x$CTS" = "x" ] || is_collector; then
 	getlogvars
 	debug "log settings: facility=$HA_LOGFACILITY logfile=$HA_LOGFILE debugfile=$HA_DEBUGFILE"
-elif [ "x$SLAVE" = "x" ]; then
+elif ! is_collector; then
 	ctslog=`findmsg "CTS: Stack:" | awk '{print $1}'`
 	debug "Using CTS control file: $ctslog"
 	USER_NODES=`grep CTS: $ctslog | grep -v debug: | grep " \* " | sed s:.*\\\*::g | sort -u  | tr '\\n' ' '`
@@ -1027,7 +1135,7 @@ elif [ "x$SLAVE" = "x" ]; then
 	NODES_SOURCE=user
 fi
 
-if [ "$SLAVE" -a "$3" = logmark ]; then
+if is_collector && [ "$3" = logmark ]; then
 	msg="$4"
 	logmark $HA_LOGFACILITY.$HA_LOGLEVEL $msg
 	exit
@@ -1035,7 +1143,8 @@ fi
 
 WE=`uname -n`  # who am i?
 THIS_IS_NODE=""
-if [ "$SLAVE" = "" ]; then
+if ! is_collector; then
+	MASTER_NODE=$WE
 	NODES=`getnodes`
 	debug "nodes: `echo $NODES`"
 fi
@@ -1043,7 +1152,7 @@ NODECNT=`echo $NODES | wc -w`
 if [ "$NODECNT" = 0 ]; then
 	fatal "could not figure out a list of nodes; is this a cluster node?"
 fi
-if echo $NODES | grep -wqs $WE || [ "$SLAVE" ]; then # are we a node?
+if echo $NODES | grep -wqs $WE; then # are we a node?
 	THIS_IS_NODE=1
 fi
 
@@ -1069,11 +1178,11 @@ CONFIGURATIONS="/etc/drbd.conf /etc/drbd.d"
 export CONFIGURATIONS
 
 # this only on master
-if [ "$SLAVE" = "" ]; then
+if ! is_collector; then
 
 	# if this is not a node, then some things afterwards might
 	# make no sense (not work)
-	if [ -z "$THIS_IS_NODE" -a "$NODES_SOURCE" != user ]; then
+	if ! is_node && [ "$NODES_SOURCE" != user ]; then
 		warning "this is not a node and you didn't specify a list of nodes using -n"
 	fi
 #
@@ -1103,11 +1212,11 @@ if [ "$SLAVE" = "" ]; then
 	}
 	mkdir -p $DESTDIR
 	[ -d $DESTDIR ] || nodistdirectory
-	[ "$ssh_good" ] &&
+	if [ "$ssh_good" ]; then
 		for node in $NODES; do
 			[ "$node" = "$WE" ] && continue
 			ssh $ssh_opts $node "test -d $DESTDIR" && {
-				if [ "$CTS" ]; then # relax a bit for CTS
+				if [ "$FORCE_REMOVE_DEST" -o "$CTS" ]; then # relax a bit for CTS
 					ssh $ssh_opts $node "rm -r $DESTDIR"
 				else
 					test -d $DESTDIR && rmdir $DESTDIR
@@ -1117,9 +1226,23 @@ if [ "$SLAVE" = "" ]; then
 			dumpenv |
 			ssh $ssh_opts $node "mkdir -p $DESTDIR && cat > $DESTDIR/.env"
 		done
+	else
+		if [ -z "$NO_SSH" -a $NODECNT -gt 1 ]; then
+			warning "ssh does not work to all nodes"
+			warning "please use the -u option if you want to supply a password"
+		fi
+	fi
+fi
+
+# only cluster nodes need their own directories
+is_node && mkdir -p $DESTDIR/$WE
+
+if is_collector && [ $VERBOSITY -gt 1 ]; then
+	echo the debug information for node $WE is in $DESTDIR/$WE/debug.out
+	exec 2>>$DESTDIR/$WE/debug.out
 fi
 
-if [ "$SLAVE" = "" ]; then
+if ! is_collector; then
 #
 # part 3: log marks to be searched for later
 #         important to do this now on _all_ nodes
@@ -1129,111 +1252,36 @@ if [ "$SLAVE" = "" ]; then
 	fi
 fi
 
-# only cluster nodes need their own directories
-[ "$THIS_IS_NODE" ] && mkdir -p $DESTDIR/$WE
-
 #
 # part 4: find the logs and cut out the segment for the period
 #
-if [ "$HA_LOG" ]; then  # log provided by the user?
-	[ -f "$HA_LOG" ] || {  # not present
-		[ "$SLAVE" ] ||  # warning if not on slave
-			warning "$HA_LOG not found; we will try to find log ourselves"
-		HA_LOG=""
-	}
-fi
-if [ "$HA_LOG" = "" ]; then
-	HA_LOG=`findlog`
-	[ "$HA_LOG" ] &&
-		cnt=`fgrep -c $UNIQUE_MSG < $HA_LOG`
-fi
-if [ "$cnt" ] && [ $cnt -eq $NODECNT ]; then
-	MASTER_IS_HOSTLOG=1
-	info "found the central log!"
-fi
 
-if [ "$THIS_IS_NODE" ]; then
-	outf=$DESTDIR/$WE/$HALOG_F
-else
-	outf=$DESTDIR/$HALOG_F # we are log server, probably
-fi
-if [ -f "$HA_LOG" ]; then
-	if [ "$NO_str2time" ]; then
-		warning "a log found; but we cannot slice it"
-		warning "please install the perl Date::Parse module"
-	elif [ "$CTS" ]; then
-		cts_findlogseg $CTS $HA_LOG > $outf
-	else
-		getstampproc=`find_getstampproc < $HA_LOG`
-		if [ "$getstampproc" ]; then
-			export getstampproc # used by linetime
-			dumplogset $HA_LOG $FROM_TIME $TO_TIME > $outf
-		else
-			warning "could not figure out the log format of $HA_LOG"
-		fi
-	fi
-elif [ "$CTS" ]; then
-	cts_findlogseg $CTS > $outf
-else
-	[ "$MASTER_IS_HOSTLOG" ] ||
-		warning "could not find $HA_LOG on $WE"
-fi
+# if the master is also a node, getlog is going to be invoked
+# from the collector
+(is_master && is_node) ||
+	getlog
 
-#
-# part 5: start this program on other nodes
-#
-if [ ! "$SLAVE" ]; then
-	if [ "$ssh_good" ]; then
-		start_remote_collectors
-	else
-		if [ -z "$NO_SSH" -a $NODECNT -gt 1 ]; then
-			warning "ssh does not work to all nodes"
-			warning "please use the -u option if you want to supply a password"
-		fi
+if ! is_collector; then
+	# assume that only root can collect data
+	SUDO=""
+	if [ -z "$SSH_USER" -a `id -u` != 0 ] || [ "$SSH_USER" != root ]; then
+		SUDO="sudo -u root -E"
 	fi
-fi
-
-#
-# part 6: get all other info (config, stats, etc)
-#
-if [ "$THIS_IS_NODE" ]; then
-	getconfig $DESTDIR/$WE
-	getpeinputs $FROM_TIME $TO_TIME $DESTDIR/$WE
-	getbacktraces $FROM_TIME $TO_TIME $DESTDIR/$WE/$BT_F
-	getconfigurations $DESTDIR/$WE
-	touch_DC_if_dc $DESTDIR/$WE
-	sanitize $DESTDIR/$WE
-	crmconfig $DESTDIR/$WE
-	check_perms > $DESTDIR/$WE/$PERMISSIONS_F 2>&1
-	sys_info > $DESTDIR/$WE/$SYSINFO_F 2>&1
-	dlm_dump > $DESTDIR/$WE/$DLM_DUMP_F 2>&1
-	sys_stats > $DESTDIR/$WE/$SYSSTATS_F 2>&1
-
-	for l in $EXTRA_LOGS; do
-		[ "$NO_str2time" ] && break
-		[ ! -f "$l" ] && continue
-		if [ "$l" = "$HA_LOG" -a "$l" != "$HALOG_F" ]; then
-			ln -s $HALOG_F $DESTDIR/$WE/`basename $l`
-			continue
-		fi
-		getstampproc=`find_getstampproc < $l`
-		if [ "$getstampproc" ]; then
-			export getstampproc # used by linetime
-			dumplogset $l $FROM_TIME $TO_TIME > $DESTDIR/$WE/`basename $l`
-		else
-			warning "could not figure out the log format of $l"
-		fi
+	for node in $NODES; do
+		start_slave_collector $node
 	done
 fi
 
 #
-# part 7: endgame:
+# part 5: endgame:
 #         slaves tar their results to stdout, the master waits
 #         for them, analyses results, asks the user to edit the
 #         problem description template, and prints final notes
 #
-if [ "$SLAVE" ]; then
-	(cd $DESTDIR && tar cf - $WE)
+if is_collector; then
+	collect_info
+	[ "$WE" != "$MASTER_NODE" ] &&
+		(cd $DESTDIR && tar cf - $WE)
 else
 	wait $SLAVEPIDS
 	analyze $DESTDIR > $DESTDIR/$ANALYSIS_F
@@ -1254,5 +1302,8 @@ else
 	finalword
 fi
 
-[ "$REMOVE_DEST" = "1" ] &&
-	rm -r $DESTDIR
+if [ "$REMOVE_DEST" = "1" ]; then
+	if is_master || [ "$WE" != "$MASTER_NODE" ]; then
+		rm -r $DESTDIR
+	fi
+fi
diff --git a/hb_report/utillib.sh b/hb_report/utillib.sh
index 96c3c43..6dd99eb 100644
--- a/hb_report/utillib.sh
+++ b/hb_report/utillib.sh
@@ -115,7 +115,9 @@ findmsg() {
 	for d in $syslogdirs; do
 		[ -d $d ] || continue
 		log=`grep -l -e "$mark" $d/$favourites` && break
+		test "$log" && break
 		log=`grep -l -e "$mark" $d/*` && break
+		test "$log" && break
 	done 2>/dev/null
 	[ "$log" ] &&
 		ls -t $log | tr '\n' ' '
@@ -308,6 +310,54 @@ check_perms() {
 #
 # coredumps
 #
+pkg_mgr_list() {
+# list of:
+# regex pkg_mgr
+# no spaces allowed in regex
+	cat<<EOF
+Try:.zypper.install zypper
+EOF
+}
+MYBINARIES="crmd|pengine|lrmd|attrd|cib|mgmtd|stonithd|corosync|libplumb|libpils"
+listpkg_zypper() {
+	local binary=$1 core=$2
+	gdb $binary $core </dev/null 2>&1 |
+	awk -v bins="$MYBINARIES" '
+	n>0 && /^Try: zypper install/ {gsub("\"",""); print $NF}
+	n>0 {n=0}
+	/Missing separate debuginfo/ && match($NF, bins) {n=1}
+	' | sort -u
+}
+fetchpkg_zypper() {
+	debug "get debuginfo packages using zypper: $@"
+	zypper -qn install -C $@ >/dev/null
+}
+find_pkgmgr() {
+	local binary=$1 core=$2
+	pkg_mgr_list |
+	while read regex pkg_mgr; do
+		if gdb $binary $core </dev/null 2>&1 |
+				grep "$regex" > /dev/null; then
+			echo $pkg_mgr
+			break
+		fi
+	done
+}
+get_debuginfo() {
+	local binary=$1 core=$2
+	local pkg_mgr pkgs
+	gdb $binary $core </dev/null 2>/dev/null |
+		grep 'no debugging symbols found' > /dev/null ||
+		return  # no missing debuginfo
+	pkg_mgr=`find_pkgmgr $binary $core`
+	if [ -z "$pkg_mgr" ]; then
+		warning "found core for $binary but there is no debuginfo and we don't know how to get it on this platform"
+		return
+	fi
+	pkgs=`listpkg_$pkg_mgr $binary $core`
+	[ -n "$pkgs" ] &&
+		fetchpkg_$pkg_mgr $pkgs
+}
 findbinary() {
 	random_binary=`which cat 2>/dev/null` # suppose we are lucky
 	binary=`gdb $random_binary $1 < /dev/null 2>/dev/null |
@@ -353,6 +403,7 @@ getbt() {
 	for corefile; do
 		absbinpath=`findbinary $corefile`
 		[ x = x"$absbinpath" ] && continue
+		get_debuginfo $absbinpath $corefile
 		echo "====================== start backtrace ======================"
 		ls -l $corefile
 		gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt full"} -ex quit \
@@ -423,6 +474,14 @@ get_crm_nodes() {
 	}
 	'
 }
+get_live_nodes() {
+	if [ `id -u` = 0 ] && which fping >/dev/null 2>&1; then
+		fping -a $@ 2>/dev/null
+	else
+		local h
+		for h; do ping -c 2 -q $h >/dev/null 2>&1 && echo $h; done
+	fi
+}
 
 #
 # remove values of sensitive attributes
diff --git a/include/clplumbing/cl_log.h b/include/clplumbing/cl_log.h
index edaae93..aa30fcd 100644
--- a/include/clplumbing/cl_log.h
+++ b/include/clplumbing/cl_log.h
@@ -24,6 +24,24 @@
 #define	HA_OK		1
 #define	MAXLINE		(512*10)
 
+/* this is defined by the caller */
+struct logspam {
+	const char *id; /* identifier */
+	int max; /* maximum number of messages ... */
+	time_t window; /* ... within this timeframe */
+	time_t reset_time; /* log new messages after this time */
+	const char *advice; /* what to log in case messages get suppressed */
+};
+
+/* this is internal (oblique to the caller) */
+struct msg_ctrl {
+	struct logspam *lspam; /*  */
+	time_t *msg_slots; /* msg slot root (space for lspam->max) */
+	int last; /* last used msg slot [0..lspam->max-1]; -1 on init */
+	int cnt; /* current msg count [0..lspam->max] */
+	time_t suppress_t; /* messages blocked since this time */
+};
+
 struct IPC_CHANNEL;
 
 extern int		debug_level;
@@ -36,6 +54,10 @@ extern int		debug_level;
 
 void		cl_direct_log(int priority, const char* buf, gboolean, const char*, int, TIME_T);
 void            cl_log(int priority, const char * fmt, ...) G_GNUC_PRINTF(2,3);
+void            cl_limit_log(struct msg_ctrl *ml, int priority, const char * fmt, ...) G_GNUC_PRINTF(3,4);
+struct msg_ctrl *cl_limit_log_new(struct logspam *lspam);
+void            cl_limit_log_destroy(struct msg_ctrl *ml);
+void            cl_limit_log_reset(struct msg_ctrl *ml);
 void            cl_perror(const char * fmt, ...) G_GNUC_PRINTF(1,2);
 void		cl_log_enable_stderr(int truefalse);
 void		cl_log_enable_stdout(int truefalse);
diff --git a/include/glue_config.h.in b/include/glue_config.h.in
index f2ff3f8..0850a63 100644
--- a/include/glue_config.h.in
+++ b/include/glue_config.h.in
@@ -27,6 +27,12 @@
 /* top directory of area to drop core files in */
 #undef HA_COREDIR
 
+/* top directory for LRM related files */
+#undef LRM_VARLIBDIR
+
+/* CIB secrets */
+#undef LRM_CIBSECRETS
+
 /* Logging Daemon IPC socket name */
 #undef HA_LOGDAEMON_IPC
 
diff --git a/lib/clplumbing/cl_log.c b/lib/clplumbing/cl_log.c
index a179e40..13c3322 100644
--- a/lib/clplumbing/cl_log.c
+++ b/lib/clplumbing/cl_log.c
@@ -501,6 +501,7 @@ prio2str(int priority)
 		}
 
 static char * syslog_timestamp(TIME_T t);
+static void cl_limit_log_update(struct msg_ctrl *ml, time_t ts);
 
 static void
 append_log(FILE * fp, const char * entity, int entity_pid
@@ -738,6 +739,116 @@ cl_log(int priority, const char * fmt, ...)
 	return;
 }
 
+/*
+ * Log a message only if there were not too many messages of this
+ * kind recently. This is too prevent log spamming in case a
+ * condition persists over a long period of time. The maximum
+ * number of messages for the timeframe and other details are
+ * provided in struct logspam (see cl_log.h).
+ *
+ * Implementation details:
+ * - max number of time_t slots is allocated; slots keep time
+ *   stamps of previous max number of messages
+ * - we check if the difference between now (i.e. new message just
+ *   arrived) and the oldest message is _less_ than the window
+ *   timeframe
+ * - it's up to the user to do cl_limit_log_new and afterwards
+ *   cl_limit_log_destroy, though the latter is usually not
+ *   necessary; the memory allocated with cl_limit_log_new stays
+ *   constant during the lifetime of the process
+ *
+ * NB on Thu Aug  4 15:26:49 CEST 2011:
+ * This interface is very new, use with caution and report bugs.
+ */
+
+struct msg_ctrl *
+cl_limit_log_new(struct logspam *lspam)
+{
+	struct msg_ctrl *ml;
+
+	ml = (struct msg_ctrl *)malloc(sizeof(struct msg_ctrl));
+	if (!ml) {
+		cl_log(LOG_ERR, "%s:%d: out of memory"
+			, __FUNCTION__, __LINE__);
+		return NULL;
+	}
+	ml->msg_slots = (time_t *)calloc(lspam->max, sizeof(time_t));
+	if (!ml->msg_slots) {
+		cl_log(LOG_ERR, "%s:%d: out of memory"
+			, __FUNCTION__, __LINE__);
+		return NULL;
+	}
+	ml->lspam = lspam;
+	cl_limit_log_reset(ml);
+	return ml; /* to be passed later to cl_limit_log() */
+}
+
+void
+cl_limit_log_destroy(struct msg_ctrl *ml)
+{
+	if (!ml)
+		return;
+	g_free(ml->msg_slots);
+	g_free(ml);
+}
+
+void
+cl_limit_log_reset(struct msg_ctrl *ml)
+{
+	ml->last = -1;
+	ml->cnt = 0;
+	ml->suppress_t = (time_t)0;
+	memset(ml->msg_slots, 0, ml->lspam->max * sizeof(time_t));
+}
+
+static void
+cl_limit_log_update(struct msg_ctrl *ml, time_t ts)
+{
+	ml->last = (ml->last + 1) % ml->lspam->max;
+	*(ml->msg_slots + ml->last) = ts;
+	if (ml->cnt < ml->lspam->max)
+		ml->cnt++;
+}
+
+void
+cl_limit_log(struct msg_ctrl *ml, int priority, const char * fmt, ...)
+{
+	va_list ap;
+	char buf[MAXLINE];
+	time_t last_ts, now = time(NULL);
+
+	if (!ml)
+		goto log_msg;
+	if (ml->suppress_t) {
+		if ((now - ml->suppress_t) < ml->lspam->reset_time)
+			return;
+		/* message blocking expired */
+		cl_limit_log_reset(ml);
+	}
+	last_ts = ml->last != -1 ? *(ml->msg_slots + ml->last) : (time_t)0;
+	if (
+		ml->cnt < ml->lspam->max || /* not so many messages logged */
+		(now - last_ts) > ml->lspam->window /* messages far apart */
+	) {
+		cl_limit_log_update(ml, now);
+		goto log_msg;
+	} else {
+		cl_log(LOG_INFO
+			, "'%s' messages logged too often, "
+			"suppressing messages of this kind for %ld seconds"
+			, ml->lspam->id, ml->lspam->reset_time);
+		cl_log(priority, "%s", ml->lspam->advice);
+		ml->suppress_t = now;
+		return;
+	}
+
+log_msg:
+	va_start(ap, fmt);
+	vsnprintf(buf, MAXLINE, fmt, ap);
+	va_end(ap);
+	cl_log(priority, "%s", buf);
+}
+
 void
 cl_perror(const char * fmt, ...)
 {
diff --git a/lib/clplumbing/ipcsocket.c b/lib/clplumbing/ipcsocket.c
index b712dd9..9297c6a 100644
--- a/lib/clplumbing/ipcsocket.c
+++ b/lib/clplumbing/ipcsocket.c
@@ -232,6 +232,7 @@ static struct IPC_CHANNEL* socket_server_channel_new(int sockfd);
 
 static struct IPC_CHANNEL * channel_new(int sockfd, int conntype, const char *pathname);
 static int client_channel_new_auth(int sockfd);
+static int verify_creds(struct IPC_AUTH *auth_info, uid_t uid, gid_t gid);
 
 typedef void (*DelProc)(IPC_Message*);
 
@@ -2359,6 +2360,26 @@ socket_message_new(struct IPC_CHANNEL *ch, int msg_len)
  *
  ***********************************************************************/
 
+static int
+verify_creds(struct IPC_AUTH *auth_info, uid_t uid, gid_t gid)
+{
+	int ret = IPC_FAIL;
+
+	if (!auth_info || (!auth_info->uid && !auth_info->gid)) {
+		return IPC_OK;
+	}
+	if (	auth_info->uid
+	&&	(g_hash_table_lookup(auth_info->uid
+		,	GUINT_TO_POINTER((guint)uid)) != NULL)) {
+		ret = IPC_OK;
+	}else if (auth_info->gid
+	&&	(g_hash_table_lookup(auth_info->gid
+		,	GUINT_TO_POINTER((guint)gid)) != NULL)) {
+		ret = IPC_OK;
+  	}
+	return ret;
+}
+
 
 /***********************************************************************
  * SO_PEERCRED VERSION... (Linux)
@@ -2406,16 +2427,7 @@ socket_verify_auth(struct IPC_CHANNEL* ch, struct IPC_AUTH * auth_info)
 
   
 	/* verify the credential information. */
-	if (	auth_info->uid
-	&&	(g_hash_table_lookup(auth_info->uid
-		,	GUINT_TO_POINTER((guint)cred.uid)) != NULL)) {
-		ret = IPC_OK;
-	}else if (auth_info->gid
-	&&	(g_hash_table_lookup(auth_info->gid
-		,	GUINT_TO_POINTER((guint)cred.gid)) != NULL)) {
-		ret = IPC_OK;
-  	}
-	return ret;
+	return verify_creds(auth_info, cred.uid, cred.gid);
 }
 
 /* get farside pid for our peer process */
@@ -2474,22 +2486,9 @@ socket_verify_auth(struct IPC_CHANNEL* ch, struct IPC_AUTH * auth_info)
 
 	ch->farside_uid = euid;
 	ch->farside_gid = egid;
-	if (ret == IPC_OK) {
-		return ret;
-	}
-
-	/* Check credentials against authorization information */
 
-	if (	auth_info->uid
-	&&	(g_hash_table_lookup(auth_info->uid
-		,	GUINT_TO_POINTER((guint)euid)) != NULL)) {
-		ret = IPC_OK;
-	}else if (auth_info->gid
-	&&	(g_hash_table_lookup(auth_info->gid
-		,	GUINT_TO_POINTER((guint)egid)) != NULL)) {
-		ret = IPC_OK;
-  	}
-	return ret;
+	/* verify the credential information. */
+	return verify_creds(auth_info, euid, egid);
 }
 
 static
@@ -2628,18 +2627,8 @@ socket_verify_auth(struct IPC_CHANNEL* ch, struct IPC_AUTH * auth_info)
       return ret;
   }
 
-  ret = IPC_OK;
-
-  if (	auth_info->uid
-  &&	g_hash_table_lookup(auth_info->uid, &(cred.crEuid)) == NULL) {
-		ret = IPC_FAIL;
-  }
-  if (	auth_info->gid
-  &&	g_hash_table_lookup(auth_info->gid, &(cred.crEgid)) == NULL) {
-		ret = IPC_FAIL;
-  }
-
-  return ret;
+  /* verify the credential information. */
+  return verify_creds(auth_info, cred.crEuid, cred.crEgid);
 }
 
 /*
@@ -2721,8 +2710,6 @@ socket_verify_auth(struct IPC_CHANNEL* ch, struct IPC_AUTH * auth_info)
 		return ret;
 	}
 
-	ret = IPC_OK;
-
 	if ((auth_info->uid == NULL || g_hash_table_size(auth_info->uid) == 0)
 	    && auth_info->gid != NULL
 	    && g_hash_table_size(auth_info->gid) != 0) {
@@ -2731,20 +2718,9 @@ socket_verify_auth(struct IPC_CHANNEL* ch, struct IPC_AUTH * auth_info)
 		       " on this platform.");
 		return IPC_BROKEN;
 	}
-	
-	if (auth_info->uid != NULL && g_hash_table_size(auth_info->uid) > 0
-	    && g_hash_table_lookup(
-		    auth_info->uid, GUINT_TO_POINTER(stat_buf.st_uid))==NULL) {
-		ret = IPC_FAIL;
-		
-	}
-	if (auth_info->gid != NULL && g_hash_table_size(auth_info->gid) > 0
-	    && g_hash_table_lookup(
-		    auth_info->gid, GUINT_TO_POINTER(stat_buf.st_gid))==NULL) {
-		ret = IPC_FAIL;
-	}
 
-	return ret;
+	/* verify the credential information. */
+	return verify_creds(auth_info, stat_buf.st_uid, stat_buf.st_gid);
 }
 
 
@@ -2774,22 +2750,9 @@ socket_verify_auth(struct IPC_CHANNEL* ch, struct IPC_AUTH * auth_info)
 	ch->farside_uid = conn_info->farside_uid;
 	ch->farside_gid = conn_info->farside_gid;
 
-	if (auth_info == NULL
-	  || (auth_info->uid == NULL && auth_info->gid == NULL)) {
-		return IPC_OK;	/* no restriction for authentication */
-	}
-
 	/* verify the credential information. */
-	if (	auth_info->uid
-	&&	(g_hash_table_lookup(auth_info->uid,
-		  GUINT_TO_POINTER((guint)conn_info->farside_uid)) != NULL)) {
-		return IPC_OK;
-	}else if (auth_info->gid
-	&&	(g_hash_table_lookup(auth_info->gid,
-		  GUINT_TO_POINTER((guint)conn_info->farside_gid)) != NULL)) {
-		return IPC_OK;
-	}
-	return IPC_FAIL;
+	return verify_creds(auth_info,
+		conn_info->farside_uid, conn_info->farside_gid);
 }
 
 static
@@ -2835,20 +2798,10 @@ socket_verify_auth(struct IPC_CHANNEL* ch, struct IPC_AUTH * auth_info)
 		return rc;
 	}
 
-	/* Check credentials against authorization information */
-
-	if (auth_info->uid
-	  && (g_hash_table_lookup(auth_info->uid,
-		  GUINT_TO_POINTER((guint)ucred_geteuid(ucred))) != NULL)) {
-		rc = IPC_OK;
-	}else if (auth_info->gid
-	  && (g_hash_table_lookup(auth_info->gid,
-		  GUINT_TO_POINTER((guint)ucred_getegid(ucred))) != NULL)) {
-		rc = IPC_OK;
-  	}
-
+	/* verify the credential information. */
+	rc = verify_creds(auth_info,
+		ucred_geteuid(ucred), ucred_getegid(ucred));
 	ucred_free(ucred);
-
 	return rc;
 }
 
diff --git a/lib/plugins/lrm/raexecocf.c b/lib/plugins/lrm/raexecocf.c
index 7e8ef36..f7cd7ed 100644
--- a/lib/plugins/lrm/raexecocf.c
+++ b/lib/plugins/lrm/raexecocf.c
@@ -230,16 +230,20 @@ get_resource_list(GList ** rsc_info)
 			free(namelist[file_num]);
 			continue;
 		}
-		
-		stat(namelist[file_num]->d_name, &prop);
-		if (S_ISDIR(prop.st_mode)) {
+
+		snprintf(subdir,FILENAME_MAX,"%s/%s",
+			 RA_PATH, namelist[file_num]->d_name);
+
+		if (stat(subdir, &prop) == -1) {
+			cl_perror("%s:%s:%d: stat failed for %s" 
+				  , __FILE__, __FUNCTION__, __LINE__, subdir);
+			free(namelist[file_num]);
+			continue;
+		} else if (!S_ISDIR(prop.st_mode)) {
 			free(namelist[file_num]);
 			continue;
 		}
 
-		snprintf(subdir,FILENAME_MAX,"%s/%s",
-			 RA_PATH, namelist[file_num]->d_name);
-			 
 		get_runnable_list(subdir,&ra_subdir);
 
 		merge_string_list(rsc_info,ra_subdir);
diff --git a/lib/plugins/stonith/external.c b/lib/plugins/stonith/external.c
index 683dd84..e991976 100644
--- a/lib/plugins/stonith/external.c
+++ b/lib/plugins/stonith/external.c
@@ -141,7 +141,7 @@ external_status(StonithPlugin  *s)
 	
 	rc = external_run_cmd(sd, op, NULL);
 	if (rc != 0) {
-		LOG(PIL_CRIT, "%s: '%s %s' failed with rc %d",
+		LOG(PIL_WARN, "%s: '%s %s' failed with rc %d",
 			__FUNCTION__, sd->subplugin, op, rc);
 	}
 	else {
@@ -337,10 +337,11 @@ external_parse_config_info(struct pluginDevice* sd, StonithNVpair * info)
 	/* TODO: Maybe treat "" as delimeters too so
 	 * whitespace can be passed to the plugins... */
 	for (nv = info; nv->s_name; nv++) {
-		key = STRDUP(nv->s_name);
 		if (!nv->s_name || !nv->s_value) {
 			continue;
 		}
+
+		key = STRDUP(nv->s_name);
 		if (!key) {
 			goto err_mem;
 		}
@@ -819,8 +820,8 @@ external_run_cmd(struct pluginDevice *sd, const char *op, char **output)
 	status = pclose(file);
 	if (WIFEXITED(status)) {
 		rc = WEXITSTATUS(status);
-		if (rc != 0) {
-			LOG(PIL_CRIT,
+		if (rc != 0 && Debug) {
+			LOG(PIL_DEBUG,
 				"%s: Calling '%s' returned %d", __FUNCTION__, cmd, rc);
 		}
 	} else {
diff --git a/lib/plugins/stonith/external/Makefile.am b/lib/plugins/stonith/external/Makefile.am
index 0ca1440..5006513 100644
--- a/lib/plugins/stonith/external/Makefile.am
+++ b/lib/plugins/stonith/external/Makefile.am
@@ -19,14 +19,15 @@
 #
 MAINTAINERCLEANFILES = Makefile.in
 
-EXTRA_DIST           = drac5 dracmc-telnet ibmrsa-telnet ipmi rackpdu vmware xen0 \
+EXTRA_DIST           = drac5 dracmc-telnet ibmrsa-telnet ipmi rackpdu vmware vcenter xen0 \
 			xen0-ha-dom0-stonith-helper sbd kdumpcheck nut
 
 extdir		     = $(stonith_ext_plugindir)
 
 helperdir	     = $(stonith_plugindir)
 
-ext_SCRIPTS	     = drac5 dracmc-telnet ibmrsa ibmrsa-telnet ipmi riloe ssh vmware rackpdu xen0 hmchttp \
-			xen0-ha sbd kdumpcheck ippower9258 nut
+ext_SCRIPTS	     = drac5 dracmc-telnet ibmrsa ibmrsa-telnet ipmi riloe ssh vmware vcenter rackpdu xen0 hmchttp \
+			xen0-ha sbd kdumpcheck ippower9258 nut libvirt \
+			hetzner
 
 helper_SCRIPTS	     = xen0-ha-dom0-stonith-helper
diff --git a/lib/plugins/stonith/external/hetzner b/lib/plugins/stonith/external/hetzner
new file mode 100755
index 0000000..8846270
--- /dev/null
+++ b/lib/plugins/stonith/external/hetzner
@@ -0,0 +1,132 @@
+#!/bin/sh
+#
+# External STONITH module for Hetzner.
+#
+# Copyright (c) 2011 MMUL S.a.S. - Raoul Scarazzini <rasca at mmul.it>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+
+# Read parameters from config file, format is based upon the hetzner OCF resource agent
+# developed by Kumina: http://blog.kumina.nl/2011/02/hetzner-failover-ip-ocf-script/
+conf_file="/etc/hetzner.cfg"
+user=`sed -n 's/^user.*=\ *//p' /etc/hetzner.cfg`
+pass=`sed -n 's/^pass.*=\ *//p' /etc/hetzner.cfg`
+hetzner_server="https://robot-ws.your-server.de"
+
+check_http_response() {
+     # If the response is 200 then return 0
+     if [ $1 = 200 ]
+      then
+       return 0
+      else
+       # If the response is not 200 then display a description of the problem and return 1
+       case $1 in
+        400) ha_log.sh err  "INVALID_INPUT - Invalid input parameters"
+               ;;
+        404) ha_log.sh err  "SERVER_NOT_FOUND - Server with ip $remote_ip not found"
+               ;;
+        409) ha_log.sh err  "RESET_MANUAL_ACTIVE - There is already a running manual reset"
+               ;;
+        500) ha_log.sh err "RESET_FAILED - Resetting failed due to an internal error"
+               ;;
+       esac
+       return 1
+     fi
+}
+
+case $1 in
+gethosts)
+        echo $hostname
+	exit 0
+	;;
+on)
+	# Can't really be implemented because Hetzner's webservice cannot power on a system
+	ha_log.sh err "Power on is not available since Hetzner's webservice can't do this operation."
+	exit 1
+	;;
+off)
+	# Can't really be implemented because Hetzner's webservice cannot power on a system
+	ha_log.sh err "Power off is not available since Hetzner's webservice can't do this operation."
+	exit 1
+	;;
+reset)
+        # Launching the reset action via webservice
+        check_http_response $(curl --silent -o /dev/null -w '%{http_code}' -u $user:$pass $hetzner_server/reset/$remote_ip -d type=hw)
+        exit $?
+	;;
+status)
+        # Check if we can contact the webservice
+        check_http_response "$(curl --silent -o /dev/null -w '%{http_code}' -u $user:$pass $hetzner_server/server/$remote_ip)"
+        exit $?
+	;;
+getconfignames)
+	echo "hostname"
+	echo "remote_ip"
+	exit 0
+	;;
+getinfo-devid)
+	echo "Hetzner STONITH device"
+	exit 0
+	;;
+getinfo-devname)
+	echo "Hetzner STONITH external device"
+	exit 0
+	;;
+getinfo-devdescr)
+	echo "Hetzner host reset"
+	echo "Manages the remote webservice for reset a remote server."
+	exit 0
+	;;
+getinfo-devurl)
+	echo "http://wiki.hetzner.de/index.php/Robot_Webservice_en"
+	exit 0
+	;;
+getinfo-xml)
+	cat << HETZNERXML
+<parameters>
+<parameter name="hostname" unique="1" required="1">
+<content type="string" />
+<shortdesc lang="en">
+Hostname
+</shortdesc>
+<longdesc lang="en">
+The name of the host to be managed by this STONITH device.
+</longdesc>
+</parameter>
+
+<parameter name="remote_ip" unique="1" required="1">
+<content type="string" />
+<shortdesc lang="en">
+Remote IP
+</shortdesc>
+<longdesc lang="en">
+The address of the remote IP that manages this server.
+</longdesc>
+</parameter>
+</parameters>
+HETZNERXML
+	exit 0
+	;;
+*)
+	ha_log.sh err "Don't know what to do for '$remote_ip'"
+	exit 1
+	;;
+esac
diff --git a/lib/plugins/stonith/external/ipmi b/lib/plugins/stonith/external/ipmi
index 53b5a9b..b7832f3 100644
--- a/lib/plugins/stonith/external/ipmi
+++ b/lib/plugins/stonith/external/ipmi
@@ -23,8 +23,8 @@
 # other software, or any other product whatsoever.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, write the Free Software Foundation,
-# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# along with this program; if not, write the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 #
 
 # Initialization -- fix locale settings so we can parse output from
@@ -115,7 +115,6 @@ ipmi_is_power_on() {
 	esac
 }
 
-# Rewrite the hostname to accept "," as a delimeter for hostnames too.
 
 case ${1} in
 gethosts)
@@ -195,7 +194,7 @@ The IP address of the STONITH device.
 </longdesc>
 </parameter>
 
-<parameter name="userid" unique="1">
+<parameter name="userid" unique="0">
 <content type="string" />
 <shortdesc lang="en">
 Login
@@ -205,7 +204,7 @@ The username used for logging in to the STONITH device.
 </longdesc>
 </parameter>
 
-<parameter name="passwd" unique="1">
+<parameter name="passwd" unique="0">
 <content type="string" />
 <shortdesc lang="en">
 Password
@@ -215,7 +214,7 @@ The password used for logging in to the STONITH device.
 </longdesc>
 </parameter>
 
-<parameter name="passwd_method" unique="1">
+<parameter name="passwd_method" unique="0">
 <content type="string" default="param"/>
 <shortdesc lang="en">
 Method for passing passwd parameter
@@ -228,7 +227,7 @@ Method for passing the passwd parameter to ipmitool
 </longdesc>
 </parameter>
 
-<parameter name="interface" unique="1">
+<parameter name="interface" unique="0">
 <content type="string" default="lan"/>
 <shortdesc lang="en">
 IPMI interface
diff --git a/lib/plugins/stonith/external/libvirt b/lib/plugins/stonith/external/libvirt
new file mode 100644
index 0000000..8923565
--- /dev/null
+++ b/lib/plugins/stonith/external/libvirt
@@ -0,0 +1,259 @@
+#!/bin/sh
+#
+# External STONITH module for a libvirt managed hypervisor (kvm/Xen).
+# Uses libvirt as a STONITH device to control guest.
+#
+# Copyright (c) 2010 Holger Teutsch <holger.teutsch at web.de>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+# start a domain
+libvirt_start() {
+    out=$($VIRSH -c $hypervisor_uri start $domain_id 2>&1)
+    if [ $? -eq 0 ]
+    then
+        ha_log.sh notice "Domain $domain_id was started"
+        return 0
+    fi
+
+    if echo "$out" | grep -i 'Domain is already active' > /dev/null 2>&1
+    then
+        ha_log.sh notice "Domain $domain_id is already active"
+        return 0
+    fi
+
+    ha_log.sh err "Failed to start domain $domain_id"
+    ha_log.sh err "$out"
+    return 1
+}
+
+# stop a domain
+# return
+#   0: success
+#   1: error
+#   2: was already stopped
+libvirt_stop() {
+    out=$($VIRSH -c $hypervisor_uri destroy $domain_id 2>&1)
+    if [ $? -eq 0 ]
+    then
+        ha_log.sh notice "Domain $domain_id was stopped"
+        return 0
+    fi
+
+    if echo "$out" | grep -i 'domain is not running' > /dev/null 2>&1
+    then
+        ha_log.sh notice "Domain $domain_id is already stopped"
+        return 2
+    fi
+
+    ha_log.sh err "Failed to stop domain $domain_id"
+    ha_log.sh err "$out"
+    return 1
+}
+
+# get status of stonith device (*NOT* of the domain).
+# If we can retrieve some info from the hypervisor
+# the stonith device is OK.
+libvirt_status() {
+    out=$($VIRSH -c $hypervisor_uri version 2>&1)
+    if [ $? -eq 0 ]
+    then
+        out=`echo "$out" | tail -1`
+        ha_log.sh notice "$hypervisor_uri: $out"
+        return 0
+    fi
+
+    ha_log.sh err "Failed to get status for $hypervisor_uri"
+    ha_log.sh err "$out"
+    return 1
+}
+
+# check config and set variables
+# does not return on error
+libvirt_check_config() {
+    VIRSH=`which virsh 2>/dev/null`
+
+    if [ ! -x "$VIRSH" ]
+    then
+        ha_log.sh err "virsh not installed"
+        exit 1
+    fi
+
+    if [ -z "$hostlist" -o -z "$hypervisor_uri" ]
+    then
+        ha_log.sh err "hostlist or hypervisor_uri missing; check configuration"
+        exit 1
+    fi
+}
+
+# set variable domain_id for the host specified as arg
+libvirt_set_domain_id ()
+{
+    for h in $hostlist
+    do
+        case $h in
+            $1:*)
+            domain_id=`expr $h : '.*:\(.*\)'`
+            return
+            ;;
+
+            $1)
+            domain_id=$1
+            return
+        esac
+    done
+
+    ha_log.sh err "Should never happen: Called for host $1 but $1 is not in $hostlist."
+    exit 1
+}
+
+libvirt_info() {
+cat << LVIRTXML
+<parameters>
+<parameter name="hostlist" unique="1" required="1">
+<content type="string" />
+<shortdesc lang="en">
+List of hostname[:domain_id]..
+</shortdesc>
+<longdesc lang="en">
+List of controlled hosts: hostname[:domain_id]..
+The optional domain_id defaults to the hostname. 
+</longdesc>
+</parameter>
+
+<parameter name="hypervisor_uri" required="1">
+<content type="string" />
+<shortdesc lang="en">
+Hypervisor URI
+</shortdesc>
+<longdesc lang="en">
+URI for connection to the hypervisor.
+driver[+transport]://[username@][hostlist][:port]/[path][?extraparameters]
+e.g.
+qemu+ssh://my_kvm_server.mydomain.my/system   (uses ssh for root)
+xen://my_kvm_server.mydomain.my/              (uses TLS for client)
+
+virsh must be installed (e.g. libvir-client package) and access control must
+be configured for your selected URI.
+</longdesc>
+</parameter>
+</parameters>
+LVIRTXML
+exit 0
+}
+
+#############
+# Main code #
+#############
+
+# don't fool yourself when testing with stonith(8)
+# and transport ssh
+unset SSH_AUTH_SOCK
+
+# support , as a separator as well
+hostlist=`echo $hostlist| sed -e 's/,/ /g'`
+
+case $1 in
+    gethosts)
+    hostnames=`echo $hostlist|sed -e 's/:[^ ]*//g'`
+    for h in $hostnames
+    do
+        echo $h
+    done
+    exit 0
+    ;;
+
+    on)
+    libvirt_check_config
+    libvirt_set_domain_id $2
+
+    libvirt_start
+    exit $?
+    ;;
+
+    off)
+    libvirt_check_config
+    libvirt_set_domain_id $2
+
+    libvirt_stop
+    [ $? = 1 ] && exit 1
+    exit 0
+    ;;
+
+    reset)
+    # libvirt has no reset so we do a power cycle
+    libvirt_check_config
+    libvirt_set_domain_id $2
+
+    libvirt_stop
+    rc=$?
+    [ $rc = 1 ] && exit 1
+
+    # stonith reset seems to require a power on even if it was off
+    # before so the next line is commented out
+    # [ $rc = 2 ] && exit 0
+
+    sleep 2
+    libvirt_start
+    exit $?
+    ;;
+
+    status)
+    libvirt_check_config
+    libvirt_status
+    exit $?
+    ;;
+
+    getconfignames)
+    echo "hostlist hypervisor_uri"
+    exit 0
+    ;;
+
+    getinfo-devid)
+    echo "libvirt STONITH device"
+    exit 0
+    ;;
+
+    getinfo-devname)
+    echo "libvirt STONITH external device"
+    exit 0
+    ;;
+
+    getinfo-devdescr)
+    echo "libvirt-based Linux host reset for Xen/KVM guest domain through hypervisor"
+    exit 0
+    ;;
+
+    getinfo-devurl)
+    echo "http://libvirt.org/uri.html http://linux-ha.org/wiki"
+    exit 0
+    ;;
+
+    getinfo-xml)
+    libvirt_info
+    echo 0;
+    ;;
+
+    *)
+    exit 1
+    ;;
+esac
+
diff --git a/lib/plugins/stonith/external/rackpdu b/lib/plugins/stonith/external/rackpdu
index b53fd03..7d0e20b 100644
--- a/lib/plugins/stonith/external/rackpdu
+++ b/lib/plugins/stonith/external/rackpdu
@@ -75,7 +75,8 @@ GetOutletNumber() {
 	    return 0
 	fi
 
-	local names=`echo "$snmp_result" | cut -f2 -d'"' | tr ' ' '_' | tr '\012' ' '`
+	local names
+	names=`echo "$snmp_result" | cut -f2 -d'"' | tr ' ' '_' | tr '\012' ' '`
 	for name in $names; do
 	    if [ "$name" != "$nodename" ]; then
 		local outlet_num=`expr $outlet_num + 1`
diff --git a/lib/plugins/stonith/external/sbd b/lib/plugins/stonith/external/sbd
index 6b4eec1..baa7b3b 100644
--- a/lib/plugins/stonith/external/sbd
+++ b/lib/plugins/stonith/external/sbd
@@ -9,13 +9,28 @@
 
 # Main code
 
+if [ x$sbd_device = x ]; then
+	if [ -f /etc/sysconfig/sbd ]; then
+		source /etc/sysconfig/sbd
+		sbd_device=$SBD_DEVICE
+	fi
+fi
+
+SBD_DEVS=${sbd_device%;}
+
+sbd_device=${SBD_DEVS//;/ -d }
+
 case $1 in
 gethosts)
-    echo `sbd -d $sbd_device list | cut -f2`
+    echo `sbd -d $sbd_device list | cut -f2 | sort | uniq`
     exit 0
     ;;
 off|reset)
-    sbd -d $sbd_device message $2 $1
+    message=$1
+    case "$crashdump" in
+	    yes|true|1|YES|TRUE|ja|on|ON) message="crashdump" ;;
+    esac
+    sbd -d $sbd_device message $2 $message
     exit $?
     ;;
 status)
@@ -29,7 +44,7 @@ on)
     exit 1
     ;;
 getconfignames)
-    echo "sbd_device"
+    echo "sbd_device crashdump"
     exit 0
     ;;
 getinfo-devid)
@@ -47,8 +62,9 @@ fencing requests. This allows clusters without network power
 switches; the downside is that access to the shared storage
 device becomes a Single Point of Failure. 
 
-It requires sbd to be configured.  Please read
-http://linux-ha.org/wiki/SBD_Fencing!
+It requires sbd to be configured on all nodes.
+
+Please read http://linux-ha.org/wiki/SBD_Fencing!
 
 DESC
     exit 0
@@ -60,13 +76,34 @@ getinfo-devurl)
 getinfo-xml)
     cat << SSHXML
 <parameters>
-<parameter name="sbd_device" unique="1" required="1">
+
+<parameter name="crashdump">
 <content type="string" />
 <shortdesc lang="en">
-SBD device
+Crashdump instead of regular fence
 </shortdesc>
 <longdesc lang="en">
-The block device used for the SBD partition.
+If SBD is given a fence command, this option will instead perform a
+kernel crash of a reboot or power-off, which on a properly configured
+system can lead to a crashdump for analysis.
+
+This is less safe for production environments. Please use with caution
+and for debugging purposes only.
+</longdesc>
+</parameter>
+
+<parameter name="sbd_device" unique="1">
+<content type="string" />
+<shortdesc lang="en">
+SBD device(s)
+</shortdesc>
+<longdesc lang="en">
+The block device used for the SBD partition. Up to three
+can be specified if separated by a semicolon. (Please check
+the documentation if specifying two.)
+
+If not specified, will default to the value from /etc/sysconfig/sbd.
+
 </longdesc>
 </parameter>
 </parameters>
diff --git a/lib/plugins/stonith/external/vcenter b/lib/plugins/stonith/external/vcenter
new file mode 100755
index 0000000..5c1afb9
--- /dev/null
+++ b/lib/plugins/stonith/external/vcenter
@@ -0,0 +1,266 @@
+#!/usr/bin/env perl
+#
+# External STONITH module for VMWare vCenter/ESX
+#
+# Author:  Nhan Ngo Dinh
+# License: GNU General Public License (GPL)
+#
+
+require 5.010;
+
+use strict;
+use warnings;
+
+sub dielog {
+	my $msg = "[";
+	$msg .= "$ARGV[0]" if defined($ARGV[0]);
+	$msg .= " $ARGV[1]" if defined($ARGV[1]);
+	$msg .= "]";
+	( $_ ) = @_;
+	$msg .= " $_";
+	system("ha_log.sh", "err", "$msg");
+	die();
+}
+
+# Define command groups
+my @configCommands = qw{getconfignames getinfo-devid getinfo-devname getinfo-devdescr getinfo-devurl getinfo-xml};
+my @actionCommands = qw{reset on off};
+my @netCommands = (@actionCommands, qw{status gethosts});
+
+# Process command line arguments
+my $command = $ARGV[0] || dielog("No command specified\n");
+
+# Command belongs to the group of commands that do not require any connection to VMware vCenter
+if ($command ~~ @configCommands) {
+	if ($command eq "getconfignames") {
+		print "VI_SERVER\nVI_PORTNUMBER\nVI_PROTOCOL\nVI_SERVICEPATH\nVI_CREDSTORE\nHOSTLIST\nRESETPOWERON\n";
+	}
+	elsif ($command eq "getinfo-devid") {
+		print "VMware vCenter STONITH device\n";
+	}
+	elsif ($command eq "getinfo-devname") {
+		print "VMware vCenter STONITH device\n";
+	}
+	elsif ($command eq "getinfo-devdescr") {
+		print "VMWare vCenter STONITH device\n";
+	}
+	elsif ($command eq "getinfo-devurl") {
+		print "http://www.vmware.com/\n";
+	}
+	elsif ($command eq "getinfo-xml") {
+		print q{<parameters>
+<parameter name="HOSTLIST" required="1">
+<content type="string"/>
+<shortdesc lang="en">List of hosts and virtual machines (required)</shortdesc>
+<longdesc lang="en">
+The list of hosts that the VMware vCenter STONITH device controls.
+Syntax is:
+  hostname1[=VirtualMachineName1];hostname2[=VirtualMachineName2]
+
+NOTE: omit =VirtualMachineName if hostname and virtual machine names are identical
+
+Example:
+  cluster1=VMCL1;cluster2=VMCL2
+</longdesc>
+</parameter>
+<parameter name="VI_SERVER">
+<content type="string" default="localhost"/>
+<shortdesc lang="en">VMware vCenter address</shortdesc>
+<longdesc lang="en">
+The VMware vCenter address
+</longdesc>
+</parameter>
+<parameter name="VI_PROTOCOL">
+<content type="string" default="https"/>
+<shortdesc lang="en">VMware vCenter protocol</shortdesc>
+<longdesc lang="en">
+The VMware vCenter protocol
+</longdesc>
+</parameter>
+<parameter name="VI_PORTNUMBER">
+<content type="string" default="443"/>
+<shortdesc lang="en">VMware vCenter port number</shortdesc>
+<longdesc lang="en">
+The VMware vCenter port number
+</longdesc>
+</parameter>
+<parameter name="VI_SERVICEPATH">
+<content type="string" default="/sdk"/>
+<shortdesc lang="en">VMware vCenter service path</shortdesc>
+<longdesc lang="en">
+The VMware vCenter services path
+</longdesc>
+</parameter>
+<parameter name="VI_CREDSTORE" required="1">
+<content type="string"/>
+<shortdesc lang="en">VMware vCenter credentials store file</shortdesc>
+<longdesc lang="en">
+VMware vCenter credentials store file
+</longdesc>
+</parameter>
+<parameter name="RESETPOWERON">
+<content type="string" default="1"/>
+<shortdesc lang="en">PowerOnVM on reset</shortdesc>
+<longdesc lang="en">
+Enable/disable a PowerOnVM on reset when the target virtual machine is off
+Allowed values: 0, 1
+</longdesc>
+</parameter>
+</parameters>} . "\n";
+	}
+	else { dielog("Invalid command specified: $command\n"); }
+}
+
+# Command belongs to the group of commands that require connecting to VMware vCenter
+elsif ($command ~~ @netCommands) {
+
+	use VMware::VIRuntime;
+
+	# A valid VI_CREDSTORE is required to avoid interactive prompt
+	( exists $ENV{'VI_CREDSTORE'} ) || dielog("VI_CREDSTORE not specified\n");
+
+	# HOSTLIST is mandatory
+	exists $ENV{'HOSTLIST'} || dielog("HOSTLIST not specified\n");
+
+	# Parse HOSTLIST to %host_to_vm and %vm_to_host
+	my @hostlist = split(';', $ENV{'HOSTLIST'});
+	my %host_to_vm = ();
+	my %vm_to_host = ();
+	foreach my $host (@hostlist) {
+		my @config = split(/=/, $host);
+		my $key = $config[0]; my $value = $config[1];
+		if (!defined($value)) { $value = $config[0]; }
+		$host_to_vm{$key} = $value;
+		$vm_to_host{(lc $value)} = $key;
+	}
+
+	eval {
+		# VI API: reads options from the environment variables into appropriate data structures for validation.
+		Opts::parse();
+		# VI API: ensures that input values from environment variable are complete, consistent and valid.
+		Opts::validate();
+		# VI API: establishes a session with the VirtualCenter Management Server or ESX Server Web service
+		Util::connect();
+	};
+	if ($@) {
+		# This is just a placeholder for any error handling procedure
+		dielog($@);
+	}
+
+	# Command belongs to the group of commands that performs actions on Virtual Machines
+	if ($command ~~ @actionCommands) {
+
+		my $targetHost = $ARGV[1] || dielog("No target specified\n");
+
+		# Require that specified target host exists in the specified HOSTLIST
+		if (exists $host_to_vm{$targetHost}) {
+
+			my $vm;
+			my $esx;
+			eval {
+				# VI API: searches the inventory tree for a VirtualMachine managed entity whose name matches
+				# the name of the virtual machine assigned to the target host in HOSTLIST
+				$vm = Vim::find_entity_view(view_type => "VirtualMachine", filter => { name => qr/\Q$host_to_vm{$targetHost}\E/i });
+
+				# VI API: retrieves the properties of the managed object reference runtime.host of the VirtualMachine
+				# managed entity obtained by the previous command
+				# NOTE: This is essentially a workaround to vSphere Perl SDK
+				# to allow pointing to the right HostSystem. This is probably
+				# done by changing the current HostSystem in the Web Service
+				# session context. WARNING: Do not use the same session for any
+				# other concurrent operation.
+				$esx = Vim::get_view(mo_ref => $vm->{"runtime"}{"host"})->name;
+			};
+			if ($@) {
+				if (ref($@) eq "SoapFault") { dielog("$@->detail\n"); }
+				dielog($@);
+			}
+
+			my $powerState = $vm->get_property('runtime.powerState')->val;
+			if ($powerState eq "suspended") {
+				# This implementation assumes that suspending a cluster node can cause
+				# severe failures on shared resources, thus any failover operation should
+				# be blocked.
+				dielog("Machine $esx:$vm->{'name'} is in a suspended state\n");
+			}
+
+			eval {
+				if ($command eq "reset") {
+					if ($powerState eq "poweredOn") {
+						$vm->ResetVM();
+						system("ha_log.sh", "info", "Machine $esx:$vm->{'name'} has been reset");
+					} else {
+						system("ha_log.sh", "warn", "Tried to ResetVM $esx:$vm->{'name'} that was $powerState");
+						# Start a virtual machine on reset only if explicitly allowed by RESETPOWERON
+						if ($powerState eq "poweredOff" && (! exists $ENV{'RESETPOWERON'} || $ENV{'RESETPOWERON'} ne 0)) {
+							$vm->PowerOnVM();
+							system("ha_log.sh", "info", "Machine $esx:$vm->{'name'} has been powered on");
+						} else {
+							dielog("Could not complete $esx:$vm->{'name'} power cycle");
+						}
+					}
+				}
+				elsif ($command eq "off") {
+					if ($powerState eq "poweredOn") {
+						$vm->PowerOffVM();
+						system("ha_log.sh", "info", "Machine $esx:$vm->{'name'} has been powered off");
+					} else {
+						system("ha_log.sh", "warn", "Tried to PowerOffVM $esx:$vm->{'name'} that was $powerState");
+
+					}
+				}
+				elsif ($command eq "on") {
+					if ($powerState eq "poweredOff") {
+						$vm->PowerOnVM();
+						system("ha_log.sh", "info", "Machine $esx:$vm->{'name'} has been powered on");
+					} else {
+						system("ha_log.sh", "warn", "Tried to PowerOnVM $esx:$vm->{'name'} that was $powerState");
+					}
+				}
+				else { dielog("Invalid command specified: $command\n"); }
+			};
+			if ($@) {
+				if (ref($@) eq "SoapFault") { dielog("$@->detail\n"); }
+				dielog($@);
+			}
+
+		} else { dielog("Invalid target specified\n"); }
+	} else {
+	# Command belongs to the group of commands that lookup the status of VMware vCenter and/or virtual machines
+		if ($command eq "status") {
+			eval {
+				# VI API: Searches the inventory tree for all VirtualMachine managed objects
+				my $vms = Vim::find_entity_views(view_type => "VirtualMachine");
+			};
+			if ($@) {
+				if (ref($@) eq "SoapFault") { dielog("$@->detail\n"); }
+				dielog($@);
+			}
+		}
+		elsif ($command eq "gethosts") {
+			# Create a regular expression to make vCenter find all the virtual machine matching
+			# mirtual machine names specified in HOSTLIST
+			# NOTE: this implementation make "gethosts" check that entries in HOSTLIST are consistent with VMware vCenter VM directory
+			my $regex = join "|", map { qr/\Q$_\E/i } values %host_to_vm;
+			eval {
+				my $vms = Vim::find_entity_views(view_type => "VirtualMachine", filter => { name => qr/^($regex)$/ });
+				foreach my $vm (@$vms) { print "$vm_to_host{(lc $vm->name)}\n" if exists $vm_to_host{(lc $vm->name)}; }
+			};
+			if ($@) {
+				if (ref($@) eq "SoapFault") { dielog("$@->detail\n"); }
+				dielog($@);
+			}
+		}
+		else { dielog("Invalid command specified: $command\n"); }
+	}
+	eval {
+		Util::disconnect();
+	};
+	if ($@) {
+		# This is just a placeholder for any error handling procedure
+		dielog($@);
+	}
+}
+else { dielog("Invalid command specified: $command\n"); }
+
+exit(0);
diff --git a/lib/plugins/stonith/meatware.c b/lib/plugins/stonith/meatware.c
index fbc0742..8547541 100644
--- a/lib/plugins/stonith/meatware.c
+++ b/lib/plugins/stonith/meatware.c
@@ -202,8 +202,10 @@ meatware_reset_req(StonithPlugin * s, int request, const char * host)
 		return S_OOPS;
 	}
 
+	alarm(600);
 	memset(line, 0, 256);
 	rc = read(fd, line, 256);
+	alarm(0);
 
 	if (rc < 0) {
 		LOG(PIL_CRIT, "read error on FIFO for Meatware_reset_host");
diff --git a/lib/stonith/Makefile.am b/lib/stonith/Makefile.am
index 614ed16..a3ffbab 100644
--- a/lib/stonith/Makefile.am
+++ b/lib/stonith/Makefile.am
@@ -41,7 +41,7 @@ stonith_LDFLAGS		=  @LIBADD_DL@ @LIBLTDL@ -export-dynamic @DLOPEN_FORCE_FLAGS@ @
 meatclient_SOURCES	= meatclient.c 
 meatclient_LDADD	= $(GLIBLIB)
 
-sbd_SOURCES		= sbd.c
+sbd_SOURCES		= sbd-md.c sbd-common.c
 sbd_CFLAGS		= -D_GNU_SOURCE
 sbd_LDADD		= $(GLIBLIB)					\
 			$(top_builddir)/lib/clplumbing/libplumb.la	\
diff --git a/lib/stonith/sbd.c b/lib/stonith/sbd-common.c
similarity index 61%
rename from lib/stonith/sbd.c
rename to lib/stonith/sbd-common.c
index d8fc6b0..f9f16ac 100644
--- a/lib/stonith/sbd.c
+++ b/lib/stonith/sbd-common.c
@@ -1,21 +1,3 @@
-/*
- * Copyright (C) 2008 Lars Marowsky-Bree <lmb at suse.de>
- * 
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- * 
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -38,6 +20,7 @@
 #include <linux/types.h>
 #include <linux/watchdog.h>
 #include <linux/fs.h>
+
 #include "sbd.h"
 
 /* These have to match the values in the header of the partition */
@@ -45,27 +28,27 @@ static char		sbd_magic[8] = "SBD_SBD_";
 static char		sbd_version  = 0x02;
 
 /* Tunable defaults: */
-static unsigned long	timeout_watchdog 	= 5;
-static unsigned long	timeout_watchdog_warn 	= 3;
-static int		timeout_allocate 	= 2;
-static int		timeout_loop	    	= 1;
-static int		timeout_msgwait		= 10;
-
-static int	watchdog_use		= 0;
-static int	go_daemon		= 0;
-static int	skip_rt			= 0;
-static int	debug			= 0;
-static const char *watchdogdev		= "/dev/watchdog";
-static char *	local_uname;
+unsigned long	timeout_watchdog 	= 5;
+unsigned long	timeout_watchdog_warn 	= 3;
+int		timeout_allocate 	= 2;
+int		timeout_loop	    	= 1;
+int		timeout_msgwait		= 10;
+
+int	watchdog_use		= 0;
+int	watchdog_set_timeout	= 1;
+int	skip_rt			= 0;
+int	debug			= 0;
+const char *watchdogdev		= "/dev/watchdog";
+char *	local_uname;
 
 /* Global, non-tunable variables: */
-static int	sector_size		= 0;
-static int	watchdogfd 		= -1;
-static int	devfd;
-static char	*devname;
-static char	*cmdname;
+int	sector_size		= 0;
+int	watchdogfd 		= -1;
+
+/*const char	*devname;*/
+const char	*cmdname;
 
-static void
+void
 usage(void)
 {
 	fprintf(stderr,
@@ -73,22 +56,24 @@ usage(void)
 "Syntax:\n"
 "	%s <options> <command> <cmdarguments>\n"
 "Options:\n"
-"-d <devname>	Block device to use (mandatory)\n"
+"-d <devname>	Block device to use (mandatory; can be specified up to 3 times)\n"
 "-h		Display this help.\n"
 "-n <node>	Set local node name; defaults to uname -n (optional)\n"
 "\n"
 "-R		Do NOT enable realtime priority (debugging only)\n"
 "-W		Use watchdog (recommended) (watch only)\n"
 "-w <dev>	Specify watchdog device (optional) (watch only)\n"
-"-D		Run as background daemon (optional) (watch only)\n"
+"-T		Do NOT initialize the watchdog timeout (watch only)\n"
 "-v		Enable some verbose debug logging (optional)\n"
 "\n"
-"-1 <N>		Set watchdog timeout to N seconds (optional) (create only)\n"
-"-2 <N>		Set slot allocation timeout to N seconds (optional) (create only)\n"
-"-3 <N>		Set daemon loop timeout to N seconds (optional) (create only)\n"
-"-4 <N>		Set msgwait timeout to N seconds (optional) (create only)\n"
-"-5 <N>		Warn if loop latency exceeds threshold (optional) (watch only)\n"
+"-1 <N>		Set watchdog timeout to N seconds (optional, create only)\n"
+"-2 <N>		Set slot allocation timeout to N seconds (optional, create only)\n"
+"-3 <N>		Set daemon loop timeout to N seconds (optional, create only)\n"
+"-4 <N>		Set msgwait timeout to N seconds (optional, create only)\n"
+"-5 <N>		Warn if loop latency exceeds threshold (optional, watch only)\n"
 "			(default is 3, set to 0 to disable)\n"
+"-t <N>		Interval in seconds for automatic child restarts (optional)\n"
+"			(default is 3600, set to 0 to disable)\n"
 "Commands:\n"
 "create		initialize N slots on <dev> - OVERWRITES DEVICE!\n"
 "list		List all allocated slots on device, and messages.\n"
@@ -101,58 +86,70 @@ usage(void)
 , cmdname);
 }
 
-static void
+int
 watchdog_init_interval(void)
 {
+	int     timeout = timeout_watchdog;
+
 	if (watchdogfd < 0) {
-		return;
+		return 0;
+	}
+
+
+	if (watchdog_set_timeout == 0) {
+		cl_log(LOG_INFO, "NOT setting watchdog timeout on explicit user request!");
+		return 0;
 	}
 
-	if (ioctl(watchdogfd, WDIOC_SETTIMEOUT, &timeout_watchdog) < 0) {
+	if (ioctl(watchdogfd, WDIOC_SETTIMEOUT, &timeout) < 0) {
 		cl_perror( "WDIOC_SETTIMEOUT"
-		": Failed to set watchdog timer to %lu seconds.",
-		timeout_watchdog);
+				": Failed to set watchdog timer to %u seconds.",
+				timeout);
+		cl_log(LOG_CRIT, "Please validate your watchdog configuration!");
+		cl_log(LOG_CRIT, "Choose a different watchdog driver or specify -T to silence this check if you are sure.");
+		/* return -1; */
 	} else {
-		cl_log(LOG_INFO, "Set watchdog timeout to %lu seconds.",
-			timeout_watchdog);
+		cl_log(LOG_INFO, "Set watchdog timeout to %u seconds.",
+				timeout);
 	}
+	return 0;
 }
 
-static void
+int
 watchdog_tickle(void)
 {
 	if (watchdogfd >= 0) {
 		if (write(watchdogfd, "", 1) != 1) {
 			cl_perror("Watchdog write failure: %s!",
 					watchdogdev);
-			/* TODO: Should we force the crash, or wait for
-			 * the watchdog to time us out? */
+			return -1;
 		}
 	}
+	return 0;
 }
 
-static void
+int
 watchdog_init(void)
 {
 	if (watchdogfd < 0 && watchdogdev != NULL) {
 		watchdogfd = open(watchdogdev, O_WRONLY);
 		if (watchdogfd >= 0) {
-			if (fcntl(watchdogfd, F_SETFD, FD_CLOEXEC)) {
-				cl_perror("Error setting the "
-				"close-on-exec flag for watchdog");
-			}
 			cl_log(LOG_NOTICE, "Using watchdog device: %s",
 					watchdogdev);
-			watchdog_init_interval();
-			watchdog_tickle();
+			if ((watchdog_init_interval() < 0)
+					|| (watchdog_tickle() < 0)) {
+				return -1;
+			}
 		}else{
 			cl_perror("Cannot open watchdog device: %s",
 					watchdogdev);
+			return -1;
 		}
 	}
+	return 0;
 }
 
-static void
+void
 watchdog_close(void)
 {
 	if (watchdogfd >= 0) {
@@ -172,8 +169,8 @@ watchdog_close(void)
  * even in linux-kernel-headers. Sucks. See also
  * /usr/src/linux/Documentation/block/ioprio.txt and ioprio_set(2) */
 extern int sys_ioprio_set(int, int, int);
-static int ioprio_set(int which, int who, int ioprio);
-static inline int ioprio_set(int which, int who, int ioprio)
+int ioprio_set(int which, int who, int ioprio);
+inline int ioprio_set(int which, int who, int ioprio)
 {
         return syscall(__NR_ioprio_set, which, who, ioprio);
 }
@@ -199,7 +196,7 @@ enum {
 #define IOPRIO_PRIO_DATA(mask)  ((mask) & IOPRIO_PRIO_MASK)
 #define IOPRIO_PRIO_VALUE(class, data)  (((class) << IOPRIO_CLASS_SHIFT) | data)
 
-static void
+void
 maximize_priority(void)
 {
 	if (skip_rt) {
@@ -215,9 +212,10 @@ maximize_priority(void)
 	}
 }
 
-static int
+int
 open_device(const char* devname)
 {
+	int devfd;
 	if (!devname)
 		return -1;
 
@@ -234,10 +232,10 @@ open_device(const char* devname)
 		cl_perror("Get sector size failed.\n");
 		return -1;
 	}
-	return 0;
+	return devfd;
 }
 
-static signed char
+signed char
 cmd2char(const char *cmd)
 {
 	if (strcmp("clear", cmd) == 0) {
@@ -250,11 +248,13 @@ cmd2char(const char *cmd)
 		return SBD_MSG_OFF;
 	} else if (strcmp("exit", cmd) == 0) {
 		return SBD_MSG_EXIT;
+	} else if (strcmp("crashdump", cmd) == 0) {
+		return SBD_MSG_CRASHDUMP;
 	}
 	return -1;
 }
 
-static void *
+void *
 sector_alloc(void)
 {
 	void *x;
@@ -268,7 +268,7 @@ sector_alloc(void)
 	return x;
 }
 
-static const char*
+const char*
 char2cmd(const char cmd)
 {
 	switch (cmd) {
@@ -287,14 +287,17 @@ char2cmd(const char cmd)
 		case SBD_MSG_EXIT:
 			return "exit";
 			break;
+		case SBD_MSG_CRASHDUMP:
+			return "crashdump";
+			break;
 		default:
 			return "undefined";
 			break;
 	}
 }
 
-static int
-sector_write(int sector, const void *data)
+int
+sector_write(int devfd, int sector, const void *data)
 {
 	if (lseek(devfd, sector_size*sector, 0) < 0) {
 		cl_perror("sector_write: lseek() failed");
@@ -308,8 +311,8 @@ sector_write(int sector, const void *data)
 	return(0);
 }
 
-static int
-sector_read(int sector, void *data)
+int
+sector_read(int devfd, int sector, void *data)
 {
 	if (lseek(devfd, sector_size*sector, 0) < 0) {
 		cl_perror("sector_read: lseek() failed");
@@ -323,67 +326,73 @@ sector_read(int sector, void *data)
 	return(0);
 }
 
-static int
-slot_read(int slot, struct sector_node_s *s_node)
+int
+slot_read(int devfd, int slot, struct sector_node_s *s_node)
 {
-	return sector_read(SLOT_TO_SECTOR(slot), s_node);
+	return sector_read(devfd, SLOT_TO_SECTOR(slot), s_node);
 }
 
-static int
-slot_write(int slot, const struct sector_node_s *s_node)
+int
+slot_write(int devfd, int slot, const struct sector_node_s *s_node)
 {
-	return sector_write(SLOT_TO_SECTOR(slot), s_node);
+	return sector_write(devfd, SLOT_TO_SECTOR(slot), s_node);
 }
 
-static int
-mbox_write(int mbox, const struct sector_mbox_s *s_mbox)
+int
+mbox_write(int devfd, int mbox, const struct sector_mbox_s *s_mbox)
 {
-	return sector_write(MBOX_TO_SECTOR(mbox), s_mbox);
+	return sector_write(devfd, MBOX_TO_SECTOR(mbox), s_mbox);
 }
 
-static int
-mbox_read(int mbox, struct sector_mbox_s *s_mbox)
+int
+mbox_read(int devfd, int mbox, struct sector_mbox_s *s_mbox)
 {
-	return sector_read(MBOX_TO_SECTOR(mbox), s_mbox);
+	return sector_read(devfd, MBOX_TO_SECTOR(mbox), s_mbox);
 }
 
-static int
-mbox_write_verify(int mbox, const struct sector_mbox_s *s_mbox)
+int
+mbox_write_verify(int devfd, int mbox, const struct sector_mbox_s *s_mbox)
 {
 	void *data;
+	int rc = 0;
 
-	if (sector_write(MBOX_TO_SECTOR(mbox), s_mbox) < 0)
+	if (sector_write(devfd, MBOX_TO_SECTOR(mbox), s_mbox) < 0)
 		return -1;
 
 	data = sector_alloc();
-	if (sector_read(MBOX_TO_SECTOR(mbox), data) < 0)
-		return -1;
+	if (sector_read(devfd, MBOX_TO_SECTOR(mbox), data) < 0) {
+		rc = -1;
+		goto out;
+	}
+
 
 	if (memcmp(s_mbox, data, sector_size) != 0) {
 		cl_log(LOG_ERR, "Write verification failed!");
-		return -1;
+		rc = -1;
+		goto out;
 	}
-
-	return 0;
+	rc = 0;
+out:
+	free(data);
+	return rc;
 }
 
-static int
-header_write(struct sector_header_s *s_header)
+int header_write(int devfd, struct sector_header_s *s_header)
 {
 	s_header->sector_size = htonl(s_header->sector_size);
 	s_header->timeout_watchdog = htonl(s_header->timeout_watchdog);
 	s_header->timeout_allocate = htonl(s_header->timeout_allocate);
 	s_header->timeout_loop = htonl(s_header->timeout_loop);
 	s_header->timeout_msgwait = htonl(s_header->timeout_msgwait);
-	return sector_write(0, s_header);
+	return sector_write(devfd, 0, s_header);
 }
 
-static int
-header_read(struct sector_header_s *s_header)
+int
+header_read(int devfd, struct sector_header_s *s_header)
 {
-	if (sector_read(0, s_header) < 0)
+	if (sector_read(devfd, 0, s_header) < 0)
 		return -1;
-	
+
 	s_header->sector_size = ntohl(s_header->sector_size);
 	s_header->timeout_watchdog = ntohl(s_header->timeout_watchdog);
 	s_header->timeout_allocate = ntohl(s_header->timeout_allocate);
@@ -398,7 +407,7 @@ header_read(struct sector_header_s *s_header)
 	return 0;
 }
 
-static int
+int
 valid_header(const struct sector_header_s *s_header)
 {
 	if (memcmp(s_header->magic, sbd_magic, sizeof(s_header->magic)) != 0) {
@@ -416,36 +425,36 @@ valid_header(const struct sector_header_s *s_header)
 	return 0;
 }
 
-static struct sector_header_s *
-header_get(void)
+struct sector_header_s *
+header_get(int devfd)
 {
 	struct sector_header_s *s_header;
 	s_header = sector_alloc();
-	
-	if (header_read(s_header) < 0) {
-		cl_log(LOG_ERR, "Unable to read header from %s", devname);
+
+	if (header_read(devfd, s_header) < 0) {
+		cl_log(LOG_ERR, "Unable to read header from device %d", devfd);
 		return NULL;
 	}
 
 	if (valid_header(s_header) < 0) {
-		cl_log(LOG_ERR, "%s is not valid.", devname);
+		cl_log(LOG_ERR, "header on device %d is not valid.", devfd);
 		return NULL;
 	}
-	
+
 	/* cl_log(LOG_INFO, "Found version %d header with %d slots",
 			s_header->version, s_header->slots); */
 
 	return s_header;
 }
 
-static int
-init_device(void)
+int
+init_device(int devfd)
 {
 	struct sector_header_s	*s_header;
 	struct sector_node_s	*s_node;
 	struct sector_mbox_s	*s_mbox;
 	struct stat 		s;
-	int			i;	
+	int			i;
 	int			rc = 0;
 
 	s_header = sector_alloc();
@@ -463,21 +472,27 @@ init_device(void)
 	fstat(devfd, &s);
 	/* printf("st_size = %ld, st_blksize = %ld, st_blocks = %ld\n",
 			s.st_size, s.st_blksize, s.st_blocks); */
-	
-	cl_log(LOG_INFO, "Creating version %d header on %s",
+
+	cl_log(LOG_INFO, "Creating version %d header on device %d",
 			s_header->version,
-			devname);
-	if (header_write(s_header) < 0) {
+			devfd);
+	fprintf(stdout, "Creating version %d header on device %d\n",
+			s_header->version,
+			devfd);
+	if (header_write(devfd, s_header) < 0) {
 		rc = -1; goto out;
 	}
-	cl_log(LOG_INFO, "Initializing %d slots on %s",
+	cl_log(LOG_INFO, "Initializing %d slots on device %d",
+			s_header->slots,
+			devfd);
+	fprintf(stdout, "Initializing %d slots on device %d\n",
 			s_header->slots,
-			devname);
+			devfd);
 	for (i=0;i < s_header->slots;i++) {
-		if (slot_write(i, s_node) < 0) {
+		if (slot_write(devfd, i, s_node) < 0) {
 			rc = -1; goto out;
 		}
-		if (mbox_write(i, s_mbox) < 0) {
+		if (mbox_write(devfd, i, s_mbox) < 0) {
 			rc = -1; goto out;
 		}
 	}
@@ -491,8 +506,8 @@ out:	free(s_node);
 /* Check if there already is a slot allocated to said name; returns the
  * slot number. If not found, returns -1.
  * This is necessary because slots might not be continuous. */
-static int
-slot_lookup(const struct sector_header_s *s_header, const char *name)
+int
+slot_lookup(int devfd, const struct sector_header_s *s_header, const char *name)
 {
 	struct sector_node_s	*s_node = NULL;
 	int 			i;
@@ -506,11 +521,11 @@ slot_lookup(const struct sector_header_s *s_header, const char *name)
 	s_node = sector_alloc();
 
 	for (i=0; i < s_header->slots; i++) {
-		if (slot_read(i, s_node) < 0) {
+		if (slot_read(devfd, i, s_node) < 0) {
 			rc = -1; goto out;
 		}
 		if (s_node->in_use != 0) {
-			if (strncasecmp(s_node->name, name, 
+			if (strncasecmp(s_node->name, name,
 						sizeof(s_node->name)) == 0) {
 				cl_log(LOG_INFO, "%s owns slot %d", name, i);
 				rc = i; goto out;
@@ -522,8 +537,8 @@ out:	free(s_node);
 	return rc;
 }
 
-static int
-slot_unused(const struct sector_header_s *s_header)
+int
+slot_unused(int devfd, const struct sector_header_s *s_header)
 {
 	struct sector_node_s	*s_node;
 	int 			i;
@@ -532,7 +547,7 @@ slot_unused(const struct sector_header_s *s_header)
 	s_node = sector_alloc();
 
 	for (i=0; i < s_header->slots; i++) {
-		if (slot_read(i, s_node) < 0) {
+		if (slot_read(devfd, i, s_node) < 0) {
 			rc = -1; goto out;
 		}
 		if (s_node->in_use == 0) {
@@ -545,21 +560,22 @@ out:	free(s_node);
 }
 
 
-static int
-slot_allocate(const char *name)
+int
+slot_allocate(int devfd, const char *name)
 {
 	struct sector_header_s	*s_header = NULL;
 	struct sector_node_s	*s_node = NULL;
 	struct sector_mbox_s	*s_mbox = NULL;
-	int			i;	
+	int			i;
 	int			rc = 0;
-	
+
 	if (!name) {
 		cl_log(LOG_ERR, "slot_allocate(): No name specified.\n");
+		fprintf(stderr, "slot_allocate(): No name specified.\n");
 		rc = -1; goto out;
 	}
 
-	s_header = header_get();
+	s_header = header_get(devfd);
 	if (!s_header) {
 		rc = -1; goto out;
 	}
@@ -568,35 +584,37 @@ slot_allocate(const char *name)
 	s_mbox = sector_alloc();
 
 	while (1) {
-		i = slot_lookup(s_header, name);
+		i = slot_lookup(devfd, s_header, name);
 		if (i >= 0) {
 			rc = i; goto out;
 		}
 
-		i = slot_unused(s_header);
+		i = slot_unused(devfd, s_header);
 		if (i >= 0) {
 			cl_log(LOG_INFO, "slot %d is unused - trying to own", i);
+			fprintf(stdout, "slot %d is unused - trying to own\n", i);
 			memset(s_node, 0, sizeof(*s_node));
 			s_node->in_use = 1;
 			strncpy(s_node->name, name, sizeof(s_node->name));
-			if (slot_write(i, s_node) < 0) {
+			if (slot_write(devfd, i, s_node) < 0) {
 				rc = -1; goto out;
 			}
 			sleep(timeout_allocate);
 		} else {
 			cl_log(LOG_ERR, "No more free slots.");
+			fprintf(stderr, "No more free slots.\n");
 			rc = -1; goto out;
 		}
 	}
-	
+
 out:	free(s_node);
 	free(s_header);
 	free(s_mbox);
 	return(rc);
 }
 
-static int
-slot_list(void)
+int
+slot_list(int devfd)
 {
 	struct sector_header_s	*s_header = NULL;
 	struct sector_node_s	*s_node = NULL;
@@ -604,7 +622,7 @@ slot_list(void)
 	int 			i;
 	int			rc = 0;
 
-	s_header = header_get();
+	s_header = header_get(devfd);
 	if (!s_header) {
 		rc = -1; goto out;
 	}
@@ -613,11 +631,11 @@ slot_list(void)
 	s_mbox = sector_alloc();
 
 	for (i=0; i < s_header->slots; i++) {
-		if (slot_read(i, s_node) < 0) {
+		if (slot_read(devfd, i, s_node) < 0) {
 			rc = -1; goto out;
 		}
 		if (s_node->in_use > 0) {
-			if (mbox_read(i, s_mbox) < 0) {
+			if (mbox_read(devfd, i, s_mbox) < 0) {
 				rc = -1; goto out;
 			}
 			printf("%d\t%s\t%s\t%s\n",
@@ -632,8 +650,8 @@ out:	free(s_node);
 	return rc;
 }
 
-static int
-slot_msg(const char *name, const char *cmd)
+int
+slot_msg(int devfd, const char *name, const char *cmd)
 {
 	struct sector_header_s	*s_header = NULL;
 	struct sector_mbox_s	*s_mbox = NULL;
@@ -645,7 +663,7 @@ slot_msg(const char *name, const char *cmd)
 		rc = -1; goto out;
 	}
 
-	s_header = header_get();
+	s_header = header_get(devfd);
 	if (!s_header) {
 		rc = -1; goto out;
 	}
@@ -654,14 +672,14 @@ slot_msg(const char *name, const char *cmd)
 		name = local_uname;
 	}
 
-	mbox = slot_lookup(s_header, name);
+	mbox = slot_lookup(devfd, s_header, name);
 	if (mbox < 0) {
 		cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name);
 		rc = -1; goto out;
 	}
 
 	s_mbox = sector_alloc();
-	
+
 	s_mbox->cmd = cmd2char(cmd);
 	if (s_mbox->cmd < 0) {
 		cl_log(LOG_ERR, "slot_msg(): Invalid command %s.", cmd);
@@ -672,7 +690,7 @@ slot_msg(const char *name, const char *cmd)
 
 	cl_log(LOG_INFO, "Writing %s to node slot %s",
 			cmd, name);
-	if (mbox_write_verify(mbox, s_mbox) < -1) {
+	if (mbox_write_verify(devfd, mbox, s_mbox) < -1) {
 		rc = -1; goto out;
 	}
 	if (strcasecmp(cmd, "exit") != 0) {
@@ -686,8 +704,8 @@ out:	free(s_mbox);
 	return rc;
 }
 
-static int
-slot_ping(const char *name)
+int
+slot_ping(int devfd, const char *name)
 {
 	struct sector_header_s	*s_header = NULL;
 	struct sector_mbox_s	*s_mbox = NULL;
@@ -700,7 +718,7 @@ slot_ping(const char *name)
 		rc = -1; goto out;
 	}
 
-	s_header = header_get();
+	s_header = header_get(devfd);
 	if (!s_header) {
 		rc = -1; goto out;
 	}
@@ -709,7 +727,7 @@ slot_ping(const char *name)
 		name = local_uname;
 	}
 
-	mbox = slot_lookup(s_header, name);
+	mbox = slot_lookup(devfd, s_header, name);
 	if (mbox < 0) {
 		cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name);
 		rc = -1; goto out;
@@ -721,13 +739,13 @@ slot_ping(const char *name)
 	strncpy(s_mbox->from, local_uname, sizeof(s_mbox->from)-1);
 
 	cl_log(LOG_DEBUG, "Pinging node %s", name);
-	if (mbox_write(mbox, s_mbox) < -1) {
+	if (mbox_write(devfd, mbox, s_mbox) < -1) {
 		rc = -1; goto out;
 	}
 
 	rc = -1;
 	while (waited <= timeout_msgwait) {
-		if (mbox_read(mbox, s_mbox) < 0)
+		if (mbox_read(devfd, mbox, s_mbox) < 0)
 			break;
 		if (s_mbox->cmd != SBD_MSG_TEST) {
 			rc = 0;
@@ -748,7 +766,34 @@ out:	free(s_mbox);
 	return rc;
 }
 
-static void
+void
+sysrq_init(void)
+{
+	FILE* procf;
+	int c;
+	procf = fopen("/proc/sys/kernel/sysrq", "r");
+	if (!procf) {
+		cl_perror("cannot open /proc/sys/kernel/sysrq for read.");
+		return;
+	}
+	fscanf(procf, "%d", &c);
+	fclose(procf);
+	if (c == 1)
+		return;
+	/* 8 for debugging dumps of processes, 
+	   128 for reboot/poweroff */
+	c |= 136; 
+	procf = fopen("/proc/sys/kernel/sysrq", "w");
+	if (!procf) {
+		printf("cannot open /proc/sys/kernel/sysrq for write\n");
+		return;
+	}
+	fprintf(procf, "%d", c);
+	fclose(procf);
+	return;
+}
+
+void
 sysrq_trigger(char t)
 {
 	FILE *procf;
@@ -764,7 +809,16 @@ sysrq_trigger(char t)
 	return;
 }
 
-static void
+void
+do_crashdump(void)
+{
+	sysrq_trigger('c');
+	/* is it possible to reach the following line? */
+	cl_reboot(5, "sbd is triggering crashdumping");
+	exit(1);
+}
+
+void
 do_reset(void)
 {
 	sysrq_trigger('b');
@@ -773,7 +827,7 @@ do_reset(void)
 	exit(1);
 }
 
-static void
+void
 do_off(void)
 {
 	sysrq_trigger('o');
@@ -782,22 +836,20 @@ do_off(void)
 	exit(1);
 }
 
-static void
+pid_t
 make_daemon(void)
 {
-	long			pid;
+	pid_t			pid;
 	const char *		devnull = "/dev/null";
 
-	if (go_daemon > 0) {
-		pid = fork();
-		if (pid < 0) {
-			cl_log(LOG_ERR, "%s: could not start daemon\n",
-					cmdname);
-			cl_perror("fork");
-			exit(1);
-		}else if (pid > 0) {
-			exit(0);
-		}
+	pid = fork();
+	if (pid < 0) {
+		cl_log(LOG_ERR, "%s: could not start daemon\n",
+				cmdname);
+		cl_perror("fork");
+		exit(1);
+	}else if (pid > 0) {
+		return pid;
 	}
 
 	cl_log_enable_stderr(FALSE);
@@ -813,97 +865,14 @@ make_daemon(void)
 	close(2);
 	(void)open(devnull, O_WRONLY);
 	cl_cdtocoredir();
+	return 0;
 }
 
-
-static int
-daemonize(void)
-{
-	struct sector_mbox_s	*s_mbox = NULL;
-	int			mbox;
-	int			rc = 0;
-	time_t			t0, t1, latency;
-
-	mbox = slot_allocate(local_uname);
-	if (mbox < 0) {
-		cl_log(LOG_ERR, "No slot allocated, and automatic allocation failed.");
-		rc = -1; goto out;
-	}
-	cl_log(LOG_INFO, "Monitoring slot %d", mbox);
-
-	/* Clear mbox once on start */
-	s_mbox = sector_alloc();
-	if (mbox_write(mbox, s_mbox) < 0) {
-		rc = -1; goto out;
-	}
-
-	make_daemon();
-
-	if (watchdog_use != 0)
-		watchdog_init();
-
-	while (1) {
-		t0 = time(NULL);
-		sleep(timeout_loop);
-
-		if (mbox_read(mbox, s_mbox) < 0) {
-			cl_log(LOG_ERR, "mbox read failed.");
-			do_reset();
-		}
-
-		if (s_mbox->cmd > 0) {
-			cl_log(LOG_INFO, "Received command %s from %s",
-					char2cmd(s_mbox->cmd), s_mbox->from);
-
-			switch (s_mbox->cmd) {
-			case SBD_MSG_TEST:
-				memset(s_mbox, 0, sizeof(*s_mbox));
-				mbox_write(mbox, s_mbox);
-				break;
-			case SBD_MSG_RESET:
-				do_reset();
-				break;
-			case SBD_MSG_OFF:
-				do_off();
-				break;
-			case SBD_MSG_EXIT:
-				watchdog_close();
-				goto out;
-				break;
-			default:
-				/* TODO: Should we do something on
-				 * unknown messages? */
-				cl_log(LOG_ERR, "Unknown message; suicide!");
-				do_reset();
-				break;
-			}
-		}
-		watchdog_tickle();
-
-		t1 = time(NULL);
-		latency = t1 - t0;
-
-		if (timeout_watchdog_warn 
-				&& (latency > timeout_watchdog_warn)) {
-			cl_log(LOG_WARNING, "Latency: %d exceeded threshold %d",
-				(int)latency, (int)timeout_watchdog_warn);
-		} else if (debug) {
-			cl_log(LOG_INFO, "Latency: %d",
-				(int)latency);
-		}
-
-	}
-
-out:
-	free(s_mbox);
-	return rc;
-}
-
-static int
-header_dump(void)
+int
+header_dump(int devfd)
 {
 	struct sector_header_s *s_header;
-	s_header = header_get();
+	s_header = header_get(devfd);
 	if (s_header == NULL)
 		return -1;
 
@@ -922,7 +891,7 @@ header_dump(void)
 	return 0;
 }
 
-static void
+void
 get_uname(void)
 {
 	struct utsname		uname_buf;
@@ -932,114 +901,10 @@ get_uname(void)
 		cl_perror("uname() failed?");
 		exit(1);
 	}
-	
+
 	local_uname = strdup(uname_buf.nodename);
 
 	for (i = 0; i < strlen(local_uname); i++)
 		local_uname[i] = tolower(local_uname[i]);
 }
 
-int
-main(int argc, char** argv)
-{
-	int		exit_status = 0;
-	int		c;
-
-	if ((cmdname = strrchr(argv[0], '/')) == NULL) {
-		cmdname = argv[0];
-	}else{
-		++cmdname;
-	}
-
-	cl_log_set_entity(cmdname);
-	cl_log_enable_stderr(0);
-	cl_log_set_facility(LOG_DAEMON);
-	
-	get_uname();
-
-	while ((c = getopt (argc, argv, "DRWhvw:d:n:1:2:3:4:5:")) != -1) {
-		switch (c) {
-		case 'D':
-			go_daemon = 1;
-			break;
-		case 'R':
-			skip_rt = 1;
-			break;
-		case 'v':
-			debug = 1;
-			break;
-		case 'W':
-			watchdog_use = 1;
-			break;
-		case 'w':
-			watchdogdev = optarg;
-			break;
-		case 'd':
-			devname = optarg;
-			break;
-		case 'n':
-			local_uname = optarg;
-			break;
-		case '1':
-			timeout_watchdog = atoi(optarg);
-			break;
-		case '2':
-			timeout_allocate = atoi(optarg);
-			break;
-		case '3':
-			timeout_loop = atoi(optarg);
-			break;
-		case '4':
-			timeout_msgwait = atoi(optarg);
-			break;
-		case '5':
-			timeout_watchdog_warn = atoi(optarg);
-			break;
-		case 'h':
-			usage();
-			return(0);
-		default:
-			exit_status = -1;
-			goto out;
-			break;
-		}
-	}
-	
-	/* There must at least be one command following the options: */
-	if ( (argc - optind) < 1) {
-		fprintf(stderr, "Not enough arguments.\n");
-		exit_status = -1;
-		goto out;
-	}
-
-	maximize_priority();
-	if (open_device(devname) < 0) {
-		exit_status = -1;
-		goto out;
-	}
-
-	if (strcmp(argv[optind],"create") == 0) {
-		exit_status = init_device();
-	} else if (strcmp(argv[optind],"dump") == 0) {
-		exit_status = header_dump();
-	} else if (strcmp(argv[optind],"allocate") == 0) {
-		exit_status = slot_allocate(argv[optind+1]);
-	} else if (strcmp(argv[optind],"list") == 0) {
-		exit_status = slot_list();
-	} else if (strcmp(argv[optind],"message") == 0) {
-		exit_status = slot_msg(argv[optind+1], argv[optind+2]);
-	} else if (strcmp(argv[optind],"ping") == 0) {
-		exit_status = slot_ping(argv[optind+1]);
-	} else if (strcmp(argv[optind],"watch") == 0) {
-		exit_status = daemonize();
-	} else {
-		exit_status = -1;
-	}
-
-out:
-	if (exit_status < 0) {
-		usage();
-		return(1);
-	}
-	return(0);
-}
diff --git a/lib/stonith/sbd-md.c b/lib/stonith/sbd-md.c
new file mode 100644
index 0000000..7e856e6
--- /dev/null
+++ b/lib/stonith/sbd-md.c
@@ -0,0 +1,936 @@
+/*
+ * Copyright (C) 2008 Lars Marowsky-Bree <lmb at suse.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <signal.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <ctype.h>
+#include <string.h>
+#include <syslog.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ptrace.h>
+#include <fcntl.h>
+#include <time.h>
+#include <clplumbing/cl_log.h>
+#include <clplumbing/coredumps.h>
+#include <clplumbing/realtime.h>
+#include <clplumbing/cl_reboot.h>
+#include <clplumbing/setproctitle.h>
+#include <malloc.h>
+#include <time.h>
+#include <sys/utsname.h>
+#include <sys/ioctl.h>
+#include <linux/types.h>
+#include <linux/watchdog.h>
+#include <linux/fs.h>
+
+#include "sbd.h"
+
+struct servants_list_item *servants_leader = NULL;
+
+static int	servant_count	= 0;
+static int	servant_restart_interval = 3600;
+
+/* signals reserved for multi-disk sbd */
+#define SIG_LIVENESS (SIGRTMIN + 1)	/* report liveness of the disk */
+#define SIG_EXITREQ  (SIGRTMIN + 2)	/* exit request to inquisitor */
+#define SIG_TEST     (SIGRTMIN + 3)	/* trigger self test */
+#define SIG_RESTART  (SIGRTMIN + 4)	/* trigger restart of all failed disk */
+/* FIXME: should add dynamic check of SIG_XX >= SIGRTMAX */
+
+/* Debug Helper */
+#if 0
+#define DBGPRINT(...) fprintf(stderr, __VA_ARGS__)
+#else
+#define DBGPRINT(...) do {} while (0)
+#endif
+
+int quorum_write(int good_servants)
+{
+	return (good_servants > servant_count/2);	
+}
+
+int quorum_read(int good_servants)
+{
+	if (servant_count >= 3) 
+		return (good_servants > servant_count/2);
+	else
+		return (good_servants >= 1);
+}
+
+int assign_servant(const char* devname, functionp_t functionp, const void* argp)
+{
+	pid_t pid = 0;
+	int rc = 0;
+
+	DBGPRINT("fork servant for %s\n", devname);
+	pid = fork();
+	if (pid == 0) {		/* child */
+		maximize_priority();
+		rc = (*functionp)(devname, argp);
+		if (rc == -1)
+			exit(1);
+		else
+			exit(0);
+	} else if (pid != -1) {		/* parent */
+		return pid;
+	} else {
+		cl_log(LOG_ERR,"Failed to fork servant");
+		exit(1);
+	}
+}
+
+int init_devices()
+{
+	int rc = 0;
+	int devfd;
+	struct servants_list_item *s;
+
+	for (s = servants_leader; s; s = s->next) {
+		fprintf(stdout, "Initializing device %s\n",
+				s->devname);
+		devfd = open_device(s->devname);
+		if (devfd == -1) {
+			return -1;
+		}
+		rc = init_device(devfd);
+		close(devfd);
+		if (rc == -1) {
+			fprintf(stderr, "Failed to init device %s\n", s->devname);
+			return rc;
+		}
+		fprintf(stdout, "Device %s is initialized.\n", s->devname);
+	}
+	return 0;
+}
+
+int slot_msg_wrapper(const char* devname, const void* argp)
+{
+	int rc = 0;
+	int devfd;
+	const struct slot_msg_arg_t* arg = (const struct slot_msg_arg_t*)argp;
+
+        devfd = open_device(devname);
+        if (devfd == -1) 
+		return -1;
+	rc = slot_msg(devfd, arg->name, arg->msg);
+	close(devfd);
+	return rc;
+}
+
+int slot_ping_wrapper(const char* devname, const void* argp)
+{
+	int rc = 0;
+	const char* name = (const char*)argp;
+	int devfd;
+
+	devfd = open_device(devname);
+	if (devfd == -1)
+		return -1;
+	rc = slot_ping(devfd, name);
+	close(devfd);
+	return rc;
+}
+
+int allocate_slots(const char *name)
+{
+	int rc = 0;
+	int devfd;
+	struct servants_list_item *s;
+
+	for (s = servants_leader; s; s = s->next) {
+		fprintf(stdout, "Trying to allocate slot for %s on device %s.\n", 
+				name,
+				s->devname);
+		devfd = open_device(s->devname);
+		if (devfd == -1) {
+			return -1;
+		}
+		rc = slot_allocate(devfd, name);
+		close(devfd);
+		if (rc == -1)
+			return rc;
+		fprintf(stdout, "Slot for %s has been allocated on %s.\n",
+				name,
+				s->devname);
+	}
+	return 0;
+}
+
+int list_slots()
+{
+	int rc = 0;
+	struct servants_list_item *s;
+	int devfd;
+
+	for (s = servants_leader; s; s = s->next) {
+		DBGPRINT("list slots on device %s\n", s->devname);
+		devfd = open_device(s->devname);
+		if (devfd == -1)
+			return -1;
+		rc = slot_list(devfd);
+		close(devfd);
+		if (rc == -1)
+			return rc;
+	}
+	return 0;
+}
+
+int ping_via_slots(const char *name)
+{
+	int sig = 0;
+	pid_t pid = 0;
+	int status = 0;
+	int servants_finished = 0;
+	sigset_t procmask;
+	siginfo_t sinfo;
+	struct servants_list_item *s;
+
+	DBGPRINT("you shall know no fear\n");
+	sigemptyset(&procmask);
+	sigaddset(&procmask, SIGCHLD);
+	sigprocmask(SIG_BLOCK, &procmask, NULL);
+
+	for (s = servants_leader; s; s = s->next) {
+		s->pid = assign_servant(s->devname, &slot_ping_wrapper, (const void*)name);
+	}
+
+	while (servants_finished < servant_count) {
+		sig = sigwaitinfo(&procmask, &sinfo);
+		DBGPRINT("get signal %d\n", sig);
+		if (sig == SIGCHLD) {
+			while ((pid = wait(&status))) {
+				if (pid == -1 && errno == ECHILD) {
+					break;
+				} else {
+					s = lookup_servant_by_pid(pid);
+					if (s) {
+						DBGPRINT
+						    ("A ping is delivered to %s via %s. ",
+						     name, s->devname);
+						if (!status)
+							DBGPRINT
+							    ("They responed to the emporer\n");
+						else
+							DBGPRINT
+							    ("There's no response\n");
+						servants_finished++;
+					}
+				}
+			}
+		}
+		DBGPRINT("signal %d handled\n", sig);
+	}
+	return 0;
+}
+
+int servant(const char *diskname, const void* argp)
+{
+	struct sector_mbox_s *s_mbox = NULL;
+	int mbox;
+	int rc = 0;
+	time_t t0, t1, latency;
+	union sigval signal_value;
+	sigset_t servant_masks;
+	int devfd;
+	pid_t ppid;
+
+	if (!diskname) {
+		cl_log(LOG_ERR, "Empty disk name %s.", diskname);
+		return -1;
+	}
+
+	/* Block most of the signals */
+	sigfillset(&servant_masks);
+	sigdelset(&servant_masks, SIGKILL);
+	sigdelset(&servant_masks, SIGFPE);
+	sigdelset(&servant_masks, SIGILL);
+	sigdelset(&servant_masks, SIGSEGV);
+	sigdelset(&servant_masks, SIGBUS);
+	sigdelset(&servant_masks, SIGALRM);
+	/* FIXME: check error */
+	sigprocmask(SIG_SETMASK, &servant_masks, NULL);
+
+	devfd = open_device(diskname);
+	if (devfd == -1) {
+		return -1;
+	}
+
+	mbox = slot_allocate(devfd, local_uname);
+	if (mbox < 0) {
+		cl_log(LOG_ERR,
+		       "No slot allocated, and automatic allocation failed for disk %s.",
+		       diskname);
+		rc = -1;
+		goto out;
+	}
+	cl_log(LOG_INFO, "Monitoring slot %d on disk %s", mbox, diskname);
+	set_proc_title("sbd: watcher: %s - slot: %d", diskname, mbox);
+
+	s_mbox = sector_alloc();
+	if (mbox_write(devfd, mbox, s_mbox) < 0) {
+		rc = -1;
+		goto out;
+	}
+
+	memset(&signal_value, 0, sizeof(signal_value));
+
+	while (1) {
+		t0 = time(NULL);
+		sleep(timeout_loop);
+
+		ppid = getppid();
+
+		if (ppid == 1) {
+			/* Our parent died unexpectedly. Triggering
+			 * self-fence. */
+			do_reset();
+		}
+
+		if (mbox_read(devfd, mbox, s_mbox) < 0) {
+			cl_log(LOG_ERR, "mbox read failed in servant.");
+			exit(1);
+		}
+
+		if (s_mbox->cmd > 0) {
+			cl_log(LOG_INFO,
+			       "Received command %s from %s on disk %s",
+			       char2cmd(s_mbox->cmd), s_mbox->from, diskname);
+
+			switch (s_mbox->cmd) {
+			case SBD_MSG_TEST:
+				memset(s_mbox, 0, sizeof(*s_mbox));
+				mbox_write(devfd, mbox, s_mbox);
+				sigqueue(ppid, SIG_TEST, signal_value);
+				break;
+			case SBD_MSG_RESET:
+				do_reset();
+				break;
+			case SBD_MSG_OFF:
+				do_off();
+				break;
+			case SBD_MSG_EXIT:
+				sigqueue(ppid, SIG_EXITREQ, signal_value);
+				break;
+			case SBD_MSG_CRASHDUMP:
+				do_crashdump();
+				break;
+			default:
+				/* FIXME:
+				   An "unknown" message might result
+				   from a partial write.
+				   log it and clear the slot.
+				 */
+				cl_log(LOG_ERR, "Unknown message on disk %s",
+				       diskname);
+				memset(s_mbox, 0, sizeof(*s_mbox));
+				mbox_write(devfd, mbox, s_mbox);
+				break;
+			}
+		}
+		sigqueue(ppid, SIG_LIVENESS, signal_value);
+
+		t1 = time(NULL);
+		latency = t1 - t0;
+		if (timeout_watchdog_warn && (latency > timeout_watchdog_warn)) {
+			cl_log(LOG_WARNING,
+			       "Latency: %d exceeded threshold %d on disk %s",
+			       (int)latency, (int)timeout_watchdog_warn,
+			       diskname);
+		} else if (debug) {
+			cl_log(LOG_INFO, "Latency: %d on disk %s", (int)latency,
+			       diskname);
+		}
+	}
+ out:
+	free(s_mbox);
+	close(devfd);
+	devfd = -1;
+	return rc;
+}
+
+void recruit_servant(const char *devname, pid_t pid)
+{
+	struct servants_list_item *s = servants_leader;
+	struct servants_list_item *newbie;
+
+	newbie = malloc(sizeof(*newbie));
+	if (!newbie) {
+		fprintf(stderr, "malloc failed in recruit_servant.");
+		exit(1);
+	}
+	memset(newbie, 0, sizeof(*newbie));
+	newbie->devname = strdup(devname);
+	newbie->pid = pid;
+
+	if (!s) {
+		servants_leader = newbie;
+	} else {
+		while (s->next)
+			s = s->next;
+		s->next = newbie;
+	}
+
+	servant_count++;
+}
+
+struct servants_list_item *lookup_servant_by_dev(const char *devname)
+{
+	struct servants_list_item *s;
+
+	for (s = servants_leader; s; s = s->next) {
+		if (strncasecmp(s->devname, devname, strlen(s->devname)))
+			break;
+	}
+	return s;
+}
+
+struct servants_list_item *lookup_servant_by_pid(pid_t pid)
+{
+	struct servants_list_item *s;
+
+	for (s = servants_leader; s; s = s->next) {
+		if (s->pid == pid)
+			break;
+	}
+	return s;
+}
+
+int check_all_dead(void)
+{
+	struct servants_list_item *s;
+	int r = 0;
+	union sigval svalue;
+
+	for (s = servants_leader; s; s = s->next) {
+		if (s->pid != 0) {
+			r = sigqueue(s->pid, 0, svalue);
+			if (r == -1 && errno == ESRCH)
+				continue;
+			return 0;
+		}
+	}
+	return 1;
+}
+
+
+void servants_start(void)
+{
+	struct servants_list_item *s;
+	int r = 0;
+	union sigval svalue;
+
+	for (s = servants_leader; s; s = s->next) {
+		if (s->pid != 0) {
+			r = sigqueue(s->pid, 0, svalue);
+			if ((r != -1 || errno != ESRCH))
+				continue;
+		}
+		s->restarts = 0;
+		s->pid = assign_servant(s->devname, servant, NULL);
+	}
+}
+
+void servants_kill(void)
+{
+	struct servants_list_item *s;
+	union sigval svalue;
+
+	for (s = servants_leader; s; s = s->next) {
+		if (s->pid != 0)
+			sigqueue(s->pid, SIGKILL, svalue);
+	}
+}
+
+int check_timeout_inconsistent(void)
+{
+	int devfd;
+	struct sector_header_s *hdr_cur = 0, *hdr_last = 0;
+	struct servants_list_item* s;
+	int inconsistent = 0;
+
+	for (s = servants_leader; s; s = s->next) {
+		devfd = open_device(s->devname);
+		if (devfd < 0)
+			continue;
+		hdr_cur = header_get(devfd);
+		close(devfd);
+		if (!hdr_cur)
+			continue;
+		if (hdr_last) {
+			if (hdr_last->timeout_watchdog != hdr_cur->timeout_watchdog
+			    || hdr_last->timeout_allocate != hdr_cur->timeout_allocate
+			    || hdr_last->timeout_loop != hdr_cur->timeout_loop
+			    || hdr_last->timeout_msgwait != hdr_cur->timeout_msgwait)
+				inconsistent = 1;
+			free(hdr_last);
+		}
+		hdr_last = hdr_cur;
+	}
+
+	if (hdr_last) {
+		timeout_watchdog = hdr_last->timeout_watchdog;
+		timeout_allocate = hdr_last->timeout_allocate;
+		timeout_loop = hdr_last->timeout_loop;
+		timeout_msgwait = hdr_last->timeout_msgwait;
+	} else { 
+		cl_log(LOG_ERR, "No devices were available at start-up.");
+		exit(1);
+	}
+
+	free(hdr_last);
+	return inconsistent;
+}
+
+inline void cleanup_servant_by_pid(pid_t pid)
+{
+	struct servants_list_item* s;
+
+	s = lookup_servant_by_pid(pid);
+	if (s) {
+		s->pid = 0;
+	} else {
+		/* TODO: This points to an inconsistency in our internal
+		 * data - how to recover? */
+		cl_log(LOG_ERR, "Cannot cleanup after unknown pid %i",
+				pid);
+	}
+}
+
+void restart_servant_by_pid(pid_t pid)
+{
+	struct servants_list_item* s;
+
+	s = lookup_servant_by_pid(pid);
+	if (s) {
+		if (s->restarts < 10) {
+			s->pid = assign_servant(s->devname, servant, NULL);
+			s->restarts++;
+		} else {
+			cl_log(LOG_WARNING, "Max retry count reached: not restarting servant for %s",
+					s->devname);
+		}
+
+	} else {
+		/* TODO: This points to an inconsistency in our internal
+		 * data - how to recover? */
+		cl_log(LOG_ERR, "Cannot restart unknown pid %i",
+				pid);
+	}
+}
+
+int inquisitor_decouple(void)
+{
+	pid_t ppid = getppid();
+	union sigval signal_value;
+
+	/* During start-up, we only arm the watchdog once we've got
+	 * quorum at least once. */
+	if (watchdog_use) {
+		if (watchdog_init() < 0) {
+			return -1;
+		}
+	}
+
+	if (ppid > 1) {
+		sigqueue(ppid, SIG_LIVENESS, signal_value);
+	}
+	return 0;
+}
+
+void inquisitor_child(void)
+{
+	int sig, pid, i;
+	sigset_t procmask;
+	siginfo_t sinfo;
+	int *reports;
+	int status;
+	struct timespec timeout;
+	int good_servants = 0;
+	int exiting = 0;
+	int decoupled = 0;
+	time_t latency;
+	struct timespec t_last_tickle, t_now, t_last_restarted;
+
+	set_proc_title("sbd: inquisitor");
+
+	reports = malloc(sizeof(int) * servant_count);
+	if (!reports) {
+		cl_log(LOG_ERR, "malloc failed");
+		exit(1);
+	}
+	memset(reports, 0, sizeof(int) * servant_count);
+
+	sigemptyset(&procmask);
+	sigaddset(&procmask, SIGCHLD);
+	sigaddset(&procmask, SIG_LIVENESS);
+	sigaddset(&procmask, SIG_EXITREQ);
+	sigaddset(&procmask, SIG_TEST);
+	sigaddset(&procmask, SIGUSR1);
+	sigaddset(&procmask, SIGUSR2);
+	sigprocmask(SIG_BLOCK, &procmask, NULL);
+
+	servants_start();
+
+	timeout.tv_sec = timeout_loop;
+	timeout.tv_nsec = 0;
+	good_servants = 0;
+	clock_gettime(CLOCK_MONOTONIC, &t_last_tickle);
+	clock_gettime(CLOCK_MONOTONIC, &t_last_restarted);
+
+	while (1) {
+		sig = sigtimedwait(&procmask, &sinfo, &timeout);
+		DBGPRINT("got signal %d\n", sig);
+
+		if (sig == SIG_EXITREQ) {
+			servants_kill();
+			watchdog_close();
+			exiting = 1;
+		} else if (sig == SIGCHLD) {
+			while ((pid = waitpid(-1, &status, WNOHANG))) {
+				if (pid == -1 && errno == ECHILD) {
+					break;
+				} else if (exiting) {
+					cleanup_servant_by_pid(pid);
+				} else {
+					restart_servant_by_pid(pid);
+				}
+			}
+		} else if (sig == SIG_LIVENESS) {
+			for (i = 0; i < servant_count; i++) {
+				if (reports[i] == sinfo.si_pid) {
+					break;
+				} else if (reports[i] == 0) {
+					reports[i] = sinfo.si_pid;
+					good_servants++;
+					break;
+				}
+			}
+		} else if (sig == SIG_TEST) {
+		} else if (sig == SIGUSR1) {
+			if (exiting)
+				continue;
+			clock_gettime(CLOCK_MONOTONIC, &t_last_restarted);
+			servants_start();
+		}
+
+		if (exiting) {
+			if (check_all_dead())
+				exit(0);
+			else
+				continue;
+		}
+
+		if (quorum_read(good_servants)) {
+			DBGPRINT("Enough liveness messages\n");
+			if (!decoupled) {
+				if (inquisitor_decouple() < 0) {
+					servants_kill();
+					exiting = 1;
+					continue;
+				} else {
+					decoupled = 1;
+				}
+			}
+
+			watchdog_tickle();
+			clock_gettime(CLOCK_MONOTONIC, &t_last_tickle);
+			memset(reports, 0, sizeof(int) * servant_count);
+			good_servants = 0;
+		}
+
+		clock_gettime(CLOCK_MONOTONIC, &t_now);
+		latency = t_now.tv_sec - t_last_tickle.tv_sec;
+		if (timeout_watchdog && (latency > timeout_watchdog)) {
+			if (!decoupled) {
+				/* We're still being watched by our
+				 * parent. We don't fence, but exit. */
+				cl_log(LOG_ERR, "SBD: Not enough votes to proceed. Aborting start-up.");
+				servants_kill();
+				exiting = 1;
+				continue;
+			}
+			do_reset();
+		}
+		if (timeout_watchdog_warn && (latency > timeout_watchdog_warn)) {
+			cl_log(LOG_WARNING,
+			       "Latency: No liveness for %d s exceeds threshold of %d s (healthy servants: %d)",
+			       (int)latency, (int)timeout_watchdog_warn, good_servants);
+		}
+		
+		latency = t_now.tv_sec - t_last_restarted.tv_sec;
+		if (servant_restart_interval > 0 
+				&& latency > servant_restart_interval) {
+			/* Restart all children every hour */
+			clock_gettime(CLOCK_MONOTONIC, &t_last_restarted);
+			servants_start();
+		}
+	}
+	/* not reached */
+	exit(0);
+}
+
+int inquisitor(void)
+{
+	int sig, pid, inquisitor_pid;
+	int status;
+	sigset_t procmask;
+	siginfo_t sinfo;
+
+	DBGPRINT("inquisitor starting\n");
+
+	/* Where's the best place for sysrq init ?*/
+	sysrq_init();
+
+	sigemptyset(&procmask);
+	sigaddset(&procmask, SIGCHLD);
+	sigaddset(&procmask, SIG_LIVENESS);
+	sigprocmask(SIG_BLOCK, &procmask, NULL);
+
+	if (check_timeout_inconsistent() == 1) {
+		fprintf(stderr, "Timeout settings are different across SBD devices!\n");
+		fprintf(stderr, "You have to correct them and re-start SBD again.\n");
+		return -1;
+	}
+
+	inquisitor_pid = make_daemon();
+	if (inquisitor_pid == 0) {
+		inquisitor_child();
+	} 
+	
+	/* We're the parent. Wait for a happy signal from our child
+	 * before we proceed - we either get "SIG_LIVENESS" when the
+	 * inquisitor has completed the first successful round, or
+	 * ECHLD when it exits with an error. */
+
+	while (1) {
+		sig = sigwaitinfo(&procmask, &sinfo);
+		DBGPRINT("get signal %d\n", sig);
+		if (sig == SIGCHLD) {
+			while ((pid = waitpid(-1, &status, WNOHANG))) {
+				if (pid == -1 && errno == ECHILD) {
+					break;
+				}
+				/* We got here because the inquisitor
+				 * did not succeed. */
+				return -1;
+			}
+		} else if (sig == SIG_LIVENESS) {
+			/* Inquisitor started up properly. */
+			return 0;
+		} else {
+			fprintf(stderr, "Nobody expected the spanish inquisition!\n");
+			continue;
+		}
+	}
+	/* not reached */
+	return -1;
+}
+
+int messenger(const char *name, const char *msg)
+{
+	int sig = 0;
+	pid_t pid = 0;
+	int status = 0;
+	int servants_finished = 0;
+	int successful_delivery = 0;
+	sigset_t procmask;
+	siginfo_t sinfo;
+	struct servants_list_item *s;
+	struct slot_msg_arg_t slot_msg_arg = {name, msg};
+
+	sigemptyset(&procmask);
+	sigaddset(&procmask, SIGCHLD);
+	sigprocmask(SIG_BLOCK, &procmask, NULL);
+
+	for (s = servants_leader; s; s = s->next) {
+		s->pid = assign_servant(s->devname, &slot_msg_wrapper, &slot_msg_arg);
+	}
+	
+	while (!(quorum_write(successful_delivery) || 
+		(servants_finished == servant_count))) {
+		sig = sigwaitinfo(&procmask, &sinfo);
+		DBGPRINT("get signal %d\n", sig);
+		if (sig == SIGCHLD) {
+			while ((pid = waitpid(-1, &status, WNOHANG))) {
+				if (pid == -1 && errno == ECHILD) {
+					break;
+				} else {
+					DBGPRINT("process %d finished\n", pid);
+					servants_finished++;
+					if (WIFEXITED(status)
+						&& WEXITSTATUS(status) == 0) {
+						DBGPRINT("exit with %d\n",
+								WEXITSTATUS(status));
+						successful_delivery++;
+					}
+				}
+			}
+		}
+		DBGPRINT("signal %d handled\n", sig);
+	}
+	if (quorum_write(successful_delivery)) {
+		return 0;
+	} else {
+		fprintf(stderr, "Message is not delivered via more then a half of devices\n");
+		return -1;
+	}
+}
+
+int dump_headers(void)
+{
+	int rc = 0;
+	struct servants_list_item *s = servants_leader;
+	int devfd;
+
+	for (s = servants_leader; s; s = s->next) {
+		fprintf(stdout, "==Dumping header on disk %s\n", s->devname);
+		devfd = open_device(s->devname);
+		if (devfd == -1)
+			return -1;
+		rc = header_dump(devfd);
+		close(devfd);
+		if (rc == -1)
+			return rc;
+		fprintf(stdout, "==Header on disk %s is dumped\n", s->devname);
+	}
+	return rc;
+}
+
+int main(int argc, char **argv, char **envp)
+{
+	int exit_status = 0;
+	int c;
+
+	if ((cmdname = strrchr(argv[0], '/')) == NULL) {
+		cmdname = argv[0];
+	} else {
+		++cmdname;
+	}
+
+	cl_log_set_entity(cmdname);
+	cl_log_enable_stderr(0);
+	cl_log_set_facility(LOG_DAEMON);
+
+	get_uname();
+
+	while ((c = getopt(argc, argv, "DRWhvw:d:n:1:2:3:4:5:t:")) != -1) {
+		switch (c) {
+		case 'D':
+			/* Ignore for historical reasons */
+			break;
+		case 'R':
+			skip_rt = 1;
+			break;
+		case 'v':
+			debug = 1;
+			break;
+		case 'T':
+			watchdog_set_timeout = 0;
+			break;
+		case 'W':
+			watchdog_use = 1;
+			break;
+		case 'w':
+			watchdogdev = optarg;
+			break;
+		case 'd':
+			recruit_servant(optarg, 0);
+			break;
+		case 'n':
+			local_uname = optarg;
+			break;
+		case '1':
+			timeout_watchdog = atoi(optarg);
+			break;
+		case '2':
+			timeout_allocate = atoi(optarg);
+			break;
+		case '3':
+			timeout_loop = atoi(optarg);
+			break;
+		case '4':
+			timeout_msgwait = atoi(optarg);
+			break;
+		case '5':
+			timeout_watchdog_warn = atoi(optarg);
+			break;
+		case 't':
+			servant_restart_interval = atoi(optarg);
+			break;
+		case 'h':
+			usage();
+			return (0);
+		default:
+			exit_status = -1;
+			goto out;
+			break;
+		}
+	}
+	
+	if (servant_count < 1 || servant_count > 3) {
+		fprintf(stderr, "You must specify 1 to 3 devices via the -d option.\n");
+		exit_status = -1;
+		goto out;
+	}
+
+	/* There must at least be one command following the options: */
+	if ((argc - optind) < 1) {
+		fprintf(stderr, "Not enough arguments.\n");
+		exit_status = -1;
+		goto out;
+	}
+
+	if (init_set_proc_title(argc, argv, envp) < 0) {
+		fprintf(stderr, "Allocation of proc title failed.");
+		exit(1);
+	}
+
+	maximize_priority();
+
+	if (strcmp(argv[optind], "create") == 0) {
+		exit_status = init_devices();
+	} else if (strcmp(argv[optind], "dump") == 0) {
+		exit_status = dump_headers();
+	} else if (strcmp(argv[optind], "allocate") == 0) {
+		exit_status = allocate_slots(argv[optind + 1]);
+	} else if (strcmp(argv[optind], "list") == 0) {
+		exit_status = list_slots();
+	} else if (strcmp(argv[optind], "message") == 0) {
+		exit_status = messenger(argv[optind + 1], argv[optind + 2]);
+	} else if (strcmp(argv[optind], "ping") == 0) {
+		exit_status = ping_via_slots(argv[optind + 1]);
+	} else if (strcmp(argv[optind], "watch") == 0) {
+		exit_status = inquisitor();
+	} else {
+		exit_status = -1;
+	}
+
+out:
+	if (exit_status < 0) {
+		usage();
+		return (1);
+	}
+	return (0);
+}
diff --git a/lib/stonith/sbd.h b/lib/stonith/sbd.h
index af2c124..001824b 100644
--- a/lib/stonith/sbd.h
+++ b/lib/stonith/sbd.h
@@ -16,7 +16,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 #include <arpa/inet.h>
-
+#include <sys/types.h>
 
 /* Sector data types */
 struct sector_header_s {
@@ -42,51 +42,110 @@ struct sector_node_s {
 	char 	name[64];
 };
 
+struct servants_list_item {
+	const char* devname;
+	pid_t pid;
+	int restarts;
+	struct servants_list_item *next;
+};
+
 #define SBD_MSG_EMPTY	0x00
 #define SBD_MSG_TEST	0x01
 #define SBD_MSG_RESET	0x02
 #define SBD_MSG_OFF	0x03
 #define SBD_MSG_EXIT	0x04
+#define SBD_MSG_CRASHDUMP	0x05
 			
 #define SLOT_TO_SECTOR(slot) (1+slot*2)
 #define MBOX_TO_SECTOR(mbox) (2+mbox*2)
 
-static void usage(void);
-static void watchdog_init_interval(void);
-static void watchdog_tickle(void);
-static void watchdog_init(void);
-static void watchdog_close(void);
-static int open_device(const char* devname);
-static signed char cmd2char(const char *cmd);
-static void * sector_alloc(void);
-static const char* char2cmd(const char cmd);
-static int sector_write(int sector, const void *data);
-static int sector_read(int sector, void *data);
-static int slot_read(int slot, struct sector_node_s *s_node);
-static int slot_write(int slot, const struct sector_node_s *s_node);
-static int mbox_write(int mbox, const struct sector_mbox_s *s_mbox);
-static int mbox_read(int mbox, struct sector_mbox_s *s_mbox);
-static int mbox_write_verify(int mbox, const struct sector_mbox_s *s_mbox);
+void usage(void);
+int watchdog_init_interval(void);
+int watchdog_tickle(void);
+int watchdog_init(void);
+void sysrq_init(void);
+void watchdog_close(void);
+int open_device(const char* devname);
+signed char cmd2char(const char *cmd);
+void * sector_alloc(void);
+const char* char2cmd(const char cmd);
+int sector_write(int devfd, int sector, const void *data);
+int sector_read(int devfd, int sector, void *data);
+int slot_read(int devfd, int slot, struct sector_node_s *s_node);
+int slot_write(int devfd, int slot, const struct sector_node_s *s_node);
+int mbox_write(int devfd, int mbox, const struct sector_mbox_s *s_mbox);
+int mbox_read(int devfd, int mbox, struct sector_mbox_s *s_mbox);
+int mbox_write_verify(int devfd, int mbox, const struct sector_mbox_s *s_mbox);
 /* After a call to header_write(), certain data fields will have been
  * converted to on-disk byte-order; the header should not be accessed
  * afterwards anymore! */
-static int header_write(struct sector_header_s *s_header);
-static int header_read(struct sector_header_s *s_header);
-static int valid_header(const struct sector_header_s *s_header);
-static struct sector_header_s * header_get(void);
-static int init_device(void);
-static int slot_lookup(const struct sector_header_s *s_header, const char *name);
-static int slot_unused(const struct sector_header_s *s_header);
-static int slot_allocate(const char *name);
-static int slot_list(void);
-static int slot_ping(const char *name);
-static int slot_msg(const char *name, const char *cmd);
-static int header_dump(void);
-static void sysrq_trigger(char t);
-static void do_reset(void);
-static void do_off(void);
-static void make_daemon(void);
-static int daemonize(void);
-static void maximize_priority(void);
-static void get_uname(void);
+int header_write(int devfd, struct sector_header_s *s_header);
+int header_read(int devfd, struct sector_header_s *s_header);
+int valid_header(const struct sector_header_s *s_header);
+struct sector_header_s * header_get(int devfd);
+int init_device(int devfd);
+int slot_lookup(int devfd, const struct sector_header_s *s_header, const char *name);
+int slot_unused(int devfd, const struct sector_header_s *s_header);
+int slot_allocate(int devfd, const char *name);
+int slot_list(int devfd);
+int slot_ping(int devfd, const char *name);
+int slot_msg(int devfd, const char *name, const char *cmd);
+int header_dump(int devfd);
+void sysrq_trigger(char t);
+void do_crashdump(void);
+void do_reset(void);
+void do_off(void);
+pid_t make_daemon(void);
+void maximize_priority(void);
+void get_uname(void);
+
+/* Tunable defaults: */
+extern unsigned long    timeout_watchdog;
+extern unsigned long    timeout_watchdog_warn;
+extern int      timeout_allocate;
+extern int      timeout_loop;
+extern int      timeout_msgwait;
+extern int  watchdog_use;
+extern int  watchdog_set_timeout;
+extern int  skip_rt;
+extern int  debug;
+extern const char *watchdogdev;
+extern char*  local_uname;
+
+/* Global, non-tunable variables: */
+extern int  sector_size;
+extern int  watchdogfd;
+extern const char* cmdname;
+
+typedef int (*functionp_t)(const char* devname, const void* argp);
+
+int assign_servant(const char* devname, functionp_t functionp, const void* argp);
+int init_devices(void);
+struct slot_msg_arg_t {
+	const char* name;
+	const char* msg;
+};
+int slot_msg_wrapper(const char* devname, const void* argp);
+int slot_ping_wrapper(const char* devname, const void* argp);
+int allocate_slots(const char *name);
+int list_slots(void);
+int ping_via_slots(const char *name);
+int dump_headers(void);
+
+int check_all_dead(void);
+int servant(const char *diskname, const void* argp);
+void recruit_servant(const char *devname, pid_t pid);
+struct servants_list_item *lookup_servant_by_dev(const char *devname);
+struct servants_list_item *lookup_servant_by_pid(pid_t pid);
+void servants_kill(void);
+void servants_start(void);
+void inquisitor_child(void);
+int inquisitor(void);
+int inquisitor_decouple(void);
+int messenger(const char *name, const char *msg);
+int check_timeout_inconsistent(void);
+void restart_servant_by_pid(pid_t pid);
+void cleanup_servant_by_pid(pid_t pid);
+int quorum_write(int good_servants);
+int quorum_read(int good_servants);
 
diff --git a/lrm/admin/Makefile.am b/lrm/admin/Makefile.am
index c503ccf..a92cd72 100644
--- a/lrm/admin/Makefile.am
+++ b/lrm/admin/Makefile.am
@@ -25,6 +25,7 @@ halibdir			=	$(libdir)/@HB_PKG@
 COMMONLIBS			=	$(top_builddir)/lib/clplumbing/libplumb.la $(GLIBLIB)
 LRM_DIR			= 	lrm
 sbin_PROGRAMS 		= 	lrmadmin
+sbin_SCRIPTS 		= 	cibsecret
 lrmadmin_SOURCES  	= 	lrmadmin.c
 lrmadmin_LDFLAGS 	= 	$(COMMONLIBS)
 lrmadmin_LDADD = $(top_builddir)/lib/$(LRM_DIR)/liblrm.la
diff --git a/lrm/admin/cibsecret.in b/lrm/admin/cibsecret.in
new file mode 100755
index 0000000..8994667
--- /dev/null
+++ b/lrm/admin/cibsecret.in
@@ -0,0 +1,347 @@
+#!/bin/sh
+
+# Copyright (C) 2011 Dejan Muhamedagic <dmuhamedagic at suse.de>
+# 
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+# 
+# This software is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+# WARNING:
+#
+# The CIB secrets interface and implementation is still being
+# discussed, it may change
+
+#
+# cibsecret: manage the secrets directory /var/lib/heartbeat/lrm/secrets
+#
+# secrets are ascii files, holding just one value per file:
+# /var/lib/heartbeat/lrm/secrets/<rsc>/<param>
+#
+# NB: this program depends on utillib.sh
+#
+
+. @OCF_ROOT_DIR@/resource.d/heartbeat/.ocf-shellfuncs
+
+HA_NOARCHBIN=@datadir@/@PACKAGE_NAME@
+
+. $HA_NOARCHBIN/utillib.sh
+
+LRM_CIBSECRETS=$HA_VARLIB/lrm/secrets
+
+PROG=`basename $0`
+
+usage() {
+	cat<<EOF
+usage: $PROG [-C] <command> <parameters>
+
+-C: don't read/write the CIB
+
+command: set | delete | stash | unstash | get | check | sync
+
+	set <rsc> <param> <value>
+	get <rsc> <param>
+	check <rsc> <param>
+	stash <rsc> <param>		(if not -C)
+	unstash <rsc> <param>	(if not -C)
+	delete <rsc> <param>
+	sync
+
+stash/unstash: move the parameter from/to the CIB (if you already
+	have the parameter set in the CIB).
+
+set/delete: add/remove a parameter from the local file.
+
+get: display the parameter from the local file.
+
+check: verify MD5 hash of the parameter from the local file and the CIB.
+
+sync: copy $LRM_CIBSECRETS to other nodes.
+
+Examples:
+
+	$PROG set ipmi_node1 passwd SecreT_PASS
+	$PROG stash ipmi_node1 passwd
+	$PROG get ipmi_node1 passwd
+	$PROG check ipmi_node1 passwd
+	$PROG sync
+EOF
+	exit $1
+}
+fatal() {
+	echo "ERROR: $*"
+	exit 1
+}
+warn() {
+	echo "WARNING: $*"
+}
+info() {
+	echo "INFO: $*"
+}
+
+check_env() {
+	which md5sum >/dev/null 2>&1 ||
+		fatal "please install md5sum to run $PROG"
+	if which pssh >/dev/null 2>&1; then
+		rsh=pssh_fun
+		rcp=pscp_fun
+	elif which pdsh >/dev/null 2>&1; then
+		rsh=pdsh_fun
+		rcp=pdcp_fun
+	elif which ssh >/dev/null 2>&1; then
+		rsh=ssh_fun
+		rcp=scp_fun
+	else
+		fatal "please install pssh, pdsh, or ssh to run $PROG"
+	fi
+	ps -ef | grep '[c]rmd' >/dev/null ||
+		fatal "pacemaker not running? $PROG needs pacemaker"
+}
+
+get_other_nodes() {
+	crm_node -l | awk '{print $2}' | grep -v `uname -n`
+}
+check_down_nodes() {
+	local n down_nodes
+	down_nodes=`(for n; do echo $n; done) | sort | uniq -u`
+	if [ -n "$down_nodes" ]; then
+		if [ `echo $down_nodes | wc -w` = 1 ]; then
+			warn "node $down_nodes is down"
+			warn "you'll need to update it using $PROG sync later"
+		else
+			warn "nodes `echo $down_nodes` are down"
+			warn "you'll need to update them using $PROG sync later"
+		fi
+	fi
+}
+
+pssh_fun() {
+	pssh -q -H "$nodes" $*
+}
+pscp_fun() {
+	pscp -q -H "$nodes" -x "-pr" $*
+}
+pdsh_fun() {
+	local pdsh_nodes=`echo $nodes | tr ' ' ','`
+	pdsh -w $pdsh_nodes $*
+}
+pdcp_fun() {
+	local pdsh_nodes=`echo $nodes | tr ' ' ','`
+	pdcp -pr -w $pdsh_nodes $*
+}
+ssh_fun() {
+	local h
+	for h in $nodes; do
+		ssh $h $*
+	done
+}
+scp_fun() {
+	local h src="$1" dest=$2
+	for h in $nodes; do
+		scp -pr -q $src $h:$dest
+	done
+}
+# TODO: this procedure should be replaced with csync2
+# provided that csync2 has already been configured
+sync_files() {
+	local crm_nodes=`get_other_nodes`
+	local nodes=`get_live_nodes $crm_nodes`
+	check_down_nodes $nodes $crm_nodes
+	[ "$nodes" = "" ] && {
+		info "no other nodes live"
+		return
+	}
+	info "syncing $LRM_CIBSECRETS to `echo $nodes` ..."
+	$rsh rm -rf $LRM_CIBSECRETS
+	$rsh mkdir -p `dirname $LRM_CIBSECRETS`
+	$rcp $LRM_CIBSECRETS `dirname $LRM_CIBSECRETS`
+}
+sync_one() {
+	local f=$1 f_all="$1 $1.sign"
+	local crm_nodes=`get_other_nodes`
+	local nodes=`get_live_nodes $crm_nodes`
+	check_down_nodes $nodes $crm_nodes
+	[ "$nodes" = "" ] && {
+		info "no other nodes live"
+		return
+	}
+	info "syncing $f to `echo $nodes` ..."
+	$rsh mkdir -p `dirname $f`
+	if [ -f "$f" ]; then
+		$rcp "$f_all" `dirname $f`
+	else
+		$rsh rm -f $f_all
+	fi
+}
+
+is_secret() {
+	# assume that the secret is in the CIB if we cannot talk to
+	# cib
+	[ "$NO_CRM" ] ||
+	test "$1" = "$MAGIC"
+}
+check_cib_rsc() {
+	local rsc=$1 output
+	output=`$NO_CRM crm_resource -r $rsc -W >/dev/null 2>&1` ||
+		fatal "resource $rsc doesn't exist: $output"
+}
+get_cib_param() {
+	local rsc=$1 param=$2
+	check_cib_rsc $rsc
+	$NO_CRM crm_resource -r $rsc -g $param 2>/dev/null
+}
+set_cib_param() {
+	local rsc=$1 param=$2 value=$3
+	check_cib_rsc $rsc
+	$NO_CRM crm_resource -r $rsc -p $param -v "$value" 2>/dev/null
+}
+remove_cib_param() {
+	local rsc=$1 param=$2
+	check_cib_rsc $rsc
+	$NO_CRM crm_resource -r $rsc -d $param 2>/dev/null
+}
+
+localfiles() {
+	local cmd=$1
+	local rsc=$2 param=$3 value=$4
+	local local_file=$LRM_CIBSECRETS/$rsc/$param
+	case $cmd in
+	"get")
+		cat $local_file 2>/dev/null
+		true
+		;;
+	"getsum")
+		cat $local_file.sign 2>/dev/null
+		true
+		;;
+	"set")
+		local md5sum
+		md5sum=`printf $value | md5sum` ||
+			fatal "md5sum failed to produce hash for resource $rsc parameter $param"
+		md5sum=`echo $md5sum | awk '{print $1}'`
+		mkdir -p `dirname $local_file` &&
+			echo $value > $local_file &&
+			echo $md5sum > $local_file.sign &&
+			sync_one $local_file
+		;;
+	"remove")
+		rm -f $local_file
+		sync_one $local_file
+	;;
+	*)
+		# not reached, this is local interface
+	;;
+	esac
+}
+get_local_param() {
+	local rsc=$1 param=$2
+	localfiles get $rsc $param
+}
+set_local_param() {
+	local rsc=$1 param=$2 value=$3
+	localfiles set $rsc $param $value
+}
+remove_local_param() {
+	local rsc=$1 param=$2
+	localfiles remove $rsc $param
+}
+
+cibsecret_set() {
+	local value=$1
+
+	if [ -z "$NO_CRM" ]; then
+		[ "$current" -a "$current" != "$value" ] &&
+			fatal "CIB value <$current> different for $rsc parameter $param; please delete it first"
+	fi
+	set_local_param $rsc $param $value &&
+	set_cib_param $rsc $param "$MAGIC"
+}
+
+cibsecret_check() {
+	local md5sum local_md5sum
+	is_secret "$current" ||
+		fatal "no magic in CIB for resource $rsc parameter $param"
+	local_md5sum=`localfiles getsum $rsc $param`
+	[ "$local_md5sum" ] ||
+		fatal "no MD5 hash for resource $rsc parameter $param"
+	md5sum=`printf "$current_local" | md5sum | awk '{print $1}'`
+	[ "$md5sum" = "$local_md5sum" ] ||
+		fatal "MD5 hash mismatch for resource $rsc parameter $param"
+}
+
+cibsecret_get() {
+	cibsecret_check
+	echo "$current_local"
+}
+
+cibsecret_delete() {
+	remove_local_param $rsc $param &&
+	remove_cib_param $rsc $param
+}
+
+cibsecret_stash() {
+	[ "$NO_CRM" ] &&
+		fatal "no access to Pacemaker, stash not supported"
+	[ "$current" = "" ] &&
+		fatal "nothing to stash for resource $rsc parameter $param"
+	is_secret "$current" &&
+		fatal "CIB value for resource $rsc parameter $param already MD5 hash"
+	cibsecret_set "$current"
+}
+
+cibsecret_unstash() {
+	[ "$NO_CRM" ] &&
+		fatal "no access to Pacemaker, unstash not supported"
+	[ "$current_local" = "" ] &&
+		fatal "nothing to unstash for resource $rsc parameter $param"
+	is_secret "$current" ||
+		warn "no MD5 hash in CIB for resource $rsc parameter $param, proceeding anyway"
+	remove_local_param $rsc $param &&
+	set_cib_param $rsc $param $current_local
+}
+
+cibsecret_sync() {
+	sync_files
+}
+
+check_env
+
+MAGIC="lrm://"
+umask 0077
+
+if [ "$1" = "-C" ]; then
+	NO_CRM=':'
+	shift 1
+fi
+
+cmd=$1
+rsc=$2
+param=$3
+value=$4
+
+case "$cmd" in
+	set) [ $# -ne 4 ] && usage 1;;
+	get) [ $# -ne 3 ] && usage 1;;
+	check) [ $# -ne 3 ] && usage 1;;
+	stash) [ $# -ne 3 ] && usage 1;;
+	unstash) [ $# -ne 3 ] && usage 1;;
+	delete) [ $# -ne 3 ] && usage 1;;
+	sync) [ $# -ne 1 ] && usage 1;;
+	*) usage 1;
+esac
+
+# we'll need these two often
+current=`get_cib_param $rsc $param`
+current_local=`get_local_param $rsc $param`
+
+cibsecret_$cmd $value
diff --git a/lrm/lrmd/Makefile.am b/lrm/lrmd/Makefile.am
index 4578f9a..3680928 100644
--- a/lrm/lrmd/Makefile.am
+++ b/lrm/lrmd/Makefile.am
@@ -31,7 +31,7 @@ COMMONLIBS	=  $(top_builddir)/lib/clplumbing/libplumb.la   \
 
 halib_PROGRAMS 	=  lrmd
 
-lrmd_SOURCES 	=  lrmd.c audit.c lrmd_fdecl.h lrmd.h
+lrmd_SOURCES 	=  lrmd.c audit.c cib_secrets.c lrmd_fdecl.h lrmd.h
 
 lrmd_LDFLAGS 	=  $(top_builddir)/lib/lrm/liblrm.la 		\
 		   $(COMMONLIBS) @LIBLTDL@			\
diff --git a/lrm/lrmd/cib_secrets.c b/lrm/lrmd/cib_secrets.c
new file mode 100644
index 0000000..612ffdb
--- /dev/null
+++ b/lrm/lrmd/cib_secrets.c
@@ -0,0 +1,205 @@
+/*
+ * cib_secrets.c
+ *
+ * Author: Dejan Muhamedagic <dejan at suse.de>
+ * Copyright (c) 2011 SUSE, Attachmate
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <lha_internal.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include <glib.h>
+#include <pils/plugin.h>
+#include <pils/generic.h>
+#include <clplumbing/GSource.h>
+#include <clplumbing/lsb_exitcodes.h>
+#include <clplumbing/cl_signal.h>
+#include <clplumbing/proctrack.h>
+#include <clplumbing/coredumps.h>
+#include <clplumbing/uids.h>
+#include <clplumbing/Gmain_timeout.h>
+#include <clplumbing/cl_pidfile.h>
+#include <clplumbing/realtime.h>
+#include <clplumbing/md5.h>
+#include <ha_msg.h>
+
+#include <lrm/lrm_api.h>
+#include <lrm/lrm_msg.h>
+
+#include <lrmd.h>
+
+int replace_secret_params(char *rsc_id, GHashTable* params);
+static int is_magic_value(char *p);
+static int check_md5_hash(char *hash, char *value);
+static void add_secret_params(gpointer key, gpointer value, gpointer user_data);
+static char *read_local_file(char *local_file);
+
+#define MAGIC "lrm://"
+
+static int
+is_magic_value(char *p)
+{
+	return !strcmp(p, MAGIC);
+}
+
+#define MD5LEN 16
+static int
+check_md5_hash(char *hash, char *value)
+{
+	int i;
+	char hash2[2*MD5LEN+1];
+	unsigned char binary[MD5LEN+1];
+
+	MD5((unsigned char *)value, strlen(value), binary);
+	for (i = 0; i < MD5LEN; i++)
+		sprintf(hash2+2*i, "%02x", binary[i]);
+	hash2[2*i] = '\0';
+	lrmd_debug2(LOG_DEBUG
+		, "%s:%d: hash: %s, calculated hash: %s"
+		, __FUNCTION__, __LINE__, hash, hash2);
+	return !strcmp(hash, hash2);
+}
+
+static char *
+read_local_file(char *local_file)
+{
+	FILE *fp = fopen(local_file, "r");
+	char buf[MAX_VALUE_LEN+1];
+	char *p;
+
+	if (!fp) {
+		if (errno != ENOENT) {
+			cl_perror("%s:%d: cannot open %s"
+			, __FUNCTION__, __LINE__, local_file);
+		}
+		return NULL;
+	}
+	if (!fgets(buf, MAX_VALUE_LEN, fp)) {
+		cl_perror("%s:%d: cannot read %s"
+		, __FUNCTION__, __LINE__, local_file);
+		return NULL;
+	}
+	/* strip white space */
+	for (p = buf+strlen(buf)-1; p >= buf && isspace(*p); p--)
+		;
+	*(p+1) = '\0';
+	return g_strdup(buf);
+}
+
+/*
+ * returns 0 on success or no replacements necessary
+ * returns -1 if replacement failed for whatever reasone
+ */
+
+int
+replace_secret_params(char *rsc_id, GHashTable* params)
+{
+	char local_file[FILENAME_MAX+1], *start_pname;
+	char hash_file[FILENAME_MAX+1], *hash;
+	GList *secret_params = NULL, *l;
+	char *key, *pvalue, *secret_value;
+	int rc = 0;
+
+	/* secret_params could be cached with the resource;
+	 * there are also parameters sent with operations
+	 * which cannot be cached
+	*/
+	g_hash_table_foreach(params, add_secret_params, &secret_params);
+	if (!secret_params) /* none found? */
+		return 0;
+
+	lrmd_debug(LOG_DEBUG
+		, "%s:%d: replace secret parameters for resource %s"
+		, __FUNCTION__, __LINE__, rsc_id);
+	if (snprintf(local_file, FILENAME_MAX,
+			LRM_CIBSECRETS "/%s/", rsc_id) > FILENAME_MAX) {
+		lrmd_log(LOG_ERR
+			, "%s:%d: filename size exceeded for resource %s"
+			, __FUNCTION__, __LINE__, rsc_id);
+		return -1;
+	}
+	start_pname = local_file + strlen(local_file);
+
+	for (l = g_list_first(secret_params); l; l = g_list_next(l)) {
+		key = (char *)(l->data);
+		pvalue = g_hash_table_lookup(params, key);
+		if (!pvalue) { /* this cannot really happen */
+			lrmd_log(LOG_ERR
+				, "%s:%d: odd, no parameter %s for rsc %s found now"
+				, __FUNCTION__, __LINE__, key, rsc_id);
+			continue;
+		}
+		if ((strlen(key) + strlen(local_file)) >= FILENAME_MAX-2) {
+			lrmd_log(LOG_ERR
+				, "%s:%d: parameter name %s too big"
+				, __FUNCTION__, __LINE__, key);
+			rc = -1;
+			continue;
+		}
+		strcpy(start_pname, key);
+		secret_value = read_local_file(local_file);
+		if (!secret_value) {
+			lrmd_log(LOG_ERR
+				, "%s:%d: secret for rsc %s parameter %s "
+				"not found in " LRM_CIBSECRETS
+				, __FUNCTION__, __LINE__, rsc_id, key);
+			rc = -1;
+			continue;
+		}
+		strcpy(hash_file, local_file);
+		if (strlen(hash_file) + 5 > FILENAME_MAX) {
+			lrmd_log(LOG_ERR
+				, "%s:%d: cannot build such a long name "
+				"for the sign file: %s.sign"
+				, __FUNCTION__, __LINE__, hash_file);
+		} else {
+			strncat(hash_file, ".sign", 5);
+			hash = read_local_file(hash_file);
+			if (!check_md5_hash(hash, secret_value)) {
+				lrmd_log(LOG_ERR
+					, "%s:%d: md5 sum for rsc %s parameter %s "
+					"does not match"
+					, __FUNCTION__, __LINE__, rsc_id, key);
+				g_free(secret_value);
+				g_free(hash);
+				rc = -1;
+				continue;
+			}
+			g_free(hash);
+		}
+		g_hash_table_replace(params, g_strdup(key), secret_value);
+	}
+	g_list_free(secret_params);
+	return rc;
+}
+
+static void
+add_secret_params(gpointer key, gpointer value, gpointer user_data)
+{
+	GList **lp = (GList **)user_data;
+
+	if (is_magic_value((char *)value))
+		*lp = g_list_append(*lp, (char *)key);
+}
diff --git a/lrm/lrmd/lrmd.c b/lrm/lrmd/lrmd.c
index 4c7295c..3df0cf4 100644
--- a/lrm/lrmd/lrmd.c
+++ b/lrm/lrmd/lrmd.c
@@ -130,6 +130,17 @@ static struct {
 	int	rsccount;
 }lrm_objectstats;
 
+/* define indexes into logmsg_ctrl_defs */
+#define OP_STAYED_TOO_LONG 0
+static struct logspam logmsg_ctrl_defs[] = {
+	{ "operation stayed too long in the queue",
+		10, 60, 120, /* max 10 messages in 60s, then delay for 120s */
+		"configuration advice: reduce operation contention "
+		"either by increasing lrmd max_children or by increasing intervals "
+		"of monitor operations"
+	},
+};
+
 #define set_fd_opts(fd,opts) do { \
 	int flag; \
 	if ((flag = fcntl(fd, F_GETFL)) >= 0) { \
@@ -3076,11 +3087,6 @@ perform_ra_op(lrmd_op_t* op)
 	}
 
 	op_type = ha_msg_value(op->msg, F_LRM_OP);
-	if (!op->interval || is_logmsg_due(op)) { /* log non-repeating ops */
-		lrmd_log(LOG_INFO,"rsc:%s:%d: %s",rsc->id,op->call_id,probe_str(op,op_type));
-	} else {
-		lrmd_debug(LOG_DEBUG,"rsc:%s:%d: %s",rsc->id,op->call_id,op_type);
-	}
 	op_params = ha_msg_value_str_table(op->msg, F_LRM_PARAM);
 	params = merge_str_tables(rsc->params,op_params);
 	ha_msg_mod_str_table(op->msg, F_LRM_PARAM, params);
@@ -3125,8 +3131,12 @@ perform_ra_op(lrmd_op_t* op)
 				((op->interval && !is_logmsg_due(op)) ? PT_LOGNORMAL : PT_LOGVERBOSE) : PT_LOGNONE
 			,	op, &ManagedChildTrackOps);
 
-			if (op->interval && is_logmsg_due(op)) {
-				op->t_lastlogmsg = time_longclock();
+			if (!op->interval || is_logmsg_due(op)) { /* log non-repeating ops */
+				lrmd_log(LOG_INFO,"rsc:%s %s[%d] (pid %d)",
+					rsc->id,probe_str(op,op_type),op->call_id,pid);
+			} else {
+				lrmd_debug(LOG_DEBUG,"rsc:%s %s[%d] (pid %d)",
+					rsc->id,op_type,op->call_id,pid);
 			}
 			close(stdout_fd[1]);
 			close(stderr_fd[1]);
@@ -3218,6 +3228,21 @@ perform_ra_op(lrmd_op_t* op)
 			,	"perform_ra_op:calling RA plugin to perform %s, pid: [%d]"
 			,	op_info(op), getpid());		
 			params = ha_msg_value_str_table(op->msg, F_LRM_PARAM);
+			if (replace_secret_params(rsc->id, params) < 0) {
+				/* replacing secrets failed! */
+				if (!strcmp(op_type,"stop")) {
+					/* don't fail on stop! */
+					lrmd_log(LOG_INFO
+					, "%s:%d: proceeding with the stop operation for %s"
+					, __FUNCTION__, __LINE__, rsc->id);
+				} else {
+					lrmd_log(LOG_ERR
+					, "%s:%d: failed to get secrets for %s, "
+					"considering resource not configured"
+					, __FUNCTION__, __LINE__, rsc->id);
+					exit(EXECRA_NOT_CONFIGURED);
+				}
+			}
 			RAExec->execra (rsc->id,
 					rsc->type,
 					rsc->provider,
@@ -3315,8 +3340,8 @@ on_ra_proc_finished(ProcTrack* p, int status, int signo, int exitcode
 
 	if( signo ) {
 		if( proctrack_timedout(p) ) {
-			lrmd_log(LOG_WARNING,	"%s: pid [%d] timed out"
-			, op_info(op), proctrack_pid(p));
+			lrmd_log(LOG_WARNING,	"%s: pid %d timed out"
+			, small_op_info(op), proctrack_pid(p));
 			op_status = LRM_OP_TIMEOUT;
 		} else {
 			op_status = LRM_OP_ERROR;
@@ -3324,20 +3349,16 @@ on_ra_proc_finished(ProcTrack* p, int status, int signo, int exitcode
 	} else {
 		rc = RAExec->map_ra_retvalue(exitcode, op_type
 						 , op->first_line_ra_stdout);
-		if (rc != EXECRA_OK || debug_level > 0) {
+		if (!op->interval || is_logmsg_due(op) || debug_level > 0) { /* log non-repeating ops */
 			if (rc == exitcode) {
-				lrmd_debug2(rc == EXECRA_OK ? LOG_DEBUG : LOG_INFO
-				,	"%s: pid [%d] exited with"
-				" return code %d", op_info(op), proctrack_pid(p), rc);
+				lrmd_log(LOG_INFO
+				,	"%s: pid %d exited with"
+				" return code %d", small_op_info(op), proctrack_pid(p), rc);
 			}else{
-				lrmd_debug2(rc == EXECRA_OK ? LOG_DEBUG : LOG_INFO
-				,	"%s: pid [%d] exited with"
+				lrmd_log(LOG_INFO
+				,	"%s: pid %d exited with"
 				" return code %d (mapped from %d)"
-				,	op_info(op), proctrack_pid(p), rc, exitcode);
-			}
-			if (rc != EXECRA_OK || debug_level > 1) {
-				lrmd_debug2(LOG_INFO, "Resource Agent output: [%s]"
-				,	op->first_line_ra_stdout);
+				,	small_op_info(op), proctrack_pid(p), rc, exitcode);
 			}
 		}
 		if (EXECRA_EXEC_UNKNOWN_ERROR == rc || EXECRA_NO_RA == rc) {
@@ -3348,6 +3369,9 @@ on_ra_proc_finished(ProcTrack* p, int status, int signo, int exitcode
 			op_status = LRM_OP_DONE;
 		}
 	}
+	if (op->interval && is_logmsg_due(op)) {
+		op->t_lastlogmsg = time_longclock();
+	}
 	if (HA_OK !=
 			ha_msg_mod_int(op->msg, F_LRM_OPSTATUS, op_status)) {
 		LOG_FAILED_TO_ADD_FIELD("opstatus");
@@ -3921,11 +3945,17 @@ gen_op_info(const lrmd_op_t* op, gboolean add_params)
 		,op->call_id ,op->client_id);
 
 	}else{
-		snprintf(info, sizeof(info)
-		,"operation %s[%d] on %s::%s::%s for client %d"
-		,lrm_str(op_type), op->call_id
-		,lrm_str(rsc->class), lrm_str(rsc->type), lrm_str(rsc->id)
-		,op->client_id);
+		if (op->exec_pid > 1) {
+			snprintf(info, sizeof(info)
+			,"operation %s[%d] with pid %d on %s for client %d"
+			,lrm_str(op_type), op->call_id, op->exec_pid, lrm_str(rsc->id)
+			,op->client_id);
+		} else {
+			snprintf(info, sizeof(info)
+			,"operation %s[%d] on %s for client %d"
+			,lrm_str(op_type), op->call_id, lrm_str(rsc->id)
+			,op->client_id);
+		}
 
 		if( add_params ) {
 			param_gstr = g_string_new("");
@@ -3968,14 +3998,18 @@ static void
 check_queue_duration(lrmd_op_t* op)
 {
 	unsigned long t_stay_in_list = 0;
+	static struct msg_ctrl *ml;
+
 	CHECK_ALLOCATED(op, "op", );
 	t_stay_in_list = longclockto_ms(op->t_perform - op->t_addtolist);
-	if ( t_stay_in_list > WARNINGTIME_IN_LIST) 
+	if ( t_stay_in_list > WARNINGTIME_IN_LIST)
 	{
-		lrmd_log(LOG_WARNING
-		,	"perform_ra_op: the operation %s stayed in operation "
+		if (!ml)
+			ml = cl_limit_log_new(logmsg_ctrl_defs + OP_STAYED_TOO_LONG);
+		cl_limit_log(ml, LOG_WARNING
+		,	"perform_ra_op: the %s stayed in operation "
 			"list for %lu ms (longer than %d ms)"
-		,	op_info(op), t_stay_in_list
+		,	small_op_info(op), t_stay_in_list
 		,	WARNINGTIME_IN_LIST
 		);
 		if (debug_level >= 2) {
diff --git a/lrm/lrmd/lrmd.h b/lrm/lrmd/lrmd.h
index 17cc6bf..8e11964 100644
--- a/lrm/lrmd/lrmd.h
+++ b/lrm/lrmd/lrmd.h
@@ -266,3 +266,8 @@ const char *gen_op_info(const lrmd_op_t* op, gboolean add_params);
 #	define LRMAUDIT() /*nothing*/
 #	define MEGALRMAUDIT() /*nothing*/
 #endif
+
+/*
+ * load parameters from an ini file (cib_secrets.c)
+ */
+int replace_secret_params(char* rsc_id, GHashTable* params);
diff --git a/lrm/test/regression.sh.in b/lrm/test/regression.sh.in
index 8233cee..523bd40 100755
--- a/lrm/test/regression.sh.in
+++ b/lrm/test/regression.sh.in
@@ -93,7 +93,7 @@ HA_logfacility=""
 export HA_logfile HA_debugfile HA_use_logd HA_logfacility
 
 mkdir -p $OUTDIR
-. /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs
+. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
 
 args=`getopt hq $*`
 [ $? -ne 0 ] && usage

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-ha/cluster-glue.git



More information about the Debian-HA-Commits mailing list