[Debian-ha-commits] [cluster-glue] 22/73: Imported Upstream version 1.0.7+hg2618
Richard Winters
devrik-guest at moszumanska.debian.org
Mon Apr 20 01:41:52 UTC 2015
This is an automated email from the git hooks/post-receive script.
devrik-guest pushed a commit to branch master
in repository cluster-glue.
commit 51d08dd1af1e2904c4c15a9a9000376c987e5020
Author: Richard B Winters <rik at mmogp.com>
Date: Sun Apr 19 20:51:47 2015 -0400
Imported Upstream version 1.0.7+hg2618
---
.hg_archival.txt | 2 +-
.hgtags | 1 +
cluster-glue-fedora.spec | 1 +
cluster-glue-suse.spec | 1 +
configure.ac | 15 +-
doc/stonith/Makefile.am | 3 +-
doc/stonith/README.vcenter | 90 +++
hb_report/hb_report.in | 299 ++++++----
hb_report/utillib.sh | 59 ++
include/clplumbing/cl_log.h | 22 +
include/glue_config.h.in | 6 +
lib/clplumbing/cl_log.c | 111 ++++
lib/clplumbing/ipcsocket.c | 113 ++--
lib/plugins/lrm/raexecocf.c | 16 +-
lib/plugins/stonith/external.c | 9 +-
lib/plugins/stonith/external/Makefile.am | 7 +-
lib/plugins/stonith/external/hetzner | 132 +++++
lib/plugins/stonith/external/ipmi | 13 +-
lib/plugins/stonith/external/libvirt | 259 +++++++++
lib/plugins/stonith/external/rackpdu | 3 +-
lib/plugins/stonith/external/sbd | 53 +-
lib/plugins/stonith/external/vcenter | 266 +++++++++
lib/plugins/stonith/meatware.c | 2 +
lib/stonith/Makefile.am | 2 +-
lib/stonith/{sbd.c => sbd-common.c} | 585 ++++++++-----------
lib/stonith/sbd-md.c | 936 +++++++++++++++++++++++++++++++
lib/stonith/sbd.h | 131 +++--
lrm/admin/Makefile.am | 1 +
lrm/admin/cibsecret.in | 347 ++++++++++++
lrm/lrmd/Makefile.am | 2 +-
lrm/lrmd/cib_secrets.c | 205 +++++++
lrm/lrmd/lrmd.c | 92 ++-
lrm/lrmd/lrmd.h | 5 +
lrm/test/regression.sh.in | 2 +-
34 files changed, 3127 insertions(+), 664 deletions(-)
diff --git a/.hg_archival.txt b/.hg_archival.txt
index 76b6b91..0036b6b 100644
--- a/.hg_archival.txt
+++ b/.hg_archival.txt
@@ -1,2 +1,2 @@
repo: e3ffdd7ae81c596b2be7e1e110d2c1255161340e
-node: 5e06b2ddd24b37ad6c1c25d958d7a9dda7d02f93
+node: 177de02442d54d435eaf1d4a6ac9e1248845c05e
diff --git a/.hgtags b/.hgtags
index baccb9f..c94a7ce 100644
--- a/.hgtags
+++ b/.hgtags
@@ -58,3 +58,4 @@ f6c2cd2593f365f984ce051db61466738ac05dcd Beta-0.4.9f
3af80b93d9e5d5e441f3f4c3aad16775ea27d2d9 glue-1.0.5
1c87a0c58c59fc384b93ec11476cefdbb6ddc1e1 glue-1.0.6
61200fbe18358e420cdc2037d87e803e150c1eac glue-1.0.7-rc1
+5e06b2ddd24b37ad6c1c25d958d7a9dda7d02f93 glue-1.0.7
diff --git a/cluster-glue-fedora.spec b/cluster-glue-fedora.spec
index 4ad2b71..06a62bb 100644
--- a/cluster-glue-fedora.spec
+++ b/cluster-glue-fedora.spec
@@ -121,6 +121,7 @@ standards, and an interface to common STONITH devices.
%{_sbindir}/ha_logger
%{_sbindir}/hb_report
%{_sbindir}/lrmadmin
+%{_sbindir}/cibsecret
%{_sbindir}/meatclient
%{_sbindir}/stonith
%{_sbindir}/sbd
diff --git a/cluster-glue-suse.spec b/cluster-glue-suse.spec
index 3aafd14..c86d609 100644
--- a/cluster-glue-suse.spec
+++ b/cluster-glue-suse.spec
@@ -206,6 +206,7 @@ fi
%{_sbindir}/ha_logger
%{_sbindir}/hb_report
%{_sbindir}/lrmadmin
+%{_sbindir}/cibsecret
%{_sbindir}/meatclient
%{_sbindir}/stonith
%{_sbindir}/sbd
diff --git a/configure.ac b/configure.ac
index 8ef20dc..0896628 100644
--- a/configure.ac
+++ b/configure.ac
@@ -392,6 +392,14 @@ HA_COREDIR="${localstatedir}/lib/heartbeat/cores"
AC_DEFINE_UNQUOTED(HA_COREDIR,"$HA_COREDIR", top directory of area to drop core files in)
AC_SUBST(HA_COREDIR)
+LRM_VARLIBDIR="${localstatedir}/lib/heartbeat/lrm"
+AC_DEFINE_UNQUOTED(LRM_VARLIBDIR,"$LRM_VARLIBDIR", LRM directory)
+AC_SUBST(LRM_VARLIBDIR)
+
+LRM_CIBSECRETS="${localstatedir}/lib/heartbeat/lrm/secrets"
+AC_DEFINE_UNQUOTED(LRM_CIBSECRETS,"$LRM_CIBSECRETS", CIB secrets location)
+AC_SUBST(LRM_CIBSECRETS)
+
AC_DEFINE_UNQUOTED(PILS_BASE_PLUGINDIR,"$libdir/heartbeat/plugins", Default plugin search path)
AC_DEFINE_UNQUOTED(HA_PLUGIN_DIR,"$libdir/heartbeat/plugins", Where to find plugins)
AC_DEFINE_UNQUOTED(LRM_PLUGIN_DIR,"$libdir/heartbeat/plugins/RAExec", Where to find LRM plugins)
@@ -643,7 +651,11 @@ AC_CHECK_HEADERS(getopt.h)
AC_CHECK_HEADERS(glib.h)
AC_CHECK_HEADERS(grp.h)
AC_CHECK_HEADERS(limits.h)
-AC_CHECK_HEADERS(linux/errqueue.h)
+AC_CHECK_HEADERS(linux/errqueue.h,,,
+ [#ifdef HAVE_LINUX_TYPES_H
+ # include <linux/types.h>
+ #endif
+ ])
AC_CHECK_HEADERS(malloc.h)
AC_CHECK_HEADERS(netdb.h)
AC_CHECK_HEADERS(netinet/in.h)
@@ -1336,6 +1348,7 @@ lib/Makefile \
lrm/Makefile \
lrm/lrmd/Makefile \
lrm/admin/Makefile \
+ lrm/admin/cibsecret \
lrm/test/Makefile \
lrm/test/regression.sh \
lrm/test/lrmregtest \
diff --git a/doc/stonith/Makefile.am b/doc/stonith/Makefile.am
index a5b93c6..165a743 100644
--- a/doc/stonith/Makefile.am
+++ b/doc/stonith/Makefile.am
@@ -30,4 +30,5 @@ stdoc_DATA = README.bladehpi \
README.riloe \
README.vacm \
README.wti_mpc \
- README_kdumpcheck.txt
+ README_kdumpcheck.txt \
+ README.vcenter
diff --git a/doc/stonith/README.vcenter b/doc/stonith/README.vcenter
new file mode 100644
index 0000000..e6cc9a5
--- /dev/null
+++ b/doc/stonith/README.vcenter
@@ -0,0 +1,90 @@
+VMware vCenter/ESX STONITH Module
+=================================
+
+1. Intro
+--------
+
+VMware vCenter/ESX STONITH Module is intended to provide STONITH support to
+clusters in VMware Virtual Infrastructures. It is able to deal with virtual
+machines running on physically different HostSystems (e.g. ESX/ESXi) by using
+VMware vSphere Web Services SDK http://www.vmware.com/support/developer/vc-sdk/
+and connecting directly on each HostSystem or through a VMware vCenter: in this
+last case the module locates the specified virtual machine in the Virtual
+Infrastructure and performs actions required by cluster policies.
+
+2. Software requirements
+------------------------
+
+VMware vSphere CLI, which includes both CLI tools and Perl SDK
+http://www.vmware.com/support/developer/vcli/ . The plugin has been tested with
+version 4.1 http://www.vmware.com/download/download.do?downloadGroup=VCLI41
+
+
+3. vCenter/ESX authentication settings
+--------------------------------------
+
+Create the credentials file with credstore_admin.pl:
+
+/usr/lib/vmware-vcli/apps/general/credstore_admin.pl \
+ -s 10.1.1.1 -u myuser -p mypass
+
+This should create $HOME/.vmware/credstore/vicredentials.xml
+Copy it to a system folder, e.g. /etc
+
+cp -p $HOME/.vmware/credstore/vicredentials.xml /etc
+
+
+4. Testing
+----------
+
+The plugin can be invoked directly to perform a very first connection test
+(replace all the provided sample values):
+
+VI_SERVER=10.1.1.1 \
+ VI_CREDSTORE=/etc/vicredentials.xml \
+ HOSTLIST="hostname1=vmname1;hostname2=vmname2" \
+ RESETPOWERON=0 \
+ /usr/lib/stonith/plugins/external/vcenter gethosts
+
+If everything works correctly you should get:
+
+hostname1
+hostname2
+
+When invoked in this way, the plugin connects to VI_SERVER, authenticates with
+credentials stored in VI_CREDSTORE and tries to retrieve the list of virtual
+machines (case insensitive) matching vmname1 and vmname2 (and any other listed).
+When finished, it reports the list back by mapping virtual machine names to
+hostnames as provided in HOSTLIST. If you see the full list of hostnames as a
+result, then everything is going well. If otherwise you are having a partial or
+empty list, you have to check parameters.
+
+You can even test "reset", "off" and "on" commands, to test (carefully!) the
+full chain. E.g.
+
+VI_SERVER=10.1.1.1 \
+ VI_CREDSTORE=/etc/vicredentials.xml \
+ HOSTLIST="hostname1=vmname1;hostname2=vmname2" \
+ RESETPOWERON=0 \
+ /usr/lib/stonith/plugins/external/vcenter reset hostname2
+
+In the above examples the referring infrastructure is a vCenter with several
+ESXi nodes. Server IP and credentials are referred to vCenter.
+
+5. CRM configuration
+--------------------
+
+The following is a sample procedure to setup STONITH for an HA 2-node cluster
+(replace all the provided sample values):
+
+crm configure primitive vfencing stonith::external/vcenter params \
+ VI_SERVER="10.1.1.1" VI_CREDSTORE="/etc/vicredentials.xml" \
+ HOSTLIST="hostname1=vmname1;hostname2=vmname2" RESETPOWERON="0" \
+ op monitor interval="60s"
+
+crm configure clone Fencing vfencing
+
+crm configure property stonith-enabled="true"
+
+
+
diff --git a/hb_report/hb_report.in b/hb_report/hb_report.in
index 048a20d..3094cc6 100755
--- a/hb_report/hb_report.in
+++ b/hb_report/hb_report.in
@@ -214,10 +214,10 @@ logmarks() {
for n in $NODES; do
if [ "$n" = "`uname -n`" ]; then
- [ "$THIS_IS_NODE" ] && logmark $HA_LOGFACILITY.$HA_LOGLEVEL $msg
+ is_node && logmark $HA_LOGFACILITY.$HA_LOGLEVEL $msg
else
[ "$ssh_good" ] &&
- echo $c | ssh $ssh_opts $n
+ ssh $ssh_opts $n "$c"
fi
done
}
@@ -274,7 +274,7 @@ is_our_log() {
return 3 # this is the last good log
fi
# have to go further back
- if [ x = "x$to_time" -o $to_time -ge $first_time ]; then
+ if [ $to_time -eq 0 -o $to_time -ge $first_time ]; then
return 1 # include this log
else
return 0 # don't include this log
@@ -360,6 +360,19 @@ print_logseg() {
trap "" 0
}
#
+# print some log info (important for crm history)
+#
+loginfo() {
+ local logf=$1
+ local fake=$2
+ local nextpos=`python -c "f=open('$logf');f.seek(0,2);print f.tell()+1"`
+ if [ "$fake" ]; then
+ echo "synthetic:$logf $nextpos"
+ else
+ echo "$logf $nextpos"
+ fi
+}
+#
# find log/set of logs which are interesting for us
#
dumplogset() {
@@ -431,6 +444,7 @@ FROM_TIME=$FROM_TIME
TO_TIME=$TO_TIME
USER_NODES="$USER_NODES"
NODES="$NODES"
+MASTER_NODE="$MASTER_NODE"
HA_LOG=$HA_LOG
MASTER_IS_HOSTLOG=$MASTER_IS_HOSTLOG
DESTDIR=$DESTDIR
@@ -447,14 +461,28 @@ CORES_DIRS="$CORES_DIRS"
VERBOSITY="$VERBOSITY"
EOF
}
-start_remote_collectors() {
- for node in $NODES; do
- [ "$node" = "$WE" ] && continue
- dumpenv | ssh $ssh_opts $node \
- "cat > $DESTDIR/.env; hb_report __slave $DESTDIR" |
- (cd $DESTDIR && tar xf -) &
- SLAVEPIDS="$SLAVEPIDS $!"
- done
+is_collector() {
+ test "$SLAVE"
+}
+is_node() {
+ test "$THIS_IS_NODE"
+}
+is_master() {
+ ! is_collector && test "$WE" = "$MASTER_NODE"
+}
+start_slave_collector() {
+ local node=$1
+
+ if [ "$node" = "$WE" ]; then
+ dumpenv > $DESTDIR/.env
+ hb_report __slave $DESTDIR
+ else
+ ssh $ssh_opts $node \
+ "$SUDO hb_report __slave $DESTDIR" |
+ (cd $DESTDIR && tar xf -)
+ fi &
+
+ SLAVEPIDS="$SLAVEPIDS $!"
}
#
@@ -514,14 +542,12 @@ getbacktraces() {
debug "found backtraces: $flist"
}
}
-pe2png() {
+pe2dot() {
local pef=`basename $1`
local dotf=`basename $pef .bz2`.dot
- local pngf=`basename $pef .bz2`.png
(
cd `dirname $1`
ptest -D $dotf -x $pef
- # dot -Tpng -o $pngf $dotf >/dev/null 2>&1
)
}
getpeinputs() {
@@ -532,17 +558,16 @@ getpeinputs() {
test -d $pe_dir ||
continue
flist=$(
- find_files $pe_dir $1 $2 | sed "s,`dirname $pe_dir`/,,g"
+ find_files $pe_dir $1 $2 | sed "s,`dirname $pe_dir`/,,g" |
+ grep -v '[.]last$'
)
[ "$flist" ] && {
(cd `dirname $pe_dir` && tar cf - $flist) | (cd $3 && tar xf -)
debug "found `echo $flist | wc -w` pengine input files in $pe_dir"
- which dot >/dev/null 2>&1 ||
- info "if you had graphviz, we'd also produce png graphics for all PE files"
}
if [ `echo $flist | wc -w` -le 20 ]; then
for f in $flist; do
- pe2png $3/$f
+ pe2dot $3/$f
done
else
info "too many PE inputs to create dot files"
@@ -577,7 +602,7 @@ getconfigurations() {
sys_info() {
cluster_info
hb_report -V # our info
- echo "resource-agents: `grep 'Build version:' /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs`"
+ echo "resource-agents: `grep 'Build version:' @OCF_ROOT_DIR@/resource.d/heartbeat/.ocf-shellfuncs`"
crm_info
pkg_ver $PACKAGES
echo "Platform: `uname`"
@@ -814,6 +839,7 @@ combine_logs() {
test -x $HA_NOARCHBIN/combine-logs.pl ||
warning "cannot combine logs: no $HA_NOARCHBIN/combine-logs.pl"
$HA_NOARCHBIN/combine-logs.pl $destdir/*/$HALOG_F > $destdir/$HALOG_F
+ loginfo $destdir/$HALOG_F combined > $destdir/$HALOG_F.info
$HA_NOARCHBIN/combine-logs.pl $destdir/*/events.txt > $destdir/events.txt
}
@@ -878,6 +904,88 @@ pickcompress() {
COMPRESS_EXT=
fi
}
+# get the right part of the log
+getlog() {
+ local outf
+
+ if [ "$HA_LOG" ]; then # log provided by the user?
+ [ -f "$HA_LOG" ] || { # not present
+ is_collector || # warning if not on slave
+ warning "$HA_LOG not found; we will try to find log ourselves"
+ HA_LOG=""
+ }
+ fi
+ if [ "$HA_LOG" = "" ]; then
+ HA_LOG=`findlog`
+ [ "$HA_LOG" ] &&
+ cnt=`fgrep -c $UNIQUE_MSG < $HA_LOG`
+ fi
+ if [ "$cnt" ] && [ $cnt -eq $NODECNT ]; then
+ MASTER_IS_HOSTLOG=1
+ info "found the central log!"
+ fi
+
+ if is_node; then
+ outf=$DESTDIR/$WE/$HALOG_F
+ else
+ outf=$DESTDIR/$HALOG_F # we are log server, probably
+ fi
+ if [ -f "$HA_LOG" ]; then
+ if [ "$NO_str2time" ]; then
+ warning "a log found; but we cannot slice it"
+ warning "please install the perl Date::Parse module"
+ elif [ "$CTS" ]; then
+ cts_findlogseg $CTS $HA_LOG > $outf
+ else
+ getstampproc=`find_getstampproc < $HA_LOG`
+ if [ "$getstampproc" ]; then
+ export getstampproc # used by linetime
+ dumplogset $HA_LOG $FROM_TIME $TO_TIME > $outf
+ loginfo $HA_LOG > $outf.info
+ else
+ warning "could not figure out the log format of $HA_LOG"
+ fi
+ fi
+ elif [ "$CTS" ]; then
+ cts_findlogseg $CTS > $outf
+ else
+ [ "$MASTER_IS_HOSTLOG" ] ||
+ warning "could not find $HA_LOG on $WE"
+ fi
+}
+#
+# get all other info (config, stats, etc)
+#
+collect_info() {
+ getconfig $DESTDIR/$WE
+ getpeinputs $FROM_TIME $TO_TIME $DESTDIR/$WE
+ getbacktraces $FROM_TIME $TO_TIME $DESTDIR/$WE/$BT_F
+ getconfigurations $DESTDIR/$WE
+ touch_DC_if_dc $DESTDIR/$WE
+ sanitize $DESTDIR/$WE
+ crmconfig $DESTDIR/$WE
+ check_perms > $DESTDIR/$WE/$PERMISSIONS_F 2>&1
+ sys_info > $DESTDIR/$WE/$SYSINFO_F 2>&1
+ dlm_dump > $DESTDIR/$WE/$DLM_DUMP_F 2>&1
+ sys_stats > $DESTDIR/$WE/$SYSSTATS_F 2>&1
+
+ for l in $EXTRA_LOGS; do
+ [ "$NO_str2time" ] && break
+ [ ! -f "$l" ] && continue
+ if [ "$l" = "$HA_LOG" -a "$l" != "$HALOG_F" ]; then
+ ln -s $HALOG_F $DESTDIR/$WE/`basename $l`
+ continue
+ fi
+ getstampproc=`find_getstampproc < $l`
+ if [ "$getstampproc" ]; then
+ export getstampproc # used by linetime
+ dumplogset $l $FROM_TIME $TO_TIME > $DESTDIR/$WE/`basename $l`
+ loginfo $l > $DESTDIR/$WE/`basename $l`.info
+ else
+ warning "could not figure out the log format of $l"
+ fi
+ done
+}
finalword() {
if [ "$COMPRESS" = "1" ]; then
echo "The report is saved in $DESTDIR.tar$COMPRESS_EXT"
@@ -897,14 +1005,14 @@ NO_str2time=""
t=`str2time "12:00"`
if [ "$t" = "" ]; then
NO_str2time=1
- [ "$SLAVE" ] ||
+ is_collector ||
fatal "please install the perl Date::Parse module"
fi
#
# part 1: get and check options; and the destination
#
-if [ "$SLAVE" = "" ]; then
+if ! is_collector; then
setvarsanddefaults
userargs="$@"
DESTDIR="$HOME/hb_report-"`date +"%a-%d-%b-%Y"`
@@ -1016,10 +1124,10 @@ else
fatal "no stack specific support: $CF_SUPPORT"
fi
-if [ "x$CTS" = "x" -o "x$SLAVE" != "x" ]; then
+if [ "x$CTS" = "x" ] || is_collector; then
getlogvars
debug "log settings: facility=$HA_LOGFACILITY logfile=$HA_LOGFILE debugfile=$HA_DEBUGFILE"
-elif [ "x$SLAVE" = "x" ]; then
+elif ! is_collector; then
ctslog=`findmsg "CTS: Stack:" | awk '{print $1}'`
debug "Using CTS control file: $ctslog"
USER_NODES=`grep CTS: $ctslog | grep -v debug: | grep " \* " | sed s:.*\\\*::g | sort -u | tr '\\n' ' '`
@@ -1027,7 +1135,7 @@ elif [ "x$SLAVE" = "x" ]; then
NODES_SOURCE=user
fi
-if [ "$SLAVE" -a "$3" = logmark ]; then
+if is_collector && [ "$3" = logmark ]; then
msg="$4"
logmark $HA_LOGFACILITY.$HA_LOGLEVEL $msg
exit
@@ -1035,7 +1143,8 @@ fi
WE=`uname -n` # who am i?
THIS_IS_NODE=""
-if [ "$SLAVE" = "" ]; then
+if ! is_collector; then
+ MASTER_NODE=$WE
NODES=`getnodes`
debug "nodes: `echo $NODES`"
fi
@@ -1043,7 +1152,7 @@ NODECNT=`echo $NODES | wc -w`
if [ "$NODECNT" = 0 ]; then
fatal "could not figure out a list of nodes; is this a cluster node?"
fi
-if echo $NODES | grep -wqs $WE || [ "$SLAVE" ]; then # are we a node?
+if echo $NODES | grep -wqs $WE; then # are we a node?
THIS_IS_NODE=1
fi
@@ -1069,11 +1178,11 @@ CONFIGURATIONS="/etc/drbd.conf /etc/drbd.d"
export CONFIGURATIONS
# this only on master
-if [ "$SLAVE" = "" ]; then
+if ! is_collector; then
# if this is not a node, then some things afterwards might
# make no sense (not work)
- if [ -z "$THIS_IS_NODE" -a "$NODES_SOURCE" != user ]; then
+ if ! is_node && [ "$NODES_SOURCE" != user ]; then
warning "this is not a node and you didn't specify a list of nodes using -n"
fi
#
@@ -1103,11 +1212,11 @@ if [ "$SLAVE" = "" ]; then
}
mkdir -p $DESTDIR
[ -d $DESTDIR ] || nodistdirectory
- [ "$ssh_good" ] &&
+ if [ "$ssh_good" ]; then
for node in $NODES; do
[ "$node" = "$WE" ] && continue
ssh $ssh_opts $node "test -d $DESTDIR" && {
- if [ "$CTS" ]; then # relax a bit for CTS
+ if [ "$FORCE_REMOVE_DEST" -o "$CTS" ]; then # relax a bit for CTS
ssh $ssh_opts $node "rm -r $DESTDIR"
else
test -d $DESTDIR && rmdir $DESTDIR
@@ -1117,9 +1226,23 @@ if [ "$SLAVE" = "" ]; then
dumpenv |
ssh $ssh_opts $node "mkdir -p $DESTDIR && cat > $DESTDIR/.env"
done
+ else
+ if [ -z "$NO_SSH" -a $NODECNT -gt 1 ]; then
+ warning "ssh does not work to all nodes"
+ warning "please use the -u option if you want to supply a password"
+ fi
+ fi
+fi
+
+# only cluster nodes need their own directories
+is_node && mkdir -p $DESTDIR/$WE
+
+if is_collector && [ $VERBOSITY -gt 1 ]; then
+ echo the debug information for node $WE is in $DESTDIR/$WE/debug.out
+ exec 2>>$DESTDIR/$WE/debug.out
fi
-if [ "$SLAVE" = "" ]; then
+if ! is_collector; then
#
# part 3: log marks to be searched for later
# important to do this now on _all_ nodes
@@ -1129,111 +1252,36 @@ if [ "$SLAVE" = "" ]; then
fi
fi
-# only cluster nodes need their own directories
-[ "$THIS_IS_NODE" ] && mkdir -p $DESTDIR/$WE
-
#
# part 4: find the logs and cut out the segment for the period
#
-if [ "$HA_LOG" ]; then # log provided by the user?
- [ -f "$HA_LOG" ] || { # not present
- [ "$SLAVE" ] || # warning if not on slave
- warning "$HA_LOG not found; we will try to find log ourselves"
- HA_LOG=""
- }
-fi
-if [ "$HA_LOG" = "" ]; then
- HA_LOG=`findlog`
- [ "$HA_LOG" ] &&
- cnt=`fgrep -c $UNIQUE_MSG < $HA_LOG`
-fi
-if [ "$cnt" ] && [ $cnt -eq $NODECNT ]; then
- MASTER_IS_HOSTLOG=1
- info "found the central log!"
-fi
-if [ "$THIS_IS_NODE" ]; then
- outf=$DESTDIR/$WE/$HALOG_F
-else
- outf=$DESTDIR/$HALOG_F # we are log server, probably
-fi
-if [ -f "$HA_LOG" ]; then
- if [ "$NO_str2time" ]; then
- warning "a log found; but we cannot slice it"
- warning "please install the perl Date::Parse module"
- elif [ "$CTS" ]; then
- cts_findlogseg $CTS $HA_LOG > $outf
- else
- getstampproc=`find_getstampproc < $HA_LOG`
- if [ "$getstampproc" ]; then
- export getstampproc # used by linetime
- dumplogset $HA_LOG $FROM_TIME $TO_TIME > $outf
- else
- warning "could not figure out the log format of $HA_LOG"
- fi
- fi
-elif [ "$CTS" ]; then
- cts_findlogseg $CTS > $outf
-else
- [ "$MASTER_IS_HOSTLOG" ] ||
- warning "could not find $HA_LOG on $WE"
-fi
+# if the master is also a node, getlog is going to be invoked
+# from the collector
+(is_master && is_node) ||
+ getlog
-#
-# part 5: start this program on other nodes
-#
-if [ ! "$SLAVE" ]; then
- if [ "$ssh_good" ]; then
- start_remote_collectors
- else
- if [ -z "$NO_SSH" -a $NODECNT -gt 1 ]; then
- warning "ssh does not work to all nodes"
- warning "please use the -u option if you want to supply a password"
- fi
+if ! is_collector; then
+ # assume that only root can collect data
+ SUDO=""
+ if [ -z "$SSH_USER" -a `id -u` != 0 ] || [ "$SSH_USER" != root ]; then
+ SUDO="sudo -u root -E"
fi
-fi
-
-#
-# part 6: get all other info (config, stats, etc)
-#
-if [ "$THIS_IS_NODE" ]; then
- getconfig $DESTDIR/$WE
- getpeinputs $FROM_TIME $TO_TIME $DESTDIR/$WE
- getbacktraces $FROM_TIME $TO_TIME $DESTDIR/$WE/$BT_F
- getconfigurations $DESTDIR/$WE
- touch_DC_if_dc $DESTDIR/$WE
- sanitize $DESTDIR/$WE
- crmconfig $DESTDIR/$WE
- check_perms > $DESTDIR/$WE/$PERMISSIONS_F 2>&1
- sys_info > $DESTDIR/$WE/$SYSINFO_F 2>&1
- dlm_dump > $DESTDIR/$WE/$DLM_DUMP_F 2>&1
- sys_stats > $DESTDIR/$WE/$SYSSTATS_F 2>&1
-
- for l in $EXTRA_LOGS; do
- [ "$NO_str2time" ] && break
- [ ! -f "$l" ] && continue
- if [ "$l" = "$HA_LOG" -a "$l" != "$HALOG_F" ]; then
- ln -s $HALOG_F $DESTDIR/$WE/`basename $l`
- continue
- fi
- getstampproc=`find_getstampproc < $l`
- if [ "$getstampproc" ]; then
- export getstampproc # used by linetime
- dumplogset $l $FROM_TIME $TO_TIME > $DESTDIR/$WE/`basename $l`
- else
- warning "could not figure out the log format of $l"
- fi
+ for node in $NODES; do
+ start_slave_collector $node
done
fi
#
-# part 7: endgame:
+# part 5: endgame:
# slaves tar their results to stdout, the master waits
# for them, analyses results, asks the user to edit the
# problem description template, and prints final notes
#
-if [ "$SLAVE" ]; then
- (cd $DESTDIR && tar cf - $WE)
+if is_collector; then
+ collect_info
+ [ "$WE" != "$MASTER_NODE" ] &&
+ (cd $DESTDIR && tar cf - $WE)
else
wait $SLAVEPIDS
analyze $DESTDIR > $DESTDIR/$ANALYSIS_F
@@ -1254,5 +1302,8 @@ else
finalword
fi
-[ "$REMOVE_DEST" = "1" ] &&
- rm -r $DESTDIR
+if [ "$REMOVE_DEST" = "1" ]; then
+ if is_master || [ "$WE" != "$MASTER_NODE" ]; then
+ rm -r $DESTDIR
+ fi
+fi
diff --git a/hb_report/utillib.sh b/hb_report/utillib.sh
index 96c3c43..6dd99eb 100644
--- a/hb_report/utillib.sh
+++ b/hb_report/utillib.sh
@@ -115,7 +115,9 @@ findmsg() {
for d in $syslogdirs; do
[ -d $d ] || continue
log=`grep -l -e "$mark" $d/$favourites` && break
+ test "$log" && break
log=`grep -l -e "$mark" $d/*` && break
+ test "$log" && break
done 2>/dev/null
[ "$log" ] &&
ls -t $log | tr '\n' ' '
@@ -308,6 +310,54 @@ check_perms() {
#
# coredumps
#
+pkg_mgr_list() {
+# list of:
+# regex pkg_mgr
+# no spaces allowed in regex
+ cat<<EOF
+Try:.zypper.install zypper
+EOF
+}
+MYBINARIES="crmd|pengine|lrmd|attrd|cib|mgmtd|stonithd|corosync|libplumb|libpils"
+listpkg_zypper() {
+ local binary=$1 core=$2
+ gdb $binary $core </dev/null 2>&1 |
+ awk -v bins="$MYBINARIES" '
+ n>0 && /^Try: zypper install/ {gsub("\"",""); print $NF}
+ n>0 {n=0}
+ /Missing separate debuginfo/ && match($NF, bins) {n=1}
+ ' | sort -u
+}
+fetchpkg_zypper() {
+ debug "get debuginfo packages using zypper: $@"
+ zypper -qn install -C $@ >/dev/null
+}
+find_pkgmgr() {
+ local binary=$1 core=$2
+ pkg_mgr_list |
+ while read regex pkg_mgr; do
+ if gdb $binary $core </dev/null 2>&1 |
+ grep "$regex" > /dev/null; then
+ echo $pkg_mgr
+ break
+ fi
+ done
+}
+get_debuginfo() {
+ local binary=$1 core=$2
+ local pkg_mgr pkgs
+ gdb $binary $core </dev/null 2>/dev/null |
+ grep 'no debugging symbols found' > /dev/null ||
+ return # no missing debuginfo
+ pkg_mgr=`find_pkgmgr $binary $core`
+ if [ -z "$pkg_mgr" ]; then
+ warning "found core for $binary but there is no debuginfo and we don't know how to get it on this platform"
+ return
+ fi
+ pkgs=`listpkg_$pkg_mgr $binary $core`
+ [ -n "$pkgs" ] &&
+ fetchpkg_$pkg_mgr $pkgs
+}
findbinary() {
random_binary=`which cat 2>/dev/null` # suppose we are lucky
binary=`gdb $random_binary $1 < /dev/null 2>/dev/null |
@@ -353,6 +403,7 @@ getbt() {
for corefile; do
absbinpath=`findbinary $corefile`
[ x = x"$absbinpath" ] && continue
+ get_debuginfo $absbinpath $corefile
echo "====================== start backtrace ======================"
ls -l $corefile
gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt full"} -ex quit \
@@ -423,6 +474,14 @@ get_crm_nodes() {
}
'
}
+get_live_nodes() {
+ if [ `id -u` = 0 ] && which fping >/dev/null 2>&1; then
+ fping -a $@ 2>/dev/null
+ else
+ local h
+ for h; do ping -c 2 -q $h >/dev/null 2>&1 && echo $h; done
+ fi
+}
#
# remove values of sensitive attributes
diff --git a/include/clplumbing/cl_log.h b/include/clplumbing/cl_log.h
index edaae93..aa30fcd 100644
--- a/include/clplumbing/cl_log.h
+++ b/include/clplumbing/cl_log.h
@@ -24,6 +24,24 @@
#define HA_OK 1
#define MAXLINE (512*10)
+/* this is defined by the caller */
+struct logspam {
+ const char *id; /* identifier */
+ int max; /* maximum number of messages ... */
+ time_t window; /* ... within this timeframe */
+ time_t reset_time; /* log new messages after this time */
+ const char *advice; /* what to log in case messages get suppressed */
+};
+
+/* this is internal (oblique to the caller) */
+struct msg_ctrl {
+ struct logspam *lspam; /* */
+ time_t *msg_slots; /* msg slot root (space for lspam->max) */
+ int last; /* last used msg slot [0..lspam->max-1]; -1 on init */
+ int cnt; /* current msg count [0..lspam->max] */
+ time_t suppress_t; /* messages blocked since this time */
+};
+
struct IPC_CHANNEL;
extern int debug_level;
@@ -36,6 +54,10 @@ extern int debug_level;
void cl_direct_log(int priority, const char* buf, gboolean, const char*, int, TIME_T);
void cl_log(int priority, const char * fmt, ...) G_GNUC_PRINTF(2,3);
+void cl_limit_log(struct msg_ctrl *ml, int priority, const char * fmt, ...) G_GNUC_PRINTF(3,4);
+struct msg_ctrl *cl_limit_log_new(struct logspam *lspam);
+void cl_limit_log_destroy(struct msg_ctrl *ml);
+void cl_limit_log_reset(struct msg_ctrl *ml);
void cl_perror(const char * fmt, ...) G_GNUC_PRINTF(1,2);
void cl_log_enable_stderr(int truefalse);
void cl_log_enable_stdout(int truefalse);
diff --git a/include/glue_config.h.in b/include/glue_config.h.in
index f2ff3f8..0850a63 100644
--- a/include/glue_config.h.in
+++ b/include/glue_config.h.in
@@ -27,6 +27,12 @@
/* top directory of area to drop core files in */
#undef HA_COREDIR
+/* top directory for LRM related files */
+#undef LRM_VARLIBDIR
+
+/* CIB secrets */
+#undef LRM_CIBSECRETS
+
/* Logging Daemon IPC socket name */
#undef HA_LOGDAEMON_IPC
diff --git a/lib/clplumbing/cl_log.c b/lib/clplumbing/cl_log.c
index a179e40..13c3322 100644
--- a/lib/clplumbing/cl_log.c
+++ b/lib/clplumbing/cl_log.c
@@ -501,6 +501,7 @@ prio2str(int priority)
}
static char * syslog_timestamp(TIME_T t);
+static void cl_limit_log_update(struct msg_ctrl *ml, time_t ts);
static void
append_log(FILE * fp, const char * entity, int entity_pid
@@ -738,6 +739,116 @@ cl_log(int priority, const char * fmt, ...)
return;
}
+/*
+ * Log a message only if there were not too many messages of this
+ * kind recently. This is too prevent log spamming in case a
+ * condition persists over a long period of time. The maximum
+ * number of messages for the timeframe and other details are
+ * provided in struct logspam (see cl_log.h).
+ *
+ * Implementation details:
+ * - max number of time_t slots is allocated; slots keep time
+ * stamps of previous max number of messages
+ * - we check if the difference between now (i.e. new message just
+ * arrived) and the oldest message is _less_ than the window
+ * timeframe
+ * - it's up to the user to do cl_limit_log_new and afterwards
+ * cl_limit_log_destroy, though the latter is usually not
+ * necessary; the memory allocated with cl_limit_log_new stays
+ * constant during the lifetime of the process
+ *
+ * NB on Thu Aug 4 15:26:49 CEST 2011:
+ * This interface is very new, use with caution and report bugs.
+ */
+
+struct msg_ctrl *
+cl_limit_log_new(struct logspam *lspam)
+{
+ struct msg_ctrl *ml;
+
+ ml = (struct msg_ctrl *)malloc(sizeof(struct msg_ctrl));
+ if (!ml) {
+ cl_log(LOG_ERR, "%s:%d: out of memory"
+ , __FUNCTION__, __LINE__);
+ return NULL;
+ }
+ ml->msg_slots = (time_t *)calloc(lspam->max, sizeof(time_t));
+ if (!ml->msg_slots) {
+ cl_log(LOG_ERR, "%s:%d: out of memory"
+ , __FUNCTION__, __LINE__);
+ return NULL;
+ }
+ ml->lspam = lspam;
+ cl_limit_log_reset(ml);
+ return ml; /* to be passed later to cl_limit_log() */
+}
+
+void
+cl_limit_log_destroy(struct msg_ctrl *ml)
+{
+ if (!ml)
+ return;
+ g_free(ml->msg_slots);
+ g_free(ml);
+}
+
+void
+cl_limit_log_reset(struct msg_ctrl *ml)
+{
+ ml->last = -1;
+ ml->cnt = 0;
+ ml->suppress_t = (time_t)0;
+ memset(ml->msg_slots, 0, ml->lspam->max * sizeof(time_t));
+}
+
+static void
+cl_limit_log_update(struct msg_ctrl *ml, time_t ts)
+{
+ ml->last = (ml->last + 1) % ml->lspam->max;
+ *(ml->msg_slots + ml->last) = ts;
+ if (ml->cnt < ml->lspam->max)
+ ml->cnt++;
+}
+
+void
+cl_limit_log(struct msg_ctrl *ml, int priority, const char * fmt, ...)
+{
+ va_list ap;
+ char buf[MAXLINE];
+ time_t last_ts, now = time(NULL);
+
+ if (!ml)
+ goto log_msg;
+ if (ml->suppress_t) {
+ if ((now - ml->suppress_t) < ml->lspam->reset_time)
+ return;
+ /* message blocking expired */
+ cl_limit_log_reset(ml);
+ }
+ last_ts = ml->last != -1 ? *(ml->msg_slots + ml->last) : (time_t)0;
+ if (
+ ml->cnt < ml->lspam->max || /* not so many messages logged */
+ (now - last_ts) > ml->lspam->window /* messages far apart */
+ ) {
+ cl_limit_log_update(ml, now);
+ goto log_msg;
+ } else {
+ cl_log(LOG_INFO
+ , "'%s' messages logged too often, "
+ "suppressing messages of this kind for %ld seconds"
+ , ml->lspam->id, ml->lspam->reset_time);
+ cl_log(priority, "%s", ml->lspam->advice);
+ ml->suppress_t = now;
+ return;
+ }
+
+log_msg:
+ va_start(ap, fmt);
+ vsnprintf(buf, MAXLINE, fmt, ap);
+ va_end(ap);
+ cl_log(priority, "%s", buf);
+}
+
void
cl_perror(const char * fmt, ...)
{
diff --git a/lib/clplumbing/ipcsocket.c b/lib/clplumbing/ipcsocket.c
index b712dd9..9297c6a 100644
--- a/lib/clplumbing/ipcsocket.c
+++ b/lib/clplumbing/ipcsocket.c
@@ -232,6 +232,7 @@ static struct IPC_CHANNEL* socket_server_channel_new(int sockfd);
static struct IPC_CHANNEL * channel_new(int sockfd, int conntype, const char *pathname);
static int client_channel_new_auth(int sockfd);
+static int verify_creds(struct IPC_AUTH *auth_info, uid_t uid, gid_t gid);
typedef void (*DelProc)(IPC_Message*);
@@ -2359,6 +2360,26 @@ socket_message_new(struct IPC_CHANNEL *ch, int msg_len)
*
***********************************************************************/
+static int
+verify_creds(struct IPC_AUTH *auth_info, uid_t uid, gid_t gid)
+{
+ int ret = IPC_FAIL;
+
+ if (!auth_info || (!auth_info->uid && !auth_info->gid)) {
+ return IPC_OK;
+ }
+ if ( auth_info->uid
+ && (g_hash_table_lookup(auth_info->uid
+ , GUINT_TO_POINTER((guint)uid)) != NULL)) {
+ ret = IPC_OK;
+ }else if (auth_info->gid
+ && (g_hash_table_lookup(auth_info->gid
+ , GUINT_TO_POINTER((guint)gid)) != NULL)) {
+ ret = IPC_OK;
+ }
+ return ret;
+}
+
/***********************************************************************
* SO_PEERCRED VERSION... (Linux)
@@ -2406,16 +2427,7 @@ socket_verify_auth(struct IPC_CHANNEL* ch, struct IPC_AUTH * auth_info)
/* verify the credential information. */
- if ( auth_info->uid
- && (g_hash_table_lookup(auth_info->uid
- , GUINT_TO_POINTER((guint)cred.uid)) != NULL)) {
- ret = IPC_OK;
- }else if (auth_info->gid
- && (g_hash_table_lookup(auth_info->gid
- , GUINT_TO_POINTER((guint)cred.gid)) != NULL)) {
- ret = IPC_OK;
- }
- return ret;
+ return verify_creds(auth_info, cred.uid, cred.gid);
}
/* get farside pid for our peer process */
@@ -2474,22 +2486,9 @@ socket_verify_auth(struct IPC_CHANNEL* ch, struct IPC_AUTH * auth_info)
ch->farside_uid = euid;
ch->farside_gid = egid;
- if (ret == IPC_OK) {
- return ret;
- }
-
- /* Check credentials against authorization information */
- if ( auth_info->uid
- && (g_hash_table_lookup(auth_info->uid
- , GUINT_TO_POINTER((guint)euid)) != NULL)) {
- ret = IPC_OK;
- }else if (auth_info->gid
- && (g_hash_table_lookup(auth_info->gid
- , GUINT_TO_POINTER((guint)egid)) != NULL)) {
- ret = IPC_OK;
- }
- return ret;
+ /* verify the credential information. */
+ return verify_creds(auth_info, euid, egid);
}
static
@@ -2628,18 +2627,8 @@ socket_verify_auth(struct IPC_CHANNEL* ch, struct IPC_AUTH * auth_info)
return ret;
}
- ret = IPC_OK;
-
- if ( auth_info->uid
- && g_hash_table_lookup(auth_info->uid, &(cred.crEuid)) == NULL) {
- ret = IPC_FAIL;
- }
- if ( auth_info->gid
- && g_hash_table_lookup(auth_info->gid, &(cred.crEgid)) == NULL) {
- ret = IPC_FAIL;
- }
-
- return ret;
+ /* verify the credential information. */
+ return verify_creds(auth_info, cred.crEuid, cred.crEgid);
}
/*
@@ -2721,8 +2710,6 @@ socket_verify_auth(struct IPC_CHANNEL* ch, struct IPC_AUTH * auth_info)
return ret;
}
- ret = IPC_OK;
-
if ((auth_info->uid == NULL || g_hash_table_size(auth_info->uid) == 0)
&& auth_info->gid != NULL
&& g_hash_table_size(auth_info->gid) != 0) {
@@ -2731,20 +2718,9 @@ socket_verify_auth(struct IPC_CHANNEL* ch, struct IPC_AUTH * auth_info)
" on this platform.");
return IPC_BROKEN;
}
-
- if (auth_info->uid != NULL && g_hash_table_size(auth_info->uid) > 0
- && g_hash_table_lookup(
- auth_info->uid, GUINT_TO_POINTER(stat_buf.st_uid))==NULL) {
- ret = IPC_FAIL;
-
- }
- if (auth_info->gid != NULL && g_hash_table_size(auth_info->gid) > 0
- && g_hash_table_lookup(
- auth_info->gid, GUINT_TO_POINTER(stat_buf.st_gid))==NULL) {
- ret = IPC_FAIL;
- }
- return ret;
+ /* verify the credential information. */
+ return verify_creds(auth_info, stat_buf.st_uid, stat_buf.st_gid);
}
@@ -2774,22 +2750,9 @@ socket_verify_auth(struct IPC_CHANNEL* ch, struct IPC_AUTH * auth_info)
ch->farside_uid = conn_info->farside_uid;
ch->farside_gid = conn_info->farside_gid;
- if (auth_info == NULL
- || (auth_info->uid == NULL && auth_info->gid == NULL)) {
- return IPC_OK; /* no restriction for authentication */
- }
-
/* verify the credential information. */
- if ( auth_info->uid
- && (g_hash_table_lookup(auth_info->uid,
- GUINT_TO_POINTER((guint)conn_info->farside_uid)) != NULL)) {
- return IPC_OK;
- }else if (auth_info->gid
- && (g_hash_table_lookup(auth_info->gid,
- GUINT_TO_POINTER((guint)conn_info->farside_gid)) != NULL)) {
- return IPC_OK;
- }
- return IPC_FAIL;
+ return verify_creds(auth_info,
+ conn_info->farside_uid, conn_info->farside_gid);
}
static
@@ -2835,20 +2798,10 @@ socket_verify_auth(struct IPC_CHANNEL* ch, struct IPC_AUTH * auth_info)
return rc;
}
- /* Check credentials against authorization information */
-
- if (auth_info->uid
- && (g_hash_table_lookup(auth_info->uid,
- GUINT_TO_POINTER((guint)ucred_geteuid(ucred))) != NULL)) {
- rc = IPC_OK;
- }else if (auth_info->gid
- && (g_hash_table_lookup(auth_info->gid,
- GUINT_TO_POINTER((guint)ucred_getegid(ucred))) != NULL)) {
- rc = IPC_OK;
- }
-
+ /* verify the credential information. */
+ rc = verify_creds(auth_info,
+ ucred_geteuid(ucred), ucred_getegid(ucred));
ucred_free(ucred);
-
return rc;
}
diff --git a/lib/plugins/lrm/raexecocf.c b/lib/plugins/lrm/raexecocf.c
index 7e8ef36..f7cd7ed 100644
--- a/lib/plugins/lrm/raexecocf.c
+++ b/lib/plugins/lrm/raexecocf.c
@@ -230,16 +230,20 @@ get_resource_list(GList ** rsc_info)
free(namelist[file_num]);
continue;
}
-
- stat(namelist[file_num]->d_name, &prop);
- if (S_ISDIR(prop.st_mode)) {
+
+ snprintf(subdir,FILENAME_MAX,"%s/%s",
+ RA_PATH, namelist[file_num]->d_name);
+
+ if (stat(subdir, &prop) == -1) {
+ cl_perror("%s:%s:%d: stat failed for %s"
+ , __FILE__, __FUNCTION__, __LINE__, subdir);
+ free(namelist[file_num]);
+ continue;
+ } else if (!S_ISDIR(prop.st_mode)) {
free(namelist[file_num]);
continue;
}
- snprintf(subdir,FILENAME_MAX,"%s/%s",
- RA_PATH, namelist[file_num]->d_name);
-
get_runnable_list(subdir,&ra_subdir);
merge_string_list(rsc_info,ra_subdir);
diff --git a/lib/plugins/stonith/external.c b/lib/plugins/stonith/external.c
index 683dd84..e991976 100644
--- a/lib/plugins/stonith/external.c
+++ b/lib/plugins/stonith/external.c
@@ -141,7 +141,7 @@ external_status(StonithPlugin *s)
rc = external_run_cmd(sd, op, NULL);
if (rc != 0) {
- LOG(PIL_CRIT, "%s: '%s %s' failed with rc %d",
+ LOG(PIL_WARN, "%s: '%s %s' failed with rc %d",
__FUNCTION__, sd->subplugin, op, rc);
}
else {
@@ -337,10 +337,11 @@ external_parse_config_info(struct pluginDevice* sd, StonithNVpair * info)
/* TODO: Maybe treat "" as delimeters too so
* whitespace can be passed to the plugins... */
for (nv = info; nv->s_name; nv++) {
- key = STRDUP(nv->s_name);
if (!nv->s_name || !nv->s_value) {
continue;
}
+
+ key = STRDUP(nv->s_name);
if (!key) {
goto err_mem;
}
@@ -819,8 +820,8 @@ external_run_cmd(struct pluginDevice *sd, const char *op, char **output)
status = pclose(file);
if (WIFEXITED(status)) {
rc = WEXITSTATUS(status);
- if (rc != 0) {
- LOG(PIL_CRIT,
+ if (rc != 0 && Debug) {
+ LOG(PIL_DEBUG,
"%s: Calling '%s' returned %d", __FUNCTION__, cmd, rc);
}
} else {
diff --git a/lib/plugins/stonith/external/Makefile.am b/lib/plugins/stonith/external/Makefile.am
index 0ca1440..5006513 100644
--- a/lib/plugins/stonith/external/Makefile.am
+++ b/lib/plugins/stonith/external/Makefile.am
@@ -19,14 +19,15 @@
#
MAINTAINERCLEANFILES = Makefile.in
-EXTRA_DIST = drac5 dracmc-telnet ibmrsa-telnet ipmi rackpdu vmware xen0 \
+EXTRA_DIST = drac5 dracmc-telnet ibmrsa-telnet ipmi rackpdu vmware vcenter xen0 \
xen0-ha-dom0-stonith-helper sbd kdumpcheck nut
extdir = $(stonith_ext_plugindir)
helperdir = $(stonith_plugindir)
-ext_SCRIPTS = drac5 dracmc-telnet ibmrsa ibmrsa-telnet ipmi riloe ssh vmware rackpdu xen0 hmchttp \
- xen0-ha sbd kdumpcheck ippower9258 nut
+ext_SCRIPTS = drac5 dracmc-telnet ibmrsa ibmrsa-telnet ipmi riloe ssh vmware vcenter rackpdu xen0 hmchttp \
+ xen0-ha sbd kdumpcheck ippower9258 nut libvirt \
+ hetzner
helper_SCRIPTS = xen0-ha-dom0-stonith-helper
diff --git a/lib/plugins/stonith/external/hetzner b/lib/plugins/stonith/external/hetzner
new file mode 100755
index 0000000..8846270
--- /dev/null
+++ b/lib/plugins/stonith/external/hetzner
@@ -0,0 +1,132 @@
+#!/bin/sh
+#
+# External STONITH module for Hetzner.
+#
+# Copyright (c) 2011 MMUL S.a.S. - Raoul Scarazzini <rasca at mmul.it>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+
+# Read parameters from config file, format is based upon the hetzner OCF resource agent
+# developed by Kumina: http://blog.kumina.nl/2011/02/hetzner-failover-ip-ocf-script/
+conf_file="/etc/hetzner.cfg"
+user=`sed -n 's/^user.*=\ *//p' /etc/hetzner.cfg`
+pass=`sed -n 's/^pass.*=\ *//p' /etc/hetzner.cfg`
+hetzner_server="https://robot-ws.your-server.de"
+
+check_http_response() {
+ # If the response is 200 then return 0
+ if [ $1 = 200 ]
+ then
+ return 0
+ else
+ # If the response is not 200 then display a description of the problem and return 1
+ case $1 in
+ 400) ha_log.sh err "INVALID_INPUT - Invalid input parameters"
+ ;;
+ 404) ha_log.sh err "SERVER_NOT_FOUND - Server with ip $remote_ip not found"
+ ;;
+ 409) ha_log.sh err "RESET_MANUAL_ACTIVE - There is already a running manual reset"
+ ;;
+ 500) ha_log.sh err "RESET_FAILED - Resetting failed due to an internal error"
+ ;;
+ esac
+ return 1
+ fi
+}
+
+case $1 in
+gethosts)
+ echo $hostname
+ exit 0
+ ;;
+on)
+ # Can't really be implemented because Hetzner's webservice cannot power on a system
+ ha_log.sh err "Power on is not available since Hetzner's webservice can't do this operation."
+ exit 1
+ ;;
+off)
+ # Can't really be implemented because Hetzner's webservice cannot power on a system
+ ha_log.sh err "Power off is not available since Hetzner's webservice can't do this operation."
+ exit 1
+ ;;
+reset)
+ # Launching the reset action via webservice
+ check_http_response $(curl --silent -o /dev/null -w '%{http_code}' -u $user:$pass $hetzner_server/reset/$remote_ip -d type=hw)
+ exit $?
+ ;;
+status)
+ # Check if we can contact the webservice
+ check_http_response "$(curl --silent -o /dev/null -w '%{http_code}' -u $user:$pass $hetzner_server/server/$remote_ip)"
+ exit $?
+ ;;
+getconfignames)
+ echo "hostname"
+ echo "remote_ip"
+ exit 0
+ ;;
+getinfo-devid)
+ echo "Hetzner STONITH device"
+ exit 0
+ ;;
+getinfo-devname)
+ echo "Hetzner STONITH external device"
+ exit 0
+ ;;
+getinfo-devdescr)
+ echo "Hetzner host reset"
+ echo "Manages the remote webservice for reset a remote server."
+ exit 0
+ ;;
+getinfo-devurl)
+ echo "http://wiki.hetzner.de/index.php/Robot_Webservice_en"
+ exit 0
+ ;;
+getinfo-xml)
+ cat << HETZNERXML
+<parameters>
+<parameter name="hostname" unique="1" required="1">
+<content type="string" />
+<shortdesc lang="en">
+Hostname
+</shortdesc>
+<longdesc lang="en">
+The name of the host to be managed by this STONITH device.
+</longdesc>
+</parameter>
+
+<parameter name="remote_ip" unique="1" required="1">
+<content type="string" />
+<shortdesc lang="en">
+Remote IP
+</shortdesc>
+<longdesc lang="en">
+The address of the remote IP that manages this server.
+</longdesc>
+</parameter>
+</parameters>
+HETZNERXML
+ exit 0
+ ;;
+*)
+ ha_log.sh err "Don't know what to do for '$remote_ip'"
+ exit 1
+ ;;
+esac
diff --git a/lib/plugins/stonith/external/ipmi b/lib/plugins/stonith/external/ipmi
index 53b5a9b..b7832f3 100644
--- a/lib/plugins/stonith/external/ipmi
+++ b/lib/plugins/stonith/external/ipmi
@@ -23,8 +23,8 @@
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
-# along with this program; if not, write the Free Software Foundation,
-# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# along with this program; if not, write the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Initialization -- fix locale settings so we can parse output from
@@ -115,7 +115,6 @@ ipmi_is_power_on() {
esac
}
-# Rewrite the hostname to accept "," as a delimeter for hostnames too.
case ${1} in
gethosts)
@@ -195,7 +194,7 @@ The IP address of the STONITH device.
</longdesc>
</parameter>
-<parameter name="userid" unique="1">
+<parameter name="userid" unique="0">
<content type="string" />
<shortdesc lang="en">
Login
@@ -205,7 +204,7 @@ The username used for logging in to the STONITH device.
</longdesc>
</parameter>
-<parameter name="passwd" unique="1">
+<parameter name="passwd" unique="0">
<content type="string" />
<shortdesc lang="en">
Password
@@ -215,7 +214,7 @@ The password used for logging in to the STONITH device.
</longdesc>
</parameter>
-<parameter name="passwd_method" unique="1">
+<parameter name="passwd_method" unique="0">
<content type="string" default="param"/>
<shortdesc lang="en">
Method for passing passwd parameter
@@ -228,7 +227,7 @@ Method for passing the passwd parameter to ipmitool
</longdesc>
</parameter>
-<parameter name="interface" unique="1">
+<parameter name="interface" unique="0">
<content type="string" default="lan"/>
<shortdesc lang="en">
IPMI interface
diff --git a/lib/plugins/stonith/external/libvirt b/lib/plugins/stonith/external/libvirt
new file mode 100644
index 0000000..8923565
--- /dev/null
+++ b/lib/plugins/stonith/external/libvirt
@@ -0,0 +1,259 @@
+#!/bin/sh
+#
+# External STONITH module for a libvirt managed hypervisor (kvm/Xen).
+# Uses libvirt as a STONITH device to control guest.
+#
+# Copyright (c) 2010 Holger Teutsch <holger.teutsch at web.de>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+# start a domain
+libvirt_start() {
+ out=$($VIRSH -c $hypervisor_uri start $domain_id 2>&1)
+ if [ $? -eq 0 ]
+ then
+ ha_log.sh notice "Domain $domain_id was started"
+ return 0
+ fi
+
+ if echo "$out" | grep -i 'Domain is already active' > /dev/null 2>&1
+ then
+ ha_log.sh notice "Domain $domain_id is already active"
+ return 0
+ fi
+
+ ha_log.sh err "Failed to start domain $domain_id"
+ ha_log.sh err "$out"
+ return 1
+}
+
+# stop a domain
+# return
+# 0: success
+# 1: error
+# 2: was already stopped
+libvirt_stop() {
+ out=$($VIRSH -c $hypervisor_uri destroy $domain_id 2>&1)
+ if [ $? -eq 0 ]
+ then
+ ha_log.sh notice "Domain $domain_id was stopped"
+ return 0
+ fi
+
+ if echo "$out" | grep -i 'domain is not running' > /dev/null 2>&1
+ then
+ ha_log.sh notice "Domain $domain_id is already stopped"
+ return 2
+ fi
+
+ ha_log.sh err "Failed to stop domain $domain_id"
+ ha_log.sh err "$out"
+ return 1
+}
+
+# get status of stonith device (*NOT* of the domain).
+# If we can retrieve some info from the hypervisor
+# the stonith device is OK.
+libvirt_status() {
+ out=$($VIRSH -c $hypervisor_uri version 2>&1)
+ if [ $? -eq 0 ]
+ then
+ out=`echo "$out" | tail -1`
+ ha_log.sh notice "$hypervisor_uri: $out"
+ return 0
+ fi
+
+ ha_log.sh err "Failed to get status for $hypervisor_uri"
+ ha_log.sh err "$out"
+ return 1
+}
+
+# check config and set variables
+# does not return on error
+libvirt_check_config() {
+ VIRSH=`which virsh 2>/dev/null`
+
+ if [ ! -x "$VIRSH" ]
+ then
+ ha_log.sh err "virsh not installed"
+ exit 1
+ fi
+
+ if [ -z "$hostlist" -o -z "$hypervisor_uri" ]
+ then
+ ha_log.sh err "hostlist or hypervisor_uri missing; check configuration"
+ exit 1
+ fi
+}
+
+# set variable domain_id for the host specified as arg
+libvirt_set_domain_id ()
+{
+ for h in $hostlist
+ do
+ case $h in
+ $1:*)
+ domain_id=`expr $h : '.*:\(.*\)'`
+ return
+ ;;
+
+ $1)
+ domain_id=$1
+ return
+ esac
+ done
+
+ ha_log.sh err "Should never happen: Called for host $1 but $1 is not in $hostlist."
+ exit 1
+}
+
+libvirt_info() {
+cat << LVIRTXML
+<parameters>
+<parameter name="hostlist" unique="1" required="1">
+<content type="string" />
+<shortdesc lang="en">
+List of hostname[:domain_id]..
+</shortdesc>
+<longdesc lang="en">
+List of controlled hosts: hostname[:domain_id]..
+The optional domain_id defaults to the hostname.
+</longdesc>
+</parameter>
+
+<parameter name="hypervisor_uri" required="1">
+<content type="string" />
+<shortdesc lang="en">
+Hypervisor URI
+</shortdesc>
+<longdesc lang="en">
+URI for connection to the hypervisor.
+driver[+transport]://[username@][hostlist][:port]/[path][?extraparameters]
+e.g.
+qemu+ssh://my_kvm_server.mydomain.my/system (uses ssh for root)
+xen://my_kvm_server.mydomain.my/ (uses TLS for client)
+
+virsh must be installed (e.g. libvir-client package) and access control must
+be configured for your selected URI.
+</longdesc>
+</parameter>
+</parameters>
+LVIRTXML
+exit 0
+}
+
+#############
+# Main code #
+#############
+
+# don't fool yourself when testing with stonith(8)
+# and transport ssh
+unset SSH_AUTH_SOCK
+
+# support , as a separator as well
+hostlist=`echo $hostlist| sed -e 's/,/ /g'`
+
+case $1 in
+ gethosts)
+ hostnames=`echo $hostlist|sed -e 's/:[^ ]*//g'`
+ for h in $hostnames
+ do
+ echo $h
+ done
+ exit 0
+ ;;
+
+ on)
+ libvirt_check_config
+ libvirt_set_domain_id $2
+
+ libvirt_start
+ exit $?
+ ;;
+
+ off)
+ libvirt_check_config
+ libvirt_set_domain_id $2
+
+ libvirt_stop
+ [ $? = 1 ] && exit 1
+ exit 0
+ ;;
+
+ reset)
+ # libvirt has no reset so we do a power cycle
+ libvirt_check_config
+ libvirt_set_domain_id $2
+
+ libvirt_stop
+ rc=$?
+ [ $rc = 1 ] && exit 1
+
+ # stonith reset seems to require a power on even if it was off
+ # before so the next line is commented out
+ # [ $rc = 2 ] && exit 0
+
+ sleep 2
+ libvirt_start
+ exit $?
+ ;;
+
+ status)
+ libvirt_check_config
+ libvirt_status
+ exit $?
+ ;;
+
+ getconfignames)
+ echo "hostlist hypervisor_uri"
+ exit 0
+ ;;
+
+ getinfo-devid)
+ echo "libvirt STONITH device"
+ exit 0
+ ;;
+
+ getinfo-devname)
+ echo "libvirt STONITH external device"
+ exit 0
+ ;;
+
+ getinfo-devdescr)
+ echo "libvirt-based Linux host reset for Xen/KVM guest domain through hypervisor"
+ exit 0
+ ;;
+
+ getinfo-devurl)
+ echo "http://libvirt.org/uri.html http://linux-ha.org/wiki"
+ exit 0
+ ;;
+
+ getinfo-xml)
+ libvirt_info
+ echo 0;
+ ;;
+
+ *)
+ exit 1
+ ;;
+esac
+
diff --git a/lib/plugins/stonith/external/rackpdu b/lib/plugins/stonith/external/rackpdu
index b53fd03..7d0e20b 100644
--- a/lib/plugins/stonith/external/rackpdu
+++ b/lib/plugins/stonith/external/rackpdu
@@ -75,7 +75,8 @@ GetOutletNumber() {
return 0
fi
- local names=`echo "$snmp_result" | cut -f2 -d'"' | tr ' ' '_' | tr '\012' ' '`
+ local names
+ names=`echo "$snmp_result" | cut -f2 -d'"' | tr ' ' '_' | tr '\012' ' '`
for name in $names; do
if [ "$name" != "$nodename" ]; then
local outlet_num=`expr $outlet_num + 1`
diff --git a/lib/plugins/stonith/external/sbd b/lib/plugins/stonith/external/sbd
index 6b4eec1..baa7b3b 100644
--- a/lib/plugins/stonith/external/sbd
+++ b/lib/plugins/stonith/external/sbd
@@ -9,13 +9,28 @@
# Main code
+if [ x$sbd_device = x ]; then
+ if [ -f /etc/sysconfig/sbd ]; then
+ source /etc/sysconfig/sbd
+ sbd_device=$SBD_DEVICE
+ fi
+fi
+
+SBD_DEVS=${sbd_device%;}
+
+sbd_device=${SBD_DEVS//;/ -d }
+
case $1 in
gethosts)
- echo `sbd -d $sbd_device list | cut -f2`
+ echo `sbd -d $sbd_device list | cut -f2 | sort | uniq`
exit 0
;;
off|reset)
- sbd -d $sbd_device message $2 $1
+ message=$1
+ case "$crashdump" in
+ yes|true|1|YES|TRUE|ja|on|ON) message="crashdump" ;;
+ esac
+ sbd -d $sbd_device message $2 $message
exit $?
;;
status)
@@ -29,7 +44,7 @@ on)
exit 1
;;
getconfignames)
- echo "sbd_device"
+ echo "sbd_device crashdump"
exit 0
;;
getinfo-devid)
@@ -47,8 +62,9 @@ fencing requests. This allows clusters without network power
switches; the downside is that access to the shared storage
device becomes a Single Point of Failure.
-It requires sbd to be configured. Please read
-http://linux-ha.org/wiki/SBD_Fencing!
+It requires sbd to be configured on all nodes.
+
+Please read http://linux-ha.org/wiki/SBD_Fencing!
DESC
exit 0
@@ -60,13 +76,34 @@ getinfo-devurl)
getinfo-xml)
cat << SSHXML
<parameters>
-<parameter name="sbd_device" unique="1" required="1">
+
+<parameter name="crashdump">
<content type="string" />
<shortdesc lang="en">
-SBD device
+Crashdump instead of regular fence
</shortdesc>
<longdesc lang="en">
-The block device used for the SBD partition.
+If SBD is given a fence command, this option will instead perform a
+kernel crash of a reboot or power-off, which on a properly configured
+system can lead to a crashdump for analysis.
+
+This is less safe for production environments. Please use with caution
+and for debugging purposes only.
+</longdesc>
+</parameter>
+
+<parameter name="sbd_device" unique="1">
+<content type="string" />
+<shortdesc lang="en">
+SBD device(s)
+</shortdesc>
+<longdesc lang="en">
+The block device used for the SBD partition. Up to three
+can be specified if separated by a semicolon. (Please check
+the documentation if specifying two.)
+
+If not specified, will default to the value from /etc/sysconfig/sbd.
+
</longdesc>
</parameter>
</parameters>
diff --git a/lib/plugins/stonith/external/vcenter b/lib/plugins/stonith/external/vcenter
new file mode 100755
index 0000000..5c1afb9
--- /dev/null
+++ b/lib/plugins/stonith/external/vcenter
@@ -0,0 +1,266 @@
+#!/usr/bin/env perl
+#
+# External STONITH module for VMWare vCenter/ESX
+#
+# Author: Nhan Ngo Dinh
+# License: GNU General Public License (GPL)
+#
+
+require 5.010;
+
+use strict;
+use warnings;
+
+sub dielog {
+ my $msg = "[";
+ $msg .= "$ARGV[0]" if defined($ARGV[0]);
+ $msg .= " $ARGV[1]" if defined($ARGV[1]);
+ $msg .= "]";
+ ( $_ ) = @_;
+ $msg .= " $_";
+ system("ha_log.sh", "err", "$msg");
+ die();
+}
+
+# Define command groups
+my @configCommands = qw{getconfignames getinfo-devid getinfo-devname getinfo-devdescr getinfo-devurl getinfo-xml};
+my @actionCommands = qw{reset on off};
+my @netCommands = (@actionCommands, qw{status gethosts});
+
+# Process command line arguments
+my $command = $ARGV[0] || dielog("No command specified\n");
+
+# Command belongs to the group of commands that do not require any connection to VMware vCenter
+if ($command ~~ @configCommands) {
+ if ($command eq "getconfignames") {
+ print "VI_SERVER\nVI_PORTNUMBER\nVI_PROTOCOL\nVI_SERVICEPATH\nVI_CREDSTORE\nHOSTLIST\nRESETPOWERON\n";
+ }
+ elsif ($command eq "getinfo-devid") {
+ print "VMware vCenter STONITH device\n";
+ }
+ elsif ($command eq "getinfo-devname") {
+ print "VMware vCenter STONITH device\n";
+ }
+ elsif ($command eq "getinfo-devdescr") {
+ print "VMWare vCenter STONITH device\n";
+ }
+ elsif ($command eq "getinfo-devurl") {
+ print "http://www.vmware.com/\n";
+ }
+ elsif ($command eq "getinfo-xml") {
+ print q{<parameters>
+<parameter name="HOSTLIST" required="1">
+<content type="string"/>
+<shortdesc lang="en">List of hosts and virtual machines (required)</shortdesc>
+<longdesc lang="en">
+The list of hosts that the VMware vCenter STONITH device controls.
+Syntax is:
+ hostname1[=VirtualMachineName1];hostname2[=VirtualMachineName2]
+
+NOTE: omit =VirtualMachineName if hostname and virtual machine names are identical
+
+Example:
+ cluster1=VMCL1;cluster2=VMCL2
+</longdesc>
+</parameter>
+<parameter name="VI_SERVER">
+<content type="string" default="localhost"/>
+<shortdesc lang="en">VMware vCenter address</shortdesc>
+<longdesc lang="en">
+The VMware vCenter address
+</longdesc>
+</parameter>
+<parameter name="VI_PROTOCOL">
+<content type="string" default="https"/>
+<shortdesc lang="en">VMware vCenter protocol</shortdesc>
+<longdesc lang="en">
+The VMware vCenter protocol
+</longdesc>
+</parameter>
+<parameter name="VI_PORTNUMBER">
+<content type="string" default="443"/>
+<shortdesc lang="en">VMware vCenter port number</shortdesc>
+<longdesc lang="en">
+The VMware vCenter port number
+</longdesc>
+</parameter>
+<parameter name="VI_SERVICEPATH">
+<content type="string" default="/sdk"/>
+<shortdesc lang="en">VMware vCenter service path</shortdesc>
+<longdesc lang="en">
+The VMware vCenter services path
+</longdesc>
+</parameter>
+<parameter name="VI_CREDSTORE" required="1">
+<content type="string"/>
+<shortdesc lang="en">VMware vCenter credentials store file</shortdesc>
+<longdesc lang="en">
+VMware vCenter credentials store file
+</longdesc>
+</parameter>
+<parameter name="RESETPOWERON">
+<content type="string" default="1"/>
+<shortdesc lang="en">PowerOnVM on reset</shortdesc>
+<longdesc lang="en">
+Enable/disable a PowerOnVM on reset when the target virtual machine is off
+Allowed values: 0, 1
+</longdesc>
+</parameter>
+</parameters>} . "\n";
+ }
+ else { dielog("Invalid command specified: $command\n"); }
+}
+
+# Command belongs to the group of commands that require connecting to VMware vCenter
+elsif ($command ~~ @netCommands) {
+
+ use VMware::VIRuntime;
+
+ # A valid VI_CREDSTORE is required to avoid interactive prompt
+ ( exists $ENV{'VI_CREDSTORE'} ) || dielog("VI_CREDSTORE not specified\n");
+
+ # HOSTLIST is mandatory
+ exists $ENV{'HOSTLIST'} || dielog("HOSTLIST not specified\n");
+
+ # Parse HOSTLIST to %host_to_vm and %vm_to_host
+ my @hostlist = split(';', $ENV{'HOSTLIST'});
+ my %host_to_vm = ();
+ my %vm_to_host = ();
+ foreach my $host (@hostlist) {
+ my @config = split(/=/, $host);
+ my $key = $config[0]; my $value = $config[1];
+ if (!defined($value)) { $value = $config[0]; }
+ $host_to_vm{$key} = $value;
+ $vm_to_host{(lc $value)} = $key;
+ }
+
+ eval {
+ # VI API: reads options from the environment variables into appropriate data structures for validation.
+ Opts::parse();
+ # VI API: ensures that input values from environment variable are complete, consistent and valid.
+ Opts::validate();
+ # VI API: establishes a session with the VirtualCenter Management Server or ESX Server Web service
+ Util::connect();
+ };
+ if ($@) {
+ # This is just a placeholder for any error handling procedure
+ dielog($@);
+ }
+
+ # Command belongs to the group of commands that performs actions on Virtual Machines
+ if ($command ~~ @actionCommands) {
+
+ my $targetHost = $ARGV[1] || dielog("No target specified\n");
+
+ # Require that specified target host exists in the specified HOSTLIST
+ if (exists $host_to_vm{$targetHost}) {
+
+ my $vm;
+ my $esx;
+ eval {
+ # VI API: searches the inventory tree for a VirtualMachine managed entity whose name matches
+ # the name of the virtual machine assigned to the target host in HOSTLIST
+ $vm = Vim::find_entity_view(view_type => "VirtualMachine", filter => { name => qr/\Q$host_to_vm{$targetHost}\E/i });
+
+ # VI API: retrieves the properties of the managed object reference runtime.host of the VirtualMachine
+ # managed entity obtained by the previous command
+ # NOTE: This is essentially a workaround to vSphere Perl SDK
+ # to allow pointing to the right HostSystem. This is probably
+ # done by changing the current HostSystem in the Web Service
+ # session context. WARNING: Do not use the same session for any
+ # other concurrent operation.
+ $esx = Vim::get_view(mo_ref => $vm->{"runtime"}{"host"})->name;
+ };
+ if ($@) {
+ if (ref($@) eq "SoapFault") { dielog("$@->detail\n"); }
+ dielog($@);
+ }
+
+ my $powerState = $vm->get_property('runtime.powerState')->val;
+ if ($powerState eq "suspended") {
+ # This implementation assumes that suspending a cluster node can cause
+ # severe failures on shared resources, thus any failover operation should
+ # be blocked.
+ dielog("Machine $esx:$vm->{'name'} is in a suspended state\n");
+ }
+
+ eval {
+ if ($command eq "reset") {
+ if ($powerState eq "poweredOn") {
+ $vm->ResetVM();
+ system("ha_log.sh", "info", "Machine $esx:$vm->{'name'} has been reset");
+ } else {
+ system("ha_log.sh", "warn", "Tried to ResetVM $esx:$vm->{'name'} that was $powerState");
+ # Start a virtual machine on reset only if explicitly allowed by RESETPOWERON
+ if ($powerState eq "poweredOff" && (! exists $ENV{'RESETPOWERON'} || $ENV{'RESETPOWERON'} ne 0)) {
+ $vm->PowerOnVM();
+ system("ha_log.sh", "info", "Machine $esx:$vm->{'name'} has been powered on");
+ } else {
+ dielog("Could not complete $esx:$vm->{'name'} power cycle");
+ }
+ }
+ }
+ elsif ($command eq "off") {
+ if ($powerState eq "poweredOn") {
+ $vm->PowerOffVM();
+ system("ha_log.sh", "info", "Machine $esx:$vm->{'name'} has been powered off");
+ } else {
+ system("ha_log.sh", "warn", "Tried to PowerOffVM $esx:$vm->{'name'} that was $powerState");
+
+ }
+ }
+ elsif ($command eq "on") {
+ if ($powerState eq "poweredOff") {
+ $vm->PowerOnVM();
+ system("ha_log.sh", "info", "Machine $esx:$vm->{'name'} has been powered on");
+ } else {
+ system("ha_log.sh", "warn", "Tried to PowerOnVM $esx:$vm->{'name'} that was $powerState");
+ }
+ }
+ else { dielog("Invalid command specified: $command\n"); }
+ };
+ if ($@) {
+ if (ref($@) eq "SoapFault") { dielog("$@->detail\n"); }
+ dielog($@);
+ }
+
+ } else { dielog("Invalid target specified\n"); }
+ } else {
+ # Command belongs to the group of commands that lookup the status of VMware vCenter and/or virtual machines
+ if ($command eq "status") {
+ eval {
+ # VI API: Searches the inventory tree for all VirtualMachine managed objects
+ my $vms = Vim::find_entity_views(view_type => "VirtualMachine");
+ };
+ if ($@) {
+ if (ref($@) eq "SoapFault") { dielog("$@->detail\n"); }
+ dielog($@);
+ }
+ }
+ elsif ($command eq "gethosts") {
+ # Create a regular expression to make vCenter find all the virtual machine matching
+ # mirtual machine names specified in HOSTLIST
+ # NOTE: this implementation make "gethosts" check that entries in HOSTLIST are consistent with VMware vCenter VM directory
+ my $regex = join "|", map { qr/\Q$_\E/i } values %host_to_vm;
+ eval {
+ my $vms = Vim::find_entity_views(view_type => "VirtualMachine", filter => { name => qr/^($regex)$/ });
+ foreach my $vm (@$vms) { print "$vm_to_host{(lc $vm->name)}\n" if exists $vm_to_host{(lc $vm->name)}; }
+ };
+ if ($@) {
+ if (ref($@) eq "SoapFault") { dielog("$@->detail\n"); }
+ dielog($@);
+ }
+ }
+ else { dielog("Invalid command specified: $command\n"); }
+ }
+ eval {
+ Util::disconnect();
+ };
+ if ($@) {
+ # This is just a placeholder for any error handling procedure
+ dielog($@);
+ }
+}
+else { dielog("Invalid command specified: $command\n"); }
+
+exit(0);
diff --git a/lib/plugins/stonith/meatware.c b/lib/plugins/stonith/meatware.c
index fbc0742..8547541 100644
--- a/lib/plugins/stonith/meatware.c
+++ b/lib/plugins/stonith/meatware.c
@@ -202,8 +202,10 @@ meatware_reset_req(StonithPlugin * s, int request, const char * host)
return S_OOPS;
}
+ alarm(600);
memset(line, 0, 256);
rc = read(fd, line, 256);
+ alarm(0);
if (rc < 0) {
LOG(PIL_CRIT, "read error on FIFO for Meatware_reset_host");
diff --git a/lib/stonith/Makefile.am b/lib/stonith/Makefile.am
index 614ed16..a3ffbab 100644
--- a/lib/stonith/Makefile.am
+++ b/lib/stonith/Makefile.am
@@ -41,7 +41,7 @@ stonith_LDFLAGS = @LIBADD_DL@ @LIBLTDL@ -export-dynamic @DLOPEN_FORCE_FLAGS@ @
meatclient_SOURCES = meatclient.c
meatclient_LDADD = $(GLIBLIB)
-sbd_SOURCES = sbd.c
+sbd_SOURCES = sbd-md.c sbd-common.c
sbd_CFLAGS = -D_GNU_SOURCE
sbd_LDADD = $(GLIBLIB) \
$(top_builddir)/lib/clplumbing/libplumb.la \
diff --git a/lib/stonith/sbd.c b/lib/stonith/sbd-common.c
similarity index 61%
rename from lib/stonith/sbd.c
rename to lib/stonith/sbd-common.c
index d8fc6b0..f9f16ac 100644
--- a/lib/stonith/sbd.c
+++ b/lib/stonith/sbd-common.c
@@ -1,21 +1,3 @@
-/*
- * Copyright (C) 2008 Lars Marowsky-Bree <lmb at suse.de>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -38,6 +20,7 @@
#include <linux/types.h>
#include <linux/watchdog.h>
#include <linux/fs.h>
+
#include "sbd.h"
/* These have to match the values in the header of the partition */
@@ -45,27 +28,27 @@ static char sbd_magic[8] = "SBD_SBD_";
static char sbd_version = 0x02;
/* Tunable defaults: */
-static unsigned long timeout_watchdog = 5;
-static unsigned long timeout_watchdog_warn = 3;
-static int timeout_allocate = 2;
-static int timeout_loop = 1;
-static int timeout_msgwait = 10;
-
-static int watchdog_use = 0;
-static int go_daemon = 0;
-static int skip_rt = 0;
-static int debug = 0;
-static const char *watchdogdev = "/dev/watchdog";
-static char * local_uname;
+unsigned long timeout_watchdog = 5;
+unsigned long timeout_watchdog_warn = 3;
+int timeout_allocate = 2;
+int timeout_loop = 1;
+int timeout_msgwait = 10;
+
+int watchdog_use = 0;
+int watchdog_set_timeout = 1;
+int skip_rt = 0;
+int debug = 0;
+const char *watchdogdev = "/dev/watchdog";
+char * local_uname;
/* Global, non-tunable variables: */
-static int sector_size = 0;
-static int watchdogfd = -1;
-static int devfd;
-static char *devname;
-static char *cmdname;
+int sector_size = 0;
+int watchdogfd = -1;
+
+/*const char *devname;*/
+const char *cmdname;
-static void
+void
usage(void)
{
fprintf(stderr,
@@ -73,22 +56,24 @@ usage(void)
"Syntax:\n"
" %s <options> <command> <cmdarguments>\n"
"Options:\n"
-"-d <devname> Block device to use (mandatory)\n"
+"-d <devname> Block device to use (mandatory; can be specified up to 3 times)\n"
"-h Display this help.\n"
"-n <node> Set local node name; defaults to uname -n (optional)\n"
"\n"
"-R Do NOT enable realtime priority (debugging only)\n"
"-W Use watchdog (recommended) (watch only)\n"
"-w <dev> Specify watchdog device (optional) (watch only)\n"
-"-D Run as background daemon (optional) (watch only)\n"
+"-T Do NOT initialize the watchdog timeout (watch only)\n"
"-v Enable some verbose debug logging (optional)\n"
"\n"
-"-1 <N> Set watchdog timeout to N seconds (optional) (create only)\n"
-"-2 <N> Set slot allocation timeout to N seconds (optional) (create only)\n"
-"-3 <N> Set daemon loop timeout to N seconds (optional) (create only)\n"
-"-4 <N> Set msgwait timeout to N seconds (optional) (create only)\n"
-"-5 <N> Warn if loop latency exceeds threshold (optional) (watch only)\n"
+"-1 <N> Set watchdog timeout to N seconds (optional, create only)\n"
+"-2 <N> Set slot allocation timeout to N seconds (optional, create only)\n"
+"-3 <N> Set daemon loop timeout to N seconds (optional, create only)\n"
+"-4 <N> Set msgwait timeout to N seconds (optional, create only)\n"
+"-5 <N> Warn if loop latency exceeds threshold (optional, watch only)\n"
" (default is 3, set to 0 to disable)\n"
+"-t <N> Interval in seconds for automatic child restarts (optional)\n"
+" (default is 3600, set to 0 to disable)\n"
"Commands:\n"
"create initialize N slots on <dev> - OVERWRITES DEVICE!\n"
"list List all allocated slots on device, and messages.\n"
@@ -101,58 +86,70 @@ usage(void)
, cmdname);
}
-static void
+int
watchdog_init_interval(void)
{
+ int timeout = timeout_watchdog;
+
if (watchdogfd < 0) {
- return;
+ return 0;
+ }
+
+
+ if (watchdog_set_timeout == 0) {
+ cl_log(LOG_INFO, "NOT setting watchdog timeout on explicit user request!");
+ return 0;
}
- if (ioctl(watchdogfd, WDIOC_SETTIMEOUT, &timeout_watchdog) < 0) {
+ if (ioctl(watchdogfd, WDIOC_SETTIMEOUT, &timeout) < 0) {
cl_perror( "WDIOC_SETTIMEOUT"
- ": Failed to set watchdog timer to %lu seconds.",
- timeout_watchdog);
+ ": Failed to set watchdog timer to %u seconds.",
+ timeout);
+ cl_log(LOG_CRIT, "Please validate your watchdog configuration!");
+ cl_log(LOG_CRIT, "Choose a different watchdog driver or specify -T to silence this check if you are sure.");
+ /* return -1; */
} else {
- cl_log(LOG_INFO, "Set watchdog timeout to %lu seconds.",
- timeout_watchdog);
+ cl_log(LOG_INFO, "Set watchdog timeout to %u seconds.",
+ timeout);
}
+ return 0;
}
-static void
+int
watchdog_tickle(void)
{
if (watchdogfd >= 0) {
if (write(watchdogfd, "", 1) != 1) {
cl_perror("Watchdog write failure: %s!",
watchdogdev);
- /* TODO: Should we force the crash, or wait for
- * the watchdog to time us out? */
+ return -1;
}
}
+ return 0;
}
-static void
+int
watchdog_init(void)
{
if (watchdogfd < 0 && watchdogdev != NULL) {
watchdogfd = open(watchdogdev, O_WRONLY);
if (watchdogfd >= 0) {
- if (fcntl(watchdogfd, F_SETFD, FD_CLOEXEC)) {
- cl_perror("Error setting the "
- "close-on-exec flag for watchdog");
- }
cl_log(LOG_NOTICE, "Using watchdog device: %s",
watchdogdev);
- watchdog_init_interval();
- watchdog_tickle();
+ if ((watchdog_init_interval() < 0)
+ || (watchdog_tickle() < 0)) {
+ return -1;
+ }
}else{
cl_perror("Cannot open watchdog device: %s",
watchdogdev);
+ return -1;
}
}
+ return 0;
}
-static void
+void
watchdog_close(void)
{
if (watchdogfd >= 0) {
@@ -172,8 +169,8 @@ watchdog_close(void)
* even in linux-kernel-headers. Sucks. See also
* /usr/src/linux/Documentation/block/ioprio.txt and ioprio_set(2) */
extern int sys_ioprio_set(int, int, int);
-static int ioprio_set(int which, int who, int ioprio);
-static inline int ioprio_set(int which, int who, int ioprio)
+int ioprio_set(int which, int who, int ioprio);
+inline int ioprio_set(int which, int who, int ioprio)
{
return syscall(__NR_ioprio_set, which, who, ioprio);
}
@@ -199,7 +196,7 @@ enum {
#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK)
#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data)
-static void
+void
maximize_priority(void)
{
if (skip_rt) {
@@ -215,9 +212,10 @@ maximize_priority(void)
}
}
-static int
+int
open_device(const char* devname)
{
+ int devfd;
if (!devname)
return -1;
@@ -234,10 +232,10 @@ open_device(const char* devname)
cl_perror("Get sector size failed.\n");
return -1;
}
- return 0;
+ return devfd;
}
-static signed char
+signed char
cmd2char(const char *cmd)
{
if (strcmp("clear", cmd) == 0) {
@@ -250,11 +248,13 @@ cmd2char(const char *cmd)
return SBD_MSG_OFF;
} else if (strcmp("exit", cmd) == 0) {
return SBD_MSG_EXIT;
+ } else if (strcmp("crashdump", cmd) == 0) {
+ return SBD_MSG_CRASHDUMP;
}
return -1;
}
-static void *
+void *
sector_alloc(void)
{
void *x;
@@ -268,7 +268,7 @@ sector_alloc(void)
return x;
}
-static const char*
+const char*
char2cmd(const char cmd)
{
switch (cmd) {
@@ -287,14 +287,17 @@ char2cmd(const char cmd)
case SBD_MSG_EXIT:
return "exit";
break;
+ case SBD_MSG_CRASHDUMP:
+ return "crashdump";
+ break;
default:
return "undefined";
break;
}
}
-static int
-sector_write(int sector, const void *data)
+int
+sector_write(int devfd, int sector, const void *data)
{
if (lseek(devfd, sector_size*sector, 0) < 0) {
cl_perror("sector_write: lseek() failed");
@@ -308,8 +311,8 @@ sector_write(int sector, const void *data)
return(0);
}
-static int
-sector_read(int sector, void *data)
+int
+sector_read(int devfd, int sector, void *data)
{
if (lseek(devfd, sector_size*sector, 0) < 0) {
cl_perror("sector_read: lseek() failed");
@@ -323,67 +326,73 @@ sector_read(int sector, void *data)
return(0);
}
-static int
-slot_read(int slot, struct sector_node_s *s_node)
+int
+slot_read(int devfd, int slot, struct sector_node_s *s_node)
{
- return sector_read(SLOT_TO_SECTOR(slot), s_node);
+ return sector_read(devfd, SLOT_TO_SECTOR(slot), s_node);
}
-static int
-slot_write(int slot, const struct sector_node_s *s_node)
+int
+slot_write(int devfd, int slot, const struct sector_node_s *s_node)
{
- return sector_write(SLOT_TO_SECTOR(slot), s_node);
+ return sector_write(devfd, SLOT_TO_SECTOR(slot), s_node);
}
-static int
-mbox_write(int mbox, const struct sector_mbox_s *s_mbox)
+int
+mbox_write(int devfd, int mbox, const struct sector_mbox_s *s_mbox)
{
- return sector_write(MBOX_TO_SECTOR(mbox), s_mbox);
+ return sector_write(devfd, MBOX_TO_SECTOR(mbox), s_mbox);
}
-static int
-mbox_read(int mbox, struct sector_mbox_s *s_mbox)
+int
+mbox_read(int devfd, int mbox, struct sector_mbox_s *s_mbox)
{
- return sector_read(MBOX_TO_SECTOR(mbox), s_mbox);
+ return sector_read(devfd, MBOX_TO_SECTOR(mbox), s_mbox);
}
-static int
-mbox_write_verify(int mbox, const struct sector_mbox_s *s_mbox)
+int
+mbox_write_verify(int devfd, int mbox, const struct sector_mbox_s *s_mbox)
{
void *data;
+ int rc = 0;
- if (sector_write(MBOX_TO_SECTOR(mbox), s_mbox) < 0)
+ if (sector_write(devfd, MBOX_TO_SECTOR(mbox), s_mbox) < 0)
return -1;
data = sector_alloc();
- if (sector_read(MBOX_TO_SECTOR(mbox), data) < 0)
- return -1;
+ if (sector_read(devfd, MBOX_TO_SECTOR(mbox), data) < 0) {
+ rc = -1;
+ goto out;
+ }
+
if (memcmp(s_mbox, data, sector_size) != 0) {
cl_log(LOG_ERR, "Write verification failed!");
- return -1;
+ rc = -1;
+ goto out;
}
-
- return 0;
+ rc = 0;
+out:
+ free(data);
+ return rc;
}
-static int
-header_write(struct sector_header_s *s_header)
+int header_write(int devfd, struct sector_header_s *s_header)
{
s_header->sector_size = htonl(s_header->sector_size);
s_header->timeout_watchdog = htonl(s_header->timeout_watchdog);
s_header->timeout_allocate = htonl(s_header->timeout_allocate);
s_header->timeout_loop = htonl(s_header->timeout_loop);
s_header->timeout_msgwait = htonl(s_header->timeout_msgwait);
- return sector_write(0, s_header);
+ return sector_write(devfd, 0, s_header);
}
-static int
-header_read(struct sector_header_s *s_header)
+int
+header_read(int devfd, struct sector_header_s *s_header)
{
- if (sector_read(0, s_header) < 0)
+ if (sector_read(devfd, 0, s_header) < 0)
return -1;
-
+
s_header->sector_size = ntohl(s_header->sector_size);
s_header->timeout_watchdog = ntohl(s_header->timeout_watchdog);
s_header->timeout_allocate = ntohl(s_header->timeout_allocate);
@@ -398,7 +407,7 @@ header_read(struct sector_header_s *s_header)
return 0;
}
-static int
+int
valid_header(const struct sector_header_s *s_header)
{
if (memcmp(s_header->magic, sbd_magic, sizeof(s_header->magic)) != 0) {
@@ -416,36 +425,36 @@ valid_header(const struct sector_header_s *s_header)
return 0;
}
-static struct sector_header_s *
-header_get(void)
+struct sector_header_s *
+header_get(int devfd)
{
struct sector_header_s *s_header;
s_header = sector_alloc();
-
- if (header_read(s_header) < 0) {
- cl_log(LOG_ERR, "Unable to read header from %s", devname);
+
+ if (header_read(devfd, s_header) < 0) {
+ cl_log(LOG_ERR, "Unable to read header from device %d", devfd);
return NULL;
}
if (valid_header(s_header) < 0) {
- cl_log(LOG_ERR, "%s is not valid.", devname);
+ cl_log(LOG_ERR, "header on device %d is not valid.", devfd);
return NULL;
}
-
+
/* cl_log(LOG_INFO, "Found version %d header with %d slots",
s_header->version, s_header->slots); */
return s_header;
}
-static int
-init_device(void)
+int
+init_device(int devfd)
{
struct sector_header_s *s_header;
struct sector_node_s *s_node;
struct sector_mbox_s *s_mbox;
struct stat s;
- int i;
+ int i;
int rc = 0;
s_header = sector_alloc();
@@ -463,21 +472,27 @@ init_device(void)
fstat(devfd, &s);
/* printf("st_size = %ld, st_blksize = %ld, st_blocks = %ld\n",
s.st_size, s.st_blksize, s.st_blocks); */
-
- cl_log(LOG_INFO, "Creating version %d header on %s",
+
+ cl_log(LOG_INFO, "Creating version %d header on device %d",
s_header->version,
- devname);
- if (header_write(s_header) < 0) {
+ devfd);
+ fprintf(stdout, "Creating version %d header on device %d\n",
+ s_header->version,
+ devfd);
+ if (header_write(devfd, s_header) < 0) {
rc = -1; goto out;
}
- cl_log(LOG_INFO, "Initializing %d slots on %s",
+ cl_log(LOG_INFO, "Initializing %d slots on device %d",
+ s_header->slots,
+ devfd);
+ fprintf(stdout, "Initializing %d slots on device %d\n",
s_header->slots,
- devname);
+ devfd);
for (i=0;i < s_header->slots;i++) {
- if (slot_write(i, s_node) < 0) {
+ if (slot_write(devfd, i, s_node) < 0) {
rc = -1; goto out;
}
- if (mbox_write(i, s_mbox) < 0) {
+ if (mbox_write(devfd, i, s_mbox) < 0) {
rc = -1; goto out;
}
}
@@ -491,8 +506,8 @@ out: free(s_node);
/* Check if there already is a slot allocated to said name; returns the
* slot number. If not found, returns -1.
* This is necessary because slots might not be continuous. */
-static int
-slot_lookup(const struct sector_header_s *s_header, const char *name)
+int
+slot_lookup(int devfd, const struct sector_header_s *s_header, const char *name)
{
struct sector_node_s *s_node = NULL;
int i;
@@ -506,11 +521,11 @@ slot_lookup(const struct sector_header_s *s_header, const char *name)
s_node = sector_alloc();
for (i=0; i < s_header->slots; i++) {
- if (slot_read(i, s_node) < 0) {
+ if (slot_read(devfd, i, s_node) < 0) {
rc = -1; goto out;
}
if (s_node->in_use != 0) {
- if (strncasecmp(s_node->name, name,
+ if (strncasecmp(s_node->name, name,
sizeof(s_node->name)) == 0) {
cl_log(LOG_INFO, "%s owns slot %d", name, i);
rc = i; goto out;
@@ -522,8 +537,8 @@ out: free(s_node);
return rc;
}
-static int
-slot_unused(const struct sector_header_s *s_header)
+int
+slot_unused(int devfd, const struct sector_header_s *s_header)
{
struct sector_node_s *s_node;
int i;
@@ -532,7 +547,7 @@ slot_unused(const struct sector_header_s *s_header)
s_node = sector_alloc();
for (i=0; i < s_header->slots; i++) {
- if (slot_read(i, s_node) < 0) {
+ if (slot_read(devfd, i, s_node) < 0) {
rc = -1; goto out;
}
if (s_node->in_use == 0) {
@@ -545,21 +560,22 @@ out: free(s_node);
}
-static int
-slot_allocate(const char *name)
+int
+slot_allocate(int devfd, const char *name)
{
struct sector_header_s *s_header = NULL;
struct sector_node_s *s_node = NULL;
struct sector_mbox_s *s_mbox = NULL;
- int i;
+ int i;
int rc = 0;
-
+
if (!name) {
cl_log(LOG_ERR, "slot_allocate(): No name specified.\n");
+ fprintf(stderr, "slot_allocate(): No name specified.\n");
rc = -1; goto out;
}
- s_header = header_get();
+ s_header = header_get(devfd);
if (!s_header) {
rc = -1; goto out;
}
@@ -568,35 +584,37 @@ slot_allocate(const char *name)
s_mbox = sector_alloc();
while (1) {
- i = slot_lookup(s_header, name);
+ i = slot_lookup(devfd, s_header, name);
if (i >= 0) {
rc = i; goto out;
}
- i = slot_unused(s_header);
+ i = slot_unused(devfd, s_header);
if (i >= 0) {
cl_log(LOG_INFO, "slot %d is unused - trying to own", i);
+ fprintf(stdout, "slot %d is unused - trying to own\n", i);
memset(s_node, 0, sizeof(*s_node));
s_node->in_use = 1;
strncpy(s_node->name, name, sizeof(s_node->name));
- if (slot_write(i, s_node) < 0) {
+ if (slot_write(devfd, i, s_node) < 0) {
rc = -1; goto out;
}
sleep(timeout_allocate);
} else {
cl_log(LOG_ERR, "No more free slots.");
+ fprintf(stderr, "No more free slots.\n");
rc = -1; goto out;
}
}
-
+
out: free(s_node);
free(s_header);
free(s_mbox);
return(rc);
}
-static int
-slot_list(void)
+int
+slot_list(int devfd)
{
struct sector_header_s *s_header = NULL;
struct sector_node_s *s_node = NULL;
@@ -604,7 +622,7 @@ slot_list(void)
int i;
int rc = 0;
- s_header = header_get();
+ s_header = header_get(devfd);
if (!s_header) {
rc = -1; goto out;
}
@@ -613,11 +631,11 @@ slot_list(void)
s_mbox = sector_alloc();
for (i=0; i < s_header->slots; i++) {
- if (slot_read(i, s_node) < 0) {
+ if (slot_read(devfd, i, s_node) < 0) {
rc = -1; goto out;
}
if (s_node->in_use > 0) {
- if (mbox_read(i, s_mbox) < 0) {
+ if (mbox_read(devfd, i, s_mbox) < 0) {
rc = -1; goto out;
}
printf("%d\t%s\t%s\t%s\n",
@@ -632,8 +650,8 @@ out: free(s_node);
return rc;
}
-static int
-slot_msg(const char *name, const char *cmd)
+int
+slot_msg(int devfd, const char *name, const char *cmd)
{
struct sector_header_s *s_header = NULL;
struct sector_mbox_s *s_mbox = NULL;
@@ -645,7 +663,7 @@ slot_msg(const char *name, const char *cmd)
rc = -1; goto out;
}
- s_header = header_get();
+ s_header = header_get(devfd);
if (!s_header) {
rc = -1; goto out;
}
@@ -654,14 +672,14 @@ slot_msg(const char *name, const char *cmd)
name = local_uname;
}
- mbox = slot_lookup(s_header, name);
+ mbox = slot_lookup(devfd, s_header, name);
if (mbox < 0) {
cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name);
rc = -1; goto out;
}
s_mbox = sector_alloc();
-
+
s_mbox->cmd = cmd2char(cmd);
if (s_mbox->cmd < 0) {
cl_log(LOG_ERR, "slot_msg(): Invalid command %s.", cmd);
@@ -672,7 +690,7 @@ slot_msg(const char *name, const char *cmd)
cl_log(LOG_INFO, "Writing %s to node slot %s",
cmd, name);
- if (mbox_write_verify(mbox, s_mbox) < -1) {
+ if (mbox_write_verify(devfd, mbox, s_mbox) < -1) {
rc = -1; goto out;
}
if (strcasecmp(cmd, "exit") != 0) {
@@ -686,8 +704,8 @@ out: free(s_mbox);
return rc;
}
-static int
-slot_ping(const char *name)
+int
+slot_ping(int devfd, const char *name)
{
struct sector_header_s *s_header = NULL;
struct sector_mbox_s *s_mbox = NULL;
@@ -700,7 +718,7 @@ slot_ping(const char *name)
rc = -1; goto out;
}
- s_header = header_get();
+ s_header = header_get(devfd);
if (!s_header) {
rc = -1; goto out;
}
@@ -709,7 +727,7 @@ slot_ping(const char *name)
name = local_uname;
}
- mbox = slot_lookup(s_header, name);
+ mbox = slot_lookup(devfd, s_header, name);
if (mbox < 0) {
cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name);
rc = -1; goto out;
@@ -721,13 +739,13 @@ slot_ping(const char *name)
strncpy(s_mbox->from, local_uname, sizeof(s_mbox->from)-1);
cl_log(LOG_DEBUG, "Pinging node %s", name);
- if (mbox_write(mbox, s_mbox) < -1) {
+ if (mbox_write(devfd, mbox, s_mbox) < -1) {
rc = -1; goto out;
}
rc = -1;
while (waited <= timeout_msgwait) {
- if (mbox_read(mbox, s_mbox) < 0)
+ if (mbox_read(devfd, mbox, s_mbox) < 0)
break;
if (s_mbox->cmd != SBD_MSG_TEST) {
rc = 0;
@@ -748,7 +766,34 @@ out: free(s_mbox);
return rc;
}
-static void
+void
+sysrq_init(void)
+{
+ FILE* procf;
+ int c;
+ procf = fopen("/proc/sys/kernel/sysrq", "r");
+ if (!procf) {
+ cl_perror("cannot open /proc/sys/kernel/sysrq for read.");
+ return;
+ }
+ fscanf(procf, "%d", &c);
+ fclose(procf);
+ if (c == 1)
+ return;
+ /* 8 for debugging dumps of processes,
+ 128 for reboot/poweroff */
+ c |= 136;
+ procf = fopen("/proc/sys/kernel/sysrq", "w");
+ if (!procf) {
+ printf("cannot open /proc/sys/kernel/sysrq for write\n");
+ return;
+ }
+ fprintf(procf, "%d", c);
+ fclose(procf);
+ return;
+}
+
+void
sysrq_trigger(char t)
{
FILE *procf;
@@ -764,7 +809,16 @@ sysrq_trigger(char t)
return;
}
-static void
+void
+do_crashdump(void)
+{
+ sysrq_trigger('c');
+ /* is it possible to reach the following line? */
+ cl_reboot(5, "sbd is triggering crashdumping");
+ exit(1);
+}
+
+void
do_reset(void)
{
sysrq_trigger('b');
@@ -773,7 +827,7 @@ do_reset(void)
exit(1);
}
-static void
+void
do_off(void)
{
sysrq_trigger('o');
@@ -782,22 +836,20 @@ do_off(void)
exit(1);
}
-static void
+pid_t
make_daemon(void)
{
- long pid;
+ pid_t pid;
const char * devnull = "/dev/null";
- if (go_daemon > 0) {
- pid = fork();
- if (pid < 0) {
- cl_log(LOG_ERR, "%s: could not start daemon\n",
- cmdname);
- cl_perror("fork");
- exit(1);
- }else if (pid > 0) {
- exit(0);
- }
+ pid = fork();
+ if (pid < 0) {
+ cl_log(LOG_ERR, "%s: could not start daemon\n",
+ cmdname);
+ cl_perror("fork");
+ exit(1);
+ }else if (pid > 0) {
+ return pid;
}
cl_log_enable_stderr(FALSE);
@@ -813,97 +865,14 @@ make_daemon(void)
close(2);
(void)open(devnull, O_WRONLY);
cl_cdtocoredir();
+ return 0;
}
-
-static int
-daemonize(void)
-{
- struct sector_mbox_s *s_mbox = NULL;
- int mbox;
- int rc = 0;
- time_t t0, t1, latency;
-
- mbox = slot_allocate(local_uname);
- if (mbox < 0) {
- cl_log(LOG_ERR, "No slot allocated, and automatic allocation failed.");
- rc = -1; goto out;
- }
- cl_log(LOG_INFO, "Monitoring slot %d", mbox);
-
- /* Clear mbox once on start */
- s_mbox = sector_alloc();
- if (mbox_write(mbox, s_mbox) < 0) {
- rc = -1; goto out;
- }
-
- make_daemon();
-
- if (watchdog_use != 0)
- watchdog_init();
-
- while (1) {
- t0 = time(NULL);
- sleep(timeout_loop);
-
- if (mbox_read(mbox, s_mbox) < 0) {
- cl_log(LOG_ERR, "mbox read failed.");
- do_reset();
- }
-
- if (s_mbox->cmd > 0) {
- cl_log(LOG_INFO, "Received command %s from %s",
- char2cmd(s_mbox->cmd), s_mbox->from);
-
- switch (s_mbox->cmd) {
- case SBD_MSG_TEST:
- memset(s_mbox, 0, sizeof(*s_mbox));
- mbox_write(mbox, s_mbox);
- break;
- case SBD_MSG_RESET:
- do_reset();
- break;
- case SBD_MSG_OFF:
- do_off();
- break;
- case SBD_MSG_EXIT:
- watchdog_close();
- goto out;
- break;
- default:
- /* TODO: Should we do something on
- * unknown messages? */
- cl_log(LOG_ERR, "Unknown message; suicide!");
- do_reset();
- break;
- }
- }
- watchdog_tickle();
-
- t1 = time(NULL);
- latency = t1 - t0;
-
- if (timeout_watchdog_warn
- && (latency > timeout_watchdog_warn)) {
- cl_log(LOG_WARNING, "Latency: %d exceeded threshold %d",
- (int)latency, (int)timeout_watchdog_warn);
- } else if (debug) {
- cl_log(LOG_INFO, "Latency: %d",
- (int)latency);
- }
-
- }
-
-out:
- free(s_mbox);
- return rc;
-}
-
-static int
-header_dump(void)
+int
+header_dump(int devfd)
{
struct sector_header_s *s_header;
- s_header = header_get();
+ s_header = header_get(devfd);
if (s_header == NULL)
return -1;
@@ -922,7 +891,7 @@ header_dump(void)
return 0;
}
-static void
+void
get_uname(void)
{
struct utsname uname_buf;
@@ -932,114 +901,10 @@ get_uname(void)
cl_perror("uname() failed?");
exit(1);
}
-
+
local_uname = strdup(uname_buf.nodename);
for (i = 0; i < strlen(local_uname); i++)
local_uname[i] = tolower(local_uname[i]);
}
-int
-main(int argc, char** argv)
-{
- int exit_status = 0;
- int c;
-
- if ((cmdname = strrchr(argv[0], '/')) == NULL) {
- cmdname = argv[0];
- }else{
- ++cmdname;
- }
-
- cl_log_set_entity(cmdname);
- cl_log_enable_stderr(0);
- cl_log_set_facility(LOG_DAEMON);
-
- get_uname();
-
- while ((c = getopt (argc, argv, "DRWhvw:d:n:1:2:3:4:5:")) != -1) {
- switch (c) {
- case 'D':
- go_daemon = 1;
- break;
- case 'R':
- skip_rt = 1;
- break;
- case 'v':
- debug = 1;
- break;
- case 'W':
- watchdog_use = 1;
- break;
- case 'w':
- watchdogdev = optarg;
- break;
- case 'd':
- devname = optarg;
- break;
- case 'n':
- local_uname = optarg;
- break;
- case '1':
- timeout_watchdog = atoi(optarg);
- break;
- case '2':
- timeout_allocate = atoi(optarg);
- break;
- case '3':
- timeout_loop = atoi(optarg);
- break;
- case '4':
- timeout_msgwait = atoi(optarg);
- break;
- case '5':
- timeout_watchdog_warn = atoi(optarg);
- break;
- case 'h':
- usage();
- return(0);
- default:
- exit_status = -1;
- goto out;
- break;
- }
- }
-
- /* There must at least be one command following the options: */
- if ( (argc - optind) < 1) {
- fprintf(stderr, "Not enough arguments.\n");
- exit_status = -1;
- goto out;
- }
-
- maximize_priority();
- if (open_device(devname) < 0) {
- exit_status = -1;
- goto out;
- }
-
- if (strcmp(argv[optind],"create") == 0) {
- exit_status = init_device();
- } else if (strcmp(argv[optind],"dump") == 0) {
- exit_status = header_dump();
- } else if (strcmp(argv[optind],"allocate") == 0) {
- exit_status = slot_allocate(argv[optind+1]);
- } else if (strcmp(argv[optind],"list") == 0) {
- exit_status = slot_list();
- } else if (strcmp(argv[optind],"message") == 0) {
- exit_status = slot_msg(argv[optind+1], argv[optind+2]);
- } else if (strcmp(argv[optind],"ping") == 0) {
- exit_status = slot_ping(argv[optind+1]);
- } else if (strcmp(argv[optind],"watch") == 0) {
- exit_status = daemonize();
- } else {
- exit_status = -1;
- }
-
-out:
- if (exit_status < 0) {
- usage();
- return(1);
- }
- return(0);
-}
diff --git a/lib/stonith/sbd-md.c b/lib/stonith/sbd-md.c
new file mode 100644
index 0000000..7e856e6
--- /dev/null
+++ b/lib/stonith/sbd-md.c
@@ -0,0 +1,936 @@
+/*
+ * Copyright (C) 2008 Lars Marowsky-Bree <lmb at suse.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <signal.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <ctype.h>
+#include <string.h>
+#include <syslog.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ptrace.h>
+#include <fcntl.h>
+#include <time.h>
+#include <clplumbing/cl_log.h>
+#include <clplumbing/coredumps.h>
+#include <clplumbing/realtime.h>
+#include <clplumbing/cl_reboot.h>
+#include <clplumbing/setproctitle.h>
+#include <malloc.h>
+#include <time.h>
+#include <sys/utsname.h>
+#include <sys/ioctl.h>
+#include <linux/types.h>
+#include <linux/watchdog.h>
+#include <linux/fs.h>
+
+#include "sbd.h"
+
+struct servants_list_item *servants_leader = NULL;
+
+static int servant_count = 0;
+static int servant_restart_interval = 3600;
+
+/* signals reserved for multi-disk sbd */
+#define SIG_LIVENESS (SIGRTMIN + 1) /* report liveness of the disk */
+#define SIG_EXITREQ (SIGRTMIN + 2) /* exit request to inquisitor */
+#define SIG_TEST (SIGRTMIN + 3) /* trigger self test */
+#define SIG_RESTART (SIGRTMIN + 4) /* trigger restart of all failed disk */
+/* FIXME: should add dynamic check of SIG_XX >= SIGRTMAX */
+
+/* Debug Helper */
+#if 0
+#define DBGPRINT(...) fprintf(stderr, __VA_ARGS__)
+#else
+#define DBGPRINT(...) do {} while (0)
+#endif
+
+int quorum_write(int good_servants)
+{
+ return (good_servants > servant_count/2);
+}
+
+int quorum_read(int good_servants)
+{
+ if (servant_count >= 3)
+ return (good_servants > servant_count/2);
+ else
+ return (good_servants >= 1);
+}
+
+int assign_servant(const char* devname, functionp_t functionp, const void* argp)
+{
+ pid_t pid = 0;
+ int rc = 0;
+
+ DBGPRINT("fork servant for %s\n", devname);
+ pid = fork();
+ if (pid == 0) { /* child */
+ maximize_priority();
+ rc = (*functionp)(devname, argp);
+ if (rc == -1)
+ exit(1);
+ else
+ exit(0);
+ } else if (pid != -1) { /* parent */
+ return pid;
+ } else {
+ cl_log(LOG_ERR,"Failed to fork servant");
+ exit(1);
+ }
+}
+
+int init_devices()
+{
+ int rc = 0;
+ int devfd;
+ struct servants_list_item *s;
+
+ for (s = servants_leader; s; s = s->next) {
+ fprintf(stdout, "Initializing device %s\n",
+ s->devname);
+ devfd = open_device(s->devname);
+ if (devfd == -1) {
+ return -1;
+ }
+ rc = init_device(devfd);
+ close(devfd);
+ if (rc == -1) {
+ fprintf(stderr, "Failed to init device %s\n", s->devname);
+ return rc;
+ }
+ fprintf(stdout, "Device %s is initialized.\n", s->devname);
+ }
+ return 0;
+}
+
+int slot_msg_wrapper(const char* devname, const void* argp)
+{
+ int rc = 0;
+ int devfd;
+ const struct slot_msg_arg_t* arg = (const struct slot_msg_arg_t*)argp;
+
+ devfd = open_device(devname);
+ if (devfd == -1)
+ return -1;
+ rc = slot_msg(devfd, arg->name, arg->msg);
+ close(devfd);
+ return rc;
+}
+
+int slot_ping_wrapper(const char* devname, const void* argp)
+{
+ int rc = 0;
+ const char* name = (const char*)argp;
+ int devfd;
+
+ devfd = open_device(devname);
+ if (devfd == -1)
+ return -1;
+ rc = slot_ping(devfd, name);
+ close(devfd);
+ return rc;
+}
+
+int allocate_slots(const char *name)
+{
+ int rc = 0;
+ int devfd;
+ struct servants_list_item *s;
+
+ for (s = servants_leader; s; s = s->next) {
+ fprintf(stdout, "Trying to allocate slot for %s on device %s.\n",
+ name,
+ s->devname);
+ devfd = open_device(s->devname);
+ if (devfd == -1) {
+ return -1;
+ }
+ rc = slot_allocate(devfd, name);
+ close(devfd);
+ if (rc == -1)
+ return rc;
+ fprintf(stdout, "Slot for %s has been allocated on %s.\n",
+ name,
+ s->devname);
+ }
+ return 0;
+}
+
+int list_slots()
+{
+ int rc = 0;
+ struct servants_list_item *s;
+ int devfd;
+
+ for (s = servants_leader; s; s = s->next) {
+ DBGPRINT("list slots on device %s\n", s->devname);
+ devfd = open_device(s->devname);
+ if (devfd == -1)
+ return -1;
+ rc = slot_list(devfd);
+ close(devfd);
+ if (rc == -1)
+ return rc;
+ }
+ return 0;
+}
+
+int ping_via_slots(const char *name)
+{
+ int sig = 0;
+ pid_t pid = 0;
+ int status = 0;
+ int servants_finished = 0;
+ sigset_t procmask;
+ siginfo_t sinfo;
+ struct servants_list_item *s;
+
+ DBGPRINT("you shall know no fear\n");
+ sigemptyset(&procmask);
+ sigaddset(&procmask, SIGCHLD);
+ sigprocmask(SIG_BLOCK, &procmask, NULL);
+
+ for (s = servants_leader; s; s = s->next) {
+ s->pid = assign_servant(s->devname, &slot_ping_wrapper, (const void*)name);
+ }
+
+ while (servants_finished < servant_count) {
+ sig = sigwaitinfo(&procmask, &sinfo);
+ DBGPRINT("get signal %d\n", sig);
+ if (sig == SIGCHLD) {
+ while ((pid = wait(&status))) {
+ if (pid == -1 && errno == ECHILD) {
+ break;
+ } else {
+ s = lookup_servant_by_pid(pid);
+ if (s) {
+ DBGPRINT
+ ("A ping is delivered to %s via %s. ",
+ name, s->devname);
+ if (!status)
+ DBGPRINT
+ ("They responed to the emporer\n");
+ else
+ DBGPRINT
+ ("There's no response\n");
+ servants_finished++;
+ }
+ }
+ }
+ }
+ DBGPRINT("signal %d handled\n", sig);
+ }
+ return 0;
+}
+
+int servant(const char *diskname, const void* argp)
+{
+ struct sector_mbox_s *s_mbox = NULL;
+ int mbox;
+ int rc = 0;
+ time_t t0, t1, latency;
+ union sigval signal_value;
+ sigset_t servant_masks;
+ int devfd;
+ pid_t ppid;
+
+ if (!diskname) {
+ cl_log(LOG_ERR, "Empty disk name %s.", diskname);
+ return -1;
+ }
+
+ /* Block most of the signals */
+ sigfillset(&servant_masks);
+ sigdelset(&servant_masks, SIGKILL);
+ sigdelset(&servant_masks, SIGFPE);
+ sigdelset(&servant_masks, SIGILL);
+ sigdelset(&servant_masks, SIGSEGV);
+ sigdelset(&servant_masks, SIGBUS);
+ sigdelset(&servant_masks, SIGALRM);
+ /* FIXME: check error */
+ sigprocmask(SIG_SETMASK, &servant_masks, NULL);
+
+ devfd = open_device(diskname);
+ if (devfd == -1) {
+ return -1;
+ }
+
+ mbox = slot_allocate(devfd, local_uname);
+ if (mbox < 0) {
+ cl_log(LOG_ERR,
+ "No slot allocated, and automatic allocation failed for disk %s.",
+ diskname);
+ rc = -1;
+ goto out;
+ }
+ cl_log(LOG_INFO, "Monitoring slot %d on disk %s", mbox, diskname);
+ set_proc_title("sbd: watcher: %s - slot: %d", diskname, mbox);
+
+ s_mbox = sector_alloc();
+ if (mbox_write(devfd, mbox, s_mbox) < 0) {
+ rc = -1;
+ goto out;
+ }
+
+ memset(&signal_value, 0, sizeof(signal_value));
+
+ while (1) {
+ t0 = time(NULL);
+ sleep(timeout_loop);
+
+ ppid = getppid();
+
+ if (ppid == 1) {
+ /* Our parent died unexpectedly. Triggering
+ * self-fence. */
+ do_reset();
+ }
+
+ if (mbox_read(devfd, mbox, s_mbox) < 0) {
+ cl_log(LOG_ERR, "mbox read failed in servant.");
+ exit(1);
+ }
+
+ if (s_mbox->cmd > 0) {
+ cl_log(LOG_INFO,
+ "Received command %s from %s on disk %s",
+ char2cmd(s_mbox->cmd), s_mbox->from, diskname);
+
+ switch (s_mbox->cmd) {
+ case SBD_MSG_TEST:
+ memset(s_mbox, 0, sizeof(*s_mbox));
+ mbox_write(devfd, mbox, s_mbox);
+ sigqueue(ppid, SIG_TEST, signal_value);
+ break;
+ case SBD_MSG_RESET:
+ do_reset();
+ break;
+ case SBD_MSG_OFF:
+ do_off();
+ break;
+ case SBD_MSG_EXIT:
+ sigqueue(ppid, SIG_EXITREQ, signal_value);
+ break;
+ case SBD_MSG_CRASHDUMP:
+ do_crashdump();
+ break;
+ default:
+ /* FIXME:
+ An "unknown" message might result
+ from a partial write.
+ log it and clear the slot.
+ */
+ cl_log(LOG_ERR, "Unknown message on disk %s",
+ diskname);
+ memset(s_mbox, 0, sizeof(*s_mbox));
+ mbox_write(devfd, mbox, s_mbox);
+ break;
+ }
+ }
+ sigqueue(ppid, SIG_LIVENESS, signal_value);
+
+ t1 = time(NULL);
+ latency = t1 - t0;
+ if (timeout_watchdog_warn && (latency > timeout_watchdog_warn)) {
+ cl_log(LOG_WARNING,
+ "Latency: %d exceeded threshold %d on disk %s",
+ (int)latency, (int)timeout_watchdog_warn,
+ diskname);
+ } else if (debug) {
+ cl_log(LOG_INFO, "Latency: %d on disk %s", (int)latency,
+ diskname);
+ }
+ }
+ out:
+ free(s_mbox);
+ close(devfd);
+ devfd = -1;
+ return rc;
+}
+
+void recruit_servant(const char *devname, pid_t pid)
+{
+ struct servants_list_item *s = servants_leader;
+ struct servants_list_item *newbie;
+
+ newbie = malloc(sizeof(*newbie));
+ if (!newbie) {
+ fprintf(stderr, "malloc failed in recruit_servant.");
+ exit(1);
+ }
+ memset(newbie, 0, sizeof(*newbie));
+ newbie->devname = strdup(devname);
+ newbie->pid = pid;
+
+ if (!s) {
+ servants_leader = newbie;
+ } else {
+ while (s->next)
+ s = s->next;
+ s->next = newbie;
+ }
+
+ servant_count++;
+}
+
+struct servants_list_item *lookup_servant_by_dev(const char *devname)
+{
+ struct servants_list_item *s;
+
+ for (s = servants_leader; s; s = s->next) {
+ if (strncasecmp(s->devname, devname, strlen(s->devname)))
+ break;
+ }
+ return s;
+}
+
+struct servants_list_item *lookup_servant_by_pid(pid_t pid)
+{
+ struct servants_list_item *s;
+
+ for (s = servants_leader; s; s = s->next) {
+ if (s->pid == pid)
+ break;
+ }
+ return s;
+}
+
+int check_all_dead(void)
+{
+ struct servants_list_item *s;
+ int r = 0;
+ union sigval svalue;
+
+ for (s = servants_leader; s; s = s->next) {
+ if (s->pid != 0) {
+ r = sigqueue(s->pid, 0, svalue);
+ if (r == -1 && errno == ESRCH)
+ continue;
+ return 0;
+ }
+ }
+ return 1;
+}
+
+
+void servants_start(void)
+{
+ struct servants_list_item *s;
+ int r = 0;
+ union sigval svalue;
+
+ for (s = servants_leader; s; s = s->next) {
+ if (s->pid != 0) {
+ r = sigqueue(s->pid, 0, svalue);
+ if ((r != -1 || errno != ESRCH))
+ continue;
+ }
+ s->restarts = 0;
+ s->pid = assign_servant(s->devname, servant, NULL);
+ }
+}
+
+void servants_kill(void)
+{
+ struct servants_list_item *s;
+ union sigval svalue;
+
+ for (s = servants_leader; s; s = s->next) {
+ if (s->pid != 0)
+ sigqueue(s->pid, SIGKILL, svalue);
+ }
+}
+
+int check_timeout_inconsistent(void)
+{
+ int devfd;
+ struct sector_header_s *hdr_cur = 0, *hdr_last = 0;
+ struct servants_list_item* s;
+ int inconsistent = 0;
+
+ for (s = servants_leader; s; s = s->next) {
+ devfd = open_device(s->devname);
+ if (devfd < 0)
+ continue;
+ hdr_cur = header_get(devfd);
+ close(devfd);
+ if (!hdr_cur)
+ continue;
+ if (hdr_last) {
+ if (hdr_last->timeout_watchdog != hdr_cur->timeout_watchdog
+ || hdr_last->timeout_allocate != hdr_cur->timeout_allocate
+ || hdr_last->timeout_loop != hdr_cur->timeout_loop
+ || hdr_last->timeout_msgwait != hdr_cur->timeout_msgwait)
+ inconsistent = 1;
+ free(hdr_last);
+ }
+ hdr_last = hdr_cur;
+ }
+
+ if (hdr_last) {
+ timeout_watchdog = hdr_last->timeout_watchdog;
+ timeout_allocate = hdr_last->timeout_allocate;
+ timeout_loop = hdr_last->timeout_loop;
+ timeout_msgwait = hdr_last->timeout_msgwait;
+ } else {
+ cl_log(LOG_ERR, "No devices were available at start-up.");
+ exit(1);
+ }
+
+ free(hdr_last);
+ return inconsistent;
+}
+
+inline void cleanup_servant_by_pid(pid_t pid)
+{
+ struct servants_list_item* s;
+
+ s = lookup_servant_by_pid(pid);
+ if (s) {
+ s->pid = 0;
+ } else {
+ /* TODO: This points to an inconsistency in our internal
+ * data - how to recover? */
+ cl_log(LOG_ERR, "Cannot cleanup after unknown pid %i",
+ pid);
+ }
+}
+
+void restart_servant_by_pid(pid_t pid)
+{
+ struct servants_list_item* s;
+
+ s = lookup_servant_by_pid(pid);
+ if (s) {
+ if (s->restarts < 10) {
+ s->pid = assign_servant(s->devname, servant, NULL);
+ s->restarts++;
+ } else {
+ cl_log(LOG_WARNING, "Max retry count reached: not restarting servant for %s",
+ s->devname);
+ }
+
+ } else {
+ /* TODO: This points to an inconsistency in our internal
+ * data - how to recover? */
+ cl_log(LOG_ERR, "Cannot restart unknown pid %i",
+ pid);
+ }
+}
+
+int inquisitor_decouple(void)
+{
+ pid_t ppid = getppid();
+ union sigval signal_value;
+
+ /* During start-up, we only arm the watchdog once we've got
+ * quorum at least once. */
+ if (watchdog_use) {
+ if (watchdog_init() < 0) {
+ return -1;
+ }
+ }
+
+ if (ppid > 1) {
+ sigqueue(ppid, SIG_LIVENESS, signal_value);
+ }
+ return 0;
+}
+
+void inquisitor_child(void)
+{
+ int sig, pid, i;
+ sigset_t procmask;
+ siginfo_t sinfo;
+ int *reports;
+ int status;
+ struct timespec timeout;
+ int good_servants = 0;
+ int exiting = 0;
+ int decoupled = 0;
+ time_t latency;
+ struct timespec t_last_tickle, t_now, t_last_restarted;
+
+ set_proc_title("sbd: inquisitor");
+
+ reports = malloc(sizeof(int) * servant_count);
+ if (!reports) {
+ cl_log(LOG_ERR, "malloc failed");
+ exit(1);
+ }
+ memset(reports, 0, sizeof(int) * servant_count);
+
+ sigemptyset(&procmask);
+ sigaddset(&procmask, SIGCHLD);
+ sigaddset(&procmask, SIG_LIVENESS);
+ sigaddset(&procmask, SIG_EXITREQ);
+ sigaddset(&procmask, SIG_TEST);
+ sigaddset(&procmask, SIGUSR1);
+ sigaddset(&procmask, SIGUSR2);
+ sigprocmask(SIG_BLOCK, &procmask, NULL);
+
+ servants_start();
+
+ timeout.tv_sec = timeout_loop;
+ timeout.tv_nsec = 0;
+ good_servants = 0;
+ clock_gettime(CLOCK_MONOTONIC, &t_last_tickle);
+ clock_gettime(CLOCK_MONOTONIC, &t_last_restarted);
+
+ while (1) {
+ sig = sigtimedwait(&procmask, &sinfo, &timeout);
+ DBGPRINT("got signal %d\n", sig);
+
+ if (sig == SIG_EXITREQ) {
+ servants_kill();
+ watchdog_close();
+ exiting = 1;
+ } else if (sig == SIGCHLD) {
+ while ((pid = waitpid(-1, &status, WNOHANG))) {
+ if (pid == -1 && errno == ECHILD) {
+ break;
+ } else if (exiting) {
+ cleanup_servant_by_pid(pid);
+ } else {
+ restart_servant_by_pid(pid);
+ }
+ }
+ } else if (sig == SIG_LIVENESS) {
+ for (i = 0; i < servant_count; i++) {
+ if (reports[i] == sinfo.si_pid) {
+ break;
+ } else if (reports[i] == 0) {
+ reports[i] = sinfo.si_pid;
+ good_servants++;
+ break;
+ }
+ }
+ } else if (sig == SIG_TEST) {
+ } else if (sig == SIGUSR1) {
+ if (exiting)
+ continue;
+ clock_gettime(CLOCK_MONOTONIC, &t_last_restarted);
+ servants_start();
+ }
+
+ if (exiting) {
+ if (check_all_dead())
+ exit(0);
+ else
+ continue;
+ }
+
+ if (quorum_read(good_servants)) {
+ DBGPRINT("Enough liveness messages\n");
+ if (!decoupled) {
+ if (inquisitor_decouple() < 0) {
+ servants_kill();
+ exiting = 1;
+ continue;
+ } else {
+ decoupled = 1;
+ }
+ }
+
+ watchdog_tickle();
+ clock_gettime(CLOCK_MONOTONIC, &t_last_tickle);
+ memset(reports, 0, sizeof(int) * servant_count);
+ good_servants = 0;
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &t_now);
+ latency = t_now.tv_sec - t_last_tickle.tv_sec;
+ if (timeout_watchdog && (latency > timeout_watchdog)) {
+ if (!decoupled) {
+ /* We're still being watched by our
+ * parent. We don't fence, but exit. */
+ cl_log(LOG_ERR, "SBD: Not enough votes to proceed. Aborting start-up.");
+ servants_kill();
+ exiting = 1;
+ continue;
+ }
+ do_reset();
+ }
+ if (timeout_watchdog_warn && (latency > timeout_watchdog_warn)) {
+ cl_log(LOG_WARNING,
+ "Latency: No liveness for %d s exceeds threshold of %d s (healthy servants: %d)",
+ (int)latency, (int)timeout_watchdog_warn, good_servants);
+ }
+
+ latency = t_now.tv_sec - t_last_restarted.tv_sec;
+ if (servant_restart_interval > 0
+ && latency > servant_restart_interval) {
+ /* Restart all children every hour */
+ clock_gettime(CLOCK_MONOTONIC, &t_last_restarted);
+ servants_start();
+ }
+ }
+ /* not reached */
+ exit(0);
+}
+
+int inquisitor(void)
+{
+ int sig, pid, inquisitor_pid;
+ int status;
+ sigset_t procmask;
+ siginfo_t sinfo;
+
+ DBGPRINT("inquisitor starting\n");
+
+ /* Where's the best place for sysrq init ?*/
+ sysrq_init();
+
+ sigemptyset(&procmask);
+ sigaddset(&procmask, SIGCHLD);
+ sigaddset(&procmask, SIG_LIVENESS);
+ sigprocmask(SIG_BLOCK, &procmask, NULL);
+
+ if (check_timeout_inconsistent() == 1) {
+ fprintf(stderr, "Timeout settings are different across SBD devices!\n");
+ fprintf(stderr, "You have to correct them and re-start SBD again.\n");
+ return -1;
+ }
+
+ inquisitor_pid = make_daemon();
+ if (inquisitor_pid == 0) {
+ inquisitor_child();
+ }
+
+ /* We're the parent. Wait for a happy signal from our child
+ * before we proceed - we either get "SIG_LIVENESS" when the
+ * inquisitor has completed the first successful round, or
+ * ECHLD when it exits with an error. */
+
+ while (1) {
+ sig = sigwaitinfo(&procmask, &sinfo);
+ DBGPRINT("get signal %d\n", sig);
+ if (sig == SIGCHLD) {
+ while ((pid = waitpid(-1, &status, WNOHANG))) {
+ if (pid == -1 && errno == ECHILD) {
+ break;
+ }
+ /* We got here because the inquisitor
+ * did not succeed. */
+ return -1;
+ }
+ } else if (sig == SIG_LIVENESS) {
+ /* Inquisitor started up properly. */
+ return 0;
+ } else {
+ fprintf(stderr, "Nobody expected the spanish inquisition!\n");
+ continue;
+ }
+ }
+ /* not reached */
+ return -1;
+}
+
+int messenger(const char *name, const char *msg)
+{
+ int sig = 0;
+ pid_t pid = 0;
+ int status = 0;
+ int servants_finished = 0;
+ int successful_delivery = 0;
+ sigset_t procmask;
+ siginfo_t sinfo;
+ struct servants_list_item *s;
+ struct slot_msg_arg_t slot_msg_arg = {name, msg};
+
+ sigemptyset(&procmask);
+ sigaddset(&procmask, SIGCHLD);
+ sigprocmask(SIG_BLOCK, &procmask, NULL);
+
+ for (s = servants_leader; s; s = s->next) {
+ s->pid = assign_servant(s->devname, &slot_msg_wrapper, &slot_msg_arg);
+ }
+
+ while (!(quorum_write(successful_delivery) ||
+ (servants_finished == servant_count))) {
+ sig = sigwaitinfo(&procmask, &sinfo);
+ DBGPRINT("get signal %d\n", sig);
+ if (sig == SIGCHLD) {
+ while ((pid = waitpid(-1, &status, WNOHANG))) {
+ if (pid == -1 && errno == ECHILD) {
+ break;
+ } else {
+ DBGPRINT("process %d finished\n", pid);
+ servants_finished++;
+ if (WIFEXITED(status)
+ && WEXITSTATUS(status) == 0) {
+ DBGPRINT("exit with %d\n",
+ WEXITSTATUS(status));
+ successful_delivery++;
+ }
+ }
+ }
+ }
+ DBGPRINT("signal %d handled\n", sig);
+ }
+ if (quorum_write(successful_delivery)) {
+ return 0;
+ } else {
+ fprintf(stderr, "Message is not delivered via more then a half of devices\n");
+ return -1;
+ }
+}
+
+int dump_headers(void)
+{
+ int rc = 0;
+ struct servants_list_item *s = servants_leader;
+ int devfd;
+
+ for (s = servants_leader; s; s = s->next) {
+ fprintf(stdout, "==Dumping header on disk %s\n", s->devname);
+ devfd = open_device(s->devname);
+ if (devfd == -1)
+ return -1;
+ rc = header_dump(devfd);
+ close(devfd);
+ if (rc == -1)
+ return rc;
+ fprintf(stdout, "==Header on disk %s is dumped\n", s->devname);
+ }
+ return rc;
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ int exit_status = 0;
+ int c;
+
+ if ((cmdname = strrchr(argv[0], '/')) == NULL) {
+ cmdname = argv[0];
+ } else {
+ ++cmdname;
+ }
+
+ cl_log_set_entity(cmdname);
+ cl_log_enable_stderr(0);
+ cl_log_set_facility(LOG_DAEMON);
+
+ get_uname();
+
+ while ((c = getopt(argc, argv, "DRWhvw:d:n:1:2:3:4:5:t:")) != -1) {
+ switch (c) {
+ case 'D':
+ /* Ignore for historical reasons */
+ break;
+ case 'R':
+ skip_rt = 1;
+ break;
+ case 'v':
+ debug = 1;
+ break;
+ case 'T':
+ watchdog_set_timeout = 0;
+ break;
+ case 'W':
+ watchdog_use = 1;
+ break;
+ case 'w':
+ watchdogdev = optarg;
+ break;
+ case 'd':
+ recruit_servant(optarg, 0);
+ break;
+ case 'n':
+ local_uname = optarg;
+ break;
+ case '1':
+ timeout_watchdog = atoi(optarg);
+ break;
+ case '2':
+ timeout_allocate = atoi(optarg);
+ break;
+ case '3':
+ timeout_loop = atoi(optarg);
+ break;
+ case '4':
+ timeout_msgwait = atoi(optarg);
+ break;
+ case '5':
+ timeout_watchdog_warn = atoi(optarg);
+ break;
+ case 't':
+ servant_restart_interval = atoi(optarg);
+ break;
+ case 'h':
+ usage();
+ return (0);
+ default:
+ exit_status = -1;
+ goto out;
+ break;
+ }
+ }
+
+ if (servant_count < 1 || servant_count > 3) {
+ fprintf(stderr, "You must specify 1 to 3 devices via the -d option.\n");
+ exit_status = -1;
+ goto out;
+ }
+
+ /* There must at least be one command following the options: */
+ if ((argc - optind) < 1) {
+ fprintf(stderr, "Not enough arguments.\n");
+ exit_status = -1;
+ goto out;
+ }
+
+ if (init_set_proc_title(argc, argv, envp) < 0) {
+ fprintf(stderr, "Allocation of proc title failed.");
+ exit(1);
+ }
+
+ maximize_priority();
+
+ if (strcmp(argv[optind], "create") == 0) {
+ exit_status = init_devices();
+ } else if (strcmp(argv[optind], "dump") == 0) {
+ exit_status = dump_headers();
+ } else if (strcmp(argv[optind], "allocate") == 0) {
+ exit_status = allocate_slots(argv[optind + 1]);
+ } else if (strcmp(argv[optind], "list") == 0) {
+ exit_status = list_slots();
+ } else if (strcmp(argv[optind], "message") == 0) {
+ exit_status = messenger(argv[optind + 1], argv[optind + 2]);
+ } else if (strcmp(argv[optind], "ping") == 0) {
+ exit_status = ping_via_slots(argv[optind + 1]);
+ } else if (strcmp(argv[optind], "watch") == 0) {
+ exit_status = inquisitor();
+ } else {
+ exit_status = -1;
+ }
+
+out:
+ if (exit_status < 0) {
+ usage();
+ return (1);
+ }
+ return (0);
+}
diff --git a/lib/stonith/sbd.h b/lib/stonith/sbd.h
index af2c124..001824b 100644
--- a/lib/stonith/sbd.h
+++ b/lib/stonith/sbd.h
@@ -16,7 +16,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <arpa/inet.h>
-
+#include <sys/types.h>
/* Sector data types */
struct sector_header_s {
@@ -42,51 +42,110 @@ struct sector_node_s {
char name[64];
};
+struct servants_list_item {
+ const char* devname;
+ pid_t pid;
+ int restarts;
+ struct servants_list_item *next;
+};
+
#define SBD_MSG_EMPTY 0x00
#define SBD_MSG_TEST 0x01
#define SBD_MSG_RESET 0x02
#define SBD_MSG_OFF 0x03
#define SBD_MSG_EXIT 0x04
+#define SBD_MSG_CRASHDUMP 0x05
#define SLOT_TO_SECTOR(slot) (1+slot*2)
#define MBOX_TO_SECTOR(mbox) (2+mbox*2)
-static void usage(void);
-static void watchdog_init_interval(void);
-static void watchdog_tickle(void);
-static void watchdog_init(void);
-static void watchdog_close(void);
-static int open_device(const char* devname);
-static signed char cmd2char(const char *cmd);
-static void * sector_alloc(void);
-static const char* char2cmd(const char cmd);
-static int sector_write(int sector, const void *data);
-static int sector_read(int sector, void *data);
-static int slot_read(int slot, struct sector_node_s *s_node);
-static int slot_write(int slot, const struct sector_node_s *s_node);
-static int mbox_write(int mbox, const struct sector_mbox_s *s_mbox);
-static int mbox_read(int mbox, struct sector_mbox_s *s_mbox);
-static int mbox_write_verify(int mbox, const struct sector_mbox_s *s_mbox);
+void usage(void);
+int watchdog_init_interval(void);
+int watchdog_tickle(void);
+int watchdog_init(void);
+void sysrq_init(void);
+void watchdog_close(void);
+int open_device(const char* devname);
+signed char cmd2char(const char *cmd);
+void * sector_alloc(void);
+const char* char2cmd(const char cmd);
+int sector_write(int devfd, int sector, const void *data);
+int sector_read(int devfd, int sector, void *data);
+int slot_read(int devfd, int slot, struct sector_node_s *s_node);
+int slot_write(int devfd, int slot, const struct sector_node_s *s_node);
+int mbox_write(int devfd, int mbox, const struct sector_mbox_s *s_mbox);
+int mbox_read(int devfd, int mbox, struct sector_mbox_s *s_mbox);
+int mbox_write_verify(int devfd, int mbox, const struct sector_mbox_s *s_mbox);
/* After a call to header_write(), certain data fields will have been
* converted to on-disk byte-order; the header should not be accessed
* afterwards anymore! */
-static int header_write(struct sector_header_s *s_header);
-static int header_read(struct sector_header_s *s_header);
-static int valid_header(const struct sector_header_s *s_header);
-static struct sector_header_s * header_get(void);
-static int init_device(void);
-static int slot_lookup(const struct sector_header_s *s_header, const char *name);
-static int slot_unused(const struct sector_header_s *s_header);
-static int slot_allocate(const char *name);
-static int slot_list(void);
-static int slot_ping(const char *name);
-static int slot_msg(const char *name, const char *cmd);
-static int header_dump(void);
-static void sysrq_trigger(char t);
-static void do_reset(void);
-static void do_off(void);
-static void make_daemon(void);
-static int daemonize(void);
-static void maximize_priority(void);
-static void get_uname(void);
+int header_write(int devfd, struct sector_header_s *s_header);
+int header_read(int devfd, struct sector_header_s *s_header);
+int valid_header(const struct sector_header_s *s_header);
+struct sector_header_s * header_get(int devfd);
+int init_device(int devfd);
+int slot_lookup(int devfd, const struct sector_header_s *s_header, const char *name);
+int slot_unused(int devfd, const struct sector_header_s *s_header);
+int slot_allocate(int devfd, const char *name);
+int slot_list(int devfd);
+int slot_ping(int devfd, const char *name);
+int slot_msg(int devfd, const char *name, const char *cmd);
+int header_dump(int devfd);
+void sysrq_trigger(char t);
+void do_crashdump(void);
+void do_reset(void);
+void do_off(void);
+pid_t make_daemon(void);
+void maximize_priority(void);
+void get_uname(void);
+
+/* Tunable defaults: */
+extern unsigned long timeout_watchdog;
+extern unsigned long timeout_watchdog_warn;
+extern int timeout_allocate;
+extern int timeout_loop;
+extern int timeout_msgwait;
+extern int watchdog_use;
+extern int watchdog_set_timeout;
+extern int skip_rt;
+extern int debug;
+extern const char *watchdogdev;
+extern char* local_uname;
+
+/* Global, non-tunable variables: */
+extern int sector_size;
+extern int watchdogfd;
+extern const char* cmdname;
+
+typedef int (*functionp_t)(const char* devname, const void* argp);
+
+int assign_servant(const char* devname, functionp_t functionp, const void* argp);
+int init_devices(void);
+struct slot_msg_arg_t {
+ const char* name;
+ const char* msg;
+};
+int slot_msg_wrapper(const char* devname, const void* argp);
+int slot_ping_wrapper(const char* devname, const void* argp);
+int allocate_slots(const char *name);
+int list_slots(void);
+int ping_via_slots(const char *name);
+int dump_headers(void);
+
+int check_all_dead(void);
+int servant(const char *diskname, const void* argp);
+void recruit_servant(const char *devname, pid_t pid);
+struct servants_list_item *lookup_servant_by_dev(const char *devname);
+struct servants_list_item *lookup_servant_by_pid(pid_t pid);
+void servants_kill(void);
+void servants_start(void);
+void inquisitor_child(void);
+int inquisitor(void);
+int inquisitor_decouple(void);
+int messenger(const char *name, const char *msg);
+int check_timeout_inconsistent(void);
+void restart_servant_by_pid(pid_t pid);
+void cleanup_servant_by_pid(pid_t pid);
+int quorum_write(int good_servants);
+int quorum_read(int good_servants);
diff --git a/lrm/admin/Makefile.am b/lrm/admin/Makefile.am
index c503ccf..a92cd72 100644
--- a/lrm/admin/Makefile.am
+++ b/lrm/admin/Makefile.am
@@ -25,6 +25,7 @@ halibdir = $(libdir)/@HB_PKG@
COMMONLIBS = $(top_builddir)/lib/clplumbing/libplumb.la $(GLIBLIB)
LRM_DIR = lrm
sbin_PROGRAMS = lrmadmin
+sbin_SCRIPTS = cibsecret
lrmadmin_SOURCES = lrmadmin.c
lrmadmin_LDFLAGS = $(COMMONLIBS)
lrmadmin_LDADD = $(top_builddir)/lib/$(LRM_DIR)/liblrm.la
diff --git a/lrm/admin/cibsecret.in b/lrm/admin/cibsecret.in
new file mode 100755
index 0000000..8994667
--- /dev/null
+++ b/lrm/admin/cibsecret.in
@@ -0,0 +1,347 @@
+#!/bin/sh
+
+# Copyright (C) 2011 Dejan Muhamedagic <dmuhamedagic at suse.de>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This software is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+# WARNING:
+#
+# The CIB secrets interface and implementation is still being
+# discussed, it may change
+
+#
+# cibsecret: manage the secrets directory /var/lib/heartbeat/lrm/secrets
+#
+# secrets are ascii files, holding just one value per file:
+# /var/lib/heartbeat/lrm/secrets/<rsc>/<param>
+#
+# NB: this program depends on utillib.sh
+#
+
+. @OCF_ROOT_DIR@/resource.d/heartbeat/.ocf-shellfuncs
+
+HA_NOARCHBIN=@datadir@/@PACKAGE_NAME@
+
+. $HA_NOARCHBIN/utillib.sh
+
+LRM_CIBSECRETS=$HA_VARLIB/lrm/secrets
+
+PROG=`basename $0`
+
+usage() {
+ cat<<EOF
+usage: $PROG [-C] <command> <parameters>
+
+-C: don't read/write the CIB
+
+command: set | delete | stash | unstash | get | check | sync
+
+ set <rsc> <param> <value>
+ get <rsc> <param>
+ check <rsc> <param>
+ stash <rsc> <param> (if not -C)
+ unstash <rsc> <param> (if not -C)
+ delete <rsc> <param>
+ sync
+
+stash/unstash: move the parameter from/to the CIB (if you already
+ have the parameter set in the CIB).
+
+set/delete: add/remove a parameter from the local file.
+
+get: display the parameter from the local file.
+
+check: verify MD5 hash of the parameter from the local file and the CIB.
+
+sync: copy $LRM_CIBSECRETS to other nodes.
+
+Examples:
+
+ $PROG set ipmi_node1 passwd SecreT_PASS
+ $PROG stash ipmi_node1 passwd
+ $PROG get ipmi_node1 passwd
+ $PROG check ipmi_node1 passwd
+ $PROG sync
+EOF
+ exit $1
+}
+fatal() {
+ echo "ERROR: $*"
+ exit 1
+}
+warn() {
+ echo "WARNING: $*"
+}
+info() {
+ echo "INFO: $*"
+}
+
+check_env() {
+ which md5sum >/dev/null 2>&1 ||
+ fatal "please install md5sum to run $PROG"
+ if which pssh >/dev/null 2>&1; then
+ rsh=pssh_fun
+ rcp=pscp_fun
+ elif which pdsh >/dev/null 2>&1; then
+ rsh=pdsh_fun
+ rcp=pdcp_fun
+ elif which ssh >/dev/null 2>&1; then
+ rsh=ssh_fun
+ rcp=scp_fun
+ else
+ fatal "please install pssh, pdsh, or ssh to run $PROG"
+ fi
+ ps -ef | grep '[c]rmd' >/dev/null ||
+ fatal "pacemaker not running? $PROG needs pacemaker"
+}
+
+get_other_nodes() {
+ crm_node -l | awk '{print $2}' | grep -v `uname -n`
+}
+check_down_nodes() {
+ local n down_nodes
+ down_nodes=`(for n; do echo $n; done) | sort | uniq -u`
+ if [ -n "$down_nodes" ]; then
+ if [ `echo $down_nodes | wc -w` = 1 ]; then
+ warn "node $down_nodes is down"
+ warn "you'll need to update it using $PROG sync later"
+ else
+ warn "nodes `echo $down_nodes` are down"
+ warn "you'll need to update them using $PROG sync later"
+ fi
+ fi
+}
+
+pssh_fun() {
+ pssh -q -H "$nodes" $*
+}
+pscp_fun() {
+ pscp -q -H "$nodes" -x "-pr" $*
+}
+pdsh_fun() {
+ local pdsh_nodes=`echo $nodes | tr ' ' ','`
+ pdsh -w $pdsh_nodes $*
+}
+pdcp_fun() {
+ local pdsh_nodes=`echo $nodes | tr ' ' ','`
+ pdcp -pr -w $pdsh_nodes $*
+}
+ssh_fun() {
+ local h
+ for h in $nodes; do
+ ssh $h $*
+ done
+}
+scp_fun() {
+ local h src="$1" dest=$2
+ for h in $nodes; do
+ scp -pr -q $src $h:$dest
+ done
+}
+# TODO: this procedure should be replaced with csync2
+# provided that csync2 has already been configured
+sync_files() {
+ local crm_nodes=`get_other_nodes`
+ local nodes=`get_live_nodes $crm_nodes`
+ check_down_nodes $nodes $crm_nodes
+ [ "$nodes" = "" ] && {
+ info "no other nodes live"
+ return
+ }
+ info "syncing $LRM_CIBSECRETS to `echo $nodes` ..."
+ $rsh rm -rf $LRM_CIBSECRETS
+ $rsh mkdir -p `dirname $LRM_CIBSECRETS`
+ $rcp $LRM_CIBSECRETS `dirname $LRM_CIBSECRETS`
+}
+sync_one() {
+ local f=$1 f_all="$1 $1.sign"
+ local crm_nodes=`get_other_nodes`
+ local nodes=`get_live_nodes $crm_nodes`
+ check_down_nodes $nodes $crm_nodes
+ [ "$nodes" = "" ] && {
+ info "no other nodes live"
+ return
+ }
+ info "syncing $f to `echo $nodes` ..."
+ $rsh mkdir -p `dirname $f`
+ if [ -f "$f" ]; then
+ $rcp "$f_all" `dirname $f`
+ else
+ $rsh rm -f $f_all
+ fi
+}
+
+is_secret() {
+ # assume that the secret is in the CIB if we cannot talk to
+ # cib
+ [ "$NO_CRM" ] ||
+ test "$1" = "$MAGIC"
+}
+check_cib_rsc() {
+ local rsc=$1 output
+ output=`$NO_CRM crm_resource -r $rsc -W >/dev/null 2>&1` ||
+ fatal "resource $rsc doesn't exist: $output"
+}
+get_cib_param() {
+ local rsc=$1 param=$2
+ check_cib_rsc $rsc
+ $NO_CRM crm_resource -r $rsc -g $param 2>/dev/null
+}
+set_cib_param() {
+ local rsc=$1 param=$2 value=$3
+ check_cib_rsc $rsc
+ $NO_CRM crm_resource -r $rsc -p $param -v "$value" 2>/dev/null
+}
+remove_cib_param() {
+ local rsc=$1 param=$2
+ check_cib_rsc $rsc
+ $NO_CRM crm_resource -r $rsc -d $param 2>/dev/null
+}
+
+localfiles() {
+ local cmd=$1
+ local rsc=$2 param=$3 value=$4
+ local local_file=$LRM_CIBSECRETS/$rsc/$param
+ case $cmd in
+ "get")
+ cat $local_file 2>/dev/null
+ true
+ ;;
+ "getsum")
+ cat $local_file.sign 2>/dev/null
+ true
+ ;;
+ "set")
+ local md5sum
+ md5sum=`printf $value | md5sum` ||
+ fatal "md5sum failed to produce hash for resource $rsc parameter $param"
+ md5sum=`echo $md5sum | awk '{print $1}'`
+ mkdir -p `dirname $local_file` &&
+ echo $value > $local_file &&
+ echo $md5sum > $local_file.sign &&
+ sync_one $local_file
+ ;;
+ "remove")
+ rm -f $local_file
+ sync_one $local_file
+ ;;
+ *)
+ # not reached, this is local interface
+ ;;
+ esac
+}
+get_local_param() {
+ local rsc=$1 param=$2
+ localfiles get $rsc $param
+}
+set_local_param() {
+ local rsc=$1 param=$2 value=$3
+ localfiles set $rsc $param $value
+}
+remove_local_param() {
+ local rsc=$1 param=$2
+ localfiles remove $rsc $param
+}
+
+cibsecret_set() {
+ local value=$1
+
+ if [ -z "$NO_CRM" ]; then
+ [ "$current" -a "$current" != "$value" ] &&
+ fatal "CIB value <$current> different for $rsc parameter $param; please delete it first"
+ fi
+ set_local_param $rsc $param $value &&
+ set_cib_param $rsc $param "$MAGIC"
+}
+
+cibsecret_check() {
+ local md5sum local_md5sum
+ is_secret "$current" ||
+ fatal "no magic in CIB for resource $rsc parameter $param"
+ local_md5sum=`localfiles getsum $rsc $param`
+ [ "$local_md5sum" ] ||
+ fatal "no MD5 hash for resource $rsc parameter $param"
+ md5sum=`printf "$current_local" | md5sum | awk '{print $1}'`
+ [ "$md5sum" = "$local_md5sum" ] ||
+ fatal "MD5 hash mismatch for resource $rsc parameter $param"
+}
+
+cibsecret_get() {
+ cibsecret_check
+ echo "$current_local"
+}
+
+cibsecret_delete() {
+ remove_local_param $rsc $param &&
+ remove_cib_param $rsc $param
+}
+
+cibsecret_stash() {
+ [ "$NO_CRM" ] &&
+ fatal "no access to Pacemaker, stash not supported"
+ [ "$current" = "" ] &&
+ fatal "nothing to stash for resource $rsc parameter $param"
+ is_secret "$current" &&
+ fatal "CIB value for resource $rsc parameter $param already MD5 hash"
+ cibsecret_set "$current"
+}
+
+cibsecret_unstash() {
+ [ "$NO_CRM" ] &&
+ fatal "no access to Pacemaker, unstash not supported"
+ [ "$current_local" = "" ] &&
+ fatal "nothing to unstash for resource $rsc parameter $param"
+ is_secret "$current" ||
+ warn "no MD5 hash in CIB for resource $rsc parameter $param, proceeding anyway"
+ remove_local_param $rsc $param &&
+ set_cib_param $rsc $param $current_local
+}
+
+cibsecret_sync() {
+ sync_files
+}
+
+check_env
+
+MAGIC="lrm://"
+umask 0077
+
+if [ "$1" = "-C" ]; then
+ NO_CRM=':'
+ shift 1
+fi
+
+cmd=$1
+rsc=$2
+param=$3
+value=$4
+
+case "$cmd" in
+ set) [ $# -ne 4 ] && usage 1;;
+ get) [ $# -ne 3 ] && usage 1;;
+ check) [ $# -ne 3 ] && usage 1;;
+ stash) [ $# -ne 3 ] && usage 1;;
+ unstash) [ $# -ne 3 ] && usage 1;;
+ delete) [ $# -ne 3 ] && usage 1;;
+ sync) [ $# -ne 1 ] && usage 1;;
+ *) usage 1;
+esac
+
+# we'll need these two often
+current=`get_cib_param $rsc $param`
+current_local=`get_local_param $rsc $param`
+
+cibsecret_$cmd $value
diff --git a/lrm/lrmd/Makefile.am b/lrm/lrmd/Makefile.am
index 4578f9a..3680928 100644
--- a/lrm/lrmd/Makefile.am
+++ b/lrm/lrmd/Makefile.am
@@ -31,7 +31,7 @@ COMMONLIBS = $(top_builddir)/lib/clplumbing/libplumb.la \
halib_PROGRAMS = lrmd
-lrmd_SOURCES = lrmd.c audit.c lrmd_fdecl.h lrmd.h
+lrmd_SOURCES = lrmd.c audit.c cib_secrets.c lrmd_fdecl.h lrmd.h
lrmd_LDFLAGS = $(top_builddir)/lib/lrm/liblrm.la \
$(COMMONLIBS) @LIBLTDL@ \
diff --git a/lrm/lrmd/cib_secrets.c b/lrm/lrmd/cib_secrets.c
new file mode 100644
index 0000000..612ffdb
--- /dev/null
+++ b/lrm/lrmd/cib_secrets.c
@@ -0,0 +1,205 @@
+/*
+ * cib_secrets.c
+ *
+ * Author: Dejan Muhamedagic <dejan at suse.de>
+ * Copyright (c) 2011 SUSE, Attachmate
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <lha_internal.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include <glib.h>
+#include <pils/plugin.h>
+#include <pils/generic.h>
+#include <clplumbing/GSource.h>
+#include <clplumbing/lsb_exitcodes.h>
+#include <clplumbing/cl_signal.h>
+#include <clplumbing/proctrack.h>
+#include <clplumbing/coredumps.h>
+#include <clplumbing/uids.h>
+#include <clplumbing/Gmain_timeout.h>
+#include <clplumbing/cl_pidfile.h>
+#include <clplumbing/realtime.h>
+#include <clplumbing/md5.h>
+#include <ha_msg.h>
+
+#include <lrm/lrm_api.h>
+#include <lrm/lrm_msg.h>
+
+#include <lrmd.h>
+
+int replace_secret_params(char *rsc_id, GHashTable* params);
+static int is_magic_value(char *p);
+static int check_md5_hash(char *hash, char *value);
+static void add_secret_params(gpointer key, gpointer value, gpointer user_data);
+static char *read_local_file(char *local_file);
+
+#define MAGIC "lrm://"
+
+static int
+is_magic_value(char *p)
+{
+ return !strcmp(p, MAGIC);
+}
+
+#define MD5LEN 16
+static int
+check_md5_hash(char *hash, char *value)
+{
+ int i;
+ char hash2[2*MD5LEN+1];
+ unsigned char binary[MD5LEN+1];
+
+ MD5((unsigned char *)value, strlen(value), binary);
+ for (i = 0; i < MD5LEN; i++)
+ sprintf(hash2+2*i, "%02x", binary[i]);
+ hash2[2*i] = '\0';
+ lrmd_debug2(LOG_DEBUG
+ , "%s:%d: hash: %s, calculated hash: %s"
+ , __FUNCTION__, __LINE__, hash, hash2);
+ return !strcmp(hash, hash2);
+}
+
+static char *
+read_local_file(char *local_file)
+{
+ FILE *fp = fopen(local_file, "r");
+ char buf[MAX_VALUE_LEN+1];
+ char *p;
+
+ if (!fp) {
+ if (errno != ENOENT) {
+ cl_perror("%s:%d: cannot open %s"
+ , __FUNCTION__, __LINE__, local_file);
+ }
+ return NULL;
+ }
+ if (!fgets(buf, MAX_VALUE_LEN, fp)) {
+ cl_perror("%s:%d: cannot read %s"
+ , __FUNCTION__, __LINE__, local_file);
+ return NULL;
+ }
+ /* strip white space */
+ for (p = buf+strlen(buf)-1; p >= buf && isspace(*p); p--)
+ ;
+ *(p+1) = '\0';
+ return g_strdup(buf);
+}
+
+/*
+ * returns 0 on success or no replacements necessary
+ * returns -1 if replacement failed for whatever reasone
+ */
+
+int
+replace_secret_params(char *rsc_id, GHashTable* params)
+{
+ char local_file[FILENAME_MAX+1], *start_pname;
+ char hash_file[FILENAME_MAX+1], *hash;
+ GList *secret_params = NULL, *l;
+ char *key, *pvalue, *secret_value;
+ int rc = 0;
+
+ /* secret_params could be cached with the resource;
+ * there are also parameters sent with operations
+ * which cannot be cached
+ */
+ g_hash_table_foreach(params, add_secret_params, &secret_params);
+ if (!secret_params) /* none found? */
+ return 0;
+
+ lrmd_debug(LOG_DEBUG
+ , "%s:%d: replace secret parameters for resource %s"
+ , __FUNCTION__, __LINE__, rsc_id);
+ if (snprintf(local_file, FILENAME_MAX,
+ LRM_CIBSECRETS "/%s/", rsc_id) > FILENAME_MAX) {
+ lrmd_log(LOG_ERR
+ , "%s:%d: filename size exceeded for resource %s"
+ , __FUNCTION__, __LINE__, rsc_id);
+ return -1;
+ }
+ start_pname = local_file + strlen(local_file);
+
+ for (l = g_list_first(secret_params); l; l = g_list_next(l)) {
+ key = (char *)(l->data);
+ pvalue = g_hash_table_lookup(params, key);
+ if (!pvalue) { /* this cannot really happen */
+ lrmd_log(LOG_ERR
+ , "%s:%d: odd, no parameter %s for rsc %s found now"
+ , __FUNCTION__, __LINE__, key, rsc_id);
+ continue;
+ }
+ if ((strlen(key) + strlen(local_file)) >= FILENAME_MAX-2) {
+ lrmd_log(LOG_ERR
+ , "%s:%d: parameter name %s too big"
+ , __FUNCTION__, __LINE__, key);
+ rc = -1;
+ continue;
+ }
+ strcpy(start_pname, key);
+ secret_value = read_local_file(local_file);
+ if (!secret_value) {
+ lrmd_log(LOG_ERR
+ , "%s:%d: secret for rsc %s parameter %s "
+ "not found in " LRM_CIBSECRETS
+ , __FUNCTION__, __LINE__, rsc_id, key);
+ rc = -1;
+ continue;
+ }
+ strcpy(hash_file, local_file);
+ if (strlen(hash_file) + 5 > FILENAME_MAX) {
+ lrmd_log(LOG_ERR
+ , "%s:%d: cannot build such a long name "
+ "for the sign file: %s.sign"
+ , __FUNCTION__, __LINE__, hash_file);
+ } else {
+ strncat(hash_file, ".sign", 5);
+ hash = read_local_file(hash_file);
+ if (!check_md5_hash(hash, secret_value)) {
+ lrmd_log(LOG_ERR
+ , "%s:%d: md5 sum for rsc %s parameter %s "
+ "does not match"
+ , __FUNCTION__, __LINE__, rsc_id, key);
+ g_free(secret_value);
+ g_free(hash);
+ rc = -1;
+ continue;
+ }
+ g_free(hash);
+ }
+ g_hash_table_replace(params, g_strdup(key), secret_value);
+ }
+ g_list_free(secret_params);
+ return rc;
+}
+
+static void
+add_secret_params(gpointer key, gpointer value, gpointer user_data)
+{
+ GList **lp = (GList **)user_data;
+
+ if (is_magic_value((char *)value))
+ *lp = g_list_append(*lp, (char *)key);
+}
diff --git a/lrm/lrmd/lrmd.c b/lrm/lrmd/lrmd.c
index 4c7295c..3df0cf4 100644
--- a/lrm/lrmd/lrmd.c
+++ b/lrm/lrmd/lrmd.c
@@ -130,6 +130,17 @@ static struct {
int rsccount;
}lrm_objectstats;
+/* define indexes into logmsg_ctrl_defs */
+#define OP_STAYED_TOO_LONG 0
+static struct logspam logmsg_ctrl_defs[] = {
+ { "operation stayed too long in the queue",
+ 10, 60, 120, /* max 10 messages in 60s, then delay for 120s */
+ "configuration advice: reduce operation contention "
+ "either by increasing lrmd max_children or by increasing intervals "
+ "of monitor operations"
+ },
+};
+
#define set_fd_opts(fd,opts) do { \
int flag; \
if ((flag = fcntl(fd, F_GETFL)) >= 0) { \
@@ -3076,11 +3087,6 @@ perform_ra_op(lrmd_op_t* op)
}
op_type = ha_msg_value(op->msg, F_LRM_OP);
- if (!op->interval || is_logmsg_due(op)) { /* log non-repeating ops */
- lrmd_log(LOG_INFO,"rsc:%s:%d: %s",rsc->id,op->call_id,probe_str(op,op_type));
- } else {
- lrmd_debug(LOG_DEBUG,"rsc:%s:%d: %s",rsc->id,op->call_id,op_type);
- }
op_params = ha_msg_value_str_table(op->msg, F_LRM_PARAM);
params = merge_str_tables(rsc->params,op_params);
ha_msg_mod_str_table(op->msg, F_LRM_PARAM, params);
@@ -3125,8 +3131,12 @@ perform_ra_op(lrmd_op_t* op)
((op->interval && !is_logmsg_due(op)) ? PT_LOGNORMAL : PT_LOGVERBOSE) : PT_LOGNONE
, op, &ManagedChildTrackOps);
- if (op->interval && is_logmsg_due(op)) {
- op->t_lastlogmsg = time_longclock();
+ if (!op->interval || is_logmsg_due(op)) { /* log non-repeating ops */
+ lrmd_log(LOG_INFO,"rsc:%s %s[%d] (pid %d)",
+ rsc->id,probe_str(op,op_type),op->call_id,pid);
+ } else {
+ lrmd_debug(LOG_DEBUG,"rsc:%s %s[%d] (pid %d)",
+ rsc->id,op_type,op->call_id,pid);
}
close(stdout_fd[1]);
close(stderr_fd[1]);
@@ -3218,6 +3228,21 @@ perform_ra_op(lrmd_op_t* op)
, "perform_ra_op:calling RA plugin to perform %s, pid: [%d]"
, op_info(op), getpid());
params = ha_msg_value_str_table(op->msg, F_LRM_PARAM);
+ if (replace_secret_params(rsc->id, params) < 0) {
+ /* replacing secrets failed! */
+ if (!strcmp(op_type,"stop")) {
+ /* don't fail on stop! */
+ lrmd_log(LOG_INFO
+ , "%s:%d: proceeding with the stop operation for %s"
+ , __FUNCTION__, __LINE__, rsc->id);
+ } else {
+ lrmd_log(LOG_ERR
+ , "%s:%d: failed to get secrets for %s, "
+ "considering resource not configured"
+ , __FUNCTION__, __LINE__, rsc->id);
+ exit(EXECRA_NOT_CONFIGURED);
+ }
+ }
RAExec->execra (rsc->id,
rsc->type,
rsc->provider,
@@ -3315,8 +3340,8 @@ on_ra_proc_finished(ProcTrack* p, int status, int signo, int exitcode
if( signo ) {
if( proctrack_timedout(p) ) {
- lrmd_log(LOG_WARNING, "%s: pid [%d] timed out"
- , op_info(op), proctrack_pid(p));
+ lrmd_log(LOG_WARNING, "%s: pid %d timed out"
+ , small_op_info(op), proctrack_pid(p));
op_status = LRM_OP_TIMEOUT;
} else {
op_status = LRM_OP_ERROR;
@@ -3324,20 +3349,16 @@ on_ra_proc_finished(ProcTrack* p, int status, int signo, int exitcode
} else {
rc = RAExec->map_ra_retvalue(exitcode, op_type
, op->first_line_ra_stdout);
- if (rc != EXECRA_OK || debug_level > 0) {
+ if (!op->interval || is_logmsg_due(op) || debug_level > 0) { /* log non-repeating ops */
if (rc == exitcode) {
- lrmd_debug2(rc == EXECRA_OK ? LOG_DEBUG : LOG_INFO
- , "%s: pid [%d] exited with"
- " return code %d", op_info(op), proctrack_pid(p), rc);
+ lrmd_log(LOG_INFO
+ , "%s: pid %d exited with"
+ " return code %d", small_op_info(op), proctrack_pid(p), rc);
}else{
- lrmd_debug2(rc == EXECRA_OK ? LOG_DEBUG : LOG_INFO
- , "%s: pid [%d] exited with"
+ lrmd_log(LOG_INFO
+ , "%s: pid %d exited with"
" return code %d (mapped from %d)"
- , op_info(op), proctrack_pid(p), rc, exitcode);
- }
- if (rc != EXECRA_OK || debug_level > 1) {
- lrmd_debug2(LOG_INFO, "Resource Agent output: [%s]"
- , op->first_line_ra_stdout);
+ , small_op_info(op), proctrack_pid(p), rc, exitcode);
}
}
if (EXECRA_EXEC_UNKNOWN_ERROR == rc || EXECRA_NO_RA == rc) {
@@ -3348,6 +3369,9 @@ on_ra_proc_finished(ProcTrack* p, int status, int signo, int exitcode
op_status = LRM_OP_DONE;
}
}
+ if (op->interval && is_logmsg_due(op)) {
+ op->t_lastlogmsg = time_longclock();
+ }
if (HA_OK !=
ha_msg_mod_int(op->msg, F_LRM_OPSTATUS, op_status)) {
LOG_FAILED_TO_ADD_FIELD("opstatus");
@@ -3921,11 +3945,17 @@ gen_op_info(const lrmd_op_t* op, gboolean add_params)
,op->call_id ,op->client_id);
}else{
- snprintf(info, sizeof(info)
- ,"operation %s[%d] on %s::%s::%s for client %d"
- ,lrm_str(op_type), op->call_id
- ,lrm_str(rsc->class), lrm_str(rsc->type), lrm_str(rsc->id)
- ,op->client_id);
+ if (op->exec_pid > 1) {
+ snprintf(info, sizeof(info)
+ ,"operation %s[%d] with pid %d on %s for client %d"
+ ,lrm_str(op_type), op->call_id, op->exec_pid, lrm_str(rsc->id)
+ ,op->client_id);
+ } else {
+ snprintf(info, sizeof(info)
+ ,"operation %s[%d] on %s for client %d"
+ ,lrm_str(op_type), op->call_id, lrm_str(rsc->id)
+ ,op->client_id);
+ }
if( add_params ) {
param_gstr = g_string_new("");
@@ -3968,14 +3998,18 @@ static void
check_queue_duration(lrmd_op_t* op)
{
unsigned long t_stay_in_list = 0;
+ static struct msg_ctrl *ml;
+
CHECK_ALLOCATED(op, "op", );
t_stay_in_list = longclockto_ms(op->t_perform - op->t_addtolist);
- if ( t_stay_in_list > WARNINGTIME_IN_LIST)
+ if ( t_stay_in_list > WARNINGTIME_IN_LIST)
{
- lrmd_log(LOG_WARNING
- , "perform_ra_op: the operation %s stayed in operation "
+ if (!ml)
+ ml = cl_limit_log_new(logmsg_ctrl_defs + OP_STAYED_TOO_LONG);
+ cl_limit_log(ml, LOG_WARNING
+ , "perform_ra_op: the %s stayed in operation "
"list for %lu ms (longer than %d ms)"
- , op_info(op), t_stay_in_list
+ , small_op_info(op), t_stay_in_list
, WARNINGTIME_IN_LIST
);
if (debug_level >= 2) {
diff --git a/lrm/lrmd/lrmd.h b/lrm/lrmd/lrmd.h
index 17cc6bf..8e11964 100644
--- a/lrm/lrmd/lrmd.h
+++ b/lrm/lrmd/lrmd.h
@@ -266,3 +266,8 @@ const char *gen_op_info(const lrmd_op_t* op, gboolean add_params);
# define LRMAUDIT() /*nothing*/
# define MEGALRMAUDIT() /*nothing*/
#endif
+
+/*
+ * load parameters from an ini file (cib_secrets.c)
+ */
+int replace_secret_params(char* rsc_id, GHashTable* params);
diff --git a/lrm/test/regression.sh.in b/lrm/test/regression.sh.in
index 8233cee..523bd40 100755
--- a/lrm/test/regression.sh.in
+++ b/lrm/test/regression.sh.in
@@ -93,7 +93,7 @@ HA_logfacility=""
export HA_logfile HA_debugfile HA_use_logd HA_logfacility
mkdir -p $OUTDIR
-. /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs
+. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
args=`getopt hq $*`
[ $? -ne 0 ] && usage
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-ha/cluster-glue.git
More information about the Debian-HA-Commits
mailing list