[Debian-ha-commits] [cluster-glue] 39/73: Imported Upstream version 1.0.11+hg2754
Richard Winters
devrik-guest at moszumanska.debian.org
Mon Apr 20 01:41:55 UTC 2015
This is an automated email from the git hooks/post-receive script.
devrik-guest pushed a commit to branch master
in repository cluster-glue.
commit 97867f039fea36d0ac17263061a39b6b64868649
Author: Richard B Winters <rik at mmogp.com>
Date: Sun Apr 19 20:51:54 2015 -0400
Imported Upstream version 1.0.11+hg2754
---
.hg_archival.txt | 2 +-
.hgtags | 1 +
ChangeLog | 10 +
cluster-glue-fedora.spec | 4 +-
cluster-glue-suse.spec | 4 +-
configure.ac | 35 +-
doc/Makefile.am | 6 +-
doc/hb_report.8.txt | 18 +-
doc/hb_report.xml | 665 ---------------------
hb_report/ha_cf_support.sh | 7 +-
hb_report/hb_report.in | 295 +++++++---
hb_report/openais_conf_support.sh | 8 +-
hb_report/utillib.sh | 35 +-
lib/plugins/stonith/Makefile.am | 42 +-
lib/plugins/stonith/external/Makefile.am | 4 +-
lib/plugins/stonith/external/libvirt | 63 +-
lib/plugins/stonith/external/sbd | 116 ----
lib/plugins/stonith/external/vcenter | 4 +-
lib/stonith/Makefile.am | 11 +-
lib/stonith/ha_log.sh | 7 +-
lib/stonith/main.c | 10 +-
lib/stonith/sbd-common.c | 971 -------------------------------
lib/stonith/sbd-md.c | 962 ------------------------------
lib/stonith/sbd.h | 190 ------
lrm/lrmd/lrmd.c | 20 +-
25 files changed, 388 insertions(+), 3102 deletions(-)
diff --git a/.hg_archival.txt b/.hg_archival.txt
index 0a916f4..a683acf 100644
--- a/.hg_archival.txt
+++ b/.hg_archival.txt
@@ -1,2 +1,2 @@
repo: e3ffdd7ae81c596b2be7e1e110d2c1255161340e
-node: 1f36e9cdcc13369e9cb117b46c0f23b9d1757dc4
+node: 3cff550e1084f1accc7782ff371739ec84e31330
diff --git a/.hgtags b/.hgtags
index 422828a..b6f0f68 100644
--- a/.hgtags
+++ b/.hgtags
@@ -63,3 +63,4 @@ f6c2cd2593f365f984ce051db61466738ac05dcd Beta-0.4.9f
c69dc6ace936f501776df92dab3d611c2405f69e glue-1.0.8
0a08a469fdc8a0db1875369497bc83c0523ceb21 glue-1.0.9
12055ca2b025ab250a544701edaa1f5aaf63aef1 glue-1.0.10
+02bdcf58f9a098b717784746308e199e12eeb005 glue-1.0.11
diff --git a/ChangeLog b/ChangeLog
index 579143d..5f3d71d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+* Mon Oct 15 2012 Dejan Muhamedagic <dejan at suse.de>, Lars Ellenberg <lars.ellenberg at linbit.com>, and many others
+- stable release 1.0.11
+- lrmd: set max-children depending on the number of processors
+- lrmd: don't send parameters from ops back to crmd
+- stonith: external/libvirt: support for reboot reset method
+- hb_report: node's type got optional
+- hb_report: make use of bash trace features
+- hb_report: compatibility code for pacemaker v1.1.8
+- build: link libstonith with stonith2 agents
+
* Mon Jul 16 2012 Dejan Muhamedagic <dejan at suse.de>, Lars Ellenberg <lars.ellenberg at linbit.com>, and many others
- stable release 1.0.10
- clplumbing: ipc: fix message size checks (bnc#752231)
diff --git a/cluster-glue-fedora.spec b/cluster-glue-fedora.spec
index a34316e..dca95b6 100644
--- a/cluster-glue-fedora.spec
+++ b/cluster-glue-fedora.spec
@@ -15,7 +15,7 @@
Name: cluster-glue
Summary: Reusable cluster components
-Version: 1.0.10
+Version: 1.0.11
Release: 1%{?dist}
License: GPLv2+ and LGPLv2+
Url: http://www.linux-ha.org/wiki/Cluster_Glue
@@ -41,6 +41,7 @@ BuildRequires: bzip2-devel glib2-devel python-devel libxml2-devel libaio-devel
BuildRequires: OpenIPMI-devel openssl-devel
BuildRequires: libxslt docbook-dtds docbook-style-xsl
BuildRequires: help2man
+BuildRequires: asciidoc
%if 0%{?fedora}
BuildRequires: libcurl-devel libnet-devel
@@ -124,7 +125,6 @@ standards, and an interface to common STONITH devices.
%{_sbindir}/cibsecret
%{_sbindir}/meatclient
%{_sbindir}/stonith
-%{_sbindir}/sbd
%dir %{_libdir}/heartbeat
%dir %{_libdir}/heartbeat/plugins
%dir %{_libdir}/heartbeat/plugins/RAExec
diff --git a/cluster-glue-suse.spec b/cluster-glue-suse.spec
index 4eba31a..96e7d4b 100644
--- a/cluster-glue-suse.spec
+++ b/cluster-glue-suse.spec
@@ -29,7 +29,7 @@
Name: cluster-glue
Summary: Reusable cluster components
-Version: 1.0.10
+Version: 1.0.11
Release: 1%{?dist}
License: GPL v2 or later; LGPL v2.1 or later
Url: http://www.linux-ha.org/wiki/Cluster_Glue
@@ -41,6 +41,7 @@ BuildRequires: automake autoconf libtool e2fsprogs-devel glib2-devel pkgconfig
BuildRequires: libnet net-snmp-devel OpenIPMI-devel openhpi-devel
BuildRequires: libxslt docbook_4 docbook-xsl-stylesheets
BuildRequires: help2man
+BuildRequires: asciidoc
Obsoletes: heartbeat-common
Provides: heartbeat-common
@@ -210,7 +211,6 @@ fi
%{_sbindir}/cibsecret
%{_sbindir}/meatclient
%{_sbindir}/stonith
-%{_sbindir}/sbd
%{_sysconfdir}/init.d/logd
diff --git a/configure.ac b/configure.ac
index 41cc1cc..5adefcc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -19,7 +19,7 @@ dnl checks for compiler characteristics
dnl checks for library functions
dnl checks for system services
-AC_INIT(cluster-glue, 1.0.10, linux-ha-dev at lists.linux-ha.org)
+AC_INIT(cluster-glue, 1.0.11, linux-ha-dev at lists.linux-ha.org)
FEATURES=""
HB_PKG=heartbeat
@@ -88,7 +88,7 @@ dnl ===============================================
cc_supports_flag() {
local CFLAGS="$@"
AC_MSG_CHECKING(whether $CC supports "$@")
- AC_COMPILE_IFELSE([int main(){return 0;}] ,[RC=0; AC_MSG_RESULT(yes)],[RC=1; AC_MSG_RESULT(no)])
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE(int main(){return 0;})] ,[RC=0; AC_MSG_RESULT(yes)],[RC=1; AC_MSG_RESULT(no)])
return $RC
}
@@ -456,9 +456,10 @@ AC_SUBST(GLUE_BUILD_VERSION)
dnl check byte order
AC_MSG_CHECKING(for byteorder)
-AC_TRY_RUN(`cat $srcdir/config/byteorder_test.c`,
+AC_C_BIGENDIAN(
+[AC_MSG_RESULT(big-endian); AC_DEFINE(CONFIG_BIG_ENDIAN, 1, [big-endian])],
[AC_MSG_RESULT(little-endian); AC_DEFINE(CONFIG_LITTLE_ENDIAN, 1, [little-endian])],
-[AC_MSG_RESULT(big-endian); AC_DEFINE(CONFIG_BIG_ENDIAN, 1, [big-endian])],)
+)
dnl ===============================================
@@ -492,11 +493,20 @@ AC_PATH_PROGS(EGREP, egrep)
AC_PATH_PROGS(PKGCONFIG, pkg-config)
AC_PATH_PROGS(XML2CONFIG, xml2-config)
-AC_PATH_PROGS(XSLTPROC, xsltproc)
-AM_CONDITIONAL(BUILD_DOC, test "x$XSLTPROC" != "x" )
-if test "x$XSLTPROC" = "x"; then
- AC_MSG_WARN([xsltproc not installed, unable to (re-)build manual pages])
+AC_ARG_ENABLE([doc],
+ AS_HELP_STRING([--enable-doc], [build documentation (default is yes)]),
+ [], [enable_doc=yes])
+if test "x$enable_doc" != "xno"; then
+ AC_PATH_PROGS(XSLTPROC, xsltproc)
+ if test "x$XSLTPROC" = "x"; then
+ AC_MSG_WARN([xsltproc not installed, unable to (re-)build manual pages])
+ fi
+ AC_PATH_PROGS(ASCIIDOC, asciidoc)
+ if test "x$ASCIIDOC" = "x"; then
+ AC_MSG_WARN([asciidoc not installed, unable to (re-)build manual pages])
+ fi
fi
+AM_CONDITIONAL(BUILD_DOC, test "x$XSLTPROC" != "x" )
AC_PATH_PROGS(VALGRIND_BIN, valgrind, /usr/bin/valgrind)
AC_DEFINE_UNQUOTED(VALGRIND_BIN, "$VALGRIND_BIN", Valgrind command)
@@ -703,11 +713,6 @@ if test "$ac_cv_header_libxml_xpath_h" != "yes"; then
AC_MSG_ERROR(The libxml developement headers were not found)
fi
-AC_CHECK_HEADERS(libaio.h)
-if test "$ac_cv_header_libaio_h" != "yes"; then
- AC_MSG_ERROR(The libaio developement headers were not found)
-fi
-
dnl Check syslog.h for 'facilitynames' table
dnl
AC_CACHE_CHECK([for facilitynames in syslog.h],ac_cv_HAVE_SYSLOG_FACILITYNAMES,[
@@ -933,7 +938,7 @@ dnl ========================================================================
AC_ARG_ENABLE([libnet],
[ --enable-libnet Use libnet for ARP based funcationality, [default=try]],
- [enable_libnet=$withval], [enable_libnet=try])
+ [], [enable_libnet=try])
libnet=""
libnet_version="none"
@@ -1042,7 +1047,7 @@ elif test "x${ac_cv_header_net_snmp_net_snmp_config_h}" = "xyes"; then
ENABLE_SNMP="no"
else
AC_MSG_CHECKING(for special snmp libraries)
- SNMPLIB=`net-snmp-config --libs`
+ SNMPLIB=`${SNMPCONFIG} --libs`
AC_MSG_RESULT($SNMPLIB)
fi
elif test "x${ac_cv_header_ucd_snmp_snmp_h}" = "xyes"; then
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 9181dc0..c8d67a8 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -29,7 +29,6 @@ man_MANS =
if BUILD_DOC
man_MANS += hb_report.8 ha_logd.8 ha_logger.1 stonith.8 meatclient.8
-endif
EXTRA_DIST = $(man_MANS)
@@ -47,3 +46,8 @@ XSLTPROC_FO_OPTIONS ?= $(XSLTPROC_OPTIONS)
$(XSLTPROC) \
$(XSLTPROC_MANPAGES_OPTIONS) \
$(MANPAGES_STYLESHEET) $<
+
+hb_report.8: hb_report.8.txt
+ a2x -f manpage $<
+
+endif
diff --git a/doc/hb_report.8.txt b/doc/hb_report.8.txt
index eff037b..ebb1e0c 100644
--- a/doc/hb_report.8.txt
+++ b/doc/hb_report.8.txt
@@ -245,7 +245,19 @@ ssh::
semi-automated report generation. See below for instructions.
+
If you need to supply a password for your passphrase/login, then
- please use the `-u` option.
+ always use the `-u` option.
+
+sudo::
+ If the ssh user (as specified with the `-u` option) is other
+ than `root`, then `hb_report` uses `sudo` to collect the
+ information which is readable only by the `root` user. In that
+ case it is required to setup the `sudoers` file properly. The
+ user (or group to which the user belongs) should have the
+ following line:
+ +
+ <user> ALL = NOPASSWD: /usr/sbin/hb_report
+ +
+ See the `sudoers(5)` man page for more details.
Times::
In order to find files and messages in the given period and to
@@ -358,10 +370,6 @@ it is:
# hb_report -f 5:20pm -t 5:30pm -l /var/tmp/ha-log -S /tmp/report_node1
-If you reconsider and want the ssh setup, take a look at the CTS
-README file for instructions.
-
-
OPERATION
---------
hb_report collects files and other information in a fairly
diff --git a/doc/hb_report.xml b/doc/hb_report.xml
deleted file mode 100644
index 87baa9e..0000000
--- a/doc/hb_report.xml
+++ /dev/null
@@ -1,665 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
-<refentry lang="en">
-<refmeta>
-<refentrytitle>hb_report</refentrytitle>
-<manvolnum>8</manvolnum>
-<refmiscinfo class="source">hb_report</refmiscinfo>
-<refmiscinfo class="version">1.2</refmiscinfo>
-<refmiscinfo class="manual">Pacemaker documentation</refmiscinfo>
-</refmeta>
-<refnamediv>
- <refname>hb_report</refname>
- <refpurpose>create report for CRM based clusters (Pacemaker)</refpurpose>
-</refnamediv>
-<refsynopsisdiv>
-<simpara><emphasis role="strong">hb_report</emphasis> -f {time|"cts:"testnum} [-t time] [-u user] [-l file]
- [-n nodes] [-E files] [-p patt] [-L patt] [-e prog] [-MSDCZAVsvhd] [dest]</simpara>
-</refsynopsisdiv>
-<refsect1 id="_description">
-<title>DESCRIPTION</title>
-<simpara>The hb_report(1) is a utility to collect all information (logs,
-configuration files, system information, etc) relevant to
-Pacemaker (CRM) over the given period of time.</simpara>
-</refsect1>
-<refsect1 id="_options">
-<title>OPTIONS</title>
-<variablelist>
-<varlistentry>
-<term>
-dest
-</term>
-<listitem>
-<simpara>
- The destination directory. Must be an absolute path. The
- resulting tarball is placed in the parent directory and
- contains the last directory element of this path. Typically
- something like /tmp/standby-failed. If left out, the tarball
- is created in your home directory named "hb_report-current_date",
- for instance hb_report-Wed-03-Mar-2010.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-d</emphasis>
-</term>
-<listitem>
-<simpara>
- Don’t create the compressed tar, but leave the result in a
- directory.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-f</emphasis> { time | "cts:"testnum }
-</term>
-<listitem>
-<simpara>
- The start time from which to collect logs. The time is in the
- format as used by the Date::Parse perl module. For cts tests,
- specify the "cts:" string followed by the test number. This
- option is required.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-t</emphasis> time
-</term>
-<listitem>
-<simpara>
- The end time to which to collect logs. Defaults to now.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-n</emphasis> nodes
-</term>
-<listitem>
-<simpara>
- A list of space separated hostnames (cluster members).
- hb_report may try to find out the set of nodes by itself, but
- if it runs on the loghost which, as it is usually the case,
- does not belong to the cluster, that may be difficult. Also,
- OpenAIS doesn’t contain a list of nodes and if Pacemaker is
- not running, there is no way to find it out automatically.
- This option is cumulative (i.e. use -n "a b" or -n a -n b).
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-l</emphasis> file
-</term>
-<listitem>
-<simpara>
- Log file location. If, for whatever reason, hb_report cannot
- find the log files, you can specify its absolute path.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-E</emphasis> files
-</term>
-<listitem>
-<simpara>
- Extra log files to collect. This option is cumulative. By
- default, /var/log/messages are collected along with the
- cluster logs.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-M</emphasis>
-</term>
-<listitem>
-<simpara>
- Don’t collect extra log files, but only the file containing
- messages from the cluster subsystems.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-L</emphasis> patt
-</term>
-<listitem>
-<simpara>
- A list of regular expressions to match in log files for
- analysis. This option is additive (default: "CRIT: ERROR:").
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-p</emphasis> patt
-</term>
-<listitem>
-<simpara>
- Additional patterns to match parameter name which contain
- sensitive information. This option is additive (default: "passw.*").
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-A</emphasis>
-</term>
-<listitem>
-<simpara>
- This is an OpenAIS cluster. hb_report has some heuristics to
- find the cluster stack, but that is not always reliable.
- By default, hb_report assumes that it is run on a Heartbeat
- cluster.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-u</emphasis> user
-</term>
-<listitem>
-<simpara>
- The ssh user. hb_report will try to login to other nodes
- without specifying a user, then as "root", and finally as
- "hacluster". If you have another user for administration over
- ssh, please use this option.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-S</emphasis>
-</term>
-<listitem>
-<simpara>
- Single node operation. Run hb_report only on this node and
- don’t try to start slave collectors on other members of the
- cluster. Under normal circumstances this option is not
- needed. Use if ssh(1) does not work to other nodes.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-Z</emphasis>
-</term>
-<listitem>
-<simpara>
- If destination directories exist, remove them instead of exiting
- (this is default for CTS).
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-V</emphasis>
-</term>
-<listitem>
-<simpara>
- Print the version including the last repository changeset.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-v</emphasis>
-</term>
-<listitem>
-<simpara>
- Increase verbosity. Normally used to debug unexpected
- behaviour.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-h</emphasis>
-</term>
-<listitem>
-<simpara>
- Show usage and some examples.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-D</emphasis> (obsolete)
-</term>
-<listitem>
-<simpara>
- Don’t invoke editor to fill the description text file.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-e</emphasis> prog (obsolete)
-</term>
-<listitem>
-<simpara>
- Your favourite text editor. Defaults to $EDITOR, vim, vi,
- emacs, or nano, whichever is found first.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">-C</emphasis> (obsolete)
-</term>
-<listitem>
-<simpara>
- Remove the destination directory once the report has been put
- in a tarball.
-</simpara>
-</listitem>
-</varlistentry>
-</variablelist>
-</refsect1>
-<refsect1 id="_examples">
-<title>EXAMPLES</title>
-<simpara>Last night during the backup there were several warnings
-encountered (logserver is the log host):</simpara>
-<literallayout class="monospaced">logserver# hb_report -f 3:00 -t 4:00 -n "node1 node2" /tmp/report</literallayout>
-<simpara>collects everything from all nodes from 3am to 4am last night.
-The files are compressed to a tarball /tmp/report.tar.bz2.</simpara>
-<simpara>Just found a problem during testing:</simpara>
-<literallayout class="monospaced"># note the current time
-node1# date
-Fri Sep 11 18:51:40 CEST 2009
-node1# /etc/init.d/heartbeat start
-node1# nasty-command-that-breaks-things
-node1# sleep 120 #wait for the cluster to settle
-node1# hb_report -f 18:51 /tmp/hb1</literallayout>
-<literallayout class="monospaced"># if hb_report can't figure out that this is openais
-node1# hb_report -f 18:51 -A /tmp/hb1</literallayout>
-<literallayout class="monospaced"># if hb_report can't figure out the cluster members
-node1# hb_report -f 18:51 -n "node1 node2" /tmp/hb1</literallayout>
-<simpara>The files are compressed to a tarball /tmp/hb1.tar.bz2.</simpara>
-</refsect1>
-<refsect1 id="_interpreting_results">
-<title>INTERPRETING RESULTS</title>
-<simpara>The compressed tar archive is the final product of hb_report.
-This is one example of its content, for a CTS test case on a
-three node OpenAIS cluster:</simpara>
-<literallayout class="monospaced">$ ls -RF 001-Restart</literallayout>
-<literallayout class="monospaced">001-Restart:
-analysis.txt events.txt logd.cf s390vm13/ s390vm16/
-description.txt ha-log.txt openais.conf s390vm14/</literallayout>
-<literallayout class="monospaced">001-Restart/s390vm13:
-STOPPED crm_verify.txt hb_uuid.txt openais.conf@ sysinfo.txt
-cib.txt dlm_dump.txt logd.cf@ pengine/ sysstats.txt
-cib.xml events.txt messages permissions.txt</literallayout>
-<literallayout class="monospaced">001-Restart/s390vm13/pengine:
-pe-input-738.bz2 pe-input-740.bz2 pe-warn-450.bz2
-pe-input-739.bz2 pe-warn-449.bz2 pe-warn-451.bz2</literallayout>
-<literallayout class="monospaced">001-Restart/s390vm14:
-STOPPED crm_verify.txt hb_uuid.txt openais.conf@ sysstats.txt
-cib.txt dlm_dump.txt logd.cf@ permissions.txt
-cib.xml events.txt messages sysinfo.txt</literallayout>
-<literallayout class="monospaced">001-Restart/s390vm16:
-STOPPED crm_verify.txt hb_uuid.txt messages sysinfo.txt
-cib.txt dlm_dump.txt hostcache openais.conf@ sysstats.txt
-cib.xml events.txt logd.cf@ permissions.txt</literallayout>
-<simpara>The top directory contains information which pertains to the
-cluster or event as a whole. Files with exactly the same content
-on all nodes will also be at the top, with per-node links created
-(as it is in this example the case with openais.conf and logd.cf).</simpara>
-<simpara>The cluster log files are named ha-log.txt regardless of the
-actual log file name on the system. If it is found on the
-loghost, then it is placed in the top directory. Files named
-messages are excerpts of /var/log/messages from nodes.</simpara>
-<simpara>Most files are copied verbatim or they contain output of a
-command. For instance, cib.xml is a copy of the CIB found in
-/var/lib/heartbeat/crm/cib.xml. crm_verify.txt is output of the
-crm_verify(8) program.</simpara>
-<simpara>Some files are result of a more involved processing:</simpara>
-<variablelist>
-<varlistentry>
-<term>
-<emphasis role="strong">analysis.txt</emphasis>
-</term>
-<listitem>
-<simpara>
- A set of log messages matching user defined patterns (may be
- provided with the -L option).
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">events.txt</emphasis>
-</term>
-<listitem>
-<simpara>
- A set of log messages matching event patterns. It should
- provide information about major cluster motions without
- unnecessary details. These patterns are devised by the
- cluster experts. Currently, the patterns cover membership
- and quorum changes, resource starts and stops, fencing
- (stonith) actions, and cluster starts and stops. events.txt
- is always generated for each node. In case the central
- cluster log was found, also combined for all nodes.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">permissions.txt</emphasis>
-</term>
-<listitem>
-<simpara>
- One of the more common problem causes are file and directory
- permissions. hb_report looks for a set of predefined
- directories and checks their permissions. Any issues are
- reported here.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">backtraces.txt</emphasis>
-</term>
-<listitem>
-<simpara>
- gdb generated backtrace information for cores dumped
- within the specified period.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">sysinfo.txt</emphasis>
-</term>
-<listitem>
-<simpara>
- Various release information about the platform, kernel,
- operating system, packages, and anything else deemed to be
- relevant. The static part of the system.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-<emphasis role="strong">sysstats.txt</emphasis>
-</term>
-<listitem>
-<simpara>
- Output of various system commands such as ps(1), uptime(1),
- netstat(8), and ifconfig(8). The dynamic part of the system.
-</simpara>
-</listitem>
-</varlistentry>
-</variablelist>
-<simpara>description.txt should contain a user supplied description of the
-problem, but since it is very seldom used, it will be dropped
-from the future releases.</simpara>
-</refsect1>
-<refsect1 id="_prerequisites">
-<title>PREREQUISITES</title>
-<variablelist>
-<varlistentry>
-<term>
-ssh
-</term>
-<listitem>
-<simpara>
- It is not strictly required, but you won’t regret having a
- password-less ssh. It is not too difficult to setup and will save
- you a lot of time. If you can’t have it, for example because your
- security policy does not allow such a thing, or you just prefer
- menial work, then you will have to resort to the semi-manual
- semi-automated report generation. See below for instructions.
- <?asciidoc-br?>
- If you need to supply a password for your passphrase/login, then
- please use the <literal>-u</literal> option.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-Times
-</term>
-<listitem>
-<simpara>
- In order to find files and messages in the given period and to
- parse the <literal>-f</literal> and <literal>-t</literal> options, <literal>hb_report</literal> uses perl and one of the
- <literal>Date::Parse</literal> or <literal>Date::Manip</literal> perl modules. Note that you need
- only one of these. Furthermore, on nodes which have no logs and
- where you don’t run <literal>hb_report</literal> directly, no date parsing is
- necessary. In other words, if you run this on a loghost then you
- don’t need these perl modules on the cluster nodes.
- <?asciidoc-br?>
- On rpm based distributions, you can find <literal>Date::Parse</literal> in
- <literal>perl-TimeDate</literal> and on Debian and its derivatives in
- <literal>libtimedate-perl</literal>.
-</simpara>
-</listitem>
-</varlistentry>
-<varlistentry>
-<term>
-Core dumps
-</term>
-<listitem>
-<simpara>
- To backtrace core dumps gdb is needed and the packages with
- the debugging info. The debug info packages may be installed
- at the time the report is created. Let’s hope that you will
- need this really seldom.
-</simpara>
-</listitem>
-</varlistentry>
-</variablelist>
-</refsect1>
-<refsect1 id="_times">
-<title>TIMES</title>
-<simpara>Specifying times can at times be a nuisance. That is why we have
-chosen to use one of the perl modules—they do allow certain
-freedom when talking dates. You can either read the instructions
-at the
-<ulink url="http://search.cpan.org/dist/TimeDate/lib/Date/Parse.pm#EXAMPLE_DATES">Date::Parse
-examples page</ulink>.
-or just rely on common sense and try stuff like:</simpara>
-<literallayout class="monospaced">3:00 (today at 3am)
-15:00 (today at 3pm)
-2007/9/1 2pm (September 1st at 2pm)
-Tue Sep 15 20:46:27 CEST 2009 (September 15th etc)</literallayout>
-<simpara><literal>hb_report</literal> will (probably) complain if it can’t figure out what do
-you mean.</simpara>
-<simpara>Try to delimit the event as close as possible in order to reduce
-the size of the report, but still leaving a minute or two around
-for good measure.</simpara>
-<simpara><literal>-f</literal> is not optional. And don’t forget to quote dates when they
-contain spaces.</simpara>
-</refsect1>
-<refsect1 id="_should_i_send_all_this_to_the_rest_of_internet">
-<title>Should I send all this to the rest of Internet?</title>
-<simpara>By default, the sensitive data in CIB and PE files is not mangled
-by <literal>hb_report</literal> because that makes PE input files mostly useless.
-If you still have no other option but to send the report to a
-public mailing list and do not want the sensitive data to be
-included, use the <literal>-s</literal> option. Without this option, <literal>hb_report</literal>
-will issue a warning if it finds information which should not be
-exposed. By default, parameters matching <emphasis>passw.*</emphasis> are considered
-sensitive. Use the <literal>-p</literal> option to specify additional regular
-expressions to match variable names which may contain information
-you don’t want to leak. For example:</simpara>
-<literallayout class="monospaced"># hb_report -f 18:00 -p "user.*" -p "secret.*" /var/tmp/report</literallayout>
-<simpara>Heartbeat’s ha.cf is always sanitized. Logs and other files are
-not filtered.</simpara>
-</refsect1>
-<refsect1 id="_logs">
-<title>LOGS</title>
-<simpara>It may be tricky to find syslog logs. The scheme used is to log a
-unique message on all nodes and then look it up in the usual
-syslog locations. This procedure is not foolproof, in particular
-if the syslog files are in a non-standard directory. We look in
-/var/log /var/logs /var/syslog /var/adm /var/log/ha
-/var/log/cluster. In case we can’t find the logs, please supply
-their location:</simpara>
-<literallayout class="monospaced"># hb_report -f 5pm -l /var/log/cluster1/ha-log -S /tmp/report_node1</literallayout>
-<simpara>If you have different log locations on different nodes, well,
-perhaps you’d like to make them the same and make life easier for
-everybody.</simpara>
-<simpara>Files starting with "ha-" are preferred. In case syslog sends
-messages to more than one file, if one of them is named ha-log or
-ha-debug those will be favoured over syslog or messages.</simpara>
-<simpara>hb_report supports also archived logs in case the period
-specified extends that far in the past. The archives must reside
-in the same directory as the current log and their names must
-be prefixed with the name of the current log (syslog-1.gz or
-messages-20090105.bz2).</simpara>
-<simpara>If there is no separate log for the cluster, possibly unrelated
-messages from other programs are included. We don’t filter logs,
-but just pick a segment for the period you specified.</simpara>
-</refsect1>
-<refsect1 id="_manual_report_collection">
-<title>MANUAL REPORT COLLECTION</title>
-<simpara>So, your ssh doesn’t work. In that case, you will have to run
-this procedure on all nodes. Use <literal>-S</literal> so that <literal>hb_report</literal> doesn’t
-bother with ssh:</simpara>
-<literallayout class="monospaced"># hb_report -f 5:20pm -t 5:30pm -S /tmp/report_node1</literallayout>
-<simpara>If you also have a log host which is not in the cluster, then
-you’ll have to copy the log to one of the nodes and tell us where
-it is:</simpara>
-<literallayout class="monospaced"># hb_report -f 5:20pm -t 5:30pm -l /var/tmp/ha-log -S /tmp/report_node1</literallayout>
-<simpara>If you reconsider and want the ssh setup, take a look at the CTS
-README file for instructions.</simpara>
-</refsect1>
-<refsect1 id="_operation">
-<title>OPERATION</title>
-<simpara>hb_report collects files and other information in a fairly
-straightforward way. The most complex tasks are discovering the
-log file locations (if syslog is used which is the most common
-case) and coordinating the operation on multiple nodes.</simpara>
-<simpara>The instance of hb_report running on the host where it was
-invoked is the master instance. Instances running on other nodes
-are slave instances. The master instance communicates with slave
-instances by ssh. There are multiple ssh invocations per run, so
-it is essential that the ssh works without password, i.e. with
-the public key authentication and authorized_keys.</simpara>
-<simpara>The operation consists of three phases. Each phase must finish
-on all nodes before the next one can commence. The first phase
-consists of logging unique messages through syslog on all nodes.
-This is the shortest of all phases.</simpara>
-<simpara>The second phase is the most involved. During this phase all
-local information is collected, which includes:</simpara>
-<itemizedlist>
-<listitem>
-<simpara>
-logs (both current and archived if the start time is far in the past)
-</simpara>
-</listitem>
-<listitem>
-<simpara>
-various configuration files (openais, heartbeat, logd)
-</simpara>
-</listitem>
-<listitem>
-<simpara>
-the CIB (both as xml and as represented by the crm shell)
-</simpara>
-</listitem>
-<listitem>
-<simpara>
-pengine inputs (if this node was the DC at any point in
- time over the given period)
-</simpara>
-</listitem>
-<listitem>
-<simpara>
-system information and status
-</simpara>
-</listitem>
-<listitem>
-<simpara>
-package information and status
-</simpara>
-</listitem>
-<listitem>
-<simpara>
-dlm lock information
-</simpara>
-</listitem>
-<listitem>
-<simpara>
-backtraces (if there were core dumps)
-</simpara>
-</listitem>
-</itemizedlist>
-<simpara>The third phase is collecting information from all nodes and
-analyzing it. The analyzis consists of the following tasks:</simpara>
-<itemizedlist>
-<listitem>
-<simpara>
-identify files equal on all nodes which may then be moved to
- the top directory
-</simpara>
-</listitem>
-<listitem>
-<simpara>
-save log messages matching user defined patterns
- (defaults to ERRORs and CRITical conditions)
-</simpara>
-</listitem>
-<listitem>
-<simpara>
-report if there were coredumps and by whom
-</simpara>
-</listitem>
-<listitem>
-<simpara>
-report crm_verify(8) results
-</simpara>
-</listitem>
-<listitem>
-<simpara>
-save log messages matching major events to events.txt
-</simpara>
-</listitem>
-<listitem>
-<simpara>
-in case logging is configured without loghost, node logs and
- events files are combined using a perl utility
-</simpara>
-</listitem>
-</itemizedlist>
-</refsect1>
-<refsect1 id="_bugs">
-<title>BUGS</title>
-<simpara>Finding logs may at times be extremely difficult, depending on
-how weird the syslog configuration. It would be nice to ask
-syslog-ng developers to provide a way to find out the log
-destination based on facility and priority.</simpara>
-<simpara>If you think you found a bug, please rerun with the -v option and
-attach the output to bugzilla.</simpara>
-<simpara>hb_report can function in a satisfactory way only if ssh works to
-all nodes using authorized_keys (without password).</simpara>
-<simpara>There are way too many options.</simpara>
-</refsect1>
-<refsect1 id="_author">
-<title>AUTHOR</title>
-<simpara>Written by Dejan Muhamedagic, <<ulink url="mailto:dejan at suse.de">dejan at suse.de</ulink>></simpara>
-</refsect1>
-<refsect1 id="_resources">
-<title>RESOURCES</title>
-<simpara>Pacemaker: <ulink url="http://clusterlabs.org/">http://clusterlabs.org/</ulink></simpara>
-<simpara>Heartbeat and other Linux HA resources: <ulink url="http://linux-ha.org/wiki">http://linux-ha.org/wiki</ulink></simpara>
-<simpara>OpenAIS: <ulink url="http://www.openais.org/">http://www.openais.org/</ulink></simpara>
-<simpara>Corosync: <ulink url="http://www.corosync.org/">http://www.corosync.org/</ulink></simpara>
-</refsect1>
-<refsect1 id="_see_also">
-<title>SEE ALSO</title>
-<simpara>Date::Parse(3)</simpara>
-</refsect1>
-<refsect1 id="_copying">
-<title>COPYING</title>
-<simpara>Copyright (C) 2007-2009 Dejan Muhamedagic. Free use of this
-software is granted under the terms of the GNU General Public License (GPL).</simpara>
-</refsect1>
-</refentry>
diff --git a/hb_report/ha_cf_support.sh b/hb_report/ha_cf_support.sh
index 9776dd3..0c674b8 100644
--- a/hb_report/ha_cf_support.sh
+++ b/hb_report/ha_cf_support.sh
@@ -59,7 +59,7 @@ getlogvars() {
HA_LOGLEVEL="debug"
if uselogd; then
[ -f "$LOGD_CF" ] || {
- info "logd used but logd.cf not found: using defaults"
+ debug "logd used but logd.cf not found: using defaults"
return # no configuration: use defaults
}
debug "reading log settings from $LOGD_CF"
@@ -76,7 +76,8 @@ essential_files() {
cat<<EOF
d $HA_VARLIB 0755 root root
d $HA_VARLIB/ccm 0750 hacluster haclient
-d `dirname $HA_VARLIB`/pengine 0750 hacluster haclient
-d $HA_VARLIB/crm 0750 hacluster haclient
+d $PCMK_LIB 0755 root root
+d $PE_STATE_DIR 0750 hacluster haclient
+d $CIB_DIR 0750 hacluster haclient
EOF
}
diff --git a/hb_report/hb_report.in b/hb_report/hb_report.in
index f5745cd..d7982f5 100755
--- a/hb_report/hb_report.in
+++ b/hb_report/hb_report.in
@@ -36,7 +36,7 @@ export DEFAULT_HA_LOGFACILITY
LOGD_CF=`findlogdcf @sysconfdir@ $HA_DIR`
export LOGD_CF
-: ${SSH_OPTS="-T -o StrictHostKeyChecking=no"}
+: ${SSH_OPTS="-o StrictHostKeyChecking=no -o EscapeChar=none"}
LOG_PATTERNS="CRIT: ERROR:"
# PEINPUTS_PATT="peng.*PEngine Input stored"
@@ -199,6 +199,73 @@ no_dir() {
time2str() {
perl -e "use POSIX; print strftime('%x %X',localtime($1));"
}
+# try to figure out where pacemaker ... etc
+get_pe_state_dir() {
+ PE_STATE_DIR=`strings $CRM_DAEMON_DIR/pengine |
+ awk 'NF==1&&/var\/lib\/.*pengine$/'`
+ test -d "$PE_STATE_DIR"
+}
+get_cib_dir() {
+ CIB_DIR=`strings $CRM_DAEMON_DIR/crmd |
+ awk 'NF==1&&/var\/lib\/.*(cib|crm)$/'`
+ test -d "$CIB_DIR"
+}
+get_pe_state_dir2() {
+ # PE_STATE_DIR
+ local localstatedir lastf
+ localstatedir=`dirname $HA_VARLIB`
+ lastf=$(2>/dev/null ls -rt `2>/dev/null find /var/lib -name pengine -type d |
+ sed 's,$,/*.last,'` | tail -1)
+ if [ -f "$lastf" ]; then
+ PE_STATE_DIR=`dirname $lastf`
+ else
+ for p in pacemaker/pengine pengine heartbeat/pengine; do
+ if [ -d $localstatedir/$p ]; then
+ debug "setting PE_STATE_DIR to $localstatedir/$p"
+ PE_STATE_DIR=$localstatedir/$p
+ break
+ fi
+ done
+ fi
+}
+get_cib_dir2() {
+ # CIB
+ # HA_VARLIB is normally set to {localstatedir}/heartbeat
+ local localstatedir
+ localstatedir=`dirname $HA_VARLIB`
+ for p in pacemaker/cib heartbeat/crm; do
+ if [ -f $localstatedir/$p/cib.xml ]; then
+ debug "setting CIB_DIR to $localstatedir/$p"
+ CIB_DIR=$localstatedir/$p
+ break
+ fi
+ done
+}
+get_crm_daemon_dir() {
+ # CRM_DAEMON_DIR
+ local libdir
+ libdir=`dirname $HA_BIN`
+ for p in pacemaker heartbeat; do
+ if [ -x $libdir/$p/crmd ]; then
+ debug "setting CRM_DAEMON_DIR to $libdir/$p"
+ CRM_DAEMON_DIR=$libdir/$p
+ break
+ fi
+ done
+ if [ ! -d "$CRM_DAEMON_DIR" ]; then
+ fatal "cannot find pacemaker daemon directory!"
+ fi
+}
+compatibility_pcmk() {
+ get_crm_daemon_dir
+ get_pe_state_dir || get_pe_state_dir2
+ get_cib_dir || get_cib_dir2
+ debug "setting PCMK_LIB to `dirname $CIB_DIR`"
+ PCMK_LIB=`dirname $CIB_DIR`
+ # PTEST
+ PTEST=`echo_ptest_tool`
+ export PE_STATE_DIR CIB_DIR CRM_DAEMON_DIR PCMK_LIB PTEST
+}
#
# find log files
@@ -286,11 +353,11 @@ arch_logs() {
case $? in
0) ;; # noop, continue
1) echo $next_log # include log and continue
- info "found log $next_log"
+ debug "found log $next_log"
;;
2) break;; # don't go through older logs!
3) echo $next_log # include log and continue
- info "found log $next_log"
+ debug "found log $next_log"
break
;; # don't go through older logs!
esac
@@ -317,9 +384,11 @@ print_logseg() {
# uncompress to a temp file (if necessary)
local cat=`find_decompressor $logf`
if [ "$cat" != "cat" ]; then
- tmp=`mktemp`
+ tmp=`mktemp` ||
+ fatal "disk full"
add_tmpfiles $tmp
- $cat $logf > $tmp
+ $cat $logf > $tmp ||
+ fatal "disk full"
sourcef=$tmp
else
sourcef=$logf
@@ -462,13 +531,13 @@ start_slave_collector() {
dumpenv |
if [ "$node" = "$WE" ]; then
- $SUDO hb_report __slave
+ debug "running: $LOCAL_SUDO hb_report __slave"
+ $LOCAL_SUDO hb_report __slave
else
- ssh $SSH_OPTS_AUTH $node \
+ debug "running: ssh $SSH_OPTS $node \"$SUDO hb_report __slave"
+ ssh $SSH_OPTS $node \
"$SUDO hb_report __slave"
- fi | (cd $WORKDIR && tar xf -) &
-
- SLAVEPIDS="$SLAVEPIDS $!"
+ fi | (cd $WORKDIR && tar xf -)
}
#
@@ -476,12 +545,12 @@ start_slave_collector() {
# and how
#
testsshconn() {
- ssh $SSH_OPTS -o Batchmode=yes $1 true 2>/dev/null
+ ssh $SSH_OPTS -T -o Batchmode=yes $1 true 2>/dev/null
}
findsshuser() {
local n u rc
local ssh_s ssh_user="__undef" try_user_list failed_nodes=""
- try_user_list=${SSH_USER:-"\"\" $TRY_SSH"}
+ try_user_list="\"\" $TRY_SSH"
for n in $NODES; do
rc=1
[ "$n" = "$WE" ] && continue
@@ -505,18 +574,16 @@ findsshuser() {
failed_nodes="$failed_nodes $n"
done
[ -n "$failed_nodes" ] &&
- warning "ssh to node(s)$failed_nodes does not work"
+ warning "ssh to node(s) $failed_nodes does not work"
# drop nodes we cannot reach
NODES=`for n in $failed_nodes $NODES; do echo $n; done | sort | uniq -u`
- if [ "$ssh_user" != "__undef" ]; then
- if [ "$ssh_user" != '""' ]; then
- SSH_OPTS_AUTH="$SSH_OPTS -l $ssh_user"
- else
- SSH_OPTS_AUTH="$SSH_OPTS"
- fi
- else
+ if [ "$ssh_user" = "__undef" ]; then
return 1
fi
+ if [ "$ssh_user" != '""' ]; then
+ echo $ssh_user
+ fi
+ return 0
}
#
@@ -538,35 +605,49 @@ getbacktraces() {
pe2dot() {
local pef=`basename $1`
local dotf=`basename $pef .bz2`.dot
+ test -z "$PTEST" && return
(
cd `dirname $1`
- ptest -D $dotf -x $pef
+ $PTEST -D $dotf -x $pef >/dev/null 2>&1
)
}
getpeinputs() {
local pe_dir flist
local f
- debug "looking for PE files in $HA_VARLIB/pengine, `dirname $HA_VARLIB`/pengine"
- for pe_dir in $HA_VARLIB/pengine `dirname $HA_VARLIB`/pengine
- do
- test -d $pe_dir ||
- continue
- flist=$(
- find_files $pe_dir $1 $2 | sed "s,`dirname $pe_dir`/,,g" |
- grep -v '[.]last$'
+ pe_dir=$PE_STATE_DIR
+ debug "looking for PE files in $pe_dir"
+ flist=$(
+ find_files $pe_dir $1 $2 | grep -v '[.]last$'
+ )
+ [ "$flist" ] && {
+ mkdir $3/`basename $pe_dir`
+ (
+ cd $3/`basename $pe_dir`
+ for f in $flist; do
+ ln -s $f
+ done
)
- [ "$flist" ] && {
- (cd `dirname $pe_dir` && tar cf - $flist) | (cd $3 && tar xf -)
- debug "found `echo $flist | wc -w` pengine input files in $pe_dir"
- }
- if [ `echo $flist | wc -w` -le 20 ]; then
- for f in $flist; do
- pe2dot $3/$f
- done
- else
- info "too many PE inputs to create dot files"
- fi
- done
+ debug "found `echo $flist | wc -w` pengine input files in $pe_dir"
+ }
+ if [ `echo $flist | wc -w` -le 20 ]; then
+ for f in $flist; do
+ pe2dot $3/`basename $pe_dir`/`basename $f`
+ done
+ else
+ debug "too many PE inputs to create dot files"
+ fi
+}
+getratraces() {
+ local trace_dir flist
+ local f
+ trace_dir=$HA_VARLIB/trace_ra
+ test -d "$trace_dir" || return 0
+ debug "looking for RA trace files in $trace_dir"
+ flist=$(find_files $trace_dir $1 $2 | sed "s,`dirname $trace_dir`/,,g")
+ [ "$flist" ] && {
+ tar -cf - -C `dirname $trace_dir` $flist | tar -xf - -C $3
+ debug "found `echo $flist | wc -w` RA trace files in $trace_dir"
+ }
}
touch_DC_if_dc() {
local dc
@@ -935,6 +1016,7 @@ pickcompress() {
getlog() {
local getstampproc cnt
local outf
+ outf=$WORKDIR/$HALOG_F
if [ "$HA_LOG" ]; then # log provided by the user?
[ -f "$HA_LOG" ] || { # not present
@@ -948,33 +1030,34 @@ getlog() {
[ "$HA_LOG" ] &&
cnt=`fgrep -c $UNIQUE_MSG < $HA_LOG`
fi
- if [ "$cnt" ] && [ $cnt -eq $NODECNT ]; then
+ if [ "$HA_LOG" = "" -o ! -f "$HA_LOG" ]; then
+ if [ "$CTS" ]; then
+ cts_findlogseg $CTS > $outf
+ else
+ warning "no log at $WE"
+ fi
+ return
+ fi
+ if [ "$cnt" ] && [ $cnt -gt 1 -a $cnt -eq $NODECNT ]; then
MASTER_IS_HOSTLOG=1
info "found the central log!"
fi
- outf=$WORKDIR/$HALOG_F
- if [ -f "$HA_LOG" ]; then
- if [ "$NO_str2time" ]; then
- warning "a log found; but we cannot slice it"
- warning "please install the perl Date::Parse module"
- elif [ "$CTS" ]; then
- cts_findlogseg $CTS $HA_LOG > $outf
- else
- getstampproc=`find_getstampproc < $HA_LOG`
- if [ "$getstampproc" ]; then
- export getstampproc # used by linetime
- dumplogset $HA_LOG $FROM_TIME $TO_TIME > $outf
- loginfo $HA_LOG > $outf.info
- else
- warning "could not figure out the log format of $HA_LOG"
- fi
- fi
+ if [ "$NO_str2time" ]; then
+ warning "a log found; but we cannot slice it"
+ warning "please install the perl Date::Parse module"
elif [ "$CTS" ]; then
- cts_findlogseg $CTS > $outf
+ cts_findlogseg $CTS $HA_LOG > $outf
else
- [ "$MASTER_IS_HOSTLOG" ] ||
- warning "could not find $HA_LOG on $WE"
+ getstampproc=`find_getstampproc < $HA_LOG`
+ if [ "$getstampproc" ]; then
+ export getstampproc # used by linetime
+ dumplogset $HA_LOG $FROM_TIME $TO_TIME > $outf &&
+ loginfo $HA_LOG > $outf.info ||
+ fatal "disk full"
+ else
+ warning "could not figure out the log format of $HA_LOG"
+ fi
fi
}
#
@@ -994,6 +1077,7 @@ collect_info() {
dlm_dump > $WORKDIR/$DLM_DUMP_F 2>&1
time_status > $WORKDIR/$TIME_F 2>&1
corosync_blackbox $FROM_TIME $TO_TIME $WORKDIR/$COROSYNC_RECORDER_F
+ getratraces $FROM_TIME $TO_TIME $WORKDIR
wait
sanitize $WORKDIR
@@ -1007,8 +1091,9 @@ collect_info() {
getstampproc=`find_getstampproc < $l`
if [ "$getstampproc" ]; then
export getstampproc # used by linetime
- dumplogset $l $FROM_TIME $TO_TIME > $WORKDIR/`basename $l`
- loginfo $l > $WORKDIR/`basename $l`.info
+ dumplogset $l $FROM_TIME $TO_TIME > $WORKDIR/`basename $l` &&
+ loginfo $l > $WORKDIR/`basename $l`.info ||
+ fatal "disk full"
else
warning "could not figure out the log format of $l"
fi
@@ -1038,7 +1123,8 @@ if [ "$t" = "" ]; then
fi
WE=`uname -n` # who am i?
-tmpdir=`mktemp -t -d .hb_report.workdir.XXXXXX`
+tmpdir=`mktemp -t -d .hb_report.workdir.XXXXXX` ||
+ fatal "disk full"
add_tmpfiles $tmpdir
WORKDIR=$tmpdir
@@ -1109,10 +1195,19 @@ fi
is_collector || {
info "high debug level, please read debug.out"
}
- exec 2>>$WORKDIR/debug.out
+ PS4='+ ${FUNCNAME[0]:+${FUNCNAME[0]}:}${LINENO}: '
+ if echo "$SHELL" | grep bash > /dev/null &&
+ [ ${BASH_VERSINFO[0]} = "4" ]; then
+ exec 3>>$WORKDIR/debug.out
+ BASH_XTRACEFD=3
+ else
+ exec 2>>$WORKDIR/debug.out
+ fi
set -x
}
+compatibility_pcmk
+
# allow user to enforce the cluster type
# if not, then it is found out on _all_ nodes
if [ -z "$USER_CLUSTER_TYPE" ]; then
@@ -1123,7 +1218,7 @@ fi
# the very first thing we must figure out is which cluster
# stack is used
-CORES_DIRS=$HA_VARLIB/cores
+CORES_DIRS="`2>/dev/null ls -d $HA_VARLIB/cores $PCMK_LIB/cores | uniq`"
PACKAGES="pacemaker libpacemaker3
pacemaker-pygui pacemaker-pymgmt pymgmt-client
openais libopenais2 libopenais3 corosync libcorosync4
@@ -1177,20 +1272,6 @@ elif ! is_collector; then
NODES_SOURCE=user
fi
-THIS_IS_NODE=""
-if ! is_collector; then
- MASTER_NODE=$WE
- NODES=`getnodes`
- debug "nodes: `echo $NODES`"
-fi
-NODECNT=`echo $NODES | wc -w`
-if [ "$NODECNT" = 0 ]; then
- fatal "could not figure out a list of nodes; is this a cluster node?"
-fi
-if echo $NODES | grep -wqs $WE; then # are we a node?
- THIS_IS_NODE=1
-fi
-
# the goods
ANALYSIS_F=analysis.txt
DESCRIPTION_F=description.txt
@@ -1215,6 +1296,20 @@ export COROSYNC_RECORDER_F
CONFIGURATIONS="/etc/drbd.conf /etc/drbd.d /etc/booth/booth.conf"
export CONFIGURATIONS
+THIS_IS_NODE=""
+if ! is_collector; then
+ MASTER_NODE=$WE
+ NODES=`getnodes`
+ debug "nodes: `echo $NODES`"
+fi
+NODECNT=`echo $NODES | wc -w`
+if [ "$NODECNT" = 0 ]; then
+ fatal "could not figure out a list of nodes; is this a cluster node?"
+fi
+if echo $NODES | grep -wqs $WE; then # are we a node?
+ THIS_IS_NODE=1
+fi
+
# this only on master
if ! is_collector; then
@@ -1228,7 +1323,25 @@ if ! is_collector; then
#
# find out if ssh works
if [ -z "$NO_SSH" ]; then
- findsshuser
+ # if the ssh user was supplied, consider that it
+ # works; helps reduce the number of ssh invocations
+ if [ -z "$SSH_USER" ]; then
+ SSH_USER=`findsshuser`
+ fi
+ if [ -n "$SSH_USER" ]; then
+ SSH_OPTS="$SSH_OPTS -o User=$SSH_USER"
+ fi
+ fi
+ # assume that only root can collect data
+ SUDO=""
+ if [ -z "$SSH_USER" -a `id -u` != 0 ] || [ "$SSH_USER" != root ]; then
+ debug "ssh user other than root, use sudo"
+ SUDO="sudo -u root"
+ fi
+ LOCAL_SUDO=""
+ if [ `id -u` != 0 ]; then
+ debug "local user other than root, use sudo"
+ LOCAL_SUDO="sudo -u root"
fi
fi
@@ -1249,13 +1362,13 @@ fi
getlog
if ! is_collector; then
- # assume that only root can collect data
- SUDO=""
- if [ -z "$SSH_USER" -a `id -u` != 0 ] || [ -n "$SSH_USER" -a "$SSH_USER" != root ]; then
- SUDO="sudo -u root -E"
- fi
for node in $NODES; do
- start_slave_collector $node
+ if [ -z "$SSH_USER" ]; then
+ start_slave_collector $node &
+ SLAVEPIDS="$SLAVEPIDS $!"
+ else
+ start_slave_collector $node
+ fi
done
fi
@@ -1267,9 +1380,11 @@ fi
#
if is_collector; then
collect_info
- (cd $WORKDIR/.. && tar cf - $WE)
+ (cd $WORKDIR/.. && tar -h -cf - $WE)
else
- wait $SLAVEPIDS
+ if [ -n "$SLAVEPIDS" ]; then
+ wait $SLAVEPIDS
+ fi
analyze $WORKDIR > $WORKDIR/$ANALYSIS_F &
events $WORKDIR &
mktemplate > $WORKDIR/$DESCRIPTION_F
diff --git a/hb_report/openais_conf_support.sh b/hb_report/openais_conf_support.sh
index 7f6ccb8..ea2e02f 100644
--- a/hb_report/openais_conf_support.sh
+++ b/hb_report/openais_conf_support.sh
@@ -72,7 +72,7 @@ getlogvars() {
HA_LOGLEVEL="debug"
if uselogd; then
[ -f "$LOGD_CF" ] || {
- info "logd used but logd.cf not found: using defaults"
+ debug "logd used but logd.cf not found: using defaults"
return # no configuration: use defaults
}
debug "reading log settings from $LOGD_CF"
@@ -90,8 +90,8 @@ cluster_info() {
}
essential_files() {
cat<<EOF
-d $HA_VARLIB 0755 root root
-d `dirname $HA_VARLIB`/pengine 0750 hacluster haclient
-d $HA_VARLIB/crm 0750 hacluster haclient
+d $PCMK_LIB 0755 root root
+d $PE_STATE_DIR 0750 hacluster haclient
+d $CIB_DIR 0750 hacluster haclient
EOF
}
diff --git a/hb_report/utillib.sh b/hb_report/utillib.sh
index 1f0d745..bbbc6c7 100644
--- a/hb_report/utillib.sh
+++ b/hb_report/utillib.sh
@@ -41,6 +41,15 @@ echo_membership_tool() {
which $f 2>/dev/null && break
done
}
+# find out if ptest or crm_simulate
+#
+echo_ptest_tool() {
+ local f ptest_progs
+ ptest_progs="crm_simulate ptest"
+ for f in $ptest_progs; do
+ which $f 2>/dev/null && break
+ done
+}
#
# find nodes for this cluster
#
@@ -60,6 +69,10 @@ getnodes() {
elif [ "$CLUSTER_TYPE" = heartbeat ]; then
debug "reading nodes from ha.cf"
getcfvar node
+ # 5. if the cluster's stopped, try the CIB
+ elif [ -f $CIB_DIR/$CIB_F ]; then
+ debug "reading nodes from the archived $CIB_DIR/$CIB_F"
+ (CIB_file=$CIB_DIR/$CIB_F get_crm_nodes)
fi
}
@@ -344,6 +357,7 @@ listpkg_zypper() {
}
fetchpkg_zypper() {
debug "get debuginfo packages using zypper: $@"
+ zypper -qn ref > /dev/null
zypper -qn install -C $@ >/dev/null
}
find_pkgmgr() {
@@ -400,12 +414,14 @@ findbinary() {
fi
fullpath=`which $binary 2>/dev/null`
if [ x = x"$fullpath" ]; then
- if [ -x $HA_BIN/$binary ]; then
- echo $HA_BIN/$binary
- debug "found the program at $HA_BIN/$binary for core $1"
- else
- warning "could not find the program path for core $1"
- fi
+ for d in $HA_BIN $CRM_DAEMON_DIR; do
+ if [ -x $d/$binary ]; then
+ echo $d/$binary
+ debug "found the program at $d/$binary for core $1"
+ else
+ warning "could not find the program path for core $1"
+ fi
+ done
else
echo $fullpath
debug "found the program at $fullpath for core $1"
@@ -463,7 +479,7 @@ getconfig() {
dumpstate $1
touch $1/RUNNING
else
- cp -p $HA_VARLIB/crm/$CIB_F $1/ 2>/dev/null
+ cp -p $CIB_DIR/$CIB_F $1/ 2>/dev/null
touch $1/STOPPED
fi
[ "$HOSTCACHE" ] &&
@@ -480,7 +496,7 @@ crmconfig() {
get_crm_nodes() {
cibadmin -Ql -o nodes |
awk '
- /type="normal"/ {
+ /<node / {
for( i=1; i<=NF; i++ )
if( $i~/^uname=/ ) {
sub("uname=.","",$i);
@@ -586,6 +602,7 @@ info() {
debug() {
[ "$VERBOSITY" ] && [ $VERBOSITY -gt 0 ] &&
echo "`uname -n`: DEBUG: $*" >&2
+ return 0
}
pickfirst() {
for x; do
@@ -677,5 +694,5 @@ pkg_ver() {
}
crm_info() {
- $HA_BIN/crmd version 2>&1
+ $CRM_DAEMON_DIR/crmd version 2>&1
}
diff --git a/lib/plugins/stonith/Makefile.am b/lib/plugins/stonith/Makefile.am
index 0a051c4..01f2f4a 100644
--- a/lib/plugins/stonith/Makefile.am
+++ b/lib/plugins/stonith/Makefile.am
@@ -123,91 +123,93 @@ noinst_LTLIBRARIES = $(libipmilan_LIB)
apcmaster_la_SOURCES = apcmaster.c $(INCFILES)
apcmaster_la_LDFLAGS = -export-dynamic -module -avoid-version
-apcmaster_la_LIBADD = $(GLIBLIB)
+apcmaster_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(GLIBLIB)
apcmastersnmp_la_SOURCES= apcmastersnmp.c $(INCFILES)
apcmastersnmp_la_LDFLAGS= -export-dynamic -module -avoid-version @SNMPLIB@ \
@CRYPTOLIB@
-apcmastersnmp_la_LIBADD = $(GLIBLIB)
+apcmastersnmp_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(GLIBLIB)
apcsmart_la_SOURCES = apcsmart.c $(INCFILES)
apcsmart_la_LDFLAGS = -export-dynamic -module -avoid-version
-apcsmart_la_LIBADD = $(GLIBLIB)
+apcsmart_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(GLIBLIB)
baytech_la_SOURCES = baytech.c $(INCFILES)
baytech_la_LDFLAGS = -export-dynamic -module -avoid-version
-baytech_la_LIBADD = $(GLIBLIB)
+baytech_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(GLIBLIB)
bladehpi_la_SOURCES = bladehpi.c $(INCFILES)
bladehpi_la_LDFLAGS = -export-dynamic -module -avoid-version
-bladehpi_la_LIBADD = $(GLIBLIB) -lopenhpi
+bladehpi_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(GLIBLIB) -lopenhpi
cyclades_la_SOURCES = cyclades.c $(INCFILES)
cyclades_la_LDFLAGS = -export-dynamic -module -avoid-version
-cyclades_la_LIBADD = $(GLIBLIB)
+cyclades_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(GLIBLIB)
drac3_la_SOURCES = drac3.c drac3_command.c drac3_command.h drac3_hash.c drac3_hash.h $(INCFILES)
drac3_la_LDFLAGS = -export-dynamic -module -avoid-version
-drac3_la_LIBADD = -lcurl -lxml2 $(GLIBLIB)
+drac3_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la -lcurl -lxml2 $(GLIBLIB)
external_la_SOURCES = external.c $(INCFILES)
external_la_LDFLAGS = -export-dynamic -module -avoid-version
-external_la_LIBADD = $(top_builddir)/replace/libreplace.la
+external_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(top_builddir)/replace/libreplace.la
rhcs_la_SOURCES = rhcs.c $(INCFILES)
rhcs_la_LDFLAGS = -export-dynamic -module -avoid-version
-rhcs_la_LIBADD = $(top_builddir)/replace/libreplace.la
+rhcs_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(top_builddir)/replace/libreplace.la
ibmhmc_la_SOURCES = ibmhmc.c $(INCFILES)
ibmhmc_la_LDFLAGS = -export-dynamic -module -avoid-version
-ibmhmc_la_LIBADD = $(top_builddir)/replace/libreplace.la $(GLIBLIB)
+ibmhmc_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(top_builddir)/replace/libreplace.la $(GLIBLIB)
ipmilan_la_SOURCES = ipmilan.c ipmilan.h ipmilan_command.c $(INCFILES)
ipmilan_la_LDFLAGS = -export-dynamic -module -avoid-version
-ipmilan_la_LIBADD = $(top_builddir)/replace/libreplace.la $(OPENIPMI_LIB) $(GLIBLIB)
+ipmilan_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(top_builddir)/replace/libreplace.la $(OPENIPMI_LIB) $(GLIBLIB)
libipmilan_la_SOURCES = ipmilan.c ipmilan.h ipmilan_command.c $(INCFILES)
libipmilan_la_LDFLAGS = -version-info 1:0:0
-libipmilan_la_LIBADD = $(top_builddir)/replace/libreplace.la $(OPENIPMI_LIB) $(GLIBLIB)
+libipmilan_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(top_builddir)/replace/libreplace.la $(OPENIPMI_LIB) $(GLIBLIB)
meatware_la_SOURCES = meatware.c $(INCFILES)
meatware_la_LDFLAGS = -export-dynamic -module -avoid-version
-meatware_la_LIBADD = $(GLIBLIB)
+meatware_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(GLIBLIB)
null_la_SOURCES = null.c $(INCFILES)
null_la_LDFLAGS = -export-dynamic -module -avoid-version
-null_la_LIBADD = $(GLIBLIB)
+null_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(GLIBLIB)
nw_rpc100s_la_SOURCES = nw_rpc100s.c $(INCFILES)
nw_rpc100s_la_LDFLAGS = -export-dynamic -module -avoid-version
-nw_rpc100s_la_LIBADD = $(top_builddir)/replace/libreplace.la $(GLIBLIB)
+nw_rpc100s_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(top_builddir)/replace/libreplace.la $(GLIBLIB)
rcd_serial_la_SOURCES = rcd_serial.c $(INCFILES)
rcd_serial_la_LDFLAGS = -export-dynamic -module -avoid-version
-rcd_serial_la_LIBADD = $(GLIBLIB)
+rcd_serial_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(GLIBLIB)
rps10_la_SOURCES = rps10.c $(INCFILES)
rps10_la_LDFLAGS = -export-dynamic -module -avoid-version
-rps10_la_LIBADD = $(GLIBLIB)
+rps10_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(GLIBLIB)
ssh_la_SOURCES = ssh.c $(INCFILES)
ssh_la_LDFLAGS = -export-dynamic -module -avoid-version
+ssh_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la
vacm_la_SOURCES = vacm.c $(INCFILES)
vacm_la_LDFLAGS = -export-dynamic -module -avoid-version
-vacm_la_LIBADD = $(top_builddir)/replace/libreplace.la
+vacm_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(top_builddir)/replace/libreplace.la
wti_nps_la_SOURCES = wti_nps.c $(INCFILES)
wti_nps_la_LDFLAGS = -export-dynamic -module -avoid-version
-wti_nps_la_LIBADD = $(top_builddir)/replace/libreplace.la $(GLIBLIB)
+wti_nps_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(top_builddir)/replace/libreplace.la $(GLIBLIB)
wti_mpc_la_SOURCES= wti_mpc.c $(INCFILES)
wti_mpc_la_LDFLAGS= -export-dynamic -module -avoid-version @SNMPLIB@ \
@CRYPTOLIB@
-wti_mpc_la_LIBADD = $(GLIBLIB)
+wti_mpc_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la $(GLIBLIB)
suicide_la_SOURCES = suicide.c $(INCFILES)
suicide_la_LDFLAGS = -export-dynamic -module -avoid-version
+suicide_la_LIBADD = $(top_builddir)/lib/stonith/libstonith.la
stonithscriptdir = $(stonith_plugindir)/stonith2
diff --git a/lib/plugins/stonith/external/Makefile.am b/lib/plugins/stonith/external/Makefile.am
index 5006513..42e0046 100644
--- a/lib/plugins/stonith/external/Makefile.am
+++ b/lib/plugins/stonith/external/Makefile.am
@@ -20,14 +20,14 @@
MAINTAINERCLEANFILES = Makefile.in
EXTRA_DIST = drac5 dracmc-telnet ibmrsa-telnet ipmi rackpdu vmware vcenter xen0 \
- xen0-ha-dom0-stonith-helper sbd kdumpcheck nut
+ xen0-ha-dom0-stonith-helper kdumpcheck nut
extdir = $(stonith_ext_plugindir)
helperdir = $(stonith_plugindir)
ext_SCRIPTS = drac5 dracmc-telnet ibmrsa ibmrsa-telnet ipmi riloe ssh vmware vcenter rackpdu xen0 hmchttp \
- xen0-ha sbd kdumpcheck ippower9258 nut libvirt \
+ xen0-ha kdumpcheck ippower9258 nut libvirt \
hetzner
helper_SCRIPTS = xen0-ha-dom0-stonith-helper
diff --git a/lib/plugins/stonith/external/libvirt b/lib/plugins/stonith/external/libvirt
index 23338b2..5fd4eec 100644
--- a/lib/plugins/stonith/external/libvirt
+++ b/lib/plugins/stonith/external/libvirt
@@ -46,6 +46,23 @@ libvirt_start() {
ha_log.sh err "$out"
return 1
}
+# reboot a domain
+# return
+# 0: success
+# 1: error
+libvirt_reboot() {
+ local rc out
+ out=$($VIRSH -c $hypervisor_uri reboot $domain_id 2>&1)
+ rc=$?
+ if [ $rc -eq 0 ]
+ then
+ ha_log.sh notice "Domain $domain_id was rebooted"
+ return 0
+ fi
+ ha_log.sh err "Failed to reboot domain $domain_id (exit code: $rc)"
+ ha_log.sh err "$out"
+ return 1
+}
# stop a domain
# return
@@ -80,8 +97,6 @@ libvirt_status() {
out=$($VIRSH -c $hypervisor_uri version 2>&1)
if [ $? -eq 0 ]
then
- out=`echo "$out" | tail -1`
- ha_log.sh notice "$hypervisor_uri: $out"
return 0
fi
@@ -106,6 +121,14 @@ libvirt_check_config() {
ha_log.sh err "hostlist or hypervisor_uri missing; check configuration"
exit 1
fi
+
+ case "$reset_method" in
+ power_cycle|reboot) : ;;
+ *)
+ ha_log.sh err "unrecognized reset_method: $reset_method"
+ exit 1
+ ;;
+ esac
}
# set variable domain_id for the host specified as arg
@@ -159,6 +182,18 @@ virsh must be installed (e.g. libvir-client package) and access control must
be configured for your selected URI.
</longdesc>
</parameter>
+
+<parameter name="reset_method" required="0">
+<content type="string" default="power_cycle"/>
+<shortdesc lang="en">
+How to reset a guest.
+</shortdesc>
+<longdesc lang="en">
+A guest reset may be done by a sequence of off and on commands
+(power_cycle) or by the reboot command. Which method works
+depend on the hypervisor and guest configuration management.
+</longdesc>
+</parameter>
</parameters>
LVIRTXML
exit 0
@@ -175,6 +210,8 @@ unset SSH_AUTH_SOCK
# support , as a separator as well
hostlist=`echo $hostlist| sed -e 's/,/ /g'`
+reset_method=${reset_method:-"power_cycle"}
+
case $1 in
gethosts)
hostnames=`echo $hostlist|sed -e 's/:[^ ]*//g'`
@@ -203,20 +240,19 @@ case $1 in
;;
reset)
- # libvirt has no reset so we do a power cycle
libvirt_check_config
libvirt_set_domain_id $2
- libvirt_stop
+ if [ "$reset_method" = "power_cycle" ]; then
+ libvirt_stop
+ rc=$?
+ [ $rc = 1 ] && exit 1
+ sleep 2
+ libvirt_start
+ else
+ libvirt_reboot
+ fi
rc=$?
- [ $rc = 1 ] && exit 1
-
- # stonith reset seems to require a power on even if it was off
- # before so the next line is commented out
- # [ $rc = 2 ] && exit 0
-
- sleep 2
- libvirt_start
exit $?
;;
@@ -227,7 +263,7 @@ case $1 in
;;
getconfignames)
- echo "hostlist hypervisor_uri"
+ echo "hostlist hypervisor_uri reboot_method"
exit 0
;;
@@ -261,3 +297,4 @@ case $1 in
;;
esac
+# vi:et:ts=4:sw=4
diff --git a/lib/plugins/stonith/external/sbd b/lib/plugins/stonith/external/sbd
deleted file mode 100644
index baa7b3b..0000000
--- a/lib/plugins/stonith/external/sbd
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-#
-# This STONITH script drives the shared-storage stonith plugin.
-#
-# Author: Lars Marowsky-Bree
-# Copyright: 2008 Lars Marowsky-Bree
-# License: GNU General Public License (GPL)
-#
-
-# Main code
-
-if [ x$sbd_device = x ]; then
- if [ -f /etc/sysconfig/sbd ]; then
- source /etc/sysconfig/sbd
- sbd_device=$SBD_DEVICE
- fi
-fi
-
-SBD_DEVS=${sbd_device%;}
-
-sbd_device=${SBD_DEVS//;/ -d }
-
-case $1 in
-gethosts)
- echo `sbd -d $sbd_device list | cut -f2 | sort | uniq`
- exit 0
- ;;
-off|reset)
- message=$1
- case "$crashdump" in
- yes|true|1|YES|TRUE|ja|on|ON) message="crashdump" ;;
- esac
- sbd -d $sbd_device message $2 $message
- exit $?
- ;;
-status)
- if ! sbd -d $sbd_device list >/dev/null 2>&1 ; then
- ha_log.sh err "sbd could not list nodes from $sbd_device"
- exit 1
- fi
- exit 0
- ;;
-on)
- exit 1
- ;;
-getconfignames)
- echo "sbd_device crashdump"
- exit 0
- ;;
-getinfo-devid)
- echo "Shared storage STONITH device"
- exit 0
- ;;
-getinfo-devname)
- echo "Shared storage STONITH device"
- exit 0
- ;;
-getinfo-devdescr)
- cat << DESC
-sbd uses a shared storage device as a medium to communicate
-fencing requests. This allows clusters without network power
-switches; the downside is that access to the shared storage
-device becomes a Single Point of Failure.
-
-It requires sbd to be configured on all nodes.
-
-Please read http://linux-ha.org/wiki/SBD_Fencing!
-
-DESC
- exit 0
- ;;
-getinfo-devurl)
- echo "http://linux-ha.org/wiki/SBD_Fencing"
- exit 0
- ;;
-getinfo-xml)
- cat << SSHXML
-<parameters>
-
-<parameter name="crashdump">
-<content type="string" />
-<shortdesc lang="en">
-Crashdump instead of regular fence
-</shortdesc>
-<longdesc lang="en">
-If SBD is given a fence command, this option will instead perform a
-kernel crash of a reboot or power-off, which on a properly configured
-system can lead to a crashdump for analysis.
-
-This is less safe for production environments. Please use with caution
-and for debugging purposes only.
-</longdesc>
-</parameter>
-
-<parameter name="sbd_device" unique="1">
-<content type="string" />
-<shortdesc lang="en">
-SBD device(s)
-</shortdesc>
-<longdesc lang="en">
-The block device used for the SBD partition. Up to three
-can be specified if separated by a semicolon. (Please check
-the documentation if specifying two.)
-
-If not specified, will default to the value from /etc/sysconfig/sbd.
-
-</longdesc>
-</parameter>
-</parameters>
-SSHXML
- exit 0
- ;;
-*)
- exit 1
- ;;
-esac
diff --git a/lib/plugins/stonith/external/vcenter b/lib/plugins/stonith/external/vcenter
index 8fc015e..add38f8 100755
--- a/lib/plugins/stonith/external/vcenter
+++ b/lib/plugins/stonith/external/vcenter
@@ -161,7 +161,7 @@ elsif ($command ~~ @netCommands) {
eval {
# VI API: searches the inventory tree for a VirtualMachine managed entity whose name matches
# the name of the virtual machine assigned to the target host in HOSTLIST
- $vm = Vim::find_entity_view(view_type => "VirtualMachine", filter => { name => qr/\Q$host_to_vm{$targetHost}\E/i });
+ $vm = Vim::find_entity_view(view_type => "VirtualMachine", filter => { name => qr/^\Q$host_to_vm{$targetHost}\E/i });
if (!defined $vm) {
dielog("Machine $targetHost was not found");
}
@@ -243,7 +243,7 @@ elsif ($command ~~ @netCommands) {
}
elsif ($command eq "gethosts") {
foreach my $key (keys(%host_to_vm)) {
- print "$host_to_vm{$key} \n";
+ print "$key \n";
}
}
elsif ($command eq "listvms") {
diff --git a/lib/stonith/Makefile.am b/lib/stonith/Makefile.am
index b91804f..a8fca04 100644
--- a/lib/stonith/Makefile.am
+++ b/lib/stonith/Makefile.am
@@ -27,9 +27,6 @@ INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \
## binaries
sbin_PROGRAMS = stonith meatclient
-if ON_LINUX
-sbin_PROGRAMS += sbd
-endif
stonith_SOURCES = main.c
@@ -41,12 +38,6 @@ stonith_LDFLAGS = @LIBADD_DL@ @LIBLTDL@ -export-dynamic @DLOPEN_FORCE_FLAGS@ @
meatclient_SOURCES = meatclient.c
meatclient_LDADD = $(GLIBLIB)
-sbd_SOURCES = sbd-md.c sbd-common.c
-sbd_CFLAGS = -D_GNU_SOURCE
-sbd_LDADD = $(GLIBLIB) -laio \
- $(top_builddir)/lib/clplumbing/libplumb.la \
- $(top_builddir)/lib/clplumbing/libplumbgpl.la
-
## libraries
lib_LTLIBRARIES = libstonith.la
@@ -60,4 +51,4 @@ libstonith_la_LIBADD = $(top_builddir)/lib/pils/libpils.la \
helperdir = $(datadir)/$(PACKAGE_NAME)
helper_SCRIPTS = ha_log.sh
-EXTRA_DIST = $(helper_SCRIPTS) sbd.h
+EXTRA_DIST = $(helper_SCRIPTS)
diff --git a/lib/stonith/ha_log.sh b/lib/stonith/ha_log.sh
index c685d43..73093f0 100755
--- a/lib/stonith/ha_log.sh
+++ b/lib/stonith/ha_log.sh
@@ -71,13 +71,14 @@ ha_log() {
prn_level=`level_pres $loglevel`
msg="$prn_level: $@"
+ if [ "x$HA_debug" = "x0" -a "x$loglevel" = xdebug ] ; then
+ return 0
+ fi
+
set_logtag
# if we're connected to a tty, then output to stderr
if tty >/dev/null; then
- if [ "x$HA_debug" = "x0" -a "x$loglevel" = xdebug ] ; then
- return 0
- fi
if [ "$HA_LOGTAG" ]; then
echo "$HA_LOGTAG: $msg"
else
diff --git a/lib/stonith/main.c b/lib/stonith/main.c
index ad4ddb9..765d839 100644
--- a/lib/stonith/main.c
+++ b/lib/stonith/main.c
@@ -334,10 +334,10 @@ print_confignames(Stonith *s)
void
log_buf(int severity, char *buf)
{
+ if (severity == LOG_DEBUG && !debug)
+ return;
if (log_destination == LOG_TERMINAL) {
- if (severity != LOG_DEBUG || debug) {
- fprintf(stderr, "%s: %s\n", prio2str(severity),buf);
- }
+ fprintf(stderr, "%s: %s\n", prio2str(severity),buf);
} else {
cl_log(severity, "%s", buf);
}
@@ -685,7 +685,9 @@ main(int argc, char** argv)
if (!silent) {
if (rc == S_OK) {
- log_msg(LOG_INFO, "%s device OK.", SwitchType);
+ log_msg((log_destination == LOG_TERMINAL) ?
+ LOG_INFO : LOG_DEBUG,
+ "%s device OK.", SwitchType);
}else{
/* Uh-Oh */
log_msg(LOG_ERR, "%s device not accessible."
diff --git a/lib/stonith/sbd-common.c b/lib/stonith/sbd-common.c
deleted file mode 100644
index 74651b8..0000000
--- a/lib/stonith/sbd-common.c
+++ /dev/null
@@ -1,971 +0,0 @@
-
-#include "sbd.h"
-
-/* These have to match the values in the header of the partition */
-static char sbd_magic[8] = "SBD_SBD_";
-static char sbd_version = 0x02;
-
-/* Tunable defaults: */
-unsigned long timeout_watchdog = 5;
-unsigned long timeout_watchdog_warn = 3;
-int timeout_allocate = 2;
-int timeout_loop = 1;
-int timeout_msgwait = 10;
-int timeout_io = 3;
-
-int watchdog_use = 0;
-int watchdog_set_timeout = 1;
-int skip_rt = 0;
-int debug = 0;
-int debug_mode = 0;
-const char *watchdogdev = "/dev/watchdog";
-char * local_uname;
-
-/* Global, non-tunable variables: */
-int sector_size = 0;
-int watchdogfd = -1;
-
-/*const char *devname;*/
-const char *cmdname;
-
-void
-usage(void)
-{
- fprintf(stderr,
-"Shared storage fencing tool.\n"
-"Syntax:\n"
-" %s <options> <command> <cmdarguments>\n"
-"Options:\n"
-"-d <devname> Block device to use (mandatory; can be specified up to 3 times)\n"
-"-h Display this help.\n"
-"-n <node> Set local node name; defaults to uname -n (optional)\n"
-"\n"
-"-R Do NOT enable realtime priority (debugging only)\n"
-"-W Use watchdog (recommended) (watch only)\n"
-"-w <dev> Specify watchdog device (optional) (watch only)\n"
-"-T Do NOT initialize the watchdog timeout (watch only)\n"
-"-v Enable some verbose debug logging (optional)\n"
-"\n"
-"-1 <N> Set watchdog timeout to N seconds (optional, create only)\n"
-"-2 <N> Set slot allocation timeout to N seconds (optional, create only)\n"
-"-3 <N> Set daemon loop timeout to N seconds (optional, create only)\n"
-"-4 <N> Set msgwait timeout to N seconds (optional, create only)\n"
-"-5 <N> Warn if loop latency exceeds threshold (optional, watch only)\n"
-" (default is 3, set to 0 to disable)\n"
-"-I <N> Async IO read timeout (defaults to 3 * loop timeout, optional)\n"
-"-t <N> Dampening delay before faulty servants are restarted (optional)\n"
-" (default is 60, set to 0 to disable)\n"
-"-F <N> # of failures before a servant is considered faulty (optional)\n"
-" (default is 10, set to 0 to disable)\n"
-"-Z Enable trace mode. WARNING: UNSAFE FOR PRODUCTION!\n"
-"Commands:\n"
-"create initialize N slots on <dev> - OVERWRITES DEVICE!\n"
-"list List all allocated slots on device, and messages.\n"
-"dump Dump meta-data header from device.\n"
-"watch Loop forever, monitoring own slot\n"
-"allocate <node>\n"
-" Allocate a slot for node (optional)\n"
-"message <node> (test|reset|off|clear|exit)\n"
-" Writes the specified message to node's slot.\n"
-, cmdname);
-}
-
-int
-watchdog_init_interval(void)
-{
- int timeout = timeout_watchdog;
-
- if (watchdogfd < 0) {
- return 0;
- }
-
-
- if (watchdog_set_timeout == 0) {
- cl_log(LOG_INFO, "NOT setting watchdog timeout on explicit user request!");
- return 0;
- }
-
- if (ioctl(watchdogfd, WDIOC_SETTIMEOUT, &timeout) < 0) {
- cl_perror( "WDIOC_SETTIMEOUT"
- ": Failed to set watchdog timer to %u seconds.",
- timeout);
- cl_log(LOG_CRIT, "Please validate your watchdog configuration!");
- cl_log(LOG_CRIT, "Choose a different watchdog driver or specify -T to silence this check if you are sure.");
- /* return -1; */
- } else {
- cl_log(LOG_INFO, "Set watchdog timeout to %u seconds.",
- timeout);
- }
- return 0;
-}
-
-int
-watchdog_tickle(void)
-{
- if (watchdogfd >= 0) {
- if (write(watchdogfd, "", 1) != 1) {
- cl_perror("Watchdog write failure: %s!",
- watchdogdev);
- return -1;
- }
- }
- return 0;
-}
-
-int
-watchdog_init(void)
-{
- if (watchdogfd < 0 && watchdogdev != NULL) {
- watchdogfd = open(watchdogdev, O_WRONLY);
- if (watchdogfd >= 0) {
- cl_log(LOG_NOTICE, "Using watchdog device: %s",
- watchdogdev);
- if ((watchdog_init_interval() < 0)
- || (watchdog_tickle() < 0)) {
- return -1;
- }
- }else{
- cl_perror("Cannot open watchdog device: %s",
- watchdogdev);
- return -1;
- }
- }
- return 0;
-}
-
-void
-watchdog_close(void)
-{
- if (watchdogfd >= 0) {
- if (write(watchdogfd, "V", 1) != 1) {
- cl_perror(
- "Watchdog write magic character failure: closing %s!",
- watchdogdev);
- }
- if (close(watchdogfd) < 0) {
- cl_perror("Watchdog close(2) failed.");
- }
- watchdogfd = -1;
- }
-}
-
-/* This duplicates some code from linux/ioprio.h since these are not included
- * even in linux-kernel-headers. Sucks. See also
- * /usr/src/linux/Documentation/block/ioprio.txt and ioprio_set(2) */
-extern int sys_ioprio_set(int, int, int);
-int ioprio_set(int which, int who, int ioprio);
-inline int ioprio_set(int which, int who, int ioprio)
-{
- return syscall(__NR_ioprio_set, which, who, ioprio);
-}
-
-enum {
- IOPRIO_CLASS_NONE,
- IOPRIO_CLASS_RT,
- IOPRIO_CLASS_BE,
- IOPRIO_CLASS_IDLE,
-};
-
-enum {
- IOPRIO_WHO_PROCESS = 1,
- IOPRIO_WHO_PGRP,
- IOPRIO_WHO_USER,
-};
-
-#define IOPRIO_BITS (16)
-#define IOPRIO_CLASS_SHIFT (13)
-#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1)
-
-#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT)
-#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK)
-#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data)
-
-void
-maximize_priority(void)
-{
- if (skip_rt) {
- cl_log(LOG_INFO, "Not elevating to realtime (-R specified).");
- return;
- }
-
- cl_make_realtime(-1, -1, 256, 256);
-
- if (ioprio_set(IOPRIO_WHO_PROCESS, getpid(),
- IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 1)) != 0) {
- cl_perror("ioprio_set() call failed.");
- }
-}
-
-void
-close_device(struct sbd_context *st)
-{
- close(st->devfd);
- free(st);
-}
-
-struct sbd_context *
-open_device(const char* devname)
-{
- struct sbd_context *st;
-
- if (!devname)
- return NULL;
-
- st = malloc(sizeof(struct sbd_context));
- if (!st)
- return NULL;
- memset(st, 0, sizeof(struct sbd_context));
-
- if (io_setup(1, &st->ioctx) != 0) {
- cl_perror("io_setup failed");
- free(st);
- return NULL;
- }
-
- st->devfd = open(devname, O_SYNC|O_RDWR|O_DIRECT);
-
- if (st->devfd == -1) {
- cl_perror("Opening device %s failed.", devname);
- free(st);
- return NULL;
- }
-
- ioctl(st->devfd, BLKSSZGET, §or_size);
-
- if (sector_size == 0) {
- cl_perror("Get sector size failed.\n");
- close_device(st);
- return NULL;
- }
-
- return st;
-}
-
-signed char
-cmd2char(const char *cmd)
-{
- if (strcmp("clear", cmd) == 0) {
- return SBD_MSG_EMPTY;
- } else if (strcmp("test", cmd) == 0) {
- return SBD_MSG_TEST;
- } else if (strcmp("reset", cmd) == 0) {
- return SBD_MSG_RESET;
- } else if (strcmp("off", cmd) == 0) {
- return SBD_MSG_OFF;
- } else if (strcmp("exit", cmd) == 0) {
- return SBD_MSG_EXIT;
- } else if (strcmp("crashdump", cmd) == 0) {
- return SBD_MSG_CRASHDUMP;
- }
- return -1;
-}
-
-void *
-sector_alloc(void)
-{
- void *x;
-
- x = valloc(sector_size);
- if (!x) {
- exit(1);
- }
- memset(x, 0, sector_size);
-
- return x;
-}
-
-const char*
-char2cmd(const char cmd)
-{
- switch (cmd) {
- case SBD_MSG_EMPTY:
- return "clear";
- break;
- case SBD_MSG_TEST:
- return "test";
- break;
- case SBD_MSG_RESET:
- return "reset";
- break;
- case SBD_MSG_OFF:
- return "off";
- break;
- case SBD_MSG_EXIT:
- return "exit";
- break;
- case SBD_MSG_CRASHDUMP:
- return "crashdump";
- break;
- default:
- return "undefined";
- break;
- }
-}
-
-int
-sector_write(struct sbd_context *st, int sector, const void *data)
-{
- if (lseek(st->devfd, sector_size*sector, 0) < 0) {
- cl_perror("sector_write: lseek() failed");
- return -1;
- }
-
- if (write(st->devfd, data, sector_size) <= 0) {
- cl_perror("sector_write: write_sector() failed");
- return -1;
- }
- return(0);
-}
-
-int
-sector_read(struct sbd_context *st, int sector, void *data)
-{
- struct timespec timeout;
- struct io_event event;
- struct iocb *ios[1] = { &st->io };
- long r;
-
- timeout.tv_sec = timeout_io;
- timeout.tv_nsec = 0;
-
- memset(&st->io, 0, sizeof(struct iocb));
- io_prep_pread(&st->io, st->devfd, data, sector_size, sector_size * sector);
- if (io_submit(st->ioctx, 1, ios) != 1) {
- cl_log(LOG_ERR, "Failed to submit IO request!");
- return -1;
- }
-
- errno = 0;
- r = io_getevents(st->ioctx, 1L, 1L, &event, &timeout);
-
- if (r < 0 ) {
- cl_log(LOG_ERR, "Failed to retrieve IO events");
- return -1;
- } else if (r < 1L) {
- cl_log(LOG_WARNING, "Cancelling IO request due to timeout");
- r = io_cancel(st->ioctx, ios[0], &event);
- if (r) {
- cl_log(LOG_ERR, "Could not cancel IO request!");
- /* TODO: Couldn't cancel the IO */
- }
- return -1;
- }
-
- /* IO is happy */
- if (event.res == sector_size) {
- return 0;
- } else {
- cl_log(LOG_ERR, "Short read");
- return -1;
- }
-}
-
-int
-slot_read(struct sbd_context *st, int slot, struct sector_node_s *s_node)
-{
- return sector_read(st, SLOT_TO_SECTOR(slot), s_node);
-}
-
-int
-slot_write(struct sbd_context *st, int slot, const struct sector_node_s *s_node)
-{
- return sector_write(st, SLOT_TO_SECTOR(slot), s_node);
-}
-
-int
-mbox_write(struct sbd_context *st, int mbox, const struct sector_mbox_s *s_mbox)
-{
- return sector_write(st, MBOX_TO_SECTOR(mbox), s_mbox);
-}
-
-int
-mbox_read(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox)
-{
- return sector_read(st, MBOX_TO_SECTOR(mbox), s_mbox);
-}
-
-int
-mbox_write_verify(struct sbd_context *st, int mbox, const struct sector_mbox_s *s_mbox)
-{
- void *data;
- int rc = 0;
-
- if (sector_write(st, MBOX_TO_SECTOR(mbox), s_mbox) < 0)
- return -1;
-
- data = sector_alloc();
- if (sector_read(st, MBOX_TO_SECTOR(mbox), data) < 0) {
- rc = -1;
- goto out;
- }
-
-
- if (memcmp(s_mbox, data, sector_size) != 0) {
- cl_log(LOG_ERR, "Write verification failed!");
- rc = -1;
- goto out;
- }
- rc = 0;
-out:
- free(data);
- return rc;
-}
-
-int header_write(struct sbd_context *st, struct sector_header_s *s_header)
-{
- s_header->sector_size = htonl(s_header->sector_size);
- s_header->timeout_watchdog = htonl(s_header->timeout_watchdog);
- s_header->timeout_allocate = htonl(s_header->timeout_allocate);
- s_header->timeout_loop = htonl(s_header->timeout_loop);
- s_header->timeout_msgwait = htonl(s_header->timeout_msgwait);
- return sector_write(st, 0, s_header);
-}
-
-int
-header_read(struct sbd_context *st, struct sector_header_s *s_header)
-{
- if (sector_read(st, 0, s_header) < 0)
- return -1;
-
- s_header->sector_size = ntohl(s_header->sector_size);
- s_header->timeout_watchdog = ntohl(s_header->timeout_watchdog);
- s_header->timeout_allocate = ntohl(s_header->timeout_allocate);
- s_header->timeout_loop = ntohl(s_header->timeout_loop);
- s_header->timeout_msgwait = ntohl(s_header->timeout_msgwait);
- /* This sets the global defaults: */
- timeout_watchdog = s_header->timeout_watchdog;
- timeout_allocate = s_header->timeout_allocate;
- timeout_loop = s_header->timeout_loop;
- timeout_msgwait = s_header->timeout_msgwait;
-
- return 0;
-}
-
-int
-valid_header(const struct sector_header_s *s_header)
-{
- if (memcmp(s_header->magic, sbd_magic, sizeof(s_header->magic)) != 0) {
- cl_log(LOG_ERR, "Header magic does not match.");
- return -1;
- }
- if (s_header->version != sbd_version) {
- cl_log(LOG_ERR, "Header version does not match.");
- return -1;
- }
- if (s_header->sector_size != sector_size) {
- cl_log(LOG_ERR, "Header sector size does not match.");
- return -1;
- }
- return 0;
-}
-
-struct sector_header_s *
-header_get(struct sbd_context *st)
-{
- struct sector_header_s *s_header;
- s_header = sector_alloc();
-
- if (header_read(st, s_header) < 0) {
- cl_log(LOG_ERR, "Unable to read header from device %d", st->devfd);
- return NULL;
- }
-
- if (valid_header(s_header) < 0) {
- cl_log(LOG_ERR, "header on device %d is not valid.", st->devfd);
- return NULL;
- }
-
- /* cl_log(LOG_INFO, "Found version %d header with %d slots",
- s_header->version, s_header->slots); */
-
- return s_header;
-}
-
-int
-init_device(struct sbd_context *st)
-{
- struct sector_header_s *s_header;
- struct sector_node_s *s_node;
- struct sector_mbox_s *s_mbox;
- struct stat s;
- int i;
- int rc = 0;
-
- s_header = sector_alloc();
- s_node = sector_alloc();
- s_mbox = sector_alloc();
- memcpy(s_header->magic, sbd_magic, sizeof(s_header->magic));
- s_header->version = sbd_version;
- s_header->slots = 255;
- s_header->sector_size = sector_size;
- s_header->timeout_watchdog = timeout_watchdog;
- s_header->timeout_allocate = timeout_allocate;
- s_header->timeout_loop = timeout_loop;
- s_header->timeout_msgwait = timeout_msgwait;
-
- fstat(st->devfd, &s);
- /* printf("st_size = %ld, st_blksize = %ld, st_blocks = %ld\n",
- s.st_size, s.st_blksize, s.st_blocks); */
-
- cl_log(LOG_INFO, "Creating version %d header on device %d",
- s_header->version,
- st->devfd);
- fprintf(stdout, "Creating version %d header on device %d\n",
- s_header->version,
- st->devfd);
- if (header_write(st, s_header) < 0) {
- rc = -1; goto out;
- }
- cl_log(LOG_INFO, "Initializing %d slots on device %d",
- s_header->slots,
- st->devfd);
- fprintf(stdout, "Initializing %d slots on device %d\n",
- s_header->slots,
- st->devfd);
- for (i=0;i < s_header->slots;i++) {
- if (slot_write(st, i, s_node) < 0) {
- rc = -1; goto out;
- }
- if (mbox_write(st, i, s_mbox) < 0) {
- rc = -1; goto out;
- }
- }
-
-out: free(s_node);
- free(s_header);
- free(s_mbox);
- return(rc);
-}
-
-/* Check if there already is a slot allocated to said name; returns the
- * slot number. If not found, returns -1.
- * This is necessary because slots might not be continuous. */
-int
-slot_lookup(struct sbd_context *st, const struct sector_header_s *s_header, const char *name)
-{
- struct sector_node_s *s_node = NULL;
- int i;
- int rc = -1;
-
- if (!name) {
- cl_log(LOG_ERR, "slot_lookup(): No name specified.\n");
- goto out;
- }
-
- s_node = sector_alloc();
-
- for (i=0; i < s_header->slots; i++) {
- if (slot_read(st, i, s_node) < 0) {
- rc = -2; goto out;
- }
- if (s_node->in_use != 0) {
- if (strncasecmp(s_node->name, name,
- sizeof(s_node->name)) == 0) {
- cl_log(LOG_INFO, "%s owns slot %d", name, i);
- rc = i; goto out;
- }
- }
- }
-
-out: free(s_node);
- return rc;
-}
-
-int
-slot_unused(struct sbd_context *st, const struct sector_header_s *s_header)
-{
- struct sector_node_s *s_node;
- int i;
- int rc = -1;
-
- s_node = sector_alloc();
-
- for (i=0; i < s_header->slots; i++) {
- if (slot_read(st, i, s_node) < 0) {
- rc = -1; goto out;
- }
- if (s_node->in_use == 0) {
- rc = i; goto out;
- }
- }
-
-out: free(s_node);
- return rc;
-}
-
-
-int
-slot_allocate(struct sbd_context *st, const char *name)
-{
- struct sector_header_s *s_header = NULL;
- struct sector_node_s *s_node = NULL;
- struct sector_mbox_s *s_mbox = NULL;
- int i;
- int rc = 0;
-
- if (!name) {
- cl_log(LOG_ERR, "slot_allocate(): No name specified.\n");
- fprintf(stderr, "slot_allocate(): No name specified.\n");
- rc = -1; goto out;
- }
-
- s_header = header_get(st);
- if (!s_header) {
- rc = -1; goto out;
- }
-
- s_node = sector_alloc();
- s_mbox = sector_alloc();
-
- while (1) {
- i = slot_lookup(st, s_header, name);
- if ((i >= 0) || (i == -2)) {
- /* -1 is "no slot found", in which case we
- * proceed to allocate a new one.
- * -2 is "read error during lookup", in which
- * case we error out too
- * >= 0 is "slot already allocated" */
- rc = i; goto out;
- }
-
- i = slot_unused(st, s_header);
- if (i >= 0) {
- cl_log(LOG_INFO, "slot %d is unused - trying to own", i);
- fprintf(stdout, "slot %d is unused - trying to own\n", i);
- memset(s_node, 0, sizeof(*s_node));
- s_node->in_use = 1;
- strncpy(s_node->name, name, sizeof(s_node->name));
- if (slot_write(st, i, s_node) < 0) {
- rc = -1; goto out;
- }
- sleep(timeout_allocate);
- } else {
- cl_log(LOG_ERR, "No more free slots.");
- fprintf(stderr, "No more free slots.\n");
- rc = -1; goto out;
- }
- }
-
-out: free(s_node);
- free(s_header);
- free(s_mbox);
- return(rc);
-}
-
-int
-slot_list(struct sbd_context *st)
-{
- struct sector_header_s *s_header = NULL;
- struct sector_node_s *s_node = NULL;
- struct sector_mbox_s *s_mbox = NULL;
- int i;
- int rc = 0;
-
- s_header = header_get(st);
- if (!s_header) {
- rc = -1; goto out;
- }
-
- s_node = sector_alloc();
- s_mbox = sector_alloc();
-
- for (i=0; i < s_header->slots; i++) {
- if (slot_read(st, i, s_node) < 0) {
- rc = -1; goto out;
- }
- if (s_node->in_use > 0) {
- if (mbox_read(st, i, s_mbox) < 0) {
- rc = -1; goto out;
- }
- printf("%d\t%s\t%s\t%s\n",
- i, s_node->name, char2cmd(s_mbox->cmd),
- s_mbox->from);
- }
- }
-
-out: free(s_node);
- free(s_header);
- free(s_mbox);
- return rc;
-}
-
-int
-slot_msg(struct sbd_context *st, const char *name, const char *cmd)
-{
- struct sector_header_s *s_header = NULL;
- struct sector_mbox_s *s_mbox = NULL;
- int mbox;
- int rc = 0;
-
- if (!name || !cmd) {
- cl_log(LOG_ERR, "slot_msg(): No recipient / cmd specified.\n");
- rc = -1; goto out;
- }
-
- s_header = header_get(st);
- if (!s_header) {
- rc = -1; goto out;
- }
-
- if (strcmp(name, "LOCAL") == 0) {
- name = local_uname;
- }
-
- mbox = slot_lookup(st, s_header, name);
- if (mbox < 0) {
- cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name);
- rc = -1; goto out;
- }
-
- s_mbox = sector_alloc();
-
- s_mbox->cmd = cmd2char(cmd);
- if (s_mbox->cmd < 0) {
- cl_log(LOG_ERR, "slot_msg(): Invalid command %s.", cmd);
- rc = -1; goto out;
- }
-
- strncpy(s_mbox->from, local_uname, sizeof(s_mbox->from)-1);
-
- cl_log(LOG_INFO, "Writing %s to node slot %s",
- cmd, name);
- if (mbox_write_verify(st, mbox, s_mbox) < -1) {
- rc = -1; goto out;
- }
- if (strcasecmp(cmd, "exit") != 0) {
- sleep(timeout_msgwait);
- }
- cl_log(LOG_INFO, "%s successfully delivered to %s",
- cmd, name);
-
-out: free(s_mbox);
- free(s_header);
- return rc;
-}
-
-int
-slot_ping(struct sbd_context *st, const char *name)
-{
- struct sector_header_s *s_header = NULL;
- struct sector_mbox_s *s_mbox = NULL;
- int mbox;
- int waited = 0;
- int rc = 0;
-
- if (!name) {
- cl_log(LOG_ERR, "slot_ping(): No recipient specified.\n");
- rc = -1; goto out;
- }
-
- s_header = header_get(st);
- if (!s_header) {
- rc = -1; goto out;
- }
-
- if (strcmp(name, "LOCAL") == 0) {
- name = local_uname;
- }
-
- mbox = slot_lookup(st, s_header, name);
- if (mbox < 0) {
- cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name);
- rc = -1; goto out;
- }
-
- s_mbox = sector_alloc();
- s_mbox->cmd = SBD_MSG_TEST;
-
- strncpy(s_mbox->from, local_uname, sizeof(s_mbox->from)-1);
-
- cl_log(LOG_DEBUG, "Pinging node %s", name);
- if (mbox_write(st, mbox, s_mbox) < -1) {
- rc = -1; goto out;
- }
-
- rc = -1;
- while (waited <= timeout_msgwait) {
- if (mbox_read(st, mbox, s_mbox) < 0)
- break;
- if (s_mbox->cmd != SBD_MSG_TEST) {
- rc = 0;
- break;
- }
- sleep(1);
- waited++;
- }
-
- if (rc == 0) {
- cl_log(LOG_DEBUG, "%s successfully pinged.", name);
- } else {
- cl_log(LOG_ERR, "%s failed to ping.", name);
- }
-
-out: free(s_mbox);
- free(s_header);
- return rc;
-}
-
-void
-sysrq_init(void)
-{
- FILE* procf;
- int c;
- procf = fopen("/proc/sys/kernel/sysrq", "r");
- if (!procf) {
- cl_perror("cannot open /proc/sys/kernel/sysrq for read.");
- return;
- }
- fscanf(procf, "%d", &c);
- fclose(procf);
- if (c == 1)
- return;
- /* 8 for debugging dumps of processes,
- 128 for reboot/poweroff */
- c |= 136;
- procf = fopen("/proc/sys/kernel/sysrq", "w");
- if (!procf) {
- printf("cannot open /proc/sys/kernel/sysrq for write\n");
- return;
- }
- fprintf(procf, "%d", c);
- fclose(procf);
- return;
-}
-
-void
-sysrq_trigger(char t)
-{
- FILE *procf;
-
- procf = fopen("/proc/sysrq-trigger", "a");
- if (!procf) {
- cl_perror("Opening sysrq-trigger failed.");
- return;
- }
- cl_log(LOG_INFO, "sysrq-trigger: %c\n", t);
- fprintf(procf, "%c\n", t);
- fclose(procf);
- return;
-}
-
-void
-do_crashdump(void)
-{
- sysrq_trigger('c');
- /* is it possible to reach the following line? */
- cl_reboot(5, "sbd is triggering crashdumping");
- exit(1);
-}
-
-void
-do_reset(void)
-{
- if (debug_mode == 2) {
- cl_log(LOG_ERR, "Skipping request to suicide due to DEBUG MODE!");
- watchdog_close();
- exit(0);
- }
- if (debug_mode == 1) {
- cl_log(LOG_ERR, "Request to suicide changed to kdump due to DEBUG MODE!");
- watchdog_close();
- sysrq_trigger('c');
- exit(0);
- }
- sysrq_trigger('b');
- cl_reboot(5, "sbd is self-fencing (reset)");
- sleep(timeout_watchdog * 2);
- exit(1);
-}
-
-void
-do_off(void)
-{
- if (debug_mode == 2) {
- cl_log(LOG_ERR, "Skipping request to power-off due to DEBUG MODE!");
- watchdog_close();
- exit(0);
- }
- if (debug_mode == 1) {
- cl_log(LOG_ERR, "Request to power-off changed to kdump due to DEBUG MODE!");
- watchdog_close();
- sysrq_trigger('c');
- exit(0);
- }
- sysrq_trigger('o');
- cl_reboot(5, "sbd is self-fencing (power-off)");
- sleep(timeout_watchdog * 2);
- exit(1);
-}
-
-pid_t
-make_daemon(void)
-{
- pid_t pid;
- const char * devnull = "/dev/null";
-
- pid = fork();
- if (pid < 0) {
- cl_log(LOG_ERR, "%s: could not start daemon\n",
- cmdname);
- cl_perror("fork");
- exit(1);
- }else if (pid > 0) {
- return pid;
- }
-
- cl_log_enable_stderr(FALSE);
-
- /* This is the child; ensure privileges have not been lost. */
- maximize_priority();
-
- umask(022);
- close(0);
- (void)open(devnull, O_RDONLY);
- close(1);
- (void)open(devnull, O_WRONLY);
- close(2);
- (void)open(devnull, O_WRONLY);
- cl_cdtocoredir();
- return 0;
-}
-
-int
-header_dump(struct sbd_context *st)
-{
- struct sector_header_s *s_header;
- s_header = header_get(st);
- if (s_header == NULL)
- return -1;
-
- printf("Header version : %u\n", s_header->version);
- printf("Number of slots : %u\n", s_header->slots);
- printf("Sector size : %lu\n",
- (unsigned long)s_header->sector_size);
- printf("Timeout (watchdog) : %lu\n",
- (unsigned long)s_header->timeout_watchdog);
- printf("Timeout (allocate) : %lu\n",
- (unsigned long)s_header->timeout_allocate);
- printf("Timeout (loop) : %lu\n",
- (unsigned long)s_header->timeout_loop);
- printf("Timeout (msgwait) : %lu\n",
- (unsigned long)s_header->timeout_msgwait);
- return 0;
-}
-
-void
-get_uname(void)
-{
- struct utsname uname_buf;
- int i;
-
- if (uname(&uname_buf) < 0) {
- cl_perror("uname() failed?");
- exit(1);
- }
-
- local_uname = strdup(uname_buf.nodename);
-
- for (i = 0; i < strlen(local_uname); i++)
- local_uname[i] = tolower(local_uname[i]);
-}
-
diff --git a/lib/stonith/sbd-md.c b/lib/stonith/sbd-md.c
deleted file mode 100644
index 4fbfb5d..0000000
--- a/lib/stonith/sbd-md.c
+++ /dev/null
@@ -1,962 +0,0 @@
-/*
- * Copyright (C) 2008 Lars Marowsky-Bree <lmb at suse.de>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "sbd.h"
-
-struct servants_list_item *servants_leader = NULL;
-
-static int servant_count = 0;
-static int servant_restart_interval = 60;
-static int servant_restart_count = 10;
-static int servant_inform_parent = 0;
-
-/* signals reserved for multi-disk sbd */
-#define SIG_LIVENESS (SIGRTMIN + 1) /* report liveness of the disk */
-#define SIG_EXITREQ (SIGRTMIN + 2) /* exit request to inquisitor */
-#define SIG_TEST (SIGRTMIN + 3) /* trigger self test */
-#define SIG_RESTART (SIGRTMIN + 4) /* trigger restart of all failed disk */
-#define SIG_IO_FAIL (SIGRTMIN + 5) /* the IO child requests to be considered failed */
-/* FIXME: should add dynamic check of SIG_XX >= SIGRTMAX */
-
-/* Debug Helper */
-#if 0
-#define DBGPRINT(...) fprintf(stderr, __VA_ARGS__)
-#else
-#define DBGPRINT(...) do {} while (0)
-#endif
-
-int quorum_write(int good_servants)
-{
- return (good_servants > servant_count/2);
-}
-
-int quorum_read(int good_servants)
-{
- if (servant_count >= 3)
- return (good_servants > servant_count/2);
- else
- return (good_servants >= 1);
-}
-
-int assign_servant(const char* devname, functionp_t functionp, const void* argp)
-{
- pid_t pid = 0;
- int rc = 0;
-
- DBGPRINT("fork servant for %s\n", devname);
- pid = fork();
- if (pid == 0) { /* child */
- maximize_priority();
- rc = (*functionp)(devname, argp);
- if (rc == -1)
- exit(1);
- else
- exit(0);
- } else if (pid != -1) { /* parent */
- return pid;
- } else {
- cl_log(LOG_ERR,"Failed to fork servant");
- exit(1);
- }
-}
-
-int init_devices()
-{
- int rc = 0;
- struct sbd_context *st;
- struct servants_list_item *s;
-
- for (s = servants_leader; s; s = s->next) {
- fprintf(stdout, "Initializing device %s\n",
- s->devname);
- st = open_device(s->devname);
- if (!st) {
- return -1;
- }
- rc = init_device(st);
- close_device(st);
- if (rc == -1) {
- fprintf(stderr, "Failed to init device %s\n", s->devname);
- return rc;
- }
- fprintf(stdout, "Device %s is initialized.\n", s->devname);
- }
- return 0;
-}
-
-int slot_msg_wrapper(const char* devname, const void* argp)
-{
- int rc = 0;
- struct sbd_context *st;
- const struct slot_msg_arg_t* arg = (const struct slot_msg_arg_t*)argp;
-
- st = open_device(devname);
- if (!st)
- return -1;
- rc = slot_msg(st, arg->name, arg->msg);
- close_device(st);
- return rc;
-}
-
-int slot_ping_wrapper(const char* devname, const void* argp)
-{
- int rc = 0;
- const char* name = (const char*)argp;
- struct sbd_context *st;
-
- st = open_device(devname);
- if (!st)
- return -1;
- rc = slot_ping(st, name);
- close_device(st);
- return rc;
-}
-
-int allocate_slots(const char *name)
-{
- int rc = 0;
- struct sbd_context *st;
- struct servants_list_item *s;
-
- for (s = servants_leader; s; s = s->next) {
- fprintf(stdout, "Trying to allocate slot for %s on device %s.\n",
- name,
- s->devname);
- st = open_device(s->devname);
- if (!st) {
- return -1;
- }
- rc = slot_allocate(st, name);
- close_device(st);
- if (rc < 0)
- return rc;
- fprintf(stdout, "Slot for %s has been allocated on %s.\n",
- name,
- s->devname);
- }
- return 0;
-}
-
-int list_slots()
-{
- int rc = 0;
- struct servants_list_item *s;
- struct sbd_context *st;
-
- for (s = servants_leader; s; s = s->next) {
- DBGPRINT("list slots on device %s\n", s->devname);
- st = open_device(s->devname);
- if (!st)
- return -1;
- rc = slot_list(st);
- close_device(st);
- if (rc == -1)
- return rc;
- }
- return 0;
-}
-
-int ping_via_slots(const char *name)
-{
- int sig = 0;
- pid_t pid = 0;
- int status = 0;
- int servants_finished = 0;
- sigset_t procmask;
- siginfo_t sinfo;
- struct servants_list_item *s;
-
- sigemptyset(&procmask);
- sigaddset(&procmask, SIGCHLD);
- sigprocmask(SIG_BLOCK, &procmask, NULL);
-
- for (s = servants_leader; s; s = s->next) {
- s->pid = assign_servant(s->devname, &slot_ping_wrapper, (const void*)name);
- }
-
- while (servants_finished < servant_count) {
- sig = sigwaitinfo(&procmask, &sinfo);
- DBGPRINT("get signal %d\n", sig);
- if (sig == SIGCHLD) {
- while ((pid = wait(&status))) {
- if (pid == -1 && errno == ECHILD) {
- break;
- } else {
- s = lookup_servant_by_pid(pid);
- if (s) {
- DBGPRINT
- ("A ping is delivered to %s via %s. ",
- name, s->devname);
- if (!status)
- DBGPRINT
- ("They responed to the emporer\n");
- else
- DBGPRINT
- ("There's no response\n");
- servants_finished++;
- }
- }
- }
- }
- DBGPRINT("signal %d handled\n", sig);
- }
- return 0;
-}
-
-/* This is a bit hackish, but the easiest way to rewire all process
- * exits to send the desired signal to the parent. */
-void servant_exit(void)
-{
- pid_t ppid;
- union sigval signal_value;
-
- ppid = getppid();
- if (servant_inform_parent) {
- memset(&signal_value, 0, sizeof(signal_value));
- sigqueue(ppid, SIG_IO_FAIL, signal_value);
- }
-}
-
-int servant(const char *diskname, const void* argp)
-{
- struct sector_mbox_s *s_mbox = NULL;
- int mbox;
- int rc = 0;
- time_t t0, t1, latency;
- union sigval signal_value;
- sigset_t servant_masks;
- struct sbd_context *st;
- pid_t ppid;
-
- if (!diskname) {
- cl_log(LOG_ERR, "Empty disk name %s.", diskname);
- return -1;
- }
-
- cl_log(LOG_INFO, "Servant starting for device %s", diskname);
-
- /* Block most of the signals */
- sigfillset(&servant_masks);
- sigdelset(&servant_masks, SIGKILL);
- sigdelset(&servant_masks, SIGFPE);
- sigdelset(&servant_masks, SIGILL);
- sigdelset(&servant_masks, SIGSEGV);
- sigdelset(&servant_masks, SIGBUS);
- sigdelset(&servant_masks, SIGALRM);
- /* FIXME: check error */
- sigprocmask(SIG_SETMASK, &servant_masks, NULL);
-
- atexit(servant_exit);
- servant_inform_parent = 1;
-
- st = open_device(diskname);
- if (!st) {
- return -1;
- }
-
- mbox = slot_allocate(st, local_uname);
- if (mbox < 0) {
- cl_log(LOG_ERR,
- "No slot allocated, and automatic allocation failed for disk %s.",
- diskname);
- rc = -1;
- goto out;
- }
- cl_log(LOG_INFO, "Monitoring slot %d on disk %s", mbox, diskname);
- set_proc_title("sbd: watcher: %s - slot: %d", diskname, mbox);
-
- s_mbox = sector_alloc();
- if (mbox_write(st, mbox, s_mbox) < 0) {
- rc = -1;
- goto out;
- }
-
- memset(&signal_value, 0, sizeof(signal_value));
-
- while (1) {
- t0 = time(NULL);
- sleep(timeout_loop);
-
- ppid = getppid();
-
- if (ppid == 1) {
- /* Our parent died unexpectedly. Triggering
- * self-fence. */
- do_reset();
- }
-
- if (mbox_read(st, mbox, s_mbox) < 0) {
- cl_log(LOG_ERR, "mbox read failed in servant.");
- exit(1);
- }
-
- if (s_mbox->cmd > 0) {
- cl_log(LOG_INFO,
- "Received command %s from %s on disk %s",
- char2cmd(s_mbox->cmd), s_mbox->from, diskname);
-
- switch (s_mbox->cmd) {
- case SBD_MSG_TEST:
- memset(s_mbox, 0, sizeof(*s_mbox));
- mbox_write(st, mbox, s_mbox);
- sigqueue(ppid, SIG_TEST, signal_value);
- break;
- case SBD_MSG_RESET:
- do_reset();
- break;
- case SBD_MSG_OFF:
- do_off();
- break;
- case SBD_MSG_EXIT:
- sigqueue(ppid, SIG_EXITREQ, signal_value);
- break;
- case SBD_MSG_CRASHDUMP:
- do_crashdump();
- break;
- default:
- /* FIXME:
- An "unknown" message might result
- from a partial write.
- log it and clear the slot.
- */
- cl_log(LOG_ERR, "Unknown message on disk %s",
- diskname);
- memset(s_mbox, 0, sizeof(*s_mbox));
- mbox_write(st, mbox, s_mbox);
- break;
- }
- }
- sigqueue(ppid, SIG_LIVENESS, signal_value);
-
- t1 = time(NULL);
- latency = t1 - t0;
- if (timeout_watchdog_warn && (latency > timeout_watchdog_warn)) {
- cl_log(LOG_WARNING,
- "Latency: %d exceeded threshold %d on disk %s",
- (int)latency, (int)timeout_watchdog_warn,
- diskname);
- } else if (debug) {
- cl_log(LOG_INFO, "Latency: %d on disk %s", (int)latency,
- diskname);
- }
- }
- out:
- free(s_mbox);
- close_device(st);
- if (rc == 0) {
- servant_inform_parent = 0;
- }
- return rc;
-}
-
-void recruit_servant(const char *devname, pid_t pid)
-{
- struct servants_list_item *s = servants_leader;
- struct servants_list_item *newbie;
-
- newbie = malloc(sizeof(*newbie));
- if (!newbie) {
- fprintf(stderr, "malloc failed in recruit_servant.");
- exit(1);
- }
- memset(newbie, 0, sizeof(*newbie));
- newbie->devname = strdup(devname);
- newbie->pid = pid;
-
- if (!s) {
- servants_leader = newbie;
- } else {
- while (s->next)
- s = s->next;
- s->next = newbie;
- }
-
- servant_count++;
-}
-
-struct servants_list_item *lookup_servant_by_dev(const char *devname)
-{
- struct servants_list_item *s;
-
- for (s = servants_leader; s; s = s->next) {
- if (strncasecmp(s->devname, devname, strlen(s->devname)))
- break;
- }
- return s;
-}
-
-struct servants_list_item *lookup_servant_by_pid(pid_t pid)
-{
- struct servants_list_item *s;
-
- for (s = servants_leader; s; s = s->next) {
- if (s->pid == pid)
- break;
- }
- return s;
-}
-
-int check_all_dead(void)
-{
- struct servants_list_item *s;
- int r = 0;
- union sigval svalue;
-
- for (s = servants_leader; s; s = s->next) {
- if (s->pid != 0) {
- r = sigqueue(s->pid, 0, svalue);
- if (r == -1 && errno == ESRCH)
- continue;
- return 0;
- }
- }
- return 1;
-}
-
-
-void servant_start(struct servants_list_item *s)
-{
- int r = 0;
- union sigval svalue;
-
- if (s->pid != 0) {
- r = sigqueue(s->pid, 0, svalue);
- if ((r != -1 || errno != ESRCH))
- return;
- }
- cl_log(LOG_INFO, "Starting servant for device %s",
- s->devname);
- s->restarts++;
- s->pid = assign_servant(s->devname, servant, NULL);
- clock_gettime(CLOCK_MONOTONIC, &s->t_started);
- return;
-}
-
-void servants_start(void)
-{
- struct servants_list_item *s;
-
- for (s = servants_leader; s; s = s->next) {
- s->restarts = 0;
- servant_start(s);
- }
-}
-
-void servants_kill(void)
-{
- struct servants_list_item *s;
- union sigval svalue;
-
- for (s = servants_leader; s; s = s->next) {
- if (s->pid != 0)
- sigqueue(s->pid, SIGKILL, svalue);
- }
-}
-
-int check_timeout_inconsistent(void)
-{
- struct sbd_context *st;
- struct sector_header_s *hdr_cur = 0, *hdr_last = 0;
- struct servants_list_item* s;
- int inconsistent = 0;
-
- for (s = servants_leader; s; s = s->next) {
- st = open_device(s->devname);
- if (!st)
- continue;
- hdr_cur = header_get(st);
- close_device(st);
- if (!hdr_cur)
- continue;
- if (hdr_last) {
- if (hdr_last->timeout_watchdog != hdr_cur->timeout_watchdog
- || hdr_last->timeout_allocate != hdr_cur->timeout_allocate
- || hdr_last->timeout_loop != hdr_cur->timeout_loop
- || hdr_last->timeout_msgwait != hdr_cur->timeout_msgwait)
- inconsistent = 1;
- free(hdr_last);
- }
- hdr_last = hdr_cur;
- }
-
- if (hdr_last) {
- timeout_watchdog = hdr_last->timeout_watchdog;
- timeout_allocate = hdr_last->timeout_allocate;
- timeout_loop = hdr_last->timeout_loop;
- timeout_msgwait = hdr_last->timeout_msgwait;
- } else {
- cl_log(LOG_ERR, "No devices were available at start-up.");
- exit(1);
- }
-
- free(hdr_last);
- return inconsistent;
-}
-
-inline void cleanup_servant_by_pid(pid_t pid)
-{
- struct servants_list_item* s;
-
- s = lookup_servant_by_pid(pid);
- if (s) {
- cl_log(LOG_WARNING, "Servant for %s (pid: %i) has terminated",
- s->devname, s->pid);
- s->pid = 0;
- } else {
- /* This most likely is a stray signal from somewhere, or
- * a SIGCHLD for a process that has previously
- * explicitly disconnected. */
- cl_log(LOG_INFO, "cleanup_servant: Nothing known about pid %i",
- pid);
- }
-}
-
-int inquisitor_decouple(void)
-{
- pid_t ppid = getppid();
- union sigval signal_value;
-
- /* During start-up, we only arm the watchdog once we've got
- * quorum at least once. */
- if (watchdog_use) {
- if (watchdog_init() < 0) {
- return -1;
- }
- }
-
- if (ppid > 1) {
- sigqueue(ppid, SIG_LIVENESS, signal_value);
- }
- return 0;
-}
-
-void inquisitor_child(void)
-{
- int sig, pid;
- sigset_t procmask;
- siginfo_t sinfo;
- int status;
- struct timespec timeout;
- int good_servants = 0;
- int exiting = 0;
- int decoupled = 0;
- time_t latency;
- struct timespec t_last_tickle, t_now;
- struct servants_list_item* s;
-
- if (debug_mode) {
- cl_log(LOG_ERR, "DEBUG MODE IS ACTIVE - DO NOT RUN IN PRODUCTION!");
- }
-
- set_proc_title("sbd: inquisitor");
-
- sigemptyset(&procmask);
- sigaddset(&procmask, SIGCHLD);
- sigaddset(&procmask, SIG_LIVENESS);
- sigaddset(&procmask, SIG_EXITREQ);
- sigaddset(&procmask, SIG_TEST);
- sigaddset(&procmask, SIG_IO_FAIL);
- sigaddset(&procmask, SIGUSR1);
- sigaddset(&procmask, SIGUSR2);
- sigprocmask(SIG_BLOCK, &procmask, NULL);
-
- servants_start();
-
- timeout.tv_sec = timeout_loop;
- timeout.tv_nsec = 0;
- good_servants = 0;
- clock_gettime(CLOCK_MONOTONIC, &t_last_tickle);
-
- while (1) {
- sig = sigtimedwait(&procmask, &sinfo, &timeout);
- DBGPRINT("got signal %d\n", sig);
-
- clock_gettime(CLOCK_MONOTONIC, &t_now);
-
- if (sig == SIG_EXITREQ) {
- servants_kill();
- watchdog_close();
- exiting = 1;
- } else if (sig == SIGCHLD) {
- while ((pid = waitpid(-1, &status, WNOHANG))) {
- if (pid == -1 && errno == ECHILD) {
- break;
- } else {
- cleanup_servant_by_pid(pid);
- }
- }
- } else if (sig == SIG_IO_FAIL) {
- s = lookup_servant_by_pid(sinfo.si_pid);
- if (s) {
- cl_log(LOG_WARNING, "Servant for %s requests to be disowned",
- s->devname);
- cleanup_servant_by_pid(sinfo.si_pid);
- }
- } else if (sig == SIG_LIVENESS) {
- s = lookup_servant_by_pid(sinfo.si_pid);
- if (s) {
- clock_gettime(CLOCK_MONOTONIC, &s->t_last);
- }
- } else if (sig == SIG_TEST) {
- } else if (sig == SIGUSR1) {
- if (exiting)
- continue;
- servants_start();
- }
-
- if (exiting) {
- if (check_all_dead())
- exit(0);
- else
- continue;
- }
-
- good_servants = 0;
- for (s = servants_leader; s; s = s->next) {
- int age = t_now.tv_sec - s->t_last.tv_sec;
-
- if (!s->t_last.tv_sec)
- continue;
-
- if (age < timeout_watchdog) {
- good_servants++;
- } else {
- if (!s->restart_blocked)
- cl_log(LOG_WARNING, "Servant for %s outdated (age: %d)",
- s->devname, age);
- }
- }
-
- if (quorum_read(good_servants)) {
- if (!decoupled) {
- if (inquisitor_decouple() < 0) {
- servants_kill();
- exiting = 1;
- continue;
- } else {
- decoupled = 1;
- }
- }
-
- watchdog_tickle();
- clock_gettime(CLOCK_MONOTONIC, &t_last_tickle);
- }
-
- latency = t_now.tv_sec - t_last_tickle.tv_sec;
- if (timeout_watchdog && (latency > timeout_watchdog)) {
- if (!decoupled) {
- /* We're still being watched by our
- * parent. We don't fence, but exit. */
- cl_log(LOG_ERR, "SBD: Not enough votes to proceed. Aborting start-up.");
- servants_kill();
- exiting = 1;
- continue;
- }
- if (debug_mode < 2) {
- /* At level 2, we do nothing, but expect
- * things to eventually return to
- * normal. */
- do_reset();
- } else {
- cl_log(LOG_ERR, "SBD: DEBUG MODE: Would have fenced due to timeout!");
- }
- }
- if (timeout_watchdog_warn && (latency > timeout_watchdog_warn)) {
- cl_log(LOG_WARNING,
- "Latency: No liveness for %d s exceeds threshold of %d s (healthy servants: %d)",
- (int)latency, (int)timeout_watchdog_warn, good_servants);
- }
-
- for (s = servants_leader; s; s = s->next) {
- int age = t_now.tv_sec - s->t_started.tv_sec;
-
- if (age > servant_restart_interval) {
- s->restarts = 0;
- s->restart_blocked = 0;
- }
-
- if (servant_restart_count
- && (s->restarts >= servant_restart_count)
- && !s->restart_blocked) {
- if (servant_restart_count > 1) {
- cl_log(LOG_WARNING, "Max retry count reached: not restarting servant for %s",
- s->devname);
- }
- s->restart_blocked = 1;
- }
-
- if (!s->restart_blocked) {
- servant_start(s);
- }
- }
- }
- /* not reached */
- exit(0);
-}
-
-int inquisitor(void)
-{
- int sig, pid, inquisitor_pid;
- int status;
- sigset_t procmask;
- siginfo_t sinfo;
-
- DBGPRINT("inquisitor starting\n");
-
- /* Where's the best place for sysrq init ?*/
- sysrq_init();
-
- sigemptyset(&procmask);
- sigaddset(&procmask, SIGCHLD);
- sigaddset(&procmask, SIG_LIVENESS);
- sigprocmask(SIG_BLOCK, &procmask, NULL);
-
- if (check_timeout_inconsistent() == 1) {
- fprintf(stderr, "Timeout settings are different across SBD devices!\n");
- fprintf(stderr, "You have to correct them and re-start SBD again.\n");
- return -1;
- }
-
- inquisitor_pid = make_daemon();
- if (inquisitor_pid == 0) {
- inquisitor_child();
- }
-
- /* We're the parent. Wait for a happy signal from our child
- * before we proceed - we either get "SIG_LIVENESS" when the
- * inquisitor has completed the first successful round, or
- * ECHLD when it exits with an error. */
-
- while (1) {
- sig = sigwaitinfo(&procmask, &sinfo);
- DBGPRINT("get signal %d\n", sig);
- if (sig == SIGCHLD) {
- while ((pid = waitpid(-1, &status, WNOHANG))) {
- if (pid == -1 && errno == ECHILD) {
- break;
- }
- /* We got here because the inquisitor
- * did not succeed. */
- return -1;
- }
- } else if (sig == SIG_LIVENESS) {
- /* Inquisitor started up properly. */
- return 0;
- } else {
- fprintf(stderr, "Nobody expected the spanish inquisition!\n");
- continue;
- }
- }
- /* not reached */
- return -1;
-}
-
-int messenger(const char *name, const char *msg)
-{
- int sig = 0;
- pid_t pid = 0;
- int status = 0;
- int servants_finished = 0;
- int successful_delivery = 0;
- sigset_t procmask;
- siginfo_t sinfo;
- struct servants_list_item *s;
- struct slot_msg_arg_t slot_msg_arg = {name, msg};
-
- sigemptyset(&procmask);
- sigaddset(&procmask, SIGCHLD);
- sigprocmask(SIG_BLOCK, &procmask, NULL);
-
- for (s = servants_leader; s; s = s->next) {
- s->pid = assign_servant(s->devname, &slot_msg_wrapper, &slot_msg_arg);
- }
-
- while (!(quorum_write(successful_delivery) ||
- (servants_finished == servant_count))) {
- sig = sigwaitinfo(&procmask, &sinfo);
- DBGPRINT("get signal %d\n", sig);
- if (sig == SIGCHLD) {
- while ((pid = waitpid(-1, &status, WNOHANG))) {
- if (pid == -1 && errno == ECHILD) {
- break;
- } else {
- DBGPRINT("process %d finished\n", pid);
- servants_finished++;
- if (WIFEXITED(status)
- && WEXITSTATUS(status) == 0) {
- DBGPRINT("exit with %d\n",
- WEXITSTATUS(status));
- successful_delivery++;
- }
- }
- }
- }
- DBGPRINT("signal %d handled\n", sig);
- }
- if (quorum_write(successful_delivery)) {
- return 0;
- } else {
- fprintf(stderr, "Message is not delivered via more then a half of devices\n");
- return -1;
- }
-}
-
-int dump_headers(void)
-{
- int rc = 0;
- struct servants_list_item *s = servants_leader;
- struct sbd_context *st;
-
- for (s = servants_leader; s; s = s->next) {
- fprintf(stdout, "==Dumping header on disk %s\n", s->devname);
- st = open_device(s->devname);
- if (!st)
- return -1;
- rc = header_dump(st);
- close_device(st);
- if (rc == -1)
- return rc;
- fprintf(stdout, "==Header on disk %s is dumped\n", s->devname);
- }
- return rc;
-}
-
-int main(int argc, char **argv, char **envp)
-{
- int exit_status = 0;
- int c;
-
- if ((cmdname = strrchr(argv[0], '/')) == NULL) {
- cmdname = argv[0];
- } else {
- ++cmdname;
- }
-
- cl_log_set_entity(cmdname);
- cl_log_enable_stderr(0);
- cl_log_set_facility(LOG_DAEMON);
-
- get_uname();
-
- while ((c = getopt(argc, argv, "DRTWZhvw:d:n:1:2:3:4:5:t:I:F:")) != -1) {
- switch (c) {
- case 'D':
- break;
- case 'Z':
- debug_mode++;
- break;
- case 'R':
- skip_rt = 1;
- break;
- case 'v':
- debug = 1;
- break;
- case 'T':
- watchdog_set_timeout = 0;
- break;
- case 'W':
- watchdog_use = 1;
- break;
- case 'w':
- watchdogdev = optarg;
- break;
- case 'd':
- recruit_servant(optarg, 0);
- break;
- case 'n':
- local_uname = optarg;
- break;
- case '1':
- timeout_watchdog = atoi(optarg);
- break;
- case '2':
- timeout_allocate = atoi(optarg);
- break;
- case '3':
- timeout_loop = atoi(optarg);
- break;
- case '4':
- timeout_msgwait = atoi(optarg);
- break;
- case '5':
- timeout_watchdog_warn = atoi(optarg);
- break;
- case 't':
- servant_restart_interval = atoi(optarg);
- break;
- case 'I':
- timeout_io = atoi(optarg);
- break;
- case 'F':
- servant_restart_count = atoi(optarg);
- break;
- case 'h':
- usage();
- return (0);
- default:
- exit_status = -1;
- goto out;
- break;
- }
- }
-
- if (servant_count < 1 || servant_count > 3) {
- fprintf(stderr, "You must specify 1 to 3 devices via the -d option.\n");
- exit_status = -1;
- goto out;
- }
-
- /* There must at least be one command following the options: */
- if ((argc - optind) < 1) {
- fprintf(stderr, "Not enough arguments.\n");
- exit_status = -1;
- goto out;
- }
-
- if (init_set_proc_title(argc, argv, envp) < 0) {
- fprintf(stderr, "Allocation of proc title failed.");
- exit(1);
- }
-
- maximize_priority();
-
- if (strcmp(argv[optind], "create") == 0) {
- exit_status = init_devices();
- } else if (strcmp(argv[optind], "dump") == 0) {
- exit_status = dump_headers();
- } else if (strcmp(argv[optind], "allocate") == 0) {
- exit_status = allocate_slots(argv[optind + 1]);
- } else if (strcmp(argv[optind], "list") == 0) {
- exit_status = list_slots();
- } else if (strcmp(argv[optind], "message") == 0) {
- exit_status = messenger(argv[optind + 1], argv[optind + 2]);
- } else if (strcmp(argv[optind], "ping") == 0) {
- exit_status = ping_via_slots(argv[optind + 1]);
- } else if (strcmp(argv[optind], "watch") == 0) {
- exit_status = inquisitor();
- } else {
- exit_status = -1;
- }
-
-out:
- if (exit_status < 0) {
- usage();
- return (1);
- }
- return (0);
-}
diff --git a/lib/stonith/sbd.h b/lib/stonith/sbd.h
deleted file mode 100644
index 1517760..0000000
--- a/lib/stonith/sbd.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Copyright (C) 2008 Lars Marowsky-Bree <lmb at suse.de>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <arpa/inet.h>
-#include <asm/unistd.h>
-#include <clplumbing/cl_log.h>
-#include <clplumbing/cl_reboot.h>
-#include <clplumbing/coredumps.h>
-#include <clplumbing/realtime.h>
-#include <clplumbing/setproctitle.h>
-#include <ctype.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <libaio.h>
-#include <linux/fs.h>
-#include <linux/types.h>
-#include <linux/watchdog.h>
-#include <malloc.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/ptrace.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/utsname.h>
-#include <sys/wait.h>
-#include <syslog.h>
-#include <time.h>
-#include <unistd.h>
-
-/* Sector data types */
-struct sector_header_s {
- char magic[8];
- unsigned char version;
- unsigned char slots;
- /* Caveat: stored in network byte-order */
- uint32_t sector_size;
- uint32_t timeout_watchdog;
- uint32_t timeout_allocate;
- uint32_t timeout_loop;
- uint32_t timeout_msgwait;
-};
-
-struct sector_mbox_s {
- signed char cmd;
- char from[64];
-};
-
-struct sector_node_s {
- /* slots will be created with in_use == 0 */
- char in_use;
- char name[64];
-};
-
-struct servants_list_item {
- const char* devname;
- pid_t pid;
- int restarts;
- int restart_blocked;
- struct timespec t_last, t_started;
- struct servants_list_item *next;
-};
-
-struct sbd_context {
- int devfd;
- io_context_t ioctx;
- struct iocb io;
-};
-
-#define SBD_MSG_EMPTY 0x00
-#define SBD_MSG_TEST 0x01
-#define SBD_MSG_RESET 0x02
-#define SBD_MSG_OFF 0x03
-#define SBD_MSG_EXIT 0x04
-#define SBD_MSG_CRASHDUMP 0x05
-
-#define SLOT_TO_SECTOR(slot) (1+slot*2)
-#define MBOX_TO_SECTOR(mbox) (2+mbox*2)
-
-void usage(void);
-int watchdog_init_interval(void);
-int watchdog_tickle(void);
-int watchdog_init(void);
-void sysrq_init(void);
-void watchdog_close(void);
-struct sbd_context *open_device(const char* devname);
-void close_device(struct sbd_context *st);
-signed char cmd2char(const char *cmd);
-void * sector_alloc(void);
-const char* char2cmd(const char cmd);
-int sector_write(struct sbd_context *st, int sector, const void *data);
-int sector_read(struct sbd_context *st, int sector, void *data);
-int slot_read(struct sbd_context *st, int slot, struct sector_node_s *s_node);
-int slot_write(struct sbd_context *st, int slot, const struct sector_node_s *s_node);
-int mbox_write(struct sbd_context *st, int mbox, const struct sector_mbox_s *s_mbox);
-int mbox_read(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox);
-int mbox_write_verify(struct sbd_context *st, int mbox, const struct sector_mbox_s *s_mbox);
-/* After a call to header_write(), certain data fields will have been
- * converted to on-disk byte-order; the header should not be accessed
- * afterwards anymore! */
-int header_write(struct sbd_context *st, struct sector_header_s *s_header);
-int header_read(struct sbd_context *st, struct sector_header_s *s_header);
-int valid_header(const struct sector_header_s *s_header);
-struct sector_header_s * header_get(struct sbd_context *st);
-int init_device(struct sbd_context *st);
-int slot_lookup(struct sbd_context *st, const struct sector_header_s *s_header, const char *name);
-int slot_unused(struct sbd_context *st, const struct sector_header_s *s_header);
-int slot_allocate(struct sbd_context *st, const char *name);
-int slot_list(struct sbd_context *st);
-int slot_ping(struct sbd_context *st, const char *name);
-int slot_msg(struct sbd_context *st, const char *name, const char *cmd);
-int header_dump(struct sbd_context *st);
-void sysrq_trigger(char t);
-void do_crashdump(void);
-void do_reset(void);
-void do_off(void);
-pid_t make_daemon(void);
-void maximize_priority(void);
-void get_uname(void);
-
-/* Tunable defaults: */
-extern unsigned long timeout_watchdog;
-extern unsigned long timeout_watchdog_warn;
-extern int timeout_allocate;
-extern int timeout_loop;
-extern int timeout_msgwait;
-extern int timeout_io;
-extern int watchdog_use;
-extern int watchdog_set_timeout;
-extern int skip_rt;
-extern int debug;
-extern int debug_mode;
-extern const char *watchdogdev;
-extern char* local_uname;
-
-/* Global, non-tunable variables: */
-extern int sector_size;
-extern int watchdogfd;
-extern const char* cmdname;
-
-typedef int (*functionp_t)(const char* devname, const void* argp);
-
-int assign_servant(const char* devname, functionp_t functionp, const void* argp);
-int init_devices(void);
-struct slot_msg_arg_t {
- const char* name;
- const char* msg;
-};
-int slot_msg_wrapper(const char* devname, const void* argp);
-int slot_ping_wrapper(const char* devname, const void* argp);
-int allocate_slots(const char *name);
-int list_slots(void);
-int ping_via_slots(const char *name);
-int dump_headers(void);
-
-int check_all_dead(void);
-void servant_exit(void);
-int servant(const char *diskname, const void* argp);
-void recruit_servant(const char *devname, pid_t pid);
-struct servants_list_item *lookup_servant_by_dev(const char *devname);
-struct servants_list_item *lookup_servant_by_pid(pid_t pid);
-void servants_kill(void);
-void servants_start(void);
-void servant_start(struct servants_list_item *s);
-void inquisitor_child(void);
-int inquisitor(void);
-int inquisitor_decouple(void);
-int messenger(const char *name, const char *msg);
-int check_timeout_inconsistent(void);
-void cleanup_servant_by_pid(pid_t pid);
-int quorum_write(int good_servants);
-int quorum_read(int good_servants);
-
diff --git a/lrm/lrmd/lrmd.c b/lrm/lrmd/lrmd.c
index 93b753f..2ac75d6 100644
--- a/lrm/lrmd/lrmd.c
+++ b/lrm/lrmd/lrmd.c
@@ -3121,17 +3121,6 @@ perform_ra_op(lrmd_op_t* op)
}
op_type = ha_msg_value(op->msg, F_LRM_OP);
- op_params = ha_msg_value_str_table(op->msg, F_LRM_PARAM);
- params = merge_str_tables(rsc->params,op_params);
- ha_msg_mod_str_table(op->msg, F_LRM_PARAM, params);
- if (op_params) {
- free_str_table(op_params);
- op_params = NULL;
- }
- if (params) {
- free_str_table(params);
- params = NULL;
- }
op->t_perform = time_longclock();
check_queue_duration(op);
@@ -3261,7 +3250,14 @@ perform_ra_op(lrmd_op_t* op)
lrmd_debug2(LOG_DEBUG
, "perform_ra_op:calling RA plugin to perform %s, pid: [%d]"
, op_info(op), getpid());
- params = ha_msg_value_str_table(op->msg, F_LRM_PARAM);
+
+ op_params = ha_msg_value_str_table(op->msg, F_LRM_PARAM);
+ params = merge_str_tables(rsc->params,op_params);
+ if (op_params) {
+ free_str_table(op_params);
+ op_params = NULL;
+ }
+
if (replace_secret_params(rsc->id, params) < 0) {
/* replacing secrets failed! */
if (!strcmp(op_type,"stop")) {
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-ha/cluster-glue.git
More information about the Debian-HA-Commits
mailing list