[Debian-ha-commits] [resource-agents] 01/03: Imported Upstream version 3.9.7
Christoph Berg
myon at debian.org
Sat Feb 13 11:01:53 UTC 2016
This is an automated email from the git hooks/post-receive script.
myon pushed a commit to branch master
in repository resource-agents.
commit ee74f98aaeb06e539404232b0beafb80bd7b1a52
Author: Christoph Berg <myon at debian.org>
Date: Sat Feb 13 11:49:34 2016 +0100
Imported Upstream version 3.9.7
---
.travis.yml | 8 +
ChangeLog | 98 +++
ci/build.sh | 46 ++
ci/install.sh | 10 +
configure.ac | 2 +-
doc/man/Makefile.am | 3 +
heartbeat/CTDB | 19 +-
heartbeat/Filesystem | 14 +-
heartbeat/IPaddr2 | 48 +-
heartbeat/IPsrcaddr | 45 +-
heartbeat/LVM | 32 +-
heartbeat/Makefile.am | 5 +
heartbeat/README.galera | 149 ++++
heartbeat/Raid1 | 30 +-
heartbeat/SAPDatabase | 2 +-
heartbeat/VirtualDomain | 206 +++--
heartbeat/apache | 1 +
heartbeat/apache-conf.sh | 4 +-
heartbeat/asterisk | 9 +-
heartbeat/clvm | 22 +-
heartbeat/dhcpd | 12 +-
heartbeat/dnsupdate | 6 +-
heartbeat/docker | 69 +-
heartbeat/ethmonitor | 15 +-
heartbeat/exportfs | 5 +-
heartbeat/findif.sh | 6 +-
heartbeat/galera | 366 +++++---
heartbeat/iSCSILogicalUnit | 18 +-
heartbeat/iSCSITarget | 5 +-
heartbeat/iscsi | 26 +-
heartbeat/lxc | 7 +-
heartbeat/mysql | 3 +-
heartbeat/mysql-common.sh | 35 +-
heartbeat/nagios | 246 ++++++
heartbeat/named | 5 +
heartbeat/nfsnotify | 2 +-
heartbeat/nfsserver | 215 ++---
heartbeat/nfsserver-redhat.sh | 169 ++++
heartbeat/ocf-distro | 47 ++
heartbeat/ocf-rarun | 8 +-
heartbeat/ocf-shellfuncs.in | 88 +-
heartbeat/ora-common.sh | 6 +-
heartbeat/oracle | 22 +-
heartbeat/oralsnr | 8 +-
heartbeat/pgsql | 234 +++++-
heartbeat/pingd | 8 +-
heartbeat/portblock | 37 +-
heartbeat/rabbitmq-cluster | 370 +++++++++
heartbeat/redis | 567 +++++++++++++
heartbeat/sapdb-nosha.sh | 2 +-
heartbeat/sapdb.sh | 2 +-
heartbeat/sg_persist | 22 +-
heartbeat/slapd | 18 +
heartbeat/syslog-ng | 7 +-
heartbeat/tomcat | 13 +-
heartbeat/zabbixserver | 23 +-
ldirectord/ldirectord.in | 18 +-
resource-agents.spec.in | 3 +
rgmanager/src/resources/Makefile.am | 5 +-
rgmanager/src/resources/SAPDatabase | 2 +-
rgmanager/src/resources/db2.sh | 2 +-
rgmanager/src/resources/fs.sh.in | 9 +-
rgmanager/src/resources/ip.sh | 39 +-
rgmanager/src/resources/mysql.sh | 43 +-
rgmanager/src/resources/netfs.sh | 9 +-
rgmanager/src/resources/nfsserver.sh | 71 +-
rgmanager/src/resources/oradg.metadata | 107 +++
.../src/resources/{orainstance.sh => oradg.sh} | 101 ++-
rgmanager/src/resources/orainstance.sh | 25 +-
rgmanager/src/resources/postgres-8.metadata | 12 -
rgmanager/src/resources/utils/fs-lib.sh | 15 +-
rgmanager/src/resources/utils/member_util.sh | 18 +-
rgmanager/src/resources/vm.sh | 18 +-
tools/ocft/Filesystem | 20 +-
tools/ocft/LVM | 11 +-
tools/ocft/Makefile.am | 5 +-
tools/ocft/Raid1 | 42 +-
tools/ocft/Xinetd | 8 +
tools/ocft/apache | 9 +-
tools/ocft/caselib.in | 47 +-
tools/ocft/exportfs | 16 +-
tools/ocft/exportfs-multidir | 13 +-
tools/ocft/helpers.sh | 43 +
tools/ocft/iscsi | 50 +-
tools/ocft/mysql | 9 +-
tools/ocft/named | 4 +-
tools/ocft/nfsserver | 22 +-
tools/ocft/ocft.in | 10 +-
tools/ocft/pgsql | 4 +-
tools/ocft/runocft | 37 +
tools/ocft/runocft.prereq | 30 +
tools/send_arp.linux.c | 920 ++++++++++++++++++---
92 files changed, 4348 insertions(+), 894 deletions(-)
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..79f7753
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,8 @@
+language: bash
+install:
+ - ./ci/install.sh
+script:
+ - ./ci/build.sh
+notifications:
+ email: false
+sudo: required
diff --git a/ChangeLog b/ChangeLog
index 3805327..3711574 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,101 @@
+* Wed Feb 3 2016 resource-agents contributors
+- stable release 3.9.7
+- ldirectord: fix unset failcount error
+- iscsi: add portal check to open_iscsi_get_session_id()
+- galera: use mysql's --tc-heuristic-recover if crash recovery is needed
+- nfsserver: fix monitor for systemd
+
+* Wed Jan 20 2016 resource-agents contributors
+- release candidate 3.9.7 rc1
+- nfsserver.sh: add hostname attribute for NFS export (required for NFSv4+Kerberos support)
+- oradg.sh: new RA for Oracle Data Guard
+- ocf_shellfuncs: suppress bash specific trace_ra log on dash
+- sg_persist: remove uncalled for ocf_run calls
+- multiple RA: replace error log messages with calls to ocf_exit_reason
+- nfsserver: only do redhat specific stuff on redhat
+- exportfs: don't increment fsid for single directory
+- Filesystem: add tmpfs support
+- netfs.sh: move defaults to metadata
+- nfsserver: /var/lock/subsys is non-standard, check for it first
+- nagios: new RA
+- docker: check for errors in the container name
+- mysql: fix grep failure on MySQL 5.6 or higher when checking read_only variable
+- VirtualDomain: new attributes migration_speed and migration_downtime
+- fs: remove not-working tmpfs support
+- vm.sh: add migrate_options parameter
+- nfsserver: Use rpc-statd.service for NFS locking in EXEC_MODE=3 (bsc#955114)
+- nfsserver: Add EXEC_MODE for systemd without nfs-lock.service (bsc#955114)
+- IPaddr2: Add IPv6 DAD collision detection
+- Filesystem: add overlay as supported filesystem
+- ldirectord: dns_check and fallbackcommand enhancements
+- IPaddr2: fix potential syntax error on if-then-else
+- SAPDatabase: add Oracle 12 to list of supported databases (bsc#953991)
+- mysql-common.sh: fix issue where "removing old PID file" wasnt logged
+- mysql-common.sh: when mysql has been stopped, mysql stop returns success
+- mysql.sh: wait up to startup_wait seconds before failing if mysqld startup is slow
+- orainstance.sh: fix 90s wait/killing of databases containing the name of the database being killed, and added cleanup code to kill remaining listener process
+- ip.sh: Use DAD to check for IPv6 address collision
+- iSCSITarget: fix to only create one IQN and add portals to it
+- galera: document the bootstrap flow
+- galera: start joining nodes during 'monitor' to allow long-running SST
+- galera: add support for MYSQL_HOST and MYSQL_PORT from /etc/sysconfig/clustercheck
+- redis: fix password parser
+- pgsql fix exec_sql errors like "unknown variable select pg_ " in dash
+- pgsql: fix get_my_location() sql regression
+- docker: fix image variable name
+- pgsql: Fix return code override in pgsql_real_start()
+- slapd: add "maxfiles" parameter to set max number of open files (for ulimit -n)
+- redis: use required client password when set
+- send_arp: fix for infiniband, re-merge from upstream iputils arping
+- CTDB: Preserve smb.conf permissions (bsc#935253)
+- lxc: fix emergency stop functionality on 1.0
+- tomcat: use runuser instead of su for SELinux enforcing mode
+- pgsql: use runuser intead of su command for SELinux enforcing mode
+- docker: image name check fixes
+- iSCSITarget: properly create portals for lio-t implementation
+- iSCSILogicalUnit: when deleting a LUN or initiator fails with lio-t, proceed with warning
+- iSCSILogicalUnit: return OCF_NOT_RUNNING on monitor if backing path does not exist
+- iSCSILogicalUnit: add check for leftover target/core entries for lio-t
+- pgsql: delete old replication slot when creating a new slot.
+- Filesystem: support RozoFS
+- orainstance.sh: interpret listener stop results correctly
+- dhcpd: use correct default chroot for RHEL based systems
+- LVM: allow vgck failures if partial_activation is true
+- redis: avoid 0 byte dump.rdb start failures
+- docker: fix container_exist test
+- redis: fixed start operation if replication sync takes > 20 seconds
+- ethmonitor: add link_status_only option for skipping RX counter and arping tests
+- clvm: fix issue with only first option of daemon_options being used
+- IPsrcaddr: return correct error code during stop when misconfigured
+- clvm: activate_vgs option for enable/disable of automatic vg activation
+- galera: properly redetect bootstrap after demote
+- galera: clear last know sequence number any time promote is even attempted
+- asterisk: fix return code
+- galera: retrieve last sequence number without using read-only mode
+- redis: add wait_last_known_master option
+- redis: only connect to active master instances
+- redis: do not attempt to demote if redis is dead
+- redis: reliable shutdown.
+- pgsql: add support for replication slots
+- redis: set executable bit to be able to greate docs (make rpm)
+- rabbitmq-cluster: fix rmq_join_list() to only return online nodes
+- rabbitmq-cluster: new RA
+- Filesystem: support overlayfs
+- sg_persist: use default binary setting in meta-data
+- dnsupdate: use nsupdate_opts parameter
+- nfsserver: merge options into existing /etc/sysconfig/nfs
+- portblock: portno param can be a string like 137,138
+- portblock: replace ancient heartbeat config with crm configure
+- portblock: clarify TCP RST vs ICMP port unreachable
+- VirtualDomain: enforce C locale in force_stop
+- redis: retry on unknown error when starting
+- redis: remove stop timeout and add placeholder master during election period
+- CTDB: Change default socket location to CTDB's expected default.
+- multiple RA: make sure that the pidfile directory exist
+- multiple RA: create state-directory writable by the application
+- orainstance.sh: Handle ORA-* error messages
+- redis: new RA
+
* Thu Jan 29 2015 resource-agents contributors
- stable release 3.9.6
- VirtualDomain: add migrate_options parameter
diff --git a/ci/build.sh b/ci/build.sh
new file mode 100755
index 0000000..798bd39
--- /dev/null
+++ b/ci/build.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+set -eo pipefail
+[[ "${DEBUG:-}" ]] && set -x
+
+declare -i failed
+failed=0
+
+success() {
+ printf "\r\033[2K [ \033[00;32mOK\033[0m ] Checking %s...\n" "$1"
+}
+
+fail() {
+ printf "\r\033[2K [\033[0;31mFAIL\033[0m] Checking %s...\n" "$1"
+ failed=1
+}
+
+check() {
+ local script="$1"
+ shellcheck "$script" || fail "$script"
+ success "$script"
+}
+
+find_prunes() {
+ local prunes="! -path './.git/*'"
+ if [ -f .gitmodules ]; then
+ while read module; do
+ prunes="$prunes ! -path './$module/*'"
+ done < <(grep path .gitmodules | awk '{print $3}')
+ fi
+ echo "$prunes"
+}
+
+find_cmd() {
+ echo "find . -type f -and \( -perm +111 -or -name '*.sh' \) $(find_prunes)"
+}
+
+check_all_executables() {
+ echo "Checking executables and .sh files..."
+ eval "$(find_cmd)" | while read script; do
+ head=$(head -n1 "$script")
+ check "$script"
+ done
+ exit $failed
+}
+
+check_all_executables
diff --git a/ci/install.sh b/ci/install.sh
new file mode 100755
index 0000000..c66b56c
--- /dev/null
+++ b/ci/install.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+set -eo pipefail
+
+main() {
+ local filename="shellcheck_0.3.7-1_amd64.deb"
+ wget "http://ftp.debian.org/debian/pool/main/s/shellcheck/$filename"
+ sudo dpkg -i "$filename"
+}
+
+main
diff --git a/configure.ac b/configure.ac
index ec3ec6e..0ecb65e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -901,7 +901,7 @@ dnl *****************
AC_MSG_RESULT([])
AC_MSG_RESULT([$PACKAGE configuration:])
AC_MSG_RESULT([ Version = ${VERSION}])
-AC_MSG_RESULT([ Build Version = 02beac55c1da0ad99a5a19bd3b2333bcff7e916c])
+AC_MSG_RESULT([ Build Version = e697f43c4e59a47bd0dc7c093b7d46174035c2dd])
AC_MSG_RESULT([ Features =${PKG_FEATURES}])
AC_MSG_RESULT([])
AC_MSG_RESULT([ Prefix = ${prefix}])
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index e082bac..5e28895 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -99,6 +99,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_db2.7 \
ocf_heartbeat_dhcpd.7 \
ocf_heartbeat_docker.7 \
+ ocf_heartbeat_dnsupdate.7 \
ocf_heartbeat_eDir88.7 \
ocf_heartbeat_ethmonitor.7 \
ocf_heartbeat_exportfs.7 \
@@ -127,6 +128,8 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_postfix.7 \
ocf_heartbeat_pound.7 \
ocf_heartbeat_proftpd.7 \
+ ocf_heartbeat_rabbitmq-cluster.7 \
+ ocf_heartbeat_redis.7 \
ocf_heartbeat_rsyncd.7 \
ocf_heartbeat_rsyslog.7 \
ocf_heartbeat_scsi2reservation.7 \
diff --git a/heartbeat/CTDB b/heartbeat/CTDB
index 3419b54..6cfff63 100755
--- a/heartbeat/CTDB
+++ b/heartbeat/CTDB
@@ -97,10 +97,10 @@ fi
: ${OCF_RESKEY_ctdb_config_dir:=/etc/ctdb}
: ${OCF_RESKEY_ctdb_binary:=/usr/bin/ctdb}
: ${OCF_RESKEY_ctdbd_binary:=/usr/sbin/ctdbd}
-: ${OCF_RESKEY_ctdb_socket:=${var_prefix}/ctdb.socket}
: ${OCF_RESKEY_ctdb_dbdir:=${var_prefix}}
: ${OCF_RESKEY_ctdb_logfile:=/var/log/ctdb/log.ctdb}
: ${OCF_RESKEY_ctdb_rundir:=${run_prefix}/ctdb}
+: ${OCF_RESKEY_ctdb_socket:=${OCF_RESKEY_ctdb_rundir}/ctdbd.socket}
: ${OCF_RESKEY_ctdb_debuglevel:=2}
: ${OCF_RESKEY_smb_conf:=/etc/samba/smb.conf}
@@ -452,6 +452,8 @@ init_smb_conf() {
vfs_fileid="$vfs_fileid\tvfs objects = fileid\n"
fi
fi
+ # Preserve permissions of smb.conf
+ cp -a $OCF_RESKEY_smb_conf $OCF_RESKEY_smb_conf.$$
awk '
/^[[:space:]]*\[/ { global = 0 }
/^[[:space:]]*\[global\]/ { global = 1 }
@@ -472,7 +474,8 @@ init_smb_conf() {
\tidmap backend = $OCF_RESKEY_smb_idmap_backend\n\
\tctdbd socket = $OCF_RESKEY_ctdb_socket\n$private_dir$vfs_fileid\
\t# CTDB-RA: End auto-generated section (do not change above)" > $OCF_RESKEY_smb_conf.$$
- mv -f $OCF_RESKEY_smb_conf.$$ $OCF_RESKEY_smb_conf
+ dd conv=notrunc,fsync of=$OCF_RESKEY_smb_conf.$$ if=/dev/null >/dev/null 2>&1
+ mv $OCF_RESKEY_smb_conf.$$ $OCF_RESKEY_smb_conf
}
@@ -480,8 +483,10 @@ init_smb_conf() {
cleanup_smb_conf() {
ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || return 0
+ # preserve permissions of smb.conf
+ cp -a $OCF_RESKEY_smb_conf $OCF_RESKEY_smb_conf.$$
sed '/# CTDB-RA: Begin/,/# CTDB-RA: End/d' $OCF_RESKEY_smb_conf > $OCF_RESKEY_smb_conf.$$
- mv -f $OCF_RESKEY_smb_conf.$$ $OCF_RESKEY_smb_conf
+ mv $OCF_RESKEY_smb_conf.$$ $OCF_RESKEY_smb_conf
}
append_ctdb_sysconfig() {
@@ -595,7 +600,7 @@ ctdb_start() {
if [ $? -ne 0 ]; then
# cleanup smb.conf
cleanup_smb_conf
-
+
ocf_exit_reason "Failed to execute $OCF_RESKEY_ctdbd_binary."
return $OCF_ERR_GENERIC
else
@@ -622,10 +627,10 @@ ctdb_start() {
fi
done
fi
-
+
# ctdbd will (or can) actually still be running at this point, so kill it
ctdb_stop
-
+
ocf_exit_reason "Timeout waiting for CTDB to stabilize"
return $OCF_ERR_GENERIC
}
@@ -634,7 +639,7 @@ ctdb_start() {
ctdb_stop() {
# Do nothing if already stopped
pkill -0 -f $OCF_RESKEY_ctdbd_binary || return $OCF_SUCCESS
-
+
# Tell it to die nicely
invoke_ctdb shutdown >/dev/null 2>&1
rv=$?
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index cefe52b..e594148 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -275,7 +275,7 @@ determine_blockdevice() {
# Get the current real device name, if possible.
# (specified devname could be -L or -U...)
case "$FSTYPE" in
- nfs4|nfs|smbfs|cifs|glusterfs|ceph|tmpfs|none)
+ nfs4|nfs|smbfs|cifs|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|none)
: ;;
*)
DEVICE=`list_mounts | grep " $MOUNTPOINT " | cut -d' ' -f1`
@@ -326,7 +326,7 @@ is_fsck_needed() {
no) false;;
""|auto)
case $FSTYPE in
- ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs)
+ ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs)
false;;
*)
true;;
@@ -355,7 +355,7 @@ fstype_supported()
# support fuse-filesystems (e.g. GlusterFS)
case $FSTYPE in
- glusterfs) support="fuse";;
+ fuse.*|glusterfs|rozofs) support="fuse";;
esac
grep -w "$support"'$' /proc/filesystems >/dev/null
@@ -368,7 +368,7 @@ fstype_supported()
# check the if the filesystem support exists again.
$MODPROBE $support >/dev/null
if [ $? -ne 0 ]; then
- ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems and failed to load kernal module"
+ ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems and failed to load kernel module"
return $OCF_ERR_INSTALLED
fi
@@ -424,7 +424,7 @@ Filesystem_start()
# NOTE: if any errors at all are detected, it returns non-zero
# if the error is >= 4 then there is a big problem
if [ $? -ge 4 ]; then
- ocf_exit_reason "Couldn't sucessfully fsck filesystem for $DEVICE"
+ ocf_exit_reason "Couldn't successfully fsck filesystem for $DEVICE"
return $OCF_ERR_GENERIC
fi
fi
@@ -724,7 +724,7 @@ set_blockdevice_var() {
# these are definitely not block devices
case $FSTYPE in
- nfs4|nfs|smbfs|cifs|none|glusterfs|ceph) return;;
+ nfs4|nfs|smbfs|cifs|none|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs) return;;
esac
if `is_option "loop"`; then
@@ -836,7 +836,7 @@ is_option "ro" &&
CLUSTERSAFE=2
case $FSTYPE in
-nfs4|nfs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2)
+nfs4|nfs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2|overlay|overlayfs|tmpfs)
CLUSTERSAFE=1 # this is kind of safe too
;;
# add here CLUSTERSAFE=0 for all filesystems which are not
diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
index a580683..b224ca5 100755
--- a/heartbeat/IPaddr2
+++ b/heartbeat/IPaddr2
@@ -169,8 +169,10 @@ routing table.
<parameter name="broadcast">
<longdesc lang="en">
-Broadcast address associated with the IP. If left empty, the script will
-determine this from the netmask.
+Broadcast address associated with the IP. It is possible to use the
+special symbols '+' and '-' instead of the broadcast address. In this
+case, the broadcast address is derived by setting/resetting the host
+bits of the interface prefix.
</longdesc>
<shortdesc lang="en">Broadcast address</shortdesc>
<content type="string" default=""/>
@@ -680,19 +682,35 @@ run_send_arp() {
#
run_send_ua() {
local i
- # Wait until the allocated IPv6 address gets ready by checking
- # "tentative" flag is disappeared, otherwise send_ua can not
- # send the unsolicited advertisement requests.
- for i in 1 2 3 4 5; do
- $IP2UTIL -o -f $FAMILY addr show dev $NIC \
- | grep -q -e "$OCF_RESKEY_ip/$NETMASK .* tentative"
- [ $? -ne 0 ] && break
- if [ $i -eq 5 ]; then
- ocf_log warn "$OCF_RESKEY_ip still has 'tentative' status. (ignored)"
+
+ # Duplicate Address Detection [DAD]
+ # Kernel will flag the IP as 'tentative' until it ensured that
+ # there is no duplicates.
+ # If there is, it will flag it as 'dadfailed'
+ for i in $(seq 1 10); do
+ ipstatus=$($IP2UTIL -o -f $FAMILY addr show dev $NIC to $OCF_RESKEY_ip/$NETMASK)
+ case "$ipstatus" in
+ *dadfailed*)
+ ocf_log err "IPv6 address collision $OCF_RESKEY_ip [DAD]"
+ $IP2UTIL -f $FAMILY addr del dev $NIC $OCF_RESKEY_ip/$NETMASK
+ if [ $? -ne 0 ]; then
+ ocf_log err "Could not delete IPv6 address"
+ fi
+ return $OCF_ERR_GENERIC
+ ;;
+ *tentative*)
+ if [ $i -eq 10 ]; then
+ ofc_log warn "IPv6 address : DAD is still in tentative"
+ fi
+ ;;
+ *)
break
- fi
+ ;;
+ esac
sleep 1
done
+ # Now the address should be usable
+
ARGS="-i $OCF_RESKEY_arp_interval -c $OCF_RESKEY_arp_count $OCF_RESKEY_ip $NETMASK $NIC"
ocf_log info "$SENDUA $ARGS"
$SENDUA $ARGS || ocf_log err "Could not send ICMPv6 Unsolicited Neighbor Advertisements."
@@ -798,7 +816,7 @@ ip_start() {
exit $OCF_SUCCESS
fi
- if [ -n "$IP_CIP" ] && [ $ip_status = "no" ] || [ $ip_status = "partial2" ]; then
+ if [ -n "$IP_CIP" ] && ([ $ip_status = "no" ] || [ $ip_status = "partial2" ]); then
$MODPROBE ip_conntrack
$IPTABLES -I INPUT -d $OCF_RESKEY_ip -i $NIC -j CLUSTERIP \
--new \
@@ -845,6 +863,10 @@ ip_start() {
else
if [ -x $SENDUA ]; then
run_send_ua
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "run_send_ua failed."
+ exit $OCF_ERR_GENERIC
+ fi
fi
fi
;;
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index 8163c0c..33c5be6 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -387,15 +387,27 @@ ip_status() {
srca_validate_all() {
- check_binary $AWK
- check_binary $IFCONFIG
+ if [ -z "$OCF_RESKEY_ipaddress" ]; then
+ # usage
+ ocf_exit_reason "Please set OCF_RESKEY_ipaddress to the preferred source IP address!"
+ return $OCF_ERR_CONFIGURED
+ fi
+
+
+ if ! [ "x$SYSTYPE" = "xLinux" ]; then
+ # checks after this point are only relevant for linux.
+ return $OCF_SUCCESS
+ fi
+
+ check_binary $AWK
+ check_binary $IFCONFIG
# The IP address should be in good shape
if CheckIP "$ipaddress"; then
:
else
ocf_exit_reason "Invalid IP address [$ipaddress]"
- exit $OCF_ERR_CONFIGURED
+ return $OCF_ERR_CONFIGURED
fi
if ocf_is_probe; then
@@ -407,8 +419,9 @@ srca_validate_all() {
:
else
ocf_exit_reason "We are not serving [$ipaddress], hence can not make it a preferred source address"
- exit $OCF_ERR_INSTALLED
+ return $OCF_ERR_INSTALLED
fi
+ return $OCF_SUCCESS
}
if
@@ -430,18 +443,22 @@ case $1 in
;;
esac
-if
- [ -z "$OCF_RESKEY_ipaddress" ]
-then
-# usage
- ocf_exit_reason "Please set OCF_RESKEY_ipaddress to the preferred source IP address!"
- exit $OCF_ERR_CONFIGURED
-fi
-
ipaddress="$OCF_RESKEY_ipaddress"
-if [ "x$SYSTYPE" = "xLinux" ]; then
- srca_validate_all
+srca_validate_all
+rc=$?
+if [ $rc -ne $OCF_SUCCESS ]; then
+ case $1 in
+ # if we can't validate the configuration during a stop, that
+ # means the resources isn't configured correctly. There's no way
+ # to actually stop the resource in this situation because there's
+ # no way it could have even started. Return success here
+ # to indicate that the resource is not running, otherwise the
+ # stop action will fail causing the node to be fenced just because
+ # of a mis configuration.
+ stop) exit $OCF_SUCCESS;;
+ *) exit $rc;;
+ esac
fi
findif_out=`$FINDIF -C`
diff --git a/heartbeat/LVM b/heartbeat/LVM
index a32435b..5d202c6 100755
--- a/heartbeat/LVM
+++ b/heartbeat/LVM
@@ -98,11 +98,11 @@ If "exclusive" is set on a non clustered volume group, this overrides the tag to
<parameter name="partial_activation" unique="0" required="0">
<longdesc lang="en">
-If set, the volume group will be activated even only partial of the physical
-volumes available. It helps to set to true, when you are using mirroring
+If set, the volume group will be activated partially even with some
+physical volumes missing. It helps to set to true when using mirrored
logical volumes.
</longdesc>
-<shortdesc lang="en">Activate VG even with partial PV only</shortdesc>
+<shortdesc lang="en">Activate VG partially when missing PVs</shortdesc>
<content type="string" default="false" />
</parameter>
@@ -542,8 +542,30 @@ LVM_validate_all() {
##
VGOUT=`vgck ${VOLUME} 2>&1`
if [ $? -ne 0 ]; then
- ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}"
- exit $OCF_ERR_GENERIC
+ # Inconsistency might be due to missing physical volumes, which doesn't
+ # automatically mean we should fail. If partial_activation=true then
+ # we should let start try to handle it, or if no PVs are listed as
+ # "unknown device" then another node may have marked a device missing
+ # where we have access to all of them and can start without issue.
+ if vgs -o pv_attr --noheadings $OCF_RESKEY_volgrpname 2>/dev/null | grep 'm' > /dev/null 2>&1; then
+ if vgs -o pv_name --noheadings $OCF_RESKEY_volgrpname 2>/dev/null | grep 'unknown device' > /dev/null 2>&1; then
+ if ! ocf_is_true "$OCF_RESKEY_partial_activation" ; then
+ # We are missing devices and cannot activate partially
+ ocf_exit_reason "Volume group [$VOLUME] has devices missing. Consider partial_activation=true to attempt to activate partially"
+ exit $OCF_ERR_GENERIC
+ else
+ # We are missing devices but are allowed to activate partially.
+ # Assume that caused the vgck failure and carry on
+ ocf_log warn "Volume group inconsistency detected with missing device(s) and partial_activation enabled. Proceeding with requested action."
+ fi
+ fi
+ # else the vg is partial but all devices are accounted for, so another
+ # node must have marked the device missing. Proceed.
+ else
+ # vgck failure was for something other than missing devices
+ ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}"
+ exit $OCF_ERR_GENERIC
+ fi
fi
##
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index d9ce755..b70c104 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -94,6 +94,7 @@ ocf_SCRIPTS = ClusterMon \
ManageVE \
mysql \
mysql-proxy \
+ nagios \
named \
nfsnotify \
nfsserver \
@@ -106,7 +107,9 @@ ocf_SCRIPTS = ClusterMon \
pgsql \
proftpd \
Pure-FTPd \
+ rabbitmq-cluster \
Raid1 \
+ redis \
Route \
rsyncd \
rsyslog \
@@ -142,12 +145,14 @@ ocfcommon_DATA = ocf-shellfuncs \
ocf-directories \
ocf-returncodes \
ocf-rarun \
+ ocf-distro \
apache-conf.sh \
http-mon.sh \
sapdb-nosha.sh \
sapdb.sh \
ora-common.sh \
mysql-common.sh \
+ nfsserver-redhat.sh \
findif.sh
# Legacy locations
diff --git a/heartbeat/README.galera b/heartbeat/README.galera
new file mode 100644
index 0000000..1b320cf
--- /dev/null
+++ b/heartbeat/README.galera
@@ -0,0 +1,149 @@
+Notes regarding the Galera resource agent
+---
+
+In the resource agent, the action of bootstrapping a Galera cluster is
+implemented into a series of small steps, by using:
+
+ * Two CIB attributes `last-committed` and `bootstrap` to elect a
+ bootstrap node that will restart the cluster.
+
+ * One CIB attribute `sync-needed` that will identify that joining
+ nodes are in the process of synchronizing their local database
+ via SST.
+
+ * A Master/Slave pacemaker resource which helps splitting the boot
+ into steps, up to a point where a galera node is available.
+
+ * the recurring monitor action to coordinate switch from one
+ state to another.
+
+How boot works
+====
+
+There are two things to know to understand how the resource agent
+restart a Galera cluster.
+
+### Bootstrap the cluster with the right node
+
+When synced, the nodes of a galera cluster have in common a last seqno,
+which identifies the last transaction considered successful by a
+majority of nodes in the cluster (think quorum).
+
+To restart a cluster, the resource agent must ensure that it will
+bootstrap the cluster from an node which is up-to-date, i.e which has
+the highest seqno of all nodes.
+
+As a result, if the resource agent cannot retrieve the seqno on all
+nodes, it won't be able to safely identify a bootstrap node, and
+will simply refuse to start the galera cluster.
+
+### synchronizing nodes can be a long operation
+
+Starting a bootstrap node is relatively fast, so it's performed
+during the "promote" operation, which is a one-off, time-bounded
+operation.
+
+Subsequent nodes will need to synchronize via SST, which consists
+in "pushing" an entire Galera DB from one node to another.
+
+There is no perfect time-out, as time spent during synchronization
+depends on the size of the DB. Thus, joiner nodes are started during
+the "monitor" operation, which is a recurring operation that can
+better track the progress of the SST.
+
+
+State flow
+====
+
+General idea for starting Galera:
+
+ * Before starting the Galera cluster each node needs to go in Slave
+ state so that the agent records its last seqno into the CIB.
+ __ This uses attribute last-committed __
+
+ * When all node went in Slave, the agent can safely determine the
+ last seqno and elect a bootstrap node (`detect_first_master()`).
+ __ This uses attribute bootstrap __
+
+ * The agent then sets the score of the elected bootstrap node to
+ Master so that pacemaker promote it and start the first Galera
+ server.
+
+ * Once the first Master is running, the agent can start joiner
+ nodes during the "monitor" operation, and starts monitoring
+ their SST sync.
+ __ This uses attribute sync-needed __
+
+ * Only when SST is over on joiner nodes, the agent promotes them
+ to Master. At this point, the entire Galera cluster is up.
+
+
+Attribute usage and liveness
+====
+
+Here is how attributes are created on a per-node basis. If you
+modify the resource agent make sure those properties still hold.
+
+### last-committed
+
+It is just a temporary hint for the resource agent to help
+elect a bootstrap node. Once the bootstrap attribute is set on one
+of the nodes, we can get rid of last-committed.
+
+ - Used : during Slave state to compare seqno
+ - Created: before entering Slave state:
+ . at startup in `galera_start()`
+ . or when a Galera node is stopped in `galera_demote()`
+ - Deleted: just before node starts in `galera_start_local_node()`;
+ cleaned-up during `galera_demote()` and `galera_stop()`
+
+We delete last-committed before starting Galera, to avoid race
+conditions that could arise due to discrepancies between the CIB and
+Galera.
+
+### bootstrap
+
+Attribute set on the node that is elected to bootstrap Galera.
+
+- Used : during promotion in `galera_start_local_node()`
+- Created: at startup once all nodes have `last-committed`;
+ or during monitor if all nodes have failed
+- Deleted: in `galera_start_local_node()`, just after the bootstrap
+ node started and is ready;
+ cleaned-up during `galera_demote()` and `galera_stop()`
+
+There cannot be more than one bootstrap node at any time, otherwise
+the Galera cluster would stop replicating properly.
+
+### sync-needed
+
+While this attribute is set on a node, the Galera node is in JOIN
+state, i.e. SST is in progress and the node cannot serve queries.
+
+The resource agent relies on the underlying SST method to monitor
+the progress of the SST. For instance, with `wsrep_sst_rsync`,
+timeout would be reported by rsync, the Galera node would go in
+Non-primary state, which would make `galera_monitor()` fail.
+
+- Used : during recurring slave monitor in `check_sync_status()`
+- Created: in `galera_start_local_node()`, just after the joiner
+ node started and entered the Galera cluster
+- Deleted: during recurring slave monitor in `check_sync_status()`
+ as soon as the Galera code reports to be SYNC-ed.
+
+### heuristic-recovered
+
+If a galera node was unexpectedly killed in a middle of a replication,
+InnoDB can retain the equivalent of a XA transaction in prepared state
+in its redo log. If so, mysqld cannot recover state (nor last seqno)
+automatically, and special recovery heuristic has to be used to
+unblock the node.
+
+This attribute is used to keep track of forced recoveries to prevent
+bootstrapping a cluster from a recovered node when possible.
+
+- Used : during `detect_first_master()` to elect the bootstrap node
+- Created: in `detect_last_commit()` if the node has a pending XA
+ transaction to recover in the redo log
+- Deleted: when a node is promoted to Master. This attribute is
+ kept in the CIB if a node in stopped.
diff --git a/heartbeat/Raid1 b/heartbeat/Raid1
index 4460ca1..7cf658b 100755
--- a/heartbeat/Raid1
+++ b/heartbeat/Raid1
@@ -153,7 +153,7 @@ udev_settle() {
}
list_conf_arrays() {
test -f $RAIDCONF || {
- ocf_log err "$RAIDCONF gone missing!"
+ ocf_exit_reason "$RAIDCONF gone missing!"
exit $OCF_ERR_GENERIC
}
grep ^ARRAY $RAIDCONF | awk '{print $2}'
@@ -199,7 +199,7 @@ raid1_start() {
if [ $rc -ne $OCF_NOT_RUNNING ]; then
# If the array is in a broken state, this agent doesn't
# know how to repair that.
- ocf_log err "$RAIDDEVS in a broken state; cannot start (rc=$rc)"
+ ocf_exit_reason "$RAIDDEVS in a broken state; cannot start (rc=$rc)"
return $OCF_ERR_GENERIC
fi
@@ -214,7 +214,7 @@ raid1_start() {
if [ $? -eq $OCF_SUCCESS ]; then
return $OCF_SUCCESS
else
- ocf_log err "Couldn't start RAID for $RAIDDEVS"
+ ocf_exit_reason "Couldn't start RAID for $RAIDDEVS"
return $OCF_ERR_GENERIC
fi
}
@@ -229,7 +229,7 @@ mark_readonly() {
$MDADM --readonly $mddev --config=$RAIDCONF
rc=$?
if [ $rc -ne 0 ]; then
- ocf_log err "Failed to set $mddev readonly (rc=$rc)"
+ ocf_exit_reason "Failed to set $mddev readonly (rc=$rc)"
fi
return $rc
}
@@ -344,7 +344,7 @@ raid1_stop() {
return $OCF_SUCCESS
fi
- ocf_log err "RAID $RAIDDEVS still active after stop command!"
+ ocf_exit_reason "RAID $RAIDDEVS still active after stop command!"
return $OCF_ERR_GENERIC
}
@@ -375,13 +375,13 @@ raid1_monitor_one() {
1) ocf_log warn "$mddev has at least one failed device."
TRY_READD=1
;;
- 2) ocf_log err "$mddev has failed."
+ 2) ocf_exit_reason "$mddev has failed."
return $OCF_ERR_GENERIC
;;
- 4) ocf_log err "mdadm failed on $mddev."
+ 4) ocf_exit_reason "mdadm failed on $mddev."
return $OCF_ERR_GENERIC
;;
- *) ocf_log err "mdadm returned an unknown result ($rc)."
+ *) ocf_exit_reason "mdadm returned an unknown result ($rc)."
return $OCF_ERR_GENERIC
;;
esac
@@ -403,7 +403,7 @@ raid1_monitor_one() {
fi
if ! dd if=$mddev count=1 bs=$pbsize of=/dev/null \
iflag=direct >/dev/null 2>&1 ; then
- ocf_log err "$mddev: I/O error on read"
+ ocf_exit_reason "$mddev: I/O error on read"
return $OCF_ERR_GENERIC
fi
@@ -462,22 +462,22 @@ FORCESTOP="${OCF_RESKEY_force_stop:-1}"
WAIT_FOR_UDEV="${OCF_RESKEY_udev:-1}"
if [ -z "$RAIDCONF" ] ; then
- ocf_log err "Please set OCF_RESKEY_raidconf!"
+ ocf_exit_reason "Please set OCF_RESKEY_raidconf!"
exit $OCF_ERR_CONFIGURED
fi
if [ ! -r "$RAIDCONF" ] ; then
- ocf_log err "Configuration file [$RAIDCONF] does not exist, or can not be opend!"
+ ocf_exit_reason "Configuration file [$RAIDCONF] does not exist, or can not be opend!"
exit $OCF_ERR_INSTALLED
fi
if [ -z "$MDDEV" ] ; then
- ocf_log err "Please set OCF_RESKEY_raiddev to the Raid device you want to control!"
+ ocf_exit_reason "Please set OCF_RESKEY_raiddev to the Raid device you want to control!"
exit $OCF_ERR_CONFIGURED
fi
if ocf_is_clone && ! ocf_is_true "$OCF_RESKEY_force_clones"; then
- ocf_log err "md RAID arrays are NOT safe to run as a clone!"
+ ocf_exit_reason "md RAID arrays are NOT safe to run as a clone!"
ocf_log err "Please read the comment on the force_clones parameter."
exit $OCF_ERR_CONFIGURED
fi
@@ -512,10 +512,10 @@ fi
if [ $HAVE_RAIDTOOLS = true ]; then
if [ "$MDDEV" = "auto" ]; then
- ocf_log err "autoconf supported only with mdadm!"
+ ocf_exit_reason "autoconf supported only with mdadm!"
exit $OCF_ERR_INSTALLED
elif [ `echo $MDDEV|wc -w` -gt 1 ]; then
- ocf_log err "multiple devices supported only with mdadm!"
+ ocf_exit_reason "multiple devices supported only with mdadm!"
exit $OCF_ERR_INSTALLED
fi
fi
diff --git a/heartbeat/SAPDatabase b/heartbeat/SAPDatabase
index 3b77206..de7959f 100755
--- a/heartbeat/SAPDatabase
+++ b/heartbeat/SAPDatabase
@@ -78,7 +78,7 @@ Resource script for SAP databases. It manages a SAP database of any type as an H
The purpose of the resource agent is to start, stop and monitor the database instance of a SAP system. Together with the RDBMS system it will also control the related network service for the database. Like the Oracle Listener and the xserver of MaxDB.
The resource agent expects a standard SAP installation of the database and therefore needs less parameters to configure.
The resource agent supports the following databases:
-- Oracle 10.2 and 11.2
+- Oracle 10.2, 11.2 and 12
- DB/2 UDB for Windows and Unix 9.x
- SAP-DB / MaxDB 7.x
- Sybase ASE 15.7
diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
index 0f6b0bc..a9ad237 100755
--- a/heartbeat/VirtualDomain
+++ b/heartbeat/VirtualDomain
@@ -17,6 +17,8 @@
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
+OCF_RESKEY_migration_downtime_default=0
+OCF_RESKEY_migration_speed_default=0
OCF_RESKEY_force_stop_default=0
OCF_RESKEY_autoset_utilization_cpu_default="true"
OCF_RESKEY_autoset_utilization_hv_memory_default="true"
@@ -25,6 +27,8 @@ OCF_RESKEY_CRM_meta_timeout_default=90000
OCF_RESKEY_save_config_on_stop_default=false
OCF_RESKEY_sync_config_on_stop_default=false
+: ${OCF_RESKEY_migration_downtime=${OCF_RESKEY_migration_downtime_default}}
+: ${OCF_RESKEY_migration_speed=${OCF_RESKEY_migration_speed_default}}
: ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}}
: ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}}
: ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}}
@@ -47,7 +51,7 @@ usage() {
echo "usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}"
}
-meta_data() {
+VirtualDomain_meta_data() {
cat <<EOF
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
@@ -104,6 +108,22 @@ use libvirt's default transport to connect to the remote hypervisor.
<content type="string" default="" />
</parameter>
+<parameter name="migration_downtime" unique="0" required="0">
+<longdesc lang="en">
+Define max downtime during live migration in milliseconds
+</longdesc>
+<shortdesc lang="en">Live migration downtime</shortdesc>
+<content type="integer" default="${OCF_RESKEY_migration_downtime_default}" />
+</parameter>
+
+<parameter name="migration_speed" unique="0" required="0">
+<longdesc lang="en">
+Define live migration speed per resource in MiB/s
+</longdesc>
+<shortdesc lang="en">Live migration speed</shortdesc>
+<content type="integer" default="${OCF_RESKEY_migration_speed_default}" />
+</parameter>
+
<parameter name="migration_network_suffix" unique="0" required="0">
<longdesc lang="en">
Use a dedicated migration network. The migration URI is composed by
@@ -342,7 +362,7 @@ pid_status()
return $rc
}
-VirtualDomain_Status() {
+VirtualDomain_status() {
local try=0
rc=$OCF_ERR_GENERIC
status="no state"
@@ -435,10 +455,10 @@ verify_undefined() {
fi
}
-VirtualDomain_Start() {
+VirtualDomain_start() {
local snapshotimage
- if VirtualDomain_Status; then
+ if VirtualDomain_status; then
ocf_log info "Virtual domain $DOMAIN_NAME already running."
return $OCF_SUCCESS
fi
@@ -469,7 +489,7 @@ VirtualDomain_Start() {
return $OCF_ERR_GENERIC
fi
- while ! VirtualDomain_Monitor; do
+ while ! VirtualDomain_monitor; do
sleep 1
done
@@ -478,22 +498,24 @@ VirtualDomain_Start() {
force_stop()
{
- local out ex
+ local out ex translate
local status=0
ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}."
- out=$(virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1|tr 'A-Z' 'a-z')
+ out=$(LANG=C virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1)
ex=$?
- echo >&2 "$out"
- case $ex$out in
- *"error:"*"domain is not running"*|*"error:"*"domain not found"*)
+ translate=$(echo $out|tr 'A-Z' 'a-z')
+ echo >&2 "$translate"
+ case $ex$translate in
+ *"error:"*"domain is not running"*|*"error:"*"domain not found"*|\
+ *"error:"*"failed to get domain"*)
: ;; # unexpected path to the intended outcome, all is well
[!0]*)
ocf_exit_reason "forced stop failed"
return $OCF_ERR_GENERIC ;;
0*)
while [ $status != $OCF_NOT_RUNNING ]; do
- VirtualDomain_Status
+ VirtualDomain_status
status=$?
done ;;
esac
@@ -532,13 +554,13 @@ save_config(){
rm -f ${CFGTMP}
}
-VirtualDomain_Stop() {
+VirtualDomain_stop() {
local i
local status
local shutdown_timeout
local needshutdown=1
- VirtualDomain_Status
+ VirtualDomain_status
status=$?
case $status in
@@ -576,7 +598,7 @@ VirtualDomain_Stop() {
shutdown_timeout=$(( $NOW + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 ))
# Loop on status until we reach $shutdown_timeout
while [ $NOW -lt $shutdown_timeout ]; do
- VirtualDomain_Status
+ VirtualDomain_status
status=$?
case $status in
$OCF_NOT_RUNNING)
@@ -640,17 +662,18 @@ mk_migrateuri() {
fi
}
-VirtualDomain_Migrate_To() {
+VirtualDomain_migrate_to() {
local rc
local target_node
local remoteuri
local transport_suffix
local migrateuri
local migrate_opts
+ local migrate_pid
target_node="$OCF_RESKEY_CRM_meta_migrate_target"
- if VirtualDomain_Status; then
+ if VirtualDomain_status; then
# Find out the remote hypervisor to connect to. That is, turn
# something like "qemu://foo:9999/system" into
# "qemu+tcp://bar:9999/system"
@@ -676,9 +699,28 @@ VirtualDomain_Migrate_To() {
save_config
fi
+ # Live migration speed limit
+ if [ ${OCF_RESKEY_migration_speed} -ne 0 ]; then
+ ocf_log info "$DOMAIN_NAME: Setting live migration speed limit for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed})."
+ virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed}
+ fi
+
# OK, we know where to connect to. Now do the actual migration.
- ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri)."
- virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri
+ ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using: virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri)."
+ virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri &
+
+ migrate_pid=${!}
+
+ # Live migration downtime interval
+ # Note: You can set downtime only while live migration is in progress
+ if [ ${OCF_RESKEY_migration_downtime} -ne 0 ]; then
+ sleep 2
+ ocf_log info "$DOMAIN_NAME: Setting live migration downtime for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime})."
+ virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime}
+ fi
+
+ wait ${migrate_pid}
+
rc=$?
if [ $rc -ne 0 ]; then
ocf_exit_reason "$DOMAIN_NAME: live migration to ${target_node} failed: $rc"
@@ -693,8 +735,8 @@ VirtualDomain_Migrate_To() {
fi
}
-VirtualDomain_Migrate_From() {
- while ! VirtualDomain_Monitor; do
+VirtualDomain_migrate_from() {
+ while ! VirtualDomain_monitor; do
sleep 1
done
ocf_log info "$DOMAIN_NAME: live migration from ${OCF_RESKEY_CRM_meta_migrate_source} succeeded."
@@ -705,10 +747,10 @@ VirtualDomain_Migrate_From() {
return $OCF_SUCCESS
}
-VirtualDomain_Monitor() {
+VirtualDomain_monitor() {
# First, check the domain status. If that returns anything other
# than $OCF_SUCCESS, something is definitely wrong.
- VirtualDomain_Status
+ VirtualDomain_status
rc=$?
if [ ${rc} -eq ${OCF_SUCCESS} ]; then
# OK, the generic status check turned out fine. Now, if we
@@ -740,24 +782,12 @@ VirtualDomain_Monitor() {
return ${rc}
}
-VirtualDomain_Validate_All() {
- # Required binaries:
- for binary in virsh sed; do
- check_binary $binary
- done
-
- if [ -z $OCF_RESKEY_config ]; then
- ocf_exit_reason "Missing configuration parameter \"config\"."
+VirtualDomain_validate_all() {
+ if ocf_is_true $OCF_RESKEY_force_stop && [ -n "$OCF_RESKEY_snapshot" ]; then
+ ocf_exit_reason "The 'force_stop' and 'snapshot' options can not be used together."
return $OCF_ERR_CONFIGURED
fi
- if ocf_is_true $OCF_RESKEY_force_stop; then
- if [ -n "$OCF_RESKEY_snapshot" ]; then
- ocf_exit_reason "The 'force_stop' and 'snapshot' options can not be used together."
- return $OCF_ERR_CONFIGURED
- fi
- fi
-
# check if we can read the config file (otherwise we're unable to
# deduce $DOMAIN_NAME from it, see below)
if [ ! -r $OCF_RESKEY_config ]; then
@@ -766,85 +796,47 @@ VirtualDomain_Validate_All() {
elif [ "$__OCF_ACTION" = "stop" ]; then
ocf_log info "Configuration file $OCF_RESKEY_config not readable, resource considered stopped."
else
- ocf_exit_reason "Configuration file $OCF_RESKEY_config does not exist or is not readable."
- return $OCF_ERR_INSTALLED
+ ocf_exit_reason "Configuration file $OCF_RESKEY_config does not exist or not readable."
fi
+ return $OCF_ERR_INSTALLED
+ fi
+
+ if [ -z $DOMAIN_NAME ]; then
+ ocf_exit_reason "Unable to determine domain name."
+ return $OCF_ERR_INSTALLED
fi
# Check if csync2 is available when config tells us we might need it.
if ocf_is_true $OCF_RESKEY_sync_config_on_stop; then
- check_binary csync2;
+ check_binary csync2
+ fi
+
+ # Check if migration_speed is a decimal value
+ if ! ocf_is_decimal ${OCF_RESKEY_migration_speed}; then
+ ocf_exit_reason "migration_speed has to be a decimal value"
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ # Check if migration_downtime is a decimal value
+ if ! ocf_is_decimal ${OCF_RESKEY_migration_downtime}; then
+ ocf_exit_reason "migration_downtime has to be a decimal value"
+ return $OCF_ERR_CONFIGURED
fi
}
-if [ $# -ne 1 ]; then
- usage
- exit $OCF_ERR_ARGS
-fi
+VirtualDomain_getconfig() {
+ # Grab the virsh uri default, but only if hypervisor isn't set
+ : ${OCF_RESKEY_hypervisor=$(virsh --quiet uri 2>/dev/null)}
-case $1 in
- meta-data) meta_data
- exit $OCF_SUCCESS
- ;;
- usage) usage
- exit $OCF_SUCCESS
- ;;
-esac
-
-# Grab the virsh uri default, but only if hypervisor isn't set
-: ${OCF_RESKEY_hypervisor=$(virsh --quiet uri 2>/dev/null)}
-
-# Set options to be passed to virsh:
-VIRSH_OPTIONS="--connect=${OCF_RESKEY_hypervisor} --quiet"
-
-# Everything except usage and meta-data must pass the validate test
-VirtualDomain_Validate_All || exit $?
-
-# During a probe, it is permissible for the config file to not be
-# readable (it might be on shared storage not available during the
-# probe). In that case, we're
-# unable to get the domain name. Thus, we also can't check whether the
-# domain is running. The only thing we can do here is to assume that
-# it is not running.
-if [ ! -r $OCF_RESKEY_config ]; then
- ocf_is_probe && exit $OCF_NOT_RUNNING
- [ "$__OCF_ACTION" = "stop" ] && exit $OCF_SUCCESS
-fi
+ # Set options to be passed to virsh:
+ VIRSH_OPTIONS="--connect=${OCF_RESKEY_hypervisor} --quiet"
-# Retrieve the domain name from the xml file.
-DOMAIN_NAME=`egrep '[[:space:]]*<name>.*</name>[[:space:]]*$' ${OCF_RESKEY_config} | sed -e 's/[[:space:]]*<name>\(.*\)<\/name>[[:space:]]*$/\1/' 2>/dev/null`
-if [ -z $DOMAIN_NAME ]; then
- ocf_exit_reason "Unable to determine domain name."
- exit $OCF_ERR_GENERIC
-fi
+ # Retrieve the domain name from the xml file.
+ DOMAIN_NAME=`egrep '[[:space:]]*<name>.*</name>[[:space:]]*$' ${OCF_RESKEY_config} 2>/dev/null | sed -e 's/[[:space:]]*<name>\(.*\)<\/name>[[:space:]]*$/\1/'`
-EMULATOR_STATE="${HA_RSCTMP}/VirtualDomain-${DOMAIN_NAME}-emu.state"
-
-case $1 in
- start)
- VirtualDomain_Start
- ;;
- stop)
- VirtualDomain_Stop
- ;;
- migrate_to)
- VirtualDomain_Migrate_To
- ;;
- migrate_from)
- VirtualDomain_Migrate_From
- ;;
- status)
- VirtualDomain_Status
- ;;
- monitor)
- VirtualDomain_Monitor
- ;;
- validate-all)
- ;;
- *)
- usage
- exit $OCF_ERR_UNIMPLEMENTED
- ;;
-esac
-exit $?
+ EMULATOR_STATE="${HA_RSCTMP}/VirtualDomain-${DOMAIN_NAME}-emu.state"
+}
+OCF_REQUIRED_PARAMS="config"
+OCF_REQUIRED_BINARIES="virsh sed"
+ocf_rarun $*
diff --git a/heartbeat/apache b/heartbeat/apache
index ab7c43f..09d5ded 100755
--- a/heartbeat/apache
+++ b/heartbeat/apache
@@ -593,6 +593,7 @@ apache_validate_all() {
ocf_exit_reason "Configuration file $CONFIGFILE not found!"
return $OCF_ERR_INSTALLED
fi
+ ocf_mkstatedir root 755 `dirname $PidFile` || return $OCF_ERR_INSTALLED
return $OCF_SUCCESS
}
diff --git a/heartbeat/apache-conf.sh b/heartbeat/apache-conf.sh
index dc3426f..a3c8930 100644
--- a/heartbeat/apache-conf.sh
+++ b/heartbeat/apache-conf.sh
@@ -24,7 +24,9 @@ apachecat() {
function procline() {
split($0,a);
if( a[1]~/^[Ii]nclude$/ ) {
- procinclude(a[2]);
+ includedir=a[2];
+ gsub("\"","",includedir);
+ procinclude(includedir);
} else {
if( a[1]=="ServerRoot" ) {
rootdir=a[2];
diff --git a/heartbeat/asterisk b/heartbeat/asterisk
index 2d9f076..e6318fc 100755
--- a/heartbeat/asterisk
+++ b/heartbeat/asterisk
@@ -286,8 +286,13 @@ asterisk_monitor() {
rc=$?
if [ $rc -ne 0 ]; then
- ocf_log err "Failed to connect to the Asterisk PBX"
- return $OCF_ERR_GENERIC
+ if [ "$__OCF_ACTION" = "start" ]; then
+ ocf_log info "Asterisk PBX not running yet"
+ return $OCF_NOT_RUNNING;
+ else
+ ocf_log err "Failed to connect to the Asterisk PBX"
+ return $OCF_ERR_GENERIC;
+ fi
fi
# Optionally check the monitor URI with sipsak
diff --git a/heartbeat/clvm b/heartbeat/clvm
index 9d312cc..9d63903 100755
--- a/heartbeat/clvm
+++ b/heartbeat/clvm
@@ -1,6 +1,6 @@
#!/bin/bash
#
-# Copyright (c) 2014 David Vossel <dvossel at redhat.com>
+# Copyright (c) 2014 David Vossel <davidvossel at gmail.com>
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
@@ -60,6 +60,18 @@ Options to clvmd. Refer to clvmd.8 for detailed descriptions.
<shortdesc lang="en">Daemon Options</shortdesc>
<content type="string" default="-d0"/>
</parameter>
+
+<parameter name="activate_vgs" unique="0">
+<longdesc lang="en">
+Whether or not to activate all cluster volume groups after starting
+the clvmd or not. Note that clustered volume groups will always be
+deactivated before the clvmd stops regardless of what this option
+is set to.
+</longdesc>
+<shortdesc lang="en">Activate volume groups</shortdesc>
+<content type="boolean" default="true"/>
+</parameter>
+
</parameters>
<actions>
@@ -77,6 +89,7 @@ END
#######################################################################
: ${OCF_RESKEY_daemon_options:="-d0"}
+: ${OCF_RESKEY_activate_vgs:="true"}
sbindir=$HA_SBIN_DIR
if [ -z $sbindir ]; then
@@ -322,6 +335,11 @@ start_process()
clvmd_activate_all()
{
+
+ if ! ocf_is_true "$OCF_RESKEY_activate_vgs"; then
+ ocf_log info "skipping vg activation, activate_vgs is set to $OCF_RESKEY_activate_vgs"
+ return $OCF_SUCCESS
+ fi
# Activate all volume groups by leaving the
# "volume group name" parameter empty
ocf_run ${LVM_VGCHANGE} -aay
@@ -360,7 +378,7 @@ clvmd_start()
if ocf_is_true $OCF_RESKEY_with_cmirrord; then
start_process $CMIRROR_PATH
fi
- start_process $DAEMON_PATH $CLVMDOPTS
+ start_process $DAEMON_PATH "$CLVMDOPTS"
# Refresh local cache.
#
diff --git a/heartbeat/dhcpd b/heartbeat/dhcpd
index 861386d..996a316 100755
--- a/heartbeat/dhcpd
+++ b/heartbeat/dhcpd
@@ -38,6 +38,14 @@ OCF_RESKEY_leases_default="/db/dhcpd.leases"
OCF_RESKEY_interface_default=""
OCF_RESKEY_includes_default=""
+# On some systems, the chrooted default is slightly different.
+# Lets do our best to support both by default.
+if [ ! -d "$OCF_RESKEY_chrooted_path_default" ]; then
+ if [ -d "/var/lib/dhcpd" ]; then
+ OCF_RESKEY_chrooted_path_default="/var/lib/dhcpd"
+ fi
+fi
+
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
@@ -304,7 +312,7 @@ dhcpd_initialize_chroot() {
{ ocf_exit_reason "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; }
done
- libdir=$(basename $(echo /var/lib/dhcp/lib*))
+ libdir=$(basename $(echo ${OCF_RESKEY_chrooted_path}/lib*))
if test -x /usr/bin/ldd ; then
get_ldd_deps()
{
@@ -329,7 +337,7 @@ dhcpd_initialize_chroot() {
done | sort -u`
for i in $cplibs ; do
if [ -s "$i" ]; then
- cp -pL "$i" "/var/lib/dhcp/$libdir/" ||
+ cp -pL "$i" "${OCF_RESKEY_chrooted_path}/$libdir/" ||
{ ocf_exit_reason "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; }
fi
done
diff --git a/heartbeat/dnsupdate b/heartbeat/dnsupdate
index 50ed8ec..2db2aa4 100755
--- a/heartbeat/dnsupdate
+++ b/heartbeat/dnsupdate
@@ -198,7 +198,11 @@ dnsupdate_validate() {
: ${OCF_RESKEY_serverport:="53"}
dns_serverport=${OCF_RESKEY_serverport}
: ${OCF_RESKEY_ttl:="300"}
- nsupdate_opts=${OCF_RESKEY_opts}
+ nsupdate_opts=${OCF_RESKEY_nsupdate_opts}
+ if [ -z "$nsupdate_opts" -a -n "$OCF_RESKEY_opts" ]; then
+ nsupdate_opts=${OCF_RESKEY_opts}
+ ocf_log warn "opts was never an advertised parameter, please use nsupdate_opts"
+ fi
if [ -z "$hostname" ]; then
ocf_log err "No hostname specified."
diff --git a/heartbeat/docker b/heartbeat/docker
index a0dcee4..5af1782 100755
--- a/heartbeat/docker
+++ b/heartbeat/docker
@@ -4,7 +4,7 @@
# based off a supplied docker image. Containers managed by this agent
# are both created and removed upon the agent's start and stop actions.
#
-# Copyright (c) 2014 David Vossel <dvossel at redhat.com>
+# Copyright (c) 2014 David Vossel <davidvossel at gmail.com>
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
@@ -61,7 +61,7 @@ The docker image to base this container off of.
<parameter name="name" required="0" unique="0">
<longdesc lang="en">
-The name to give the created container. By default this will
+The name to give the created container. By default this will
be that resource's instance name.
</longdesc>
<shortdesc lang="en">docker container name</shortdesc>
@@ -72,7 +72,7 @@ be that resource's instance name.
<longdesc lang="en">
Allow the image to be pulled from the configured docker registry when
the image does not exist locally. NOTE, this can drastically increase
-the time required to start the container if the image repository is
+the time required to start the container if the image repository is
pulled over the network.
</longdesc>
<shortdesc lang="en">Allow pulling non-local images</shortdesc>
@@ -84,8 +84,8 @@ pulled over the network.
Add options to be appended to the 'docker run' command which is used
when creating the container during the start action. This option allows
users to do things such as setting a custom entry point and injecting
-environment variables into the newly created container. Note the '-d'
-option is supplied regardless of this value to force containers to run
+environment variables into the newly created container. Note the '-d'
+option is supplied regardless of this value to force containers to run
in the background.
NOTE: Do not explicitly specify the --name argument in the run_opts. This
@@ -99,7 +99,7 @@ provided in the 'name' argument of this agent.
<parameter name="run_cmd" required="0" unique="0">
<longdesc lang="en">
-Specifiy a command to launch within the container once
+Specifiy a command to launch within the container once
it has initialized.
</longdesc>
<shortdesc lang="en">run command</shortdesc>
@@ -186,7 +186,7 @@ monitor_cmd_exec()
exit $OCF_ERR_ARGS
fi
rc=$OCF_ERR_GENERIC
- else
+ else
ocf_log info "monitor cmd passed: exit code = $rc"
fi
@@ -195,7 +195,7 @@ monitor_cmd_exec()
container_exists()
{
- docker inspect $CONTAINER > /dev/null 2>&1
+ docker inspect --format {{.State.Running}} $CONTAINER | egrep '(true|false)' >/dev/null 2>&1
}
remove_container()
@@ -279,7 +279,7 @@ docker_start()
ocf_run docker start $CONTAINER
else
# make sure any previous container matching our container name is cleaned up first.
- # we already know at this point it wouldn't be running
+ # we already know at this point it wouldn't be running
remove_container
ocf_log info "running container $CONTAINER for the first time"
ocf_run docker run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd
@@ -328,7 +328,7 @@ docker_stop()
if ocf_is_true "$OCF_RESKEY_force_kill"; then
ocf_run docker kill $CONTAINER
- else
+ else
ocf_log debug "waiting $timeout second[s] before killing container"
ocf_run docker stop -t=$timeout $CONTAINER
fi
@@ -349,23 +349,34 @@ docker_stop()
image_exists()
{
- local res=1
+ # assume that OCF_RESKEY_name have been validated
+ local IMAGE_NAME="$(echo ${OCF_RESKEY_image} | awk -F':' '{print $1}')"
+ # if no tag was specified, use default "latest"
+ local COLON_FOUND=0
+ local IMAGE_TAG="latest"
- echo "${OCF_RESKEY_image}" | grep -q ":"
- if [ $? -eq 0 ]; then
- docker images | awk '{print $1 ":" $2}' | grep "^${OCF_RESKEY_image}\$" > /dev/null 2>&1
- else
- docker images | awk '{print $1}' | grep "^${OCF_RESKEY_image}\$" > /dev/null 2>&1
+ COLON_FOUND="$(echo "${OCF_RESKEY_image}" | grep -o ':' | grep -c .)"
+
+ if [ ${COLON_FOUND} -ne 0 ]; then
+ IMAGE_TAG="$(echo ${OCF_RESKEY_image} | awk -F':' '{print $NF}')"
fi
+
+ # IMAGE_NAME might be following formats:
+ # - image
+ # - repository/image
+ # - docker.io/image (some distro will display "docker.io/" as prefix)
+ docker images | awk '{print $1 ":" $2}' | egrep -q -s "^(docker.io\/)?${IMAGE_NAME}:${IMAGE_TAG}\$"
if [ $? -eq 0 ]; then
+ # image found
return 0
fi
+
if ocf_is_true "$OCF_RESKEY_allow_pull"; then
REQUIRE_IMAGE_PULL=1
ocf_log notice "Image (${OCF_RESKEY_image}) does not exist locally but will be pulled during start"
return 0
- fi
+ fi
# image not found.
return 1
}
@@ -376,7 +387,7 @@ docker_validate()
if [ -z "$OCF_RESKEY_image" ]; then
ocf_exit_reason "'image' option is required"
exit $OCF_ERR_CONFIGURED
- fi
+ fi
if [ -n "$OCF_RESKEY_monitor_cmd" ]; then
ocf_log info "checking for nsenter, which is required when 'monitor_cmd' is specified"
@@ -392,7 +403,27 @@ docker_validate()
return $OCF_SUCCESS
}
-: ${OCF_RESKEY_name=${OCF_RESOURCE_INSTANCE}}
+# TODO :
+# When a user starts plural clones in a node in globally-unique, a user cannot appoint plural name parameters.
+# When a user appoints reuse, the resource agent cannot connect plural clones with a container.
+
+if ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then
+ if [ -n "$OCF_RESKEY_name" ]; then
+ if [ -n "$OCF_RESKEY_CRM_meta_clone_node_max" ] && [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ]
+ then
+ ocf_exit_reason "Cannot make plural clones from the same name parameter."
+ exit $OCF_ERR_CONFIGURED
+ fi
+ if [ -n "$OCF_RESKEY_CRM_meta_master_node_max" ] && [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ]
+ then
+ ocf_exit_reason "Cannot make plural master from the same name parameter."
+ exit $OCF_ERR_CONFIGURED
+ fi
+ fi
+ : ${OCF_RESKEY_name=`echo ${OCF_RESOURCE_INSTANCE} | tr ':' '-'`}
+else
+ : ${OCF_RESKEY_name=${OCF_RESOURCE_INSTANCE}}
+fi
if [ -n "$OCF_RESKEY_container" ]; then
# we'll keep the container attribute around for a bit in order not to break
diff --git a/heartbeat/ethmonitor b/heartbeat/ethmonitor
index a447391..6563af7 100755
--- a/heartbeat/ethmonitor
+++ b/heartbeat/ethmonitor
@@ -176,6 +176,14 @@ For infiniband devices, this is the port to monitor.
<content type="integer" />
</parameter>
+<parameter name="link_status_only">
+<longdesc lang="en">
+Only report success based on link status. Do not perform RX counter or arping related connectivity tests.
+</longdesc>
+<shortdesc lang="en">link status check only</shortdesc>
+<content type="boolean" default="false" />
+</parameter>
+
</parameters>
<actions>
<action name="start" timeout="60s" />
@@ -300,7 +308,7 @@ get_rx_packets () {
ocf_log debug "$IP2UTIL -o -s link show dev $NIC"
$IP2UTIL -o -s link show dev "$NIC" \
| sed 's/.* RX: [^0-9]*[0-9]* *\([0-9]*\) .*/\1/'
- # the first number after RX: ist the # of bytes ,
+ # the first number after RX: is the # of bytes ,
# the second is the # of packets received
}
@@ -378,6 +386,11 @@ if_check () {
return $OCF_NOT_RUNNING
fi
+ # if using link_status_only, skip RX count and arping related tests
+ if ocf_is_true "$OCF_RESKEY_link_status_only"; then
+ return $OCF_SUCCESS
+ fi
+
# watch for packet counter changes
ocf_log debug "watch for packet counter changes"
watch_pkt_counter
diff --git a/heartbeat/exportfs b/heartbeat/exportfs
index c4ec653..4b88fa1 100755
--- a/heartbeat/exportfs
+++ b/heartbeat/exportfs
@@ -201,7 +201,7 @@ forall() {
for dir in $OCF_RESKEY_directory; do
$func $dir "$@"
rc=$(($rc | $?))
- bump_fsid
+ [ $NUMDIRS -gt 1 ] && bump_fsid
[ "$fast_exit" ] && continue
[ $rc -ne 0 ] && return $rc
done
@@ -409,7 +409,7 @@ testdir() {
}
exportfs_validate_all ()
{
- if [ `echo "$OCF_RESKEY_directory" | wc -w` -gt 1 ] &&
+ if [ $NUMDIRS -gt 1 ] &&
! ocf_is_decimal "$OCF_RESKEY_fsid"; then
ocf_exit_reason "use integer fsid when exporting multiple directories"
return $OCF_ERR_CONFIGURED
@@ -428,6 +428,7 @@ if [ -n "$newdir" ]; then
OCF_RESKEY_directory=$newdir
fi
+NUMDIRS=`echo "$OCF_RESKEY_directory" | wc -w`
OCF_REQUIRED_PARAMS="directory fsid clientspec"
OCF_REQUIRED_BINARIES="exportfs"
ocf_rarun $*
diff --git a/heartbeat/findif.sh b/heartbeat/findif.sh
index 2092d7a..ecd7843 100644
--- a/heartbeat/findif.sh
+++ b/heartbeat/findif.sh
@@ -182,8 +182,10 @@ findif_check_params()
if [ -n "$brdcast" ] ; then
ipcheck_ipv4 $brdcast
if [ $? = 1 ] ; then
- ocf_log err "Invalid broadcast address [$brdcast]."
- return $OCF_ERR_CONFIGURED
+ if [ "$brdcast" != "+" -a "$brdcast" != "-" ]; then
+ ocf_log err "Invalid broadcast address [$brdcast]."
+ return $OCF_ERR_CONFIGURED
+ fi
fi
fi
fi
diff --git a/heartbeat/galera b/heartbeat/galera
index 994aad0..b65b7c2 100755
--- a/heartbeat/galera
+++ b/heartbeat/galera
@@ -1,6 +1,6 @@
#!/bin/sh
#
-# Copyright (c) 2014 David Vossel <dvossel at redhat.com>
+# Copyright (c) 2014 David Vossel <davidvossel at gmail.com>
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
@@ -32,7 +32,7 @@
# Slave vs Master role:
#
# During the 'Slave' role, galera instances are in read-only mode and
-# will not attempt to connect to the cluster. This role exists only as
+# will not attempt to connect to the cluster. This role exists as
# a means to determine which galera instance is the most up-to-date. The
# most up-to-date node will be used to bootstrap a galera cluster that
# has no current members.
@@ -40,9 +40,12 @@
# The galera instances will only begin to be promoted to the Master role
# once all the nodes in the 'wsrep_cluster_address' connection address
# have entered read-only mode. At that point the node containing the
-# database that is most current will be promoted to Master. Once the first
-# Master instance bootstraps the galera cluster, the other nodes will be
-# promoted to Master as well.
+# database that is most current will be promoted to Master.
+#
+# Once the first Master instance bootstraps the galera cluster, the
+# other nodes will join the cluster and start synchronizing via SST.
+# They will stay in Slave role as long as the SST is running. Their
+# promotion to Master will happen once synchronization is finished.
#
# Example: Create a galera cluster using nodes rhel7-node1 rhel7-node2 rhel7-node3
#
@@ -276,6 +279,22 @@ is_bootstrap()
}
+set_heuristic_recovered()
+{
+ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -v "true"
+}
+
+clear_heuristic_recovered()
+{
+ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -D
+}
+
+is_heuristic_recovered()
+{
+ local node=$1
+ ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -Q 2>/dev/null
+}
+
clear_last_commit()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -D
@@ -309,6 +328,43 @@ wait_for_sync()
ocf_log info "Database synced."
}
+set_sync_needed()
+{
+ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-sync-needed" -v "true"
+}
+
+clear_sync_needed()
+{
+ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-sync-needed" -D
+}
+
+check_sync_needed()
+{
+ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-sync-needed" -Q 2>/dev/null
+}
+
+check_sync_status()
+{
+ local state=$(get_status_variable "wsrep_local_state")
+ local ready=$(get_status_variable "wsrep_ready")
+
+ if [ -z "$state" -o -z "$ready" ]; then
+ ocf_exit_reason "Unable to retrieve state transfer status, verify check_user '$OCF_RESKEY_check_user' has permissions to view status"
+ return $OCF_ERR_GENERIC
+ fi
+
+ if [ "$state" = "4" -a "$ready" = "ON" ]; then
+ ocf_log info "local node synced with the cluster"
+ # when sync is finished, we are ready to switch to Master
+ clear_sync_needed
+ set_master_score
+ return $OCF_SUCCESS
+ else
+ ocf_log info "local node syncing"
+ return $OCF_SUCCESS
+ fi
+}
+
is_primary()
{
cluster_status=$(get_status_variable "wsrep_cluster_status")
@@ -342,6 +398,14 @@ is_readonly()
master_exists()
{
+ if [ "$__OCF_ACTION" = "demote" ]; then
+ # We don't want to detect master instances during demote.
+ # 1. we could be detecting ourselves as being master, which is no longer the case.
+ # 2. we could be detecting other master instances that are in the process of shutting down.
+ # by not detecting other master instances in "demote" we are deferring this check
+ # to the next recurring monitor operation which will be much more accurate
+ return 1
+ fi
# determine if a master instance is already up and is healthy
crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
return $?
@@ -368,15 +432,6 @@ set_master_score()
fi
}
-promote_everyone()
-{
-
- for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do
-
- set_master_score $node
- done
-}
-
greater_than_equal_long()
{
# there are values we need to compare in this script
@@ -390,8 +445,19 @@ detect_first_master()
local best_node="$NODENAME"
local last_commit=0
local missing_nodes=0
+ local nodes=""
+ local nodes_recovered=""
+ # avoid selecting a recovered node as bootstrap if possible
for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do
+ if is_heuristic_recovered $node; then
+ nodes_recovered="$nodes_recovered $node"
+ else
+ nodes="$nodes $node"
+ fi
+ done
+
+ for node in $nodes_recovered $nodes; do
last_commit=$(get_last_commit $node)
if [ -z "$last_commit" ]; then
@@ -422,39 +488,32 @@ detect_first_master()
set_bootstrap_node $best_node
}
-# For galera, promote is really start
-galera_promote()
+galera_start_local_node()
{
local rc
local extra_opts
local bootstrap
+
+ bootstrap=$(is_bootstrap)
master_exists
if [ $? -eq 0 ]; then
# join without bootstrapping
+ ocf_log info "Node <${NODENAME}> is joining the cluster"
extra_opts="--wsrep-cluster-address=${OCF_RESKEY_wsrep_cluster_address}"
+ elif ocf_is_true $bootstrap; then
+ ocf_log info "Node <${NODENAME}> is bootstrapping the cluster"
+ extra_opts="--wsrep-cluster-address=gcomm://"
else
- bootstrap=$(is_bootstrap)
-
- if ocf_is_true $bootstrap; then
- ocf_log info "Node <${NODENAME}> is bootstrapping the cluster"
- extra_opts="--wsrep-cluster-address=gcomm://"
- else
- ocf_exit_reason "Failure, Attempted to promote Master instance of $OCF_RESOURCE_INSTANCE before bootstrap node has been detected."
- return $OCF_ERR_GENERIC
- fi
-
- fi
-
- # make sure the read only instance is stopped
- mysql_common_stop
- rc=$?
- if [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_NOT_RUNNING ]; then
- ocf_exit_reason "Failed to stop read-only galera instance during promotion to Master"
- return $rc
+ ocf_exit_reason "Failure, Attempted to join cluster of $OCF_RESOURCE_INSTANCE before master node has been detected."
+ clear_last_commit
+ return $OCF_ERR_GENERIC
fi
- sleep 4
+ # clear last_commit before we start galera to make sure there
+ # won't be discrepency between the cib and galera if this node
+ # processes a few transactions and fails before we detect it
+ clear_last_commit
mysql_common_prepare_dirs
mysql_common_start "$extra_opts"
@@ -463,9 +522,10 @@ galera_promote()
return $rc
fi
- galera_monitor
+ mysql_common_status info
rc=$?
- if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then
+
+ if [ $rc != $OCF_SUCCESS ]; then
ocf_exit_reason "Failed initial monitor action"
return $rc
fi
@@ -483,22 +543,110 @@ galera_promote()
fi
if ocf_is_true $bootstrap; then
- promote_everyone
clear_bootstrap_node
- ocf_log info "Bootstrap complete, promoting the rest of the galera instances."
+ # clear attribute heuristic-recovered. if last shutdown was
+ # not clean, we cannot be extra-cautious by requesting a SST
+ # since this is the bootstrap node
+ clear_heuristic_recovered
else
- # if this is not the bootstrap node, make sure this instance
- # syncs with the rest of the cluster before promotion returns.
- wait_for_sync
+ set_sync_needed
+ # attribute heuristic-recovered will be cleared once the joiner
+ # has finished syncing and is promoted to Master
fi
- # last commit is no longer relevant once promoted
- clear_last_commit
-
ocf_log info "Galera started"
return $OCF_SUCCESS
}
+detect_last_commit()
+{
+ local last_commit
+ local recover_args="--defaults-file=$OCF_RESKEY_config \
+ --pid-file=$OCF_RESKEY_pid \
+ --socket=$OCF_RESKEY_socket \
+ --datadir=$OCF_RESKEY_datadir \
+ --user=$OCF_RESKEY_user"
+ local recovered_position_regex='s/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p'
+
+ ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat"
+ last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')"
+ if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then
+ local tmp=$(mktemp)
+ local tmperr=$(mktemp)
+
+ ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'"
+
+ ${OCF_RESKEY_binary} $recover_args --wsrep-recover > $tmp 2> $tmperr
+
+ last_commit="$(cat $tmp | sed -n $recovered_position_regex)"
+ if [ -z "$last_commit" ]; then
+ # Galera uses InnoDB's 2pc transactions internally. If
+ # server was stopped in the middle of a replication, the
+ # recovery may find a "prepared" XA transaction in the
+ # redo log, and mysql won't recover automatically
+
+ cat $tmperr | grep -q -E '\[ERROR\]\s+Found\s+[0-9]+\s+prepared\s+transactions!' 2>/dev/null
+ if [ $? -eq 0 ]; then
+ # we can only rollback the transaction, but that's OK
+ # since the DB will get resynchronized anyway
+ ocf_log warn "local node <${NODENAME}> was not shutdown properly. Rollback stuck transaction with --tc-heuristic-recover"
+ ${OCF_RESKEY_binary} $recover_args --wsrep-recover \
+ --tc-heuristic-recover=rollback > $tmp 2>/dev/null
+
+ last_commit="$(cat $tmp | sed -n $recovered_position_regex)"
+ if [ ! -z "$last_commit" ]; then
+ ocf_log warn "State recovered. force SST at next restart for full resynchronization"
+ rm -f ${OCF_RESKEY_datadir}/grastate.dat
+ # try not to use this node if bootstrap is needed
+ set_heuristic_recovered
+ fi
+ fi
+ fi
+ rm -f $tmp $tmperr
+ fi
+
+ if [ ! -z "$last_commit" ]; then
+ ocf_log info "Last commit version found: $last_commit"
+ set_last_commit $last_commit
+ return $OCF_SUCCESS
+ else
+ ocf_exit_reason "Unable to detect last known write sequence number"
+ clear_last_commit
+ return $OCF_ERR_GENERIC
+ fi
+}
+
+galera_promote()
+{
+ local rc
+ local extra_opts
+ local bootstrap
+
+ master_exists
+ if [ $? -ne 0 ]; then
+ # promoting the first master will bootstrap the cluster
+ if is_bootstrap; then
+ galera_start_local_node
+ rc=$?
+ return $rc
+ else
+ ocf_exit_reason "Attempted to start the cluster without being a bootstrap node."
+ return $OCF_ERR_GENERIC
+ fi
+ else
+ # promoting other masters only performs sanity checks
+ # as the joining nodes were started during the "monitor" op
+ if ! check_sync_needed; then
+ # sync is done, clear info about last recovery
+ clear_heuristic_recovered
+ return $OCF_SUCCESS
+ else
+ ocf_exit_reason "Attempted to promote local node while sync was still needed."
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+}
+
galera_demote()
{
mysql_common_stop
@@ -510,15 +658,18 @@ galera_demote()
# if this node was previously a bootstrap node, that is no longer the case.
clear_bootstrap_node
+ clear_last_commit
+ clear_sync_needed
- # start again in slave mode so the new last commit is recorded
- galera_start
+ # record last commit for next promotion
+ detect_last_commit
+ rc=$?
+ return $rc
}
galera_start()
{
- local extra_opts='--read-only=true'
- local last_commit
+ local rc
echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME
if [ $? -ne 0 ]; then
@@ -526,30 +677,23 @@ galera_start()
return $OCF_ERR_CONFIGURED
fi
- mysql_common_prepare_dirs
- mysql_common_start "$extra_opts"
-
- is_readonly
- if [ $? -ne 0 ]; then
- ocf_exit_reason "Slave instance did not start correctly in read-only mode, Make sure local galera.cnf does not have wsrep_cluster_address set."
+ mysql_common_status info
+ if [ $? -ne $OCF_NOT_RUNNING ]; then
+ ocf_exit_reason "master galera instance started outside of the cluster's control"
return $OCF_ERR_GENERIC
fi
- ocf_log info "attempting to detect last commit version"
- while [ -z "$last_commit" ]; do
- last_commit=$(get_status_variable "wsrep_last_committed")
- if [ -z "$last_commit" ]; then
- sleep 1
- fi
- done
- ocf_log info "Last commit version found: $last_commit"
+ mysql_common_prepare_dirs
- set_last_commit $last_commit
+ detect_last_commit
+ rc=$?
+ if [ $rc -ne $OCF_SUCCESS ]; then
+ return $rc
+ fi
master_exists
if [ $? -eq 0 ]; then
- ocf_log info "Master instances are already up, setting master score so this instance will join galera cluster."
- set_master_score $NODENAME
+ ocf_log info "Master instances are already up, local node will join in when started"
else
clear_master_score
detect_first_master
@@ -567,47 +711,70 @@ galera_monitor()
if ocf_is_probe; then
status_loglevel="info"
fi
-
+
mysql_common_status $status_loglevel
rc=$?
- # If status returned an error, return that immediately
- if [ $rc -ne $OCF_SUCCESS ]; then
+ if [ $rc -eq $OCF_NOT_RUNNING ]; then
+ last_commit=$(get_last_commit $NODENAME)
+ if [ -n "$last_commit" ];then
+ rc=$OCF_SUCCESS
+
+ if ocf_is_probe; then
+ # prevent state change during probe
+ return $rc
+ fi
+
+ master_exists
+ if [ $? -ne 0 ]; then
+ detect_first_master
+ else
+ # a master instance exists and is healthy.
+ # start this node and mark it as "pending sync"
+ ocf_log info "cluster is running. start local node to join in"
+ galera_start_local_node
+ rc=$?
+ fi
+ fi
+ return $rc
+ elif [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
+ # if we make it here, mysql is running. Check cluster status now.
+
echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME
if [ $? -ne 0 ]; then
ocf_exit_reason "local node <${NODENAME}> is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>"
return $OCF_ERR_GENERIC
fi
- is_readonly
- if [ $? -ne 0 ]; then
- is_primary
- if [ $? -ne 0 ]; then
- ocf_exit_reason "local node <${NODENAME}> is neither in primary mode nor in read_only mode. Unknown state."
- return $OCF_ERR_GENERIC
- fi
-
- if ocf_is_probe; then
- # restore master score during probe
- # if we detect this is a master instance
- set_master_score
- fi
- rc=$OCF_RUNNING_MASTER
- else
- master_exists
- if [ $? -ne 0 ]; then
- detect_first_master
+ is_primary
+ if [ $? -eq 0 ]; then
+ check_sync_needed
+ if [ $? -eq 0 ]; then
+ # galera running and sync is needed: slave state
+ if ocf_is_probe; then
+ # prevent state change during probe
+ rc=$OCF_SUCCESS
+ else
+ check_sync_status
+ rc=$?
+ fi
else
- # a master instance exists and is healthy, promote this
- # local read only instance
- # so it can join the master galera cluster.
- set_master_score
+ # galera running, no need to sync: master state and everything's clear
+ rc=$OCF_RUNNING_MASTER
+
+ if ocf_is_probe; then
+ # restore master score during probe
+ # if we detect this is a master instance
+ set_master_score
+ fi
fi
+ else
+ ocf_exit_reason "local node <${NODENAME}> is started, but not in primary mode. Unknown state."
+ rc=$OCF_ERR_GENERIC
fi
- # TODO look at what is done in the wait script
return $rc
}
@@ -617,11 +784,12 @@ galera_stop()
local rc
# make sure the process is stopped
mysql_common_stop
- rc=$1
+ rc=$?
clear_last_commit
clear_master_score
clear_bootstrap_node
+ clear_sync_needed
return $rc
}
@@ -674,6 +842,18 @@ if [ -n "${OCF_RESKEY_check_passwd}" ]; then
MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}"
fi
+# This value is automatically sourced from /etc/sysconfig/checkcluster if available
+if [ -n "${MYSQL_HOST}" ]; then
+ MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -h ${MYSQL_HOST}"
+fi
+
+# This value is automatically sourced from /etc/sysconfig/checkcluster if available
+if [ -n "${MYSQL_PORT}" ]; then
+ MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -P ${MYSQL_PORT}"
+fi
+
+
+
# What kind of method was invoked?
case "$1" in
start) galera_start;;
diff --git a/heartbeat/iSCSILogicalUnit b/heartbeat/iSCSILogicalUnit
index ffd66ff..0b1670f 100755
--- a/heartbeat/iSCSILogicalUnit
+++ b/heartbeat/iSCSILogicalUnit
@@ -429,15 +429,21 @@ iSCSILogicalUnit_stop() {
fi
;;
lio-t)
- ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/luns delete ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC
+ # "targetcli delete" will fail if the LUN is already
+ # gone. Log a warning and still push ahead.
+ ocf_run -warn targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/luns delete ${OCF_RESKEY_lun}
if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then
for initiator in ${OCF_RESKEY_allowed_initiators}; do
if targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls/${initiator} status | grep "Mapped LUNs: 0" >/dev/null ; then
- ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls/ delete ${initiator}
+ ocf_run -warn targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls/ delete ${initiator}
fi
done
fi
+ # If we've proceeded down to here and we're unable to
+ # delete the backstore, then something is seriously
+ # wrong and we need to fail the stop operation
+ # (potentially causing fencing)
ocf_run targetcli /backstores/block delete ${OCF_RESOURCE_INSTANCE} || exit $OCF_ERR_GENERIC
;;
esac
@@ -446,6 +452,9 @@ iSCSILogicalUnit_stop() {
}
iSCSILogicalUnit_monitor() {
+ # If our backing device (or file) doesn't even exist, we're not running
+ [ -e ${OCF_RESKEY_path} ] || return $OCF_NOT_RUNNING
+
case $OCF_RESKEY_implementation in
iet)
# Figure out and set the target ID
@@ -486,6 +495,11 @@ iSCSILogicalUnit_monitor() {
lio-t)
configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/lun/lun_${OCF_RESKEY_lun}/*/udev_path"
[ -e ${configfs_path} ] && [ `cat ${configfs_path}` = "${OCF_RESKEY_path}" ] && return $OCF_SUCCESS
+
+ # if we aren't activated, is a block device still left over?
+ block_configfs_path="/sys/kernel/config/target/core/iblock_*/${OCF_RESOURCE_INSTANCE}/udev_path"
+ [ -e ${block_configfs_path} ] && ocf_log warn "existing block without an active lun: ${block_configfs_path}"
+ [ -e ${block_configfs_path} ] && return $OCF_ERR_GENERIC
;;
esac
diff --git a/heartbeat/iSCSITarget b/heartbeat/iSCSITarget
index 72ec64a..b71a21f 100755
--- a/heartbeat/iSCSITarget
+++ b/heartbeat/iSCSITarget
@@ -325,11 +325,14 @@ iSCSITarget_start() {
# number 1. In lio, creating a network portal
# automatically creates the corresponding target if it
# doesn't already exist.
+ ocf_run targetcli /iscsi set global auto_add_default_portal=false || exit $OCF_ERR_GENERIC
+ ocf_run targetcli /iscsi create ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC
for portal in ${OCF_RESKEY_portals}; do
- ocf_run targetcli /iscsi create ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC
if [ $portal != ${OCF_RESKEY_portals_default} ] ; then
IFS=':' read -a sep_portal <<< "$portal"
ocf_run targetcli /iscsi/${OCF_RESKEY_iqn}/tpg1/portals create "${sep_portal[0]}" "${sep_portal[1]}" || exit $OCF_ERR_GENERIC
+ else
+ ocf_run targetcli /iscsi create ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC
fi
done
# in lio, we can set target parameters by manipulating
diff --git a/heartbeat/iscsi b/heartbeat/iscsi
index 36ed908..81cd78e 100755
--- a/heartbeat/iscsi
+++ b/heartbeat/iscsi
@@ -194,7 +194,7 @@ open_iscsi_setup() {
# the discussion at the linux-ha-dev ML)
return 1
else
- ocf_log err "iscsid not running; please start open-iscsi utilities"
+ ocf_exit_reason "iscsid not running; please start open-iscsi utilities"
return 2
fi
}
@@ -227,7 +227,7 @@ open_iscsi_discovery() {
output=`$cmd`
if [ $? -ne 0 -o x = "x$output" ]; then
[ x != "x$output" ] && {
- ocf_log err "$cmd FAILED"
+ ocf_exit_reason "$cmd FAILED"
echo "$output"
}
return 3
@@ -244,7 +244,7 @@ open_iscsi_discovery() {
case `echo "$PORTAL" | wc -w` in
0) #target not found
echo "$output"
- ocf_log err "target $OCF_RESKEY_target not found at portal $OCF_RESKEY_portal"
+ ocf_exit_reason "target $OCF_RESKEY_target not found at portal $OCF_RESKEY_portal"
return 1
;;
1) #we're ok
@@ -258,7 +258,7 @@ open_iscsi_discovery() {
fi
done
echo "$output"
- ocf_log err "sorry, can't handle multihomed hosts unless you specify the portal exactly"
+ ocf_exit_reason "sorry, can't handle multihomed hosts unless you specify the portal exactly"
return 2
;;
esac
@@ -268,18 +268,20 @@ open_iscsi_add() {
}
open_iscsi_get_session_id() {
local target="$1"
+ local portal="$2"
$iscsiadm -m session 2>/dev/null |
grep -E "$target($|[[:space:]])" |
+ grep -E "] $portal" |
awk '{print $2}' | tr -d '[]'
}
open_iscsi_remove() {
local target="$1"
local session_id
- session_id=`open_iscsi_get_session_id "$target"`
+ session_id=`open_iscsi_get_session_id "$target" "$OCF_RESKEY_portal"`
if [ "$session_id" ]; then
$iscsiadm -m session -r $session_id -u
else
- ocf_log err "cannot find session id for target $target"
+ ocf_exit_reason "cannot find session id for target $target"
return 1
fi
}
@@ -296,7 +298,7 @@ open_iscsi_monitor() {
local recov
recov=${2:-$OCF_RESKEY_try_recovery}
- session_id=`open_iscsi_get_session_id "$target"`
+ session_id=`open_iscsi_get_session_id "$target" "$OCF_RESKEY_portal"`
prev_state=""
if [ -z "$session_id" ]; then
if $iscsiadm -m node -p $OCF_RESKEY_portal -T $target >/dev/null 2>&1; then
@@ -328,7 +330,7 @@ open_iscsi_monitor() {
fi
sleep 1
else
- ocf_log err "iscsiadm output: $outp"
+ ocf_exit_reason "iscsiadm output: $outp"
return 2
fi
;;
@@ -443,12 +445,12 @@ case "$1" in
esac
if [ x = "x$OCF_RESKEY_target" ]; then
- ocf_log err "target parameter not set"
+ ocf_exit_reason "target parameter not set"
exit $OCF_ERR_CONFIGURED
fi
if [ x = "x$OCF_RESKEY_portal" ]; then
- ocf_log err "portal parameter not set"
+ ocf_exit_reason "portal parameter not set"
exit $OCF_ERR_CONFIGURED
fi
@@ -465,7 +467,7 @@ LSB_STATUS_STOPPED=3
$setup
setup_rc=$?
if [ $setup_rc -gt 1 ]; then
- ocf_log info "iscsi initiator utilities not installed or not setup"
+ ocf_exit_reason "iscsi initiator utilities not installed or not setup"
case "$1" in
stop) exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
@@ -475,7 +477,7 @@ if [ $setup_rc -gt 1 ]; then
fi
if [ `id -u` != 0 ]; then
- ocf_log err "$0 must be run as root"
+ ocf_exit_reason "$0 must be run as root"
exit $OCF_ERR_PERM
fi
diff --git a/heartbeat/lxc b/heartbeat/lxc
index 811ae2b..dca5fe4 100755
--- a/heartbeat/lxc
+++ b/heartbeat/lxc
@@ -274,7 +274,12 @@ LXC_stop() {
done
# If the container is still running, it will be stopped now. regardless of state!
- ocf_run lxc-stop -n ${OCF_RESKEY_container} || exit $OCF_ERR_GENERIC
+ # LXC prior 1.0.0
+ if ocf_version_cmp "`lxc_version`" 1.0.0 ; then
+ ocf_run lxc-stop -n ${OCF_RESKEY_container} || exit $OCF_ERR_GENERIC
+ else
+ ocf_run lxc-stop -n ${OCF_RESKEY_container} -k || exit $OCF_ERR_GENERIC
+ fi
ocf_log info "Container" ${OCF_RESKEY_container} "stopped"
ocf_run rm -f $TRANS_RES_STATE
diff --git a/heartbeat/mysql b/heartbeat/mysql
index c3282fd..e2d54dd 100755
--- a/heartbeat/mysql
+++ b/heartbeat/mysql
@@ -324,7 +324,7 @@ get_read_only() {
local read_only_state
read_only_state=`$MYSQL $MYSQL_OPTIONS_REPL \
- -e "SHOW VARIABLES" | grep read_only | awk '{print $2}'`
+ -e "SHOW VARIABLES" | grep -w read_only | awk '{print $2}'`
if [ "$read_only_state" = "ON" ]; then
return 0
@@ -347,6 +347,7 @@ is_slave() {
get_slave_info
rc=$?
+ rm -f $tmpfile
if [ $rc -eq 0 ]; then
# show slave status is not empty
diff --git a/heartbeat/mysql-common.sh b/heartbeat/mysql-common.sh
index 6564c66..d5ac972 100755
--- a/heartbeat/mysql-common.sh
+++ b/heartbeat/mysql-common.sh
@@ -39,6 +39,8 @@ OCF_RESKEY_test_table_default="mysql.user"
OCF_RESKEY_test_passwd_default=""
OCF_RESKEY_enable_creation_default=0
OCF_RESKEY_additional_parameters_default=""
+OCF_RESKEY_replication_user_default="root"
+OCF_RESKEY_replication_passwd_default=""
OCF_RESKEY_replication_port_default="3306"
OCF_RESKEY_max_slave_lag_default="3600"
OCF_RESKEY_evict_outdated_slaves_default="false"
@@ -130,6 +132,17 @@ mysql_common_validate()
return $OCF_SUCCESS
}
+mysql_common_check_pid() {
+ local pid=$1
+
+ if [ -d /proc -a -d /proc/1 ]; then
+ [ "u$pid" != "u" -a -d /proc/$pid ]
+ else
+ kill -s 0 $pid >/dev/null 2>&1
+ fi
+ return $?
+}
+
mysql_common_status() {
local loglevel=$1
local pid=$2
@@ -141,17 +154,17 @@ mysql_common_status() {
pid=`cat $OCF_RESKEY_pid`;
fi
- if [ -d /proc -a -d /proc/1 ]; then
- [ "u$pid" != "u" -a -d /proc/$pid ]
- else
- kill -s 0 $pid >/dev/null 2>&1
- fi
+
+ mysql_common_check_pid $pid
+
if [ $? -eq 0 ]; then
return $OCF_SUCCESS;
else
- ocf_log $loglevel "MySQL not running: removing old PID file"
- rm -f $OCF_RESKEY_pid
+ if [ -e $OCF_RESKEY_pid ]; then
+ ocf_log $loglevel "MySQL not running: removing old PID file"
+ rm -f $OCF_RESKEY_pid
+ fi
return $OCF_NOT_RUNNING;
fi
}
@@ -249,6 +262,14 @@ mysql_common_stop()
fi
pid=`cat $OCF_RESKEY_pid 2> /dev/null `
+
+ mysql_common_check_pid $pid
+ if [ $? -ne 0 ]; then
+ rm -f $OCF_RESKEY_pid
+ ocf_log info "MySQL is already stopped"
+ return $OCF_SUCCESS;
+ fi
+
/bin/kill $pid > /dev/null
rc=$?
if [ $rc != 0 ]; then
diff --git a/heartbeat/nagios b/heartbeat/nagios
new file mode 100755
index 0000000..e61306c
--- /dev/null
+++ b/heartbeat/nagios
@@ -0,0 +1,246 @@
+#!/bin/sh
+#
+# License: GNU General Public License (GPL)
+# (c) 2015 T.J. Yang, O. Albrigtsen
+# and Linux-HA contributors
+#
+# -----------------------------------------------------------------------------
+# O C F R E S O U R C E S C R I P T S P E C I F I C A T I O N
+# -----------------------------------------------------------------------------
+#
+# NAME
+# nagios : OCF resource agent script for Nagios Server
+#
+
+# Initialization:
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Defaults
+OCF_RESKEY_user_default="nagios"
+OCF_RESKEY_group_default="nagios"
+OCF_RESKEY_binary_default="/usr/sbin/nagios"
+OCF_RESKEY_config_default="/etc/nagios/nagios.cfg"
+OCF_RESKEY_log_default="/var/log/nagios/nagios.log"
+OCF_RESKEY_retention_default="/var/log/nagios/retention.dat"
+OCF_RESKEY_command_default="/var/log/nagios/rw/nagios.cmd"
+OCF_RESKEY_pid_default="/var/run/nagios.pid"
+
+: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
+: ${OCF_RESKEY_group=${OCF_RESKEY_group_default}}
+: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
+: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
+: ${OCF_RESKEY_log=${OCF_RESKEY_log_default}}
+: ${OCF_RESKEY_retention=${OCF_RESKEY_retention_default}}
+: ${OCF_RESKEY_command=${OCF_RESKEY_command_default}}
+: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
+
+
+nagios_usage() {
+ cat <<END
+ usage: $0 (start|stop|validate-all|meta-data|help|usage|monitor)
+ $0 manages a Nagios instance as an OCF HA resource.
+ The 'start' operation starts the instance.
+ The 'stop' operation stops the instance.
+ The 'status' operation reports whether the instance is running
+ The 'monitor' operation reports whether the instance seems to be working
+ The 'validate-all' operation reports whether the parameters are valid
+END
+}
+
+nagios_meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="nagios">
+<version>0.75</version>
+
+<longdesc lang="en">OCF Resource script for Nagios 3.x or 4.x. It manages a Nagios instance as a HA resource.</longdesc>
+<shortdesc lang="en">Nagios resource agent</shortdesc>
+
+<parameters>
+
+<parameter name="user">
+ <longdesc lang="en">User running Nagios daemon (for file permissions)</longdesc>
+ <shortdesc lang="en">Nagios user</shortdesc>
+ <content type="string" default="${OCF_RESKEY_user_default}" />
+</parameter>
+
+<parameter name="group">
+ <longdesc lang="en">Group running Nagios daemon (for file permissions)</longdesc>
+ <shortdesc lang="en">Nagios group</shortdesc>
+ <content type="string" default="${OCF_RESKEY_group_default}" />
+</parameter>
+
+<parameter name="binary">
+ <longdesc lang="en">Location of the Nagios binary</longdesc>
+ <shortdesc lang="en">Nagios binary</shortdesc>
+ <content type="string" default="${OCF_RESKEY_binary_default}" />
+</parameter>
+
+<parameter name="config">
+ <longdesc lang="en">Configuration file</longdesc>
+ <shortdesc lang="en">Nagios config</shortdesc>
+ <content type="string" default="${OCF_RESKEY_config_default}" />
+</parameter>
+
+<parameter name="log">
+ <longdesc lang="en">Location of the Nagios log</longdesc>
+ <shortdesc lang="en">Nagios log</shortdesc>
+ <content type="string" default="${OCF_RESKEY_log_default}" />
+</parameter>
+
+<parameter name="retention">
+ <longdesc lang="en">Location of the Nagios retention file</longdesc>
+ <shortdesc lang="en">Nagios retention file</shortdesc>
+ <content type="string" default="${OCF_RESKEY_retention_default}" />
+</parameter>
+
+<parameter name="command">
+ <longdesc lang="en">Location of the Nagios external command file</longdesc>
+ <shortdesc lang="en">Nagios command file</shortdesc>
+ <content type="string" default="${OCF_RESKEY_command_default}" />
+</parameter>
+
+<parameter name="pid">
+ <longdesc lang="en">Location of the Nagios pid/lock</longdesc>
+ <shortdesc lang="en">Nagios pid file</shortdesc>
+ <content type="string" default="${OCF_RESKEY_pid_default}" />
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="20" />
+<action name="stop" timeout="20" />
+<action name="status" timeout="20" />
+<action name="monitor" depth="0" timeout="20" interval="10" start-delay="10" />
+<action name="validate-all" timeout="20" />
+<action name="meta-data" timeout="20" />
+</actions>
+</resource-agent>
+END
+}
+
+
+nagios_start() {
+ nagios_validate_all
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ return $rc
+ fi
+
+
+ # if resource is already running,no need to continue code after this.
+ if nagios_monitor; then
+ ocf_log info "Nagios is already running"
+ return $OCF_SUCCESS
+ fi
+
+ # Remove ${OCF_RESKEY_pid} if it exists
+ rm -f ${OCF_RESKEY_pid}
+
+ ocf_run -q touch ${OCF_RESKEY_log} ${OCF_RESKEY_retention} ${OCF_RESKEY_pid}
+ chown ${OCF_RESKEY_user}:${OCF_RESKEY_group} ${OCF_RESKEY_log} ${OCF_RESKEY_retention} ${OCF_RESKEY_pid}
+ rm -f ${OCF_RESKEY_command}
+ [ -x /sbin/restorecon ] && /sbin/restorecon ${OCF_RESKEY_pid}
+ ocf_run -q ${OCF_RESKEY_binary} -d ${OCF_RESKEY_config}
+
+ while ! nagios_monitor; do
+ sleep 1
+ done
+
+ if [ $? -eq "0" ]; then
+ ocf_log info "Nagios started"
+ return ${OCF_SUCCESS}
+ fi
+
+ return $OCF_SUCCESS
+}
+
+nagios_stop() {
+ nagios_monitor
+ if [ "$?" -ne "$OCF_SUCCESS" ]; then
+ # Currently not running. Nothing to do.
+ ocf_log info "Resource is already stopped"
+ rm -f ${OCF_RESKEY_pid}
+
+ return $OCF_SUCCESS
+ fi
+
+ kill `cat ${OCF_RESKEY_pid}`
+
+ # Wait for process to stop
+ while nagios_monitor; do
+ sleep 1
+ done
+
+ return $OCF_SUCCESS
+}
+
+nagios_monitor(){
+ ocf_pidfile_status ${OCF_RESKEY_pid} > /dev/null 2>&1
+ case "$?" in
+ 0)
+ rc=$OCF_SUCCESS
+ ;;
+ 1|2)
+ rc=$OCF_NOT_RUNNING
+ ;;
+ *)
+ rc=$OCF_ERR_GENERIC
+ ;;
+ esac
+ return $rc
+}
+
+nagios_validate_all(){
+ check_binary ${OCF_RESKEY_binary}
+
+ if [ ! -f ${OCF_RESKEY_config} ]; then
+ ocf_exit_reason "Configuration file ${OCF_RESKEY_config} not found"
+ return ${OCF_ERR_INSTALLED}
+ fi
+
+ ${OCF_RESKEY_binary} -v ${OCF_RESKEY_config} > /dev/null 2>&1;
+ if [ $? -ne "0" ]; then
+ ocf_exit_reason "Configuration check failed"
+ return ${OCF_ERR_INSTALLED}
+ fi
+}
+
+
+# **************************** MAIN SCRIPT ************************************
+
+# Make sure meta-data and usage always succeed
+case $__OCF_ACTION in
+meta-data) nagios_meta_data
+ exit $OCF_SUCCESS
+ ;;
+usage|help) nagios_usage
+ exit $OCF_SUCCESS
+ ;;
+esac
+
+# This OCF agent script need to be run as root user.
+if ! ocf_is_root; then
+ echo "$0 agent script need to be run as root user."
+ ocf_log debug "$0 agent script need to be run as root user."
+ exit $OCF_ERR_GENERIC
+fi
+
+# Translate each action into the appropriate function call
+case $__OCF_ACTION in
+start) nagios_start;;
+stop) nagios_stop;;
+status|monitor) nagios_monitor;;
+validate-all) nagios_validate_all;;
+*) nagios_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+
+exit $rc
+
+# End of this script
diff --git a/heartbeat/named b/heartbeat/named
index 2c34a15..2118e0c 100755
--- a/heartbeat/named
+++ b/heartbeat/named
@@ -229,6 +229,9 @@ named_validate_all() {
return $OCF_ERR_CONFIGURED
fi
+ # make sure that the pidfile directory exists
+ ocf_mkstatedir $OCF_RESKEY_named_user 755 `dirname $OCF_RESKEY_named_pidfile` || return $OCF_ERR_INSTALLED
+
return $OCF_SUCCESS
}
@@ -487,3 +490,5 @@ case "$1" in
*)
exit $OCF_ERR_UNIMPLEMENTED;;
esac
+
+# vim:ts=4:sw=4:et:
diff --git a/heartbeat/nfsnotify b/heartbeat/nfsnotify
index 5f72d58..b8dc1e4 100755
--- a/heartbeat/nfsnotify
+++ b/heartbeat/nfsnotify
@@ -1,6 +1,6 @@
#!/bin/bash
#
-# Copyright (c) 2014 David Vossel <dvossel at redhat.com>
+# Copyright (c) 2014 David Vossel <davidvossel at gmail.com>
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
index cb8635f..4b6767b 100755
--- a/heartbeat/nfsserver
+++ b/heartbeat/nfsserver
@@ -12,6 +12,10 @@ else
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
fi
+if is_redhat_based; then
+ . ${OCF_FUNCTIONS_DIR}/nfsserver-redhat.sh
+fi
+
DEFAULT_INIT_SCRIPT="/etc/init.d/nfsserver"
if ! [ -f $DEFAULT_INIT_SCRIPT ]; then
# On some systems, the script is just called nfs
@@ -26,9 +30,6 @@ EXEC_MODE=0
SELINUX_ENABLED=-1
STATD_PATH="/var/lib/nfs"
STATD_DIR=""
-NFS_SYSCONFIG="/etc/sysconfig/nfs"
-NFS_SYSCONFIG_LOCAL_BACKUP="/etc/sysconfig/nfs.ha.bu"
-NFS_SYSCONFIG_AUTOGEN_TAG="AUTOGENERATED by $0 high availability resource-agent"
nfsserver_meta_data() {
cat <<END
@@ -104,83 +105,6 @@ IP addresses.
<content type="string"/>
</parameter>
-<parameter name="nfsd_args" unique="0" required="0">
-<longdesc lang="en">
-Specifies what arguments to pass to the nfs daemon on startup. View the rpc.nfsd man page for information on what arguments are available.
-Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file.
-</longdesc>
-<shortdesc lang="en">
-rpc.nfsd options
-</shortdesc>
-<content type="string" />
-</parameter>
-
-<parameter name="lockd_udp_port" unique="0" required="0">
-<longdesc lang="en">
-The udp port lockd should listen on.
-Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file.
-</longdesc>
-<shortdesc lang="en">
-lockd udp port
-</shortdesc>
-<content type="integer" />
-</parameter>
-
-<parameter name="lockd_tcp_port" unique="0" required="0">
-<longdesc lang="en">
-The tcp port lockd should listen on.
-Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file.
-</longdesc>
-<shortdesc lang="en">
-lockd tcp port
-</shortdesc>
-<content type="integer" />
-</parameter>
-
-<parameter name="statd_outgoing_port" unique="0" required="0">
-<longdesc lang="en">
-The source port number sm-notify uses when sending reboot notifications.
-Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file.
-</longdesc>
-<shortdesc lang="en">
-sm-notify source port
-</shortdesc>
-<content type="integer" />
-</parameter>
-
-<parameter name="statd_port" unique="0" required="0">
-<longdesc lang="en">
-The port number used for RPC listener sockets.
-Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file.
-</longdesc>
-<shortdesc lang="en">
-rpc.statd listener port
-</shortdesc>
-<content type="integer" />
-</parameter>
-
-<parameter name="mountd_port" unique="0" required="0">
-<longdesc lang="en">
-The port number used for rpc.mountd listener sockets.
-Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file.
-</longdesc>
-<shortdesc lang="en">
-rpc.mountd listener port
-</shortdesc>
-<content type="integer" />
-</parameter>
-
-<parameter name="rquotad_port" unique="0" required="0">
-<longdesc lang="en">
-The port number used for rpc.rquotad.
-Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file.
-</longdesc>
-<shortdesc lang="en">
-rpc.rquotad port
-</shortdesc>
-<content type="integer" />
-</parameter>
-
<parameter name="nfs_shared_infodir" unique="0" required="0">
<longdesc lang="en">
The nfsserver resource agent will save nfs related information in this specific directory.
@@ -205,6 +129,10 @@ The mount point for the sunrpc file system.
<content type="string" default="$DEFAULT_RPCPIPEFS_DIR" />
</parameter>
+$(
+is_redhat_based && nfsserver_redhat_meta_data
+)
+
</parameters>
<actions>
@@ -272,7 +200,8 @@ fi
##
# EXEC_MODE values
# 1 user init script or default init script
-# 2 systemd
+# 2 systemd (with nfs-lock.service)
+# 3 systemd (with rpc-statd.service)
#
# On error, this function will terminate the process
# with error code $OCF_ERR_INSTALLED
@@ -307,7 +236,7 @@ set_exec_mode()
fi
##
- # Last of all, attempt systemd.
+ # Attempt systemd (with nfs-lock.service).
##
if which systemctl > /dev/null 2>&1; then
if systemctl list-unit-files | grep nfs-server > /dev/null && systemctl list-unit-files | grep nfs-lock > /dev/null; then
@@ -317,6 +246,16 @@ set_exec_mode()
fi
fi
+ ##
+ # Attempt systemd (with rpc-statd.service).
+ ##
+ if which systemctl > /dev/null 2>&1; then
+ if systemctl list-unit-files | grep nfs-server > /dev/null && systemctl list-unit-files | grep rpc-statd > /dev/null; then
+ EXEC_MODE=3
+ return 0
+ fi
+ fi
+
ocf_exit_reason "No init script or systemd unit file detected for nfs server"
exit $OCF_ERR_INSTALLED
}
@@ -332,6 +271,7 @@ nfs_exec()
case $EXEC_MODE in
1) ${OCF_RESKEY_nfs_init_script} $cmd;;
2) systemctl $cmd nfs-server.service ;;
+ 3) systemctl $cmd nfs-server.service ;;
esac
}
@@ -342,6 +282,8 @@ v3locking_exec()
if [ $EXEC_MODE -eq 2 ]; then
systemctl $cmd nfs-lock.service
+ elif [ $EXEC_MODE -eq 3 ]; then
+ systemctl $cmd rpc-statd.service
else
case $cmd in
start) locking_start;;
@@ -351,10 +293,42 @@ v3locking_exec()
fi
}
+nfsserver_systemd_monitor()
+{
+ local threads_num
+ local rc
+
+ nfs_exec is-active
+ rc=$?
+
+ # Now systemctl is-active can't detect the failure of kernel process like nfsd.
+ # So, if the return value of systemctl is-active is 0, check the threads number
+ # to make sure the process is running really.
+ # /proc/fs/nfsd/threads has the numbers of the nfsd threads.
+ if [ $rc -eq 0 ]; then
+ threads_num=`cat /proc/fs/nfsd/threads 2>/dev/null`
+ if [ $? -eq 0 ]; then
+ if [ $threads_num -gt 0 ]; then
+ return $OCF_SUCCESS
+ else
+ return 3
+ fi
+ else
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ return $rc
+}
+
nfsserver_monitor ()
{
+ set_exec_mode
fn=`mktemp`
- nfs_exec status > $fn 2>&1
+ case $EXEC_MODE in
+ 1) nfs_exec status > $fn 2>&1;;
+ [23]) nfsserver_systemd_monitor > $fn 2>&1;;
+ esac
rc=$?
ocf_log debug "$(cat $fn)"
rm -f $fn
@@ -377,75 +351,6 @@ nfsserver_monitor ()
fi
}
-set_arg()
-{
- local key="$1"
- local value="$2"
- local file="$3"
- local requires_sysconfig="$4"
-
- if [ -z "$value" ]; then
- return
- fi
-
- # only write to the tmp /etc/sysconfig/nfs if sysconfig exists.
- # otherwise this distro does not support setting these options.
- if [ -d "/etc/sysconfig" ]; then
- echo "${key}=\"${value}\"" >> $file
- elif [ "$requires_sysconfig" = "true" ]; then
- ocf_log warn "/etc/sysconfig/nfs not found, unable to set port and nfsd args."
- fi
-
- export ${key}="${value}"
-}
-
-set_env_args()
-{
- local tmpconfig=$(mktemp ${HA_RSCTMP}/nfsserver-tmp-XXXXX)
- local statd_args
-
- # nfsd args
- set_arg "RPCNFSDARGS" "$OCF_RESKEY_nfsd_args" "$tmpconfig" "true"
-
- # mountd args
- if [ -n "$OCF_RESKEY_mountd_port" ]; then
- set_arg "RPCMOUNTDOPTS" "-p $OCF_RESKEY_mountd_port" "$tmpconfig" "true"
- fi
-
- # statd args. we always want to perform the notify using sm-notify after
- # both rpc.statd and the nfsd daemons are initialized
- statd_args="--no-notify"
- if [ -n "$OCF_RESKEY_statd_outgoing_port" ]; then
- statd_args="$statd_args -o $OCF_RESKEY_statd_outgoing_port"
- fi
- if [ -n "$OCF_RESKEY_statd_port" ]; then
- statd_args="$statd_args -p $OCF_RESKEY_statd_port"
- fi
- set_arg "STATDARG" "$statd_args" "$tmpconfig" "false"
-
- # lockd ports
- set_arg "LOCKD_UDPPORT" "$OCF_RESKEY_lockd_udp_port" "$tmpconfig" "true"
- set_arg "LOCKD_TCPPORT" "$OCF_RESKEY_lockd_tcp_port" "$tmpconfig" "true"
-
- # rquotad_port
- if [ -n "$OCF_RESKEY_rquotad_port" ]; then
- set_arg "RPCRQUOTADOPTS" "-p $OCF_RESKEY_rquotad_port" "$tmpconfig" "true"
- fi
-
- # override local nfs config. preserve previous local config though.
- if [ -s $tmpconfig ]; then
- cat $NFS_SYSCONFIG | grep -e "$NFS_SYSCONFIG_AUTOGEN_TAG"
- if [ $? -ne 0 ]; then
- # backup local nfs config if it doesn't have our HA autogen tag in it.
- mv -f $NFS_SYSCONFIG $NFS_SYSCONFIG_LOCAL_BACKUP
- fi
- echo "# $NFS_SYSCONFIG_AUTOGEN_TAG" > $NFS_SYSCONFIG
- echo "# local config backup stored here, '$NFS_SYSCONFIG_LOCAL_BACKUP'" >> $NFS_SYSCONFIG
- cat $tmpconfig >> $NFS_SYSCONFIG
- fi
- rm -f $tmpconfig
-}
-
prepare_directory ()
{
if [ -z "$fp" ]; then
@@ -468,7 +373,7 @@ prepare_directory ()
[ -f "$fp/rmtab" ] || touch "$fp/rmtab"
dd if=/dev/urandom of=$fp/$STATD_DIR/state bs=1 count=4 >/dev/null 2>&1
- [ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown rpcuser.rpcuser "$fp/$STATD_DIR/state"
+ [ -n "`id -u rpcuser 2>/dev/null`" -a "`id -g rpcuser 2>/dev/null`" ] && chown rpcuser.rpcuser "$fp/$STATD_DIR/state"
[ $SELINUX_ENABLED -eq 0 ] && chcon -R "$SELINUX_LABEL" "$fp"
}
@@ -540,7 +445,7 @@ locking_start()
ocf_log err "Failed to start rpc.statd"
return $ret
fi
- touch /var/lock/subsys/nfslock
+ [ -d /var/lock/subsys ] && touch /var/lock/subsys/nfslock
return $ret
}
@@ -656,7 +561,7 @@ nfsserver_start ()
return $OCF_SUCCESS
fi
- set_env_args
+ is_redhat_based && set_env_args
prepare_directory
bind_tree
diff --git a/heartbeat/nfsserver-redhat.sh b/heartbeat/nfsserver-redhat.sh
new file mode 100644
index 0000000..cef0862
--- /dev/null
+++ b/heartbeat/nfsserver-redhat.sh
@@ -0,0 +1,169 @@
+NFS_SYSCONFIG="/etc/sysconfig/nfs"
+NFS_SYSCONFIG_LOCAL_BACKUP="/etc/sysconfig/nfs.ha.bu"
+NFS_SYSCONFIG_AUTOGEN_TAG="AUTOGENERATED by $0 high availability resource-agent"
+
+nfsserver_redhat_meta_data() {
+cat<<EOF
+<parameter name="nfsd_args" unique="0" required="0">
+<longdesc lang="en">
+Specifies what arguments to pass to the nfs daemon on startup. View the rpc.nfsd man page for information on what arguments are available.
+Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file.
+</longdesc>
+<shortdesc lang="en">
+rpc.nfsd options
+</shortdesc>
+<content type="string" />
+</parameter>
+
+<parameter name="lockd_udp_port" unique="0" required="0">
+<longdesc lang="en">
+The udp port lockd should listen on.
+Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file.
+</longdesc>
+<shortdesc lang="en">
+lockd udp port
+</shortdesc>
+<content type="integer" />
+</parameter>
+
+<parameter name="lockd_tcp_port" unique="0" required="0">
+<longdesc lang="en">
+The tcp port lockd should listen on.
+Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file.
+</longdesc>
+<shortdesc lang="en">
+lockd tcp port
+</shortdesc>
+<content type="integer" />
+</parameter>
+
+<parameter name="statd_outgoing_port" unique="0" required="0">
+<longdesc lang="en">
+The source port number sm-notify uses when sending reboot notifications.
+Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file.
+</longdesc>
+<shortdesc lang="en">
+sm-notify source port
+</shortdesc>
+<content type="integer" />
+</parameter>
+
+<parameter name="statd_port" unique="0" required="0">
+<longdesc lang="en">
+The port number used for RPC listener sockets.
+Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file.
+</longdesc>
+<shortdesc lang="en">
+rpc.statd listener port
+</shortdesc>
+<content type="integer" />
+</parameter>
+
+<parameter name="mountd_port" unique="0" required="0">
+<longdesc lang="en">
+The port number used for rpc.mountd listener sockets.
+Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file.
+</longdesc>
+<shortdesc lang="en">
+rpc.mountd listener port
+</shortdesc>
+<content type="integer" />
+</parameter>
+
+<parameter name="rquotad_port" unique="0" required="0">
+<longdesc lang="en">
+The port number used for rpc.rquotad.
+Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file.
+</longdesc>
+<shortdesc lang="en">
+rpc.rquotad port
+</shortdesc>
+<content type="integer" />
+</parameter>
+EOF
+}
+
+set_arg()
+{
+ local key="$1"
+ local value="$2"
+ local file="$3"
+ local requires_sysconfig="$4"
+
+ if [ -z "$value" ]; then
+ return
+ fi
+
+ # only write to the tmp /etc/sysconfig/nfs if sysconfig exists.
+ # otherwise this distro does not support setting these options.
+ if [ -d "/etc/sysconfig" ]; then
+ # replace if the value exists, append otherwise
+ if grep "^\s*${key}=" $file ; then
+ sed -i "s/\s*${key}=.*$/${key}=\"${value}\"/" $file
+ else
+ echo "${key}=\"${value}\"" >> $file
+ fi
+ elif [ "$requires_sysconfig" = "true" ]; then
+ ocf_log warn "/etc/sysconfig/nfs not found, unable to set port and nfsd args."
+ fi
+
+ export ${key}="${value}"
+}
+
+set_env_args()
+{
+ local tmpconfig=$(mktemp ${HA_RSCTMP}/nfsserver-tmp-XXXXX)
+ local statd_args
+
+ if [ -f "$NFS_SYSCONFIG" ]; then
+ ## Take the $NFS_SYSCONFIG file as our skeleton
+ cp $NFS_SYSCONFIG $tmpconfig
+ fi
+
+ # nfsd args
+ set_arg "RPCNFSDARGS" "$OCF_RESKEY_nfsd_args" "$tmpconfig" "true"
+
+ # mountd args
+ if [ -n "$OCF_RESKEY_mountd_port" ]; then
+ set_arg "RPCMOUNTDOPTS" "-p $OCF_RESKEY_mountd_port" "$tmpconfig" "true"
+ fi
+
+ # statd args. we always want to perform the notify using sm-notify after
+ # both rpc.statd and the nfsd daemons are initialized
+ statd_args="--no-notify"
+ if [ -n "$OCF_RESKEY_statd_outgoing_port" ]; then
+ statd_args="$statd_args -o $OCF_RESKEY_statd_outgoing_port"
+ fi
+ if [ -n "$OCF_RESKEY_statd_port" ]; then
+ statd_args="$statd_args -p $OCF_RESKEY_statd_port"
+ fi
+ set_arg "STATDARG" "$statd_args" "$tmpconfig" "false"
+
+ # lockd ports
+ set_arg "LOCKD_UDPPORT" "$OCF_RESKEY_lockd_udp_port" "$tmpconfig" "true"
+ set_arg "LOCKD_TCPPORT" "$OCF_RESKEY_lockd_tcp_port" "$tmpconfig" "true"
+
+ # rquotad_port
+ if [ -n "$OCF_RESKEY_rquotad_port" ]; then
+ set_arg "RPCRQUOTADOPTS" "-p $OCF_RESKEY_rquotad_port" "$tmpconfig" "true"
+ fi
+
+ # override local nfs config. preserve previous local config though.
+ if [ -s $tmpconfig ]; then
+ cat $NFS_SYSCONFIG | grep -q -e "$NFS_SYSCONFIG_AUTOGEN_TAG" > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ # backup local nfs config if it doesn't have our HA autogen tag in it.
+ mv -f $NFS_SYSCONFIG $NFS_SYSCONFIG_LOCAL_BACKUP
+ fi
+
+ cat $tmpconfig | grep -q -e "$NFS_SYSCONFIG_AUTOGEN_TAG" > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ echo "# $NFS_SYSCONFIG_AUTOGEN_TAG" > $NFS_SYSCONFIG
+ echo "# local config backup stored here, '$NFS_SYSCONFIG_LOCAL_BACKUP'" >> $NFS_SYSCONFIG
+ cat $tmpconfig >> $NFS_SYSCONFIG
+ else
+ cat $tmpconfig > $NFS_SYSCONFIG
+ fi
+ fi
+ rm -f $tmpconfig
+}
diff --git a/heartbeat/ocf-distro b/heartbeat/ocf-distro
new file mode 100644
index 0000000..530ee57
--- /dev/null
+++ b/heartbeat/ocf-distro
@@ -0,0 +1,47 @@
+#
+# This is OCF Linux distribution query support
+#
+# Currently needed for the nfsserver RA which has some already
+# released RH specific stuff (/etc/sysconfig/nfs editing)
+#
+
+_DEBIAN_VERSION_FILE="/etc/debian_version"
+_REDHAT_RELEASE_FILE="/etc/redhat-release"
+_SUSE_RELEASE_FILE="/etc/SuSE-release"
+_RELEASE_FILES="/etc/*-release"
+_REDHAT_BASED_DISTROS_RE='red *hat|fedora|centos|scientific linux'
+
+get_release_id() {
+ if which lsb_release >/dev/null 2>&1; then
+ lsb_release -si
+ elif [ -e $_DEBIAN_VERSION_FILE ]; then
+ echo Debian
+ elif [ -e $_SUSE_RELEASE_FILE ]; then
+ echo SUSE
+ elif [ -e $_REDHAT_RELEASE_FILE ]; then
+ echo Redhat
+ else # FIXME not exactly the id here, but will do for our purpose
+ cat $_RELEASE_FILES 2>/dev/null
+ fi
+}
+
+is_redhat_based() {
+ get_release_id | egrep -qsi "$_REDHAT_BASED_DISTROS_RE"
+}
+
+# get_os_ver() is currently unused
+get_os_ver() {
+ if which lsb_release >/dev/null 2>&1; then
+ OS=`lsb_release -si`
+ VER=`lsb_release -sr`
+ elif [ -f $_DEBIAN_VERSION_FILE ]; then
+ OS=Debian
+ VER=$(cat $_DEBIAN_VERSION_FILE)
+ elif [ -f $_REDHAT_RELEASE_FILE ]; then
+ OS=RedHat # redhat or similar
+ VER= # here some complex sed script
+ else
+ OS=$(uname -s)
+ VER=$(uname -r)
+ fi
+}
diff --git a/heartbeat/ocf-rarun b/heartbeat/ocf-rarun
index ec4bdba..32bbab2 100644
--- a/heartbeat/ocf-rarun
+++ b/heartbeat/ocf-rarun
@@ -51,7 +51,7 @@ mk_action_func() {
}
validate_args() {
is_function $ACTION_FUNC || {
- ocf_log err "$__OCF_ACTION: action not supported"
+ ocf_exit_reason "$__OCF_ACTION: action not supported"
run_function ${OCF_RESOURCE_TYPE}_methods
exit $OCF_ERR_UNIMPLEMENTED
}
@@ -74,7 +74,7 @@ check_required_params() {
local v
for v in $OCF_REQUIRED_PARAMS; do
is_var_defined OCF_RESKEY_$v || {
- ocf_log err "$v: required parameter not set"
+ ocf_exit_reason "$v: required parameter not set"
exit $OCF_ERR_CONFIGURED
}
done
@@ -95,7 +95,7 @@ handle_invalid_env() {
exit $OCF_NOT_RUNNING
else
# in recurring monitor, this amounts to error
- ocf_log err $msg
+ ocf_exit_reason "$msg"
exit $OCF_ERR_GENERIC
fi
;;
@@ -104,7 +104,7 @@ handle_invalid_env() {
exit $LSB_STATUS_STOPPED
;;
*)
- ocf_log err $msg
+ ocf_exit_reason "$msg"
exit $rc
;;
esac
diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in
index a0da395..8ff19af 100644
--- a/heartbeat/ocf-shellfuncs.in
+++ b/heartbeat/ocf-shellfuncs.in
@@ -22,7 +22,7 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
-# Build version: 02beac55c1da0ad99a5a19bd3b2333bcff7e916c
+# Build version: e697f43c4e59a47bd0dc7c093b7d46174035c2dd
# TODO: Some of this should probably split out into a generic OCF
# library for shell scripts, but for the time being, we'll just use it
@@ -57,6 +57,7 @@ fi
. ${OCF_FUNCTIONS_DIR}/ocf-returncodes
. ${OCF_FUNCTIONS_DIR}/ocf-directories
. ${OCF_FUNCTIONS_DIR}/ocf-rarun
+. ${OCF_FUNCTIONS_DIR}/ocf-distro
# Define OCF_RESKEY_CRM_meta_interval in case it isn't already set,
# to make sure that ocf_is_probe() always works
@@ -748,6 +749,87 @@ ocf_stop_processes() {
}
#
+# create a given status directory
+# if the directory path doesn't start with $HA_VARRUN, then
+# we return with error (most of the calls would be with the user
+# supplied configuration, hence we need to do necessary
+# protection)
+# used mostly for PID files
+#
+# usage: ocf_mkstatedir owner permissions path
+#
+# owner: user.group
+# permissions: permissions
+# path: directory path
+#
+# example:
+# ocf_mkstatedir named 755 `dirname $pidfile`
+#
+ocf_mkstatedir()
+{
+ local owner
+ local perms
+ local path
+
+ owner=$1
+ perms=$2
+ path=$3
+
+ test -d $path && return 0
+ [ $(id -u) = 0 ] || return 1
+
+ case $path in
+ $HA_VARRUN/*) : this path is ok ;;
+ *) ocf_log err "cannot create $path (does not start with $HA_VARRUN)"
+ return 1
+ ;;
+ esac
+
+ mkdir -p $path &&
+ chown $owner $path &&
+ chmod $perms $path
+}
+
+#
+# create a unique status directory in $HA_VARRUN
+# used mostly for PID files
+# the directory is by default set to
+# $HA_VARRUN/$OCF_RESOURCE_INSTANCE
+# the directory name is printed to stdout
+#
+# usage: ocf_unique_rundir owner permissions name
+#
+# owner: user.group (default: "root")
+# permissions: permissions (default: "755")
+# name: some unique string (default: "$OCF_RESOURCE_INSTANCE")
+#
+# to use the default either don't set the parameter or set it to
+# empty string ("")
+# example:
+#
+# STATEDIR=`ocf_unique_rundir named "" myownstatedir`
+#
+ocf_unique_rundir()
+{
+ local path
+ local owner
+ local perms
+ local name
+
+ owner=${1:-"root"}
+ perms=${2:-"755"}
+ name=${3:-"$OCF_RESOURCE_INSTANCE"}
+ path=$HA_VARRUN/$name
+ if [ ! -d $path ]; then
+ [ $(id -u) = 0 ] || return 1
+ mkdir -p $path &&
+ chown $owner $path &&
+ chmod $perms $path || return 1
+ fi
+ echo $path
+}
+
+#
# RA tracing may be turned on by setting OCF_TRACE_RA
# the trace output will be saved to OCF_TRACE_FILE, if set, or
# by default to
@@ -820,7 +902,9 @@ ocf_start_trace() {
return
ocf_trace_redirect_to_file "$__OCF_TRC_DEST"
fi
- PS4='+ `date +"%T"`: ${FUNCNAME[0]:+${FUNCNAME[0]}:}${LINENO}: '
+ if [ -n "$BASH_VERSION" ]; then
+ PS4='+ `date +"%T"`: ${FUNCNAME[0]:+${FUNCNAME[0]}:}${LINENO}: '
+ fi
set -x
env=$( echo; printenv | sort )
}
diff --git a/heartbeat/ora-common.sh b/heartbeat/ora-common.sh
index 5bbb163..728caaf 100644
--- a/heartbeat/ora-common.sh
+++ b/heartbeat/ora-common.sh
@@ -17,8 +17,12 @@ rmtmpfiles() {
ora_common_getconfig() {
ORACLE_SID=$1
+ # optional, defaults to whatever is in oratab
ORACLE_HOME=$2
+ # optional, defaults to the owner of ORACLE_HOME
ORACLE_OWNER=$3
+ # optional, defaults to $ORACLE_HOME/network/admin
+ # (only the oralsnr may provide and use this one)
TNS_ADMIN=$4
# get ORACLE_HOME from /etc/oratab if not set
@@ -61,7 +65,7 @@ ora_common_validate_all() {
US=`id -u -n`
if [ $US != root -a $US != $ORACLE_OWNER ]
then
- ocf_log err "$0 must be run as root or $ORACLE_OWNER"
+ ocf_exit_reason "$0 must be run as root or $ORACLE_OWNER"
return $OCF_ERR_PERM
fi
return 0
diff --git a/heartbeat/oracle b/heartbeat/oracle
index 785be46..951221c 100755
--- a/heartbeat/oracle
+++ b/heartbeat/oracle
@@ -403,7 +403,7 @@ check_mon_profile() {
if echo "$output" | grep -iw "^$MONPROFILE" >/dev/null; then
return 0
else
- ocf_log err "could not create $MONPROFILE oracle profile"
+ ocf_exit_reason "could not create $MONPROFILE oracle profile"
ocf_log err "sqlplus output: $output"
return 1
fi
@@ -426,7 +426,7 @@ check_mon_user() {
if echo "$output2" | grep -iw "^$MONPROFILE" >/dev/null; then
return 0
fi
- ocf_log err "could not set profile for $MONUSR oracle user"
+ ocf_exit_reason "could not set profile for $MONUSR oracle user"
ocf_log err "sqlplus output: $output( $output2 )"
return 1
fi
@@ -435,7 +435,7 @@ check_mon_user() {
if echo "$output" | grep -iw "^$MONUSR" >/dev/null; then
return 0
else
- ocf_log err "could not create $MONUSR oracle user"
+ ocf_exit_reason "could not create $MONUSR oracle user"
ocf_log err "sqlplus output: $output"
return 1
fi
@@ -567,7 +567,7 @@ ora_cleanup() {
}
oracle_getconfig() {
- ora_common_getconfig "$OCF_RESKEY_sid" "$OCF_RESKEY_home" "$OCF_RESKEY_user" "$OCF_RESKEY_tns_admin"
+ ora_common_getconfig "$OCF_RESKEY_sid" "$OCF_RESKEY_home" "$OCF_RESKEY_user"
clear_backupmode=${OCF_RESKEY_clear_backupmode:-"false"}
shutdown_method=${OCF_RESKEY_shutdown_method:-"checkpoint/abort"}
@@ -622,7 +622,7 @@ oracle_start() {
;;
*)
: error!!
- ocf_log err "oracle $ORACLE_SID can not be mounted (status: $status)"
+ ocf_exit_reason "oracle $ORACLE_SID can not be mounted (status: $status)"
return $OCF_ERR_GENERIC
;;
esac
@@ -646,10 +646,10 @@ oracle_start() {
fi
if ! is_proc_running; then
- ocf_log err "oracle process not running: $output"
+ ocf_exit_reason "oracle process not running: $output"
return $OCF_ERR_GENERIC
elif ! instance_live; then
- ocf_log err "oracle instance $ORACLE_SID not started: $output"
+ ocf_exit_reason "oracle instance $ORACLE_SID not started: $output"
return $OCF_ERR_GENERIC
else
: cool, we are up and running
@@ -672,7 +672,7 @@ oracle_stop() {
fi
ocf_stop_processes TERM $PROCS_CLEANUP_TIME `proc_pids` # kill the procs if they hanged
if is_proc_running; then
- ocf_log err "Oracle instance $ORACLE_SID not stopped: $output"
+ ocf_exit_reason "Oracle instance $ORACLE_SID not stopped: $output"
return $OCF_ERR_GENERIC
else
ocf_log info "Oracle instance $ORACLE_SID stopped: $output"
@@ -692,7 +692,7 @@ oracle_monitor() {
return $OCF_NOT_RUNNING
fi
if ! instance_live; then
- ocf_log err "oracle instance $ORACLE_SID is down"
+ ocf_exit_reason "oracle instance $ORACLE_SID is down"
return $OCF_ERR_GENERIC
fi
#ocf_log info "Oracle instance $ORACLE_SID is alive"
@@ -727,14 +727,14 @@ oracle_validate_all() {
case "${shutdown_method}" in
"immediate") ;;
"checkpoint/abort") ;;
- *) ocf_log err "unsupported shutdown_method, please read meta-data"
+ *) ocf_exit_reason "unsupported shutdown_method, please read meta-data"
return $OCF_ERR_CONFIGURED
;;
esac
case "${IPCRM}" in
"none"|"instance"|"orauser") ;;
- *) ocf_log err "unsupported ipcrm setting, please read meta-data"
+ *) ocf_exit_reason "unsupported ipcrm setting, please read meta-data"
return $OCF_ERR_CONFIGURED
;;
esac
diff --git a/heartbeat/oralsnr b/heartbeat/oralsnr
index a91eeab..c47f121 100755
--- a/heartbeat/oralsnr
+++ b/heartbeat/oralsnr
@@ -178,7 +178,7 @@ oralsnr_start() {
ocf_log info "Listener $listener running: $output"
return $OCF_SUCCESS
else
- ocf_log err "Listener $listener appears to have started, but is not running properly: $output"
+ ocf_exit_reason "Listener $listener appears to have started, but is not running properly: $output"
ocf_log err "Probable Oracle configuration error"
return $OCF_ERR_GENERIC
fi
@@ -196,7 +196,7 @@ oralsnr_stop() {
fi
ocf_stop_processes TERM $PROCS_CLEANUP_TIME `proc_pids` # kill the procs if they hanged
if is_proc_running; then
- ocf_log err "Listener $listener not stopped: $output"
+ ocf_exit_reason "Listener $listener not stopped: $output"
return $OCF_ERR_GENERIC
else
ocf_log info "Listener $listener stopped: $output"
@@ -218,7 +218,7 @@ test_listener() {
then
return $OCF_SUCCESS
else
- ocf_log err "$listener status failed: $output"
+ ocf_exit_reason "$listener status failed: $output"
return $OCF_ERR_GENERIC
fi
}
@@ -229,7 +229,7 @@ test_tnsping() {
if echo "$output" | tail -1 | grep -qs '^OK'; then
return $OCF_SUCCESS
else
- ocf_log err "tnsping $ORACLE_SID failed: $output"
+ ocf_exit_reason "tnsping $ORACLE_SID failed: $output"
return $OCF_ERR_GENERIC
fi
}
diff --git a/heartbeat/pgsql b/heartbeat/pgsql
index 26bb251..320db69 100755
--- a/heartbeat/pgsql
+++ b/heartbeat/pgsql
@@ -18,6 +18,13 @@
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+# Use runuser if available for SELinux.
+if [ -x /sbin/runuser ]; then
+ SU=runuser
+else
+ SU=su
+fi
+
#
# Get PostgreSQL Configuration parameter
#
@@ -43,6 +50,7 @@ OCF_RESKEY_pghost_default=""
OCF_RESKEY_pgport_default=5432
OCF_RESKEY_pglibs_default=/usr/lib
OCF_RESKEY_start_opt_default=""
+OCF_RESKEY_ctl_opt_default=""
OCF_RESKEY_pgdb_default=template1
OCF_RESKEY_logfile_default=/dev/null
OCF_RESKEY_stop_escalate_default=30
@@ -64,6 +72,7 @@ OCF_RESKEY_tmpdir_default="/var/lib/pgsql/tmp"
OCF_RESKEY_xlog_check_count_default="3"
OCF_RESKEY_crm_attr_timeout_default="5"
OCF_RESKEY_stop_escalate_in_slave_default=30
+OCF_RESKEY_replication_slot_name_default=""
: ${OCF_RESKEY_pgctl=${OCF_RESKEY_pgctl_default}}
: ${OCF_RESKEY_psql=${OCF_RESKEY_psql_default}}
@@ -74,6 +83,7 @@ OCF_RESKEY_stop_escalate_in_slave_default=30
: ${OCF_RESKEY_pglibs=${OCF_RESKEY_pglibs_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_pgdata}/postgresql.conf}
: ${OCF_RESKEY_start_opt=${OCF_RESKEY_start_opt_default}}
+: ${OCF_RESKEY_ctl_opt=${OCF_RESKEY_ctl_opt_default}}
: ${OCF_RESKEY_pgdb=${OCF_RESKEY_pgdb_default}}
: ${OCF_RESKEY_logfile=${OCF_RESKEY_logfile_default}}
: ${OCF_RESKEY_stop_escalate=${OCF_RESKEY_stop_escalate_default}}
@@ -96,6 +106,7 @@ OCF_RESKEY_stop_escalate_in_slave_default=30
: ${OCF_RESKEY_xlog_check_count=${OCF_RESKEY_xlog_check_count_default}}
: ${OCF_RESKEY_crm_attr_timeout=${OCF_RESKEY_crm_attr_timeout_default}}
: ${OCF_RESKEY_stop_escalate_in_slave=${OCF_RESKEY_stop_escalate_in_slave_default}}
+: ${OCF_RESKEY_replication_slot_name=${OCF_RESKEY_replication_slot_name_default}}
usage() {
cat <<EOF
@@ -361,6 +372,29 @@ This is optional for replication.
<content type="boolean" default="${OCF_RESKEY_restart_on_promote_default}" />
</parameter>
+<parameter name="replication_slot_name" unique="0" required="0">
+<longdesc lang="en">
+Set this option when using replication slots.
+Can only use lower case letters, numbers and underscore for replication_slot_name.
+
+When the master node has 1 slave node,one replication slot would be created with the name "replication_slot_name".
+When the master node has 2 or more slave nodes,the replication slots would be created for each node, with the name adding the node name as postfix.
+For example, replication_slot_name is "sample" and 2 slaves which are "node1" and "node2" connect to
+their slots, the slots names are "sample_node1" and "sample_node2".
+If the node name contains a upper case letter, hyphen and dot, those characters will be converted to a lower case letter or an underscore.
+For example, Node-1.example.com to node_1_example_com.
+
+pgsql RA doesn't monitor and delete the repliation slot.
+When the slave node has been disconnected in failure or the like, execute one of the following manually.
+Otherwise it may eventually cause a disk full because the master node will continue to accumulate the unsent WAL.
+1. recover and reconnect the slave node to the master node as soon as possible.
+2. delete the slot on the master node by following psql command.
+$ select pg_drop_replication_slot('replication_slot_name');
+</longdesc>
+<shortdesc lang="en">replication_slot_name</shortdesc>
+<content type="string" default="${OCF_RESKEY_replication_slot_name_default}" />
+</parameter>
+
<parameter name="tmpdir" unique="0" required="0">
<longdesc lang="en">
Path to temporary directory.
@@ -376,7 +410,7 @@ Number of checks of xlog on monitor before promote.
This is optional for replication.
</longdesc>
<shortdesc lang="en">xlog check count</shortdesc>
-<content type="integer" default="${OCF_RESKEY_check_count_default}" />
+<content type="integer" default="${OCF_RESKEY_xlog_check_count_default}" />
</parameter>
<parameter name="crm_attr_timeout" unique="0" required="0">
@@ -404,7 +438,10 @@ This is optional for replication.
If this is true, RA checks wal_receiver process on monitor
and notifies its status using "(resource name)-receiver-status" attribute.
It's useful for checking whether PostgreSQL (hot standby) connects to primary.
-The attribute shows status as "normal" or "ERROR".
+The attribute shows status as "normal" or "normal (master)" or "ERROR".
+Note that if you configure PostgreSQL as master/slave resource, then
+wal receiver is not running in the master and the attribute shows status as
+"normal (master)" consistently because it is normal status.
</longdesc>
<shortdesc lang="en">check_wal_receiver</shortdesc>
<content type="boolean" default="${OCF_RESKEY_check_wal_receiver_default}" />
@@ -451,7 +488,7 @@ runasowner() {
esac
done
- ocf_run $quietrun $loglevel su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $*"
+ ocf_run $quietrun $loglevel $SU $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $*"
}
#
@@ -482,6 +519,22 @@ EOF
}
+# Execulte SQL and return the result.
+exec_sql() {
+ local sql="$1"
+ local output
+ local rc
+
+ output=`$SU $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \
+ $OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \
+ -Atc \"$sql\""`
+ rc=$?
+
+ echo $output
+ return $rc
+}
+
+
#pgsql_real_start: Starts PostgreSQL
pgsql_real_start() {
local pgctl_options
@@ -562,6 +615,16 @@ pgsql_real_start() {
ocf_log debug "PostgreSQL still hasn't started yet. Waiting..."
done
+ # create replication slot on the master and slave nodes.
+ # creating slot on the slave node is in preparation for failover.
+ if use_replication_slot; then
+ create_replication_slot
+ if [ $? -eq $OCF_ERR_GENERIC ]; then
+ ocf_exit_reason "PostgreSQL can't create replication_slot."
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
ocf_log info "PostgreSQL is started."
return $rc
}
@@ -738,7 +801,7 @@ pgsql_real_stop() {
if pgsql_status
then
#PostgreSQL is still up. Use another shutdown mode.
- ocf_log info "PostgreSQL failed to stop after ${OCF_RESKEY_stop_escalate}s using -m fast. Trying -m immediate..."
+ ocf_log info "PostgreSQL failed to stop after ${stop_escalate}s using -m fast. Trying -m immediate..."
runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata stop -m immediate"
fi
@@ -820,13 +883,21 @@ pgsql_status() {
pgsql_wal_receiver_status() {
local PID
local receiver_parent_pids
+ local pgsql_real_monitor_status=$1
PID=`head -n 1 $PIDFILE`
receiver_parent_pids=`ps -ef | tr -s " " | grep "[w]al receiver process" | cut -d " " -f 3`
+
if echo "$receiver_parent_pids" | grep -q -w "$PID" ; then
attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal" -q
return 0
fi
+
+ if [ $pgsql_real_monitor_status -eq "$OCF_RUNNING_MASTER" ]; then
+ attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal (master)" -q
+ return 0
+ fi
+
attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "ERROR" -q
ocf_log warn "wal receiver process is not running"
return 1
@@ -850,16 +921,11 @@ pgsql_real_monitor() {
return $OCF_NOT_RUNNING
fi
- if ocf_is_true ${OCF_RESKEY_check_wal_receiver}; then
- pgsql_wal_receiver_status
- fi
-
if is_replication; then
#Check replication state
- output=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \
- $OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \
- -Atc \"${CHECK_MS_SQL}\""`
+ output=`exec_sql "${CHECK_MS_SQL}"`
rc=$?
+
if [ $rc -ne 0 ]; then
report_psql_error $rc $loglevel "Can't get PostgreSQL recovery status."
return $OCF_ERR_GENERIC
@@ -918,7 +984,7 @@ pgsql_replication_monitor() {
# I can't get master node name from $OCF_RESKEY_CRM_meta_notify_master_uname on monitor,
# so I will get master node name using crm_mon -n
- print_crm_mon | tr -d "\t" | tr -d " " | grep -q "^${RESOURCE_NAME}[(:].*[):]Master"
+ print_crm_mon | tr -d "\t" | tr -d " " | grep -q "^${RESOURCE_NAME}[(:].*[):].*Master"
if [ $? -ne 0 ] ; then
# If I am Slave and Master is not exist
ocf_log info "Master does not exist."
@@ -943,6 +1009,11 @@ pgsql_monitor() {
pgsql_real_monitor
rc=$?
+
+ if ocf_is_true ${OCF_RESKEY_check_wal_receiver}; then
+ pgsql_wal_receiver_status $rc
+ fi
+
if ! is_replication; then
return $rc
else
@@ -1051,10 +1122,9 @@ control_slave_status() {
local tmp_data_status
local number_of_nodes
- all_data_status=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \
- $OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \
- -Atc \"${CHECK_REPLICATION_STATE_SQL}\""`
+ all_data_status=`exec_sql "${CHECK_REPLICATION_STATE_SQL}"`
rc=$?
+
if [ $rc -eq 0 ]; then
if [ -n "$all_data_status" ]; then
all_data_status=`echo $all_data_status | sed "s/\n/ /g"`
@@ -1226,6 +1296,104 @@ is_replication() {
return 1
}
+use_replication_slot() {
+ if [ -n "$OCF_RESKEY_replication_slot_name" ]; then
+ return 0
+ fi
+
+ return 1
+}
+
+create_replication_slot_name() {
+ local number_of_nodes=0
+ local target
+ local replication_slot_name
+ local replication_slot_name_list_tmp
+ local replication_slot_name_list
+
+ if [ -n "$NODE_LIST" ]; then
+ number_of_nodes=`echo $NODE_LIST | wc -w`
+ fi
+
+ # If the number of nodes 2 or less, Master node has 1 or less Slave node.
+ # The Master node should have 1 slot for the Slave, which is named "$OCF_RES_KEY_replication_slot_name".
+ if [ $number_of_nodes -le 2 ]; then
+ replication_slot_name_list="$OCF_RESKEY_replication_slot_name"
+
+ # If the number of nodes 3 or more, the Master has some Slave nodes.
+ # The Master node should have some slots equal to the number of Slaves, and
+ # the Slave nodes connect to their dedicated slot on the Master.
+ # To ensuring that the slots name are each unique, add postfix to $OCF_RESKEY_replication_slot.
+ # The postfix is "_$target".
+ else
+ for target in $NODE_LIST
+ do
+ if [ "$target" != "$NODENAME" ]; then
+ # The Uppercase, "-" and "." don't allow to use in slot_name.
+ # If the NODENAME contains them, convert upper case to lower case and "_" and "." to "_".
+ target=`echo "$target" | tr '[A-Z.-]' '[a-z__]'`
+ replication_slot_name="$OCF_RESKEY_replication_slot_name"_"$target"
+ replication_slot_name_list_tmp="$replication_slot_name_list"
+ replication_slot_name_list="$replication_slot_name_list_tmp $replication_slot_name"
+ fi
+ done
+ fi
+
+ echo $replication_slot_name_list
+}
+
+create_replication_slot() {
+ local replication_slot_name
+ local replication_slot_name_list
+ local output
+ local rc
+ local CREATE_REPLICATION_SLOT_sql
+ local DELETE_REPLICATION_SLOT_sql
+
+ replication_slot_name_list=`create_replication_slot_name`
+ ocf_log debug "replication slot names are $replication_slot_name_list."
+
+ for replication_slot_name in $replication_slot_name_list
+ do
+ # If the same name slot is already exists, initialize(delete and create) the slot.
+ if [ `check_replication_slot $replication_slot_name` = "1" ]; then
+ DELETE_REPLICATION_SLOT_sql="SELECT pg_drop_replication_slot('$replication_slot_name');"
+ output=`exec_sql "$DELETE_REPLICATION_SLOT_sql"`
+ rc=$?
+
+ if [ $rc -eq 0 ]; then
+ ocf_log info "PostgreSQL delete the replication slot($replication_slot_name)."
+ else
+ ocf_exit_reason "$output"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ CREATE_REPLICATION_SLOT_sql="SELECT pg_create_physical_replication_slot('$replication_slot_name');"
+ output=`exec_sql "$CREATE_REPLICATION_SLOT_sql"`
+ rc=$?
+
+ if [ $rc -eq 0 ]; then
+ ocf_log info "PostgreSQL creates the replication slot($replication_slot_name)."
+ else
+ ocf_exit_reason "$output"
+ return $OCF_ERR_GENERIC
+ fi
+ done
+
+ return 0
+}
+
+# This function check the replication slot does exists.
+check_replication_slot(){
+ local replication_slot_name=$1
+ local output
+ local CHECK_REPLICATION_SLOT_sql="SELECT count(*) FROM pg_replication_slots WHERE slot_name = '$replication_slot_name'"
+
+ output=`exec_sql "$CHECK_REPLICATION_SLOT_sql"`
+ echo "$output"
+}
+
get_my_location() {
local rc
local output
@@ -1237,10 +1405,9 @@ get_my_location() {
local log2
local newer_location
- output=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \
- $OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \
- -Atc \"${CHECK_XLOG_LOC_SQL}\""`
+ output=`exec_sql "$CHECK_XLOG_LOC_SQL"`
rc=$?
+
if [ $rc -ne 0 ]; then
report_psql_error $rc err "Can't get my xlog location."
return 1
@@ -1340,6 +1507,9 @@ reload_conf() {
}
user_recovery_conf() {
+ local number_of_nodes
+ local nodename_tmp
+
# put archive_cleanup_command and recovery_end_command only when defined by user
if [ -n "$OCF_RESKEY_archive_cleanup_command" ]; then
echo "archive_cleanup_command = '${OCF_RESKEY_archive_cleanup_command}'"
@@ -1347,6 +1517,16 @@ user_recovery_conf() {
if [ -n "$OCF_RESKEY_recovery_end_command" ]; then
echo "recovery_end_command = '${OCF_RESKEY_recovery_end_command}'"
fi
+
+ if use_replication_slot; then
+ number_of_nodes=`echo $NODE_LIST | wc -w`
+ if [ $number_of_nodes -le 2 ]; then
+ echo "primary_slot_name = '${OCF_RESKEY_replication_slot_name}'"
+ else
+ nodename_tmp=`echo "$NODENAME" | tr '[A-Z.-]' '[a-z__]'`
+ echo "primary_slot_name = '${OCF_RESKEY_replication_slot_name}_$nodename_tmp'"
+ fi
+ fi
}
make_recovery_conf() {
@@ -1424,7 +1604,7 @@ set_master_score() {
current_score=`$CRM_ATTR_REBOOT -N "$1" -n "master-$3" -G -q 2>/dev/null`
if [ -n "$current_score" -a "$current_score" != "$2" ]; then
ocf_log info "Changing $3 master score on $1 : $current_score->$2."
- exec_with_retry 0 $CRM_ATTR_REBOOT -N "$target" -n "master-$3" -v "$2"
+ exec_with_retry 0 $CRM_ATTR_REBOOT -N "$1" -n "master-$3" -v "$2"
fi
return 0
}
@@ -1603,7 +1783,7 @@ pgsql_validate_all() {
# unix_socket_directories may have multiple socket directories and the pgsql RA can not know which directory is used for psql command.
# Therefore, the user must set OCF_RESKEY_socketdir explicitly.
if [ -z "$OCF_RESKEY_socketdir" ]; then
- ocf_log err "In PostgreSQL 9.3 or higher, socketdir can't be empty if you define unix_socket_directories in the postgresql.conf."
+ ocf_exit_reason "In PostgreSQL 9.3 or higher, socketdir can't be empty if you define unix_socket_directories in the postgresql.conf."
return $OCF_ERR_CONFIGURED
fi
fi
@@ -1689,6 +1869,20 @@ pgsql_validate_all() {
fi
fi
+ if use_replication_slot; then
+ ocf_version_cmp "$version" "9.4"
+ if [ $? -eq 0 -o $? -eq 3 ]; then
+ ocf_exit_reason "Replication slot needs PostgreSQL 9.4 or higher."
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ echo "$OCF_RESKEY_replication_slot_name" | grep -q -e [^a-z0-9_]
+ if [ $? -eq 0 ]; then
+ ocf_exit_reason "Invalid replication_slot_name($OCF_RESKEY_replication_slot_name). only use lower case letters, numbers, and the underscore character."
+ return $OCF_ERR_CONFIGURED
+ fi
+ fi
+
return $OCF_SUCCESS
}
diff --git a/heartbeat/pingd b/heartbeat/pingd
index 34ed704..205bda6 100755
--- a/heartbeat/pingd
+++ b/heartbeat/pingd
@@ -175,7 +175,7 @@ pingd_start() {
exit $OCF_SUCCESS
fi
- ocf_log err "Could not run $pingd_cmd : rc=$rc"
+ ocf_exit_reason "Could not run $pingd_cmd : rc=$rc"
exit $OCF_ERR_GENERIC
}
@@ -192,7 +192,7 @@ pingd_stop() {
exit $OCF_SUCCESS
fi
- ocf_log err "Unexpected result from kill -9 $pid: $rc"
+ ocf_exit_reason "Unexpected result from kill -9 $pid: $rc"
exit $OCF_ERR_GENERIC
fi
exit $OCF_SUCCESS
@@ -218,7 +218,7 @@ pingd_validate() {
if [ $? -eq 0 ]; then
: Yes, user exists. We can further check his permission on crm_mon if necessary
else
- ocf_log err "The user $OCF_RESKEY_user does not exist!"
+ ocf_exit_reason "The user $OCF_RESKEY_user does not exist!"
exit $OCF_ERR_ARGS
fi
fi
@@ -233,7 +233,7 @@ pingd_validate() {
if ocf_is_decimal "$OCF_RESKEY_update" && [ $OCF_RESKEY_update -gt 0 ]; then
:
else
- ocf_log err "Invalid update interval $OCF_RESKEY_update. It should be positive integer!"
+ ocf_exit_reason "Invalid update interval $OCF_RESKEY_update. It should be positive integer!"
exit $OCF_ERR_ARGS
fi
diff --git a/heartbeat/portblock b/heartbeat/portblock
index 6085566..c480954 100755
--- a/heartbeat/portblock
+++ b/heartbeat/portblock
@@ -39,16 +39,22 @@ usage()
$CMD is used to temporarily block ports using iptables.
- It can be used to turn off a port before bringing
+ It can be used to blackhole a port before bringing
up an IP address, and enable it after a service is started.
- To do that for samba, the following resource line can be used:
+ To do that for samba, the following can be used:
- $CMD::tcp::137,138::block \\
- 10.10.10.20 \\
- nmbd smbd \\
- $CMD::tcp::137,138::unblock
+ crm configure <<EOF
+ primitive portblock-samba ocf:heartbeat:portblock \\
+ params protocol=tcp portno=137,138 action=block
+ primitive portunblock-samba ocf:heartbeat:portblock \\
+ params protocol=tcp portno=137,138 action=unblock
+ primitive samba-vip ocf:heartbeat:IPaddr2 \\
+ params ip=10.10.10.20
+ group g-samba \\
+ portblock-samba samba-vip nmbd smbd portunblock-samba
+ EOF
- This will do the follwing things:
+ This will do the following things:
- DROP all incoming packets for TCP ports 137 and 138
- Bring up the IP alias 10.10.10.20
@@ -56,13 +62,16 @@ usage()
- Re-enable TCP ports 137 and 138
(enable normal firewall rules on those ports)
- This prevents clients from getting ICMP port unreachable
- if they try to reconnect to the service after the alias is
- enabled but before nmbd and smbd are running. These packets
- will cause some clients to give up attempting to reconnect to
- the server.
+ This prevents clients from getting TCP RST if they try to reconnect
+ to the service after the alias is enabled but before nmbd and smbd
+ are running. These packets will cause some clients to give up
+ attempting to reconnect to the server.
- NOTE: iptables is linux-specific...
+ Attempts to connect to UDP and other non-TCP ports which have nothing
+ listening can result in ICMP port unreachable responses, which can
+ have the same undesirable affect on some clients.
+
+ NOTE: iptables is Linux-specific.
An additional feature in the portblock RA is the tickle ACK function
enabled by specifying the tickle_dir parameter. The tickle ACK
@@ -140,7 +149,7 @@ The protocol used to be blocked/unblocked.
The port number used to be blocked/unblocked.
</longdesc>
<shortdesc lang="en">portno</shortdesc>
-<content type="integer" default="" />
+<content type="string" default="" />
</parameter>
<parameter name="action" unique="0" required="1">
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
new file mode 100755
index 0000000..cc45f09
--- /dev/null
+++ b/heartbeat/rabbitmq-cluster
@@ -0,0 +1,370 @@
+#!/bin/sh
+#
+# Copyright (c) 2014 David Vossel <davidvossel at gmail.com>
+# All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+#######################################################################
+
+RMQ_SERVER=/usr/sbin/rabbitmq-server
+RMQ_CTL=/usr/sbin/rabbitmqctl
+RMQ_DATA_DIR="/var/lib/rabbitmq/mnesia"
+RMQ_PID_DIR="/var/run/rabbitmq"
+RMQ_PID_FILE="/var/run/rabbitmq/rmq.pid"
+RMQ_LOG_DIR="/var/log/rabbitmq"
+NODENAME=$(ocf_local_nodename)
+
+RMQ_CRM_ATTR_COOKIE="rmq-node-attr-${OCF_RESOURCE_INSTANCE}"
+
+meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="rabbitmq-cluster" version="0.9">
+<version>1.0</version>
+
+<longdesc lang="en">
+Starts cloned rabbitmq cluster instance
+</longdesc>
+<shortdesc lang="en">rabbitmq clustered</shortdesc>
+
+<parameters>
+<parameter name="set_policy" unique="1">
+<longdesc lang="en">
+Policy string to pass to 'rabbitmqctl set_policy' right after bootstrapping the first rabbitmq instance.
+</longdesc>
+<shortdesc lang="en">rabbitmqctl set_policy args</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="100" />
+<action name="stop" timeout="90" />
+<action name="monitor" timeout="40" interval="10" depth="0" />
+<action name="meta-data" timeout="10" />
+<action name="validate-all" timeout="20" />
+</actions>
+</resource-agent>
+END
+}
+
+#######################################################################
+
+rmq_usage() {
+ cat <<END
+usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data}
+
+Expects to have a fully populated OCF RA-compliant environment set.
+END
+}
+
+rmq_wipe_data()
+{
+ rm -rf $RMQ_DATA_DIR > /dev/null 2>&1
+}
+
+rmq_local_node()
+{
+
+ local node_name=$(rabbitmqctl status 2>&1 | sed -n -e "s/^.*[S|s]tatus of node \(.*\)\s.*$/\1/p" | tr -d "'")
+
+ if [ -z "$node_name" ]; then
+ node_name=$(cat /etc/rabbitmq/rabbitmq-env.conf 2>/dev/null | grep "\s*RABBITMQ_NODENAME=" | awk -F= '{print $2}')
+ fi
+
+ echo "$node_name"
+}
+
+rmq_join_list()
+{
+ cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p"
+}
+
+rmq_write_nodename()
+{
+ local node_name=$(rmq_local_node)
+
+ if [ -z "$node_name" ]; then
+ ocf_log err "Failed to determine rabbitmq node name, exiting"
+ exit $OCF_ERR_GENERIC
+ fi
+
+ # store the pcmknode to rmq node mapping as an attribute
+ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -v "$node_name"
+}
+
+rmq_delete_nodename()
+{
+ # remove node-name
+ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -D
+}
+
+prepare_dir () {
+ if [ ! -d ${1} ] ; then
+ mkdir -p ${1}
+ chown -R rabbitmq:rabbitmq ${1}
+ chmod 755 ${1}
+ fi
+}
+
+remove_pid () {
+ rm -f ${RMQ_PID_FILE} > /dev/null 2>&1
+}
+
+rmq_monitor() {
+ local rc
+
+ $RMQ_CTL cluster_status > /dev/null 2>&1
+ rc=$?
+ case "$rc" in
+ 0)
+ ocf_log debug "RabbitMQ server is running normally"
+ rmq_write_nodename
+
+ return $OCF_SUCCESS
+ ;;
+ 2)
+ ocf_log info "RabbitMQ server is not running"
+ rmq_delete_nodename
+ return $OCF_NOT_RUNNING
+ ;;
+ *)
+ ocf_log err "Unexpected return code from '$RMQ_CTL cluster status' exit code: $rc"
+ rmq_delete_nodename
+ return $OCF_ERR_GENERIC
+ ;;
+ esac
+}
+
+rmq_init_and_wait()
+{
+ local rc
+
+ prepare_dir $RMQ_PID_DIR
+ prepare_dir $RMQ_LOG_DIR
+ remove_pid
+
+ # the server startup script uses this environment variable
+ export RABBITMQ_PID_FILE="$RMQ_PID_FILE"
+
+ setsid sh -c "$RMQ_SERVER > ${RMQ_LOG_DIR}/startup_log 2> ${RMQ_LOG_DIR}/startup_err" &
+
+ ocf_log info "Waiting for server to start"
+ $RMQ_CTL wait $RMQ_PID_FILE
+ rc=$?
+ if [ $rc -ne $OCF_SUCCESS ]; then
+ remove_pid
+ ocf_log info "rabbitmq-server start failed: $rc"
+ return $OCF_ERR_GENERIC
+ fi
+
+ rmq_monitor
+ return $?
+}
+
+rmq_set_policy()
+{
+ $RMQ_CTL set_policy $@ > /dev/null 2>&1
+}
+
+rmq_start_first()
+{
+ local rc
+
+ ocf_log info "Bootstrapping rabbitmq cluster"
+ rmq_wipe_data
+ rmq_init_and_wait
+ rc=$?
+
+ if [ $rc -eq 0 ]; then
+ rc=$OCF_SUCCESS
+ ocf_log info "cluster bootstrapped"
+
+ if [ -n "$OCF_RESKEY_set_policy" ]; then
+ # do not quote set_policy, we are passing in arguments
+ rmq_set_policy $OCF_RESKEY_set_policy > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ ocf_log err "Failed to set policy: $OCF_RESKEY_set_policy"
+ rc=$OCF_ERR_GENERIC
+ else
+ ocf_log info "Policy set: $OCF_RESKEY_set_policy"
+ fi
+ fi
+
+ else
+ ocf_log info "failed to bootstrap cluster. Check SELINUX policy"
+ rc=$OCF_ERR_GENERIC
+ fi
+
+ return $rc
+}
+
+rmq_join_existing()
+{
+ local join_list="$1"
+ local rc=$OCF_ERR_GENERIC
+
+ ocf_log info "Joining existing cluster with [ $(echo $join_list | tr '\n' ' ') ] nodes."
+ rmq_init_and_wait
+ if [ $? -ne 0 ]; then
+ return $OCF_ERR_GENERIC
+ fi
+
+ # unconditionally join the cluster
+ $RMQ_CTL stop_app > /dev/null 2>&1
+ for node in $(echo "$join_list"); do
+ ocf_log info "Attempting to join cluster with target node $node"
+ $RMQ_CTL join_cluster $node
+ if [ $? -eq 0 ]; then
+ ocf_log info "Joined cluster by connecting to node $node, starting app"
+ $RMQ_CTL start_app
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ ocf_log err "'$RMQ_CTL start_app' failed"
+ fi
+ break;
+ fi
+ done
+
+ if [ "$rc" -ne 0 ]; then
+ ocf_log info "Join process incomplete, shutting down."
+ return $OCF_ERR_GENERIC
+ fi
+
+ ocf_log info "Successfully joined existing rabbitmq cluster"
+ return $OCF_SUCCESS
+}
+
+rmq_start() {
+ local join_list=""
+ local rc
+
+ rmq_monitor
+ if [ $? -eq $OCF_SUCCESS ]; then
+ return $OCF_SUCCESS
+ fi
+
+ join_list=$(rmq_join_list)
+
+ # No join list means no active instances are up. This instance
+ # is the first, so it needs to bootstrap the rest
+ if [ -z "$join_list" ]; then
+ rmq_start_first
+ rc=$?
+ return $rc
+ fi
+
+ # first try to join without wiping mnesia data
+ rmq_join_existing "$join_list"
+ if [ $? -ne 0 ]; then
+ ocf_log info "node failed to join, wiping data directory and trying again"
+ # if the graceful join fails, use the hammer and reset all the data.
+ rmq_stop
+ rmq_wipe_data
+ rmq_join_existing "$join_list"
+ if [ $? -ne 0 ]; then
+ ocf_log info "node failed to join even after reseting local data. Check SELINUX policy"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ return $OCF_SUCCESS
+}
+
+rmq_stop() {
+ rmq_monitor
+ if [ $? -eq $OCF_NOT_RUNNING ]; then
+ return $OCF_SUCCESS
+ fi
+
+ $RMQ_CTL stop
+ rc=$?
+
+ if [ $rc -ne 0 ]; then
+ ocf_log err "rabbitmq-server stop command failed: $RMQ_CTL stop, $rc"
+ return $rc
+ fi
+
+ #TODO add kill logic
+ stop_wait=1
+ while [ $stop_wait = 1 ]; do
+ rmq_monitor
+ rc=$?
+ if [ "$rc" -eq $OCF_NOT_RUNNING ]; then
+ stop_wait=0
+ break
+ elif [ "$rc" -ne $OCF_SUCCESS ]; then
+ ocf_log info "rabbitmq-server stop failed: $rc"
+ exit $OCF_ERR_GENERIC
+ fi
+ sleep 1
+ done
+
+ remove_pid
+ return $OCF_SUCCESS
+}
+
+rmq_validate() {
+ check_binary $RMQ_SERVER
+ check_binary $RMQ_CTL
+
+ # This resource only makes sense as a clone right now. at some point
+ # we may want to verify the following.
+ #TODO verify cloned
+ #TODO verify ordered=true
+
+ # Given that this resource does the cluster join explicitly,
+ # having a cluster_nodes list in the static config file will
+ # likely conflict with this agent.
+ #TODO verify no cluster list in rabbitmq conf
+ #cat /etc/rabbitmq/rabbitmq.config | grep "cluster_nodes"
+
+ return $OCF_SUCCESS
+}
+
+case $__OCF_ACTION in
+meta-data) meta_data
+ exit $OCF_SUCCESS
+ ;;
+start) rmq_start;;
+stop) rmq_stop;;
+monitor) rmq_monitor;;
+validate-all) rmq_validate;;
+usage|help) rmq_usage
+ exit $OCF_SUCCESS
+ ;;
+*) rmq_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
+exit $rc
+
diff --git a/heartbeat/redis b/heartbeat/redis
new file mode 100755
index 0000000..f85b2f7
--- /dev/null
+++ b/heartbeat/redis
@@ -0,0 +1,567 @@
+#!/bin/bash
+
+. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
+
+: ${OCF_RESKEY_bin:=/usr/bin/redis-server}
+: ${OCF_RESKEY_client_bin:=/usr/bin/redis-cli}
+: ${OCF_RESKEY_user:=redis}
+: ${OCF_RESKEY_rundir:=/var/run/redis}
+: ${OCF_RESKEY_pidfile_name:=redis-server.pid}
+: ${OCF_RESKEY_socket_name:=redis.sock}
+: ${OCF_RESKEY_port:=6379}
+
+if [ -z "$OCF_RESKEY_config" ]; then
+ if [ -f "/etc/redis.conf" ]; then
+ OCF_RESKEY_config="/etc/redis.conf"
+ else
+ OCF_RESKEY_config="/etc/redis/redis.conf"
+ fi
+fi
+
+CHECK_SLAVE_STATE=0
+
+REDIS_CHECK_DUMP="/usr/bin/redis-check-dump"
+REDIS_SERVER="$OCF_RESKEY_bin"
+REDIS_CLIENT="$OCF_RESKEY_client_bin"
+REDIS_CONFIG="$OCF_RESKEY_config"
+REDIS_USER="$OCF_RESKEY_user"
+REDIS_RUNDIR="$OCF_RESKEY_rundir"
+REDIS_PIDFILE="$OCF_RESKEY_rundir/$OCF_RESKEY_pidfile_name"
+REDIS_SOCKET="$OCF_RESKEY_rundir/$OCF_RESKEY_socket_name"
+REDIS_REPLICATION_PORT="$OCF_RESKEY_port"
+
+if ! [ -f $REDIS_CHECK_DUMP ]; then
+ REDIS_CHECK_DUMP="$(which redis-check-dump 2>/dev/null)"
+fi
+
+if [ -f "$REDIS_CONFIG" ]; then
+ REDIS_DUMP_DIR="$(cat $REDIS_CONFIG | grep "^\s*dir\s" | awk '{ print $2 }' 2>/dev/null)"
+ REDIS_DUMP_FILE="$(cat $REDIS_CONFIG | grep "^\s*dbfilename\s" | awk '{ print $2 }' 2>/dev/null)"
+fi
+: ${REDIS_DUMP_DIR:=/var/lib/redis/}
+: ${REDIS_DUMP_FILE:=dump.rdb}
+
+function meta_data() {
+ cat <<EOI
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="redis">
+<version>1.0</version>
+
+<longdesc lang="en">
+Resource agent script for redis server.
+
+This resource fully supports master/slave replication. The master preference of a node is determined by the 'slave_priority' parameter of the redis config.
+When taking the resource from 'unmanaged' to 'managed', the currently active master will be given a priority of 1000 (plus 1 for each active connection). The default 'slave_priority' is 100, so the master will stay master. For a slave to become master after converting the resource to managed, set a slave_priority greater than 1000.
+</longdesc>
+
+<shortdesc lang="en">Redis server</shortdesc>
+
+<parameters>
+<parameter name="bin" unique="0" required="0">
+<longdesc lang="en">
+Path to \`redis-server\`
+</longdesc>
+<shortdesc lang="en">Path to \`redis-server\`</shortdesc>
+<content type="string" default="${OCF_RESKEY_bin}" />
+</parameter>
+
+<parameter name="client_bin" unique="0" required="0">
+<longdesc lang="en">
+Path to \`redis-cli\`
+</longdesc>
+<shortdesc lang="en">Path to \`redis-cli\`</shortdesc>
+<content type="string" default="${OCF_RESKEY_client_bin}" />
+</parameter>
+
+<parameter name="config" unique="1" required="0">
+<longdesc lang="en">
+Path to 'redis.conf'
+</longdesc>
+<shortdesc lang="en">Path to 'redis.conf'</shortdesc>
+<content type="string" default="${OCF_RESKEY_config}" />
+</parameter>
+
+<parameter name="user" unique="0" required="0">
+<longdesc lang="en">
+User to run redis as
+</longdesc>
+<shortdesc lang="en">Redis user</shortdesc>
+<content type="string" default="${OCF_RESKEY_user}" />
+</parameter>
+
+<parameter name="rundir" unique="1" required="0">
+<longdesc lang="en">
+Directory to store socket and pid file in
+</longdesc>
+<shortdesc lang="en">Redis var/run dir</shortdesc>
+<content type="string" default="${OCF_RESKEY_rundir}"/>
+</parameter>
+
+<parameter name="pidfile_name" unique="0" required="0">
+<longdesc lang="en">
+The filename to use for the pidfile. Will be created in the rundir.
+Should only be a basename, not a full path.
+</longdesc>
+<shortdesc lang="en">Redis pidfile name</shortdesc>
+<content type="string" default="${OCF_RESKEY_pidfile_name}"/>
+</parameter>
+
+<parameter name="socket_name" unique="0" required="0">
+<longdesc lang="en">
+The filename to use for the socket. Will be crated in the rundir.
+Should only be a basename, not a full path.
+</longdesc>
+<shortdesc lang="en">Redis socket name</shortdesc>
+<content type="string" default="${OCF_RESKEY_socket_name}"/>
+</parameter>
+
+<parameter name="port" unique="0" required="0">
+<longdesc lang="en">
+Port for replication client to connect to on remote server
+</longdesc>
+<shortdesc lang="en">Replication port</shortdesc>
+<content type="string" default="${OCF_RESKEY_port}"/>
+</parameter>
+
+<parameter name="wait_last_known_master" unique="0" required="0">
+<longdesc lang="en">
+During redis cluster bootstrap, wait for the last known master to be
+promoted before allowing any other instances in the cluster to be
+promoted. This lessens the risk of data loss when persistent data
+is in use.
+</longdesc>
+<shortdesc lang="en">Wait for last known master</shortdesc>
+<content type="boolean" default="false"/>
+</parameter>
+</parameters>
+
+<actions>
+<action name="start" timeout="120" />
+<action name="stop" timeout="120" />
+<action name="status" timeout="60" />
+<action name="monitor" depth="0" timeout="60" interval="45" />
+<action name="monitor" role="Master" depth="0" timeout="60" interval="20" />
+<action name="monitor" role="Slave" depth="0" timeout="60" interval="60" />
+<action name="promote" timeout="120" />
+<action name="demote" timeout="120" />
+<action name="notify" timeout="90" />
+<action name="validate-all" timeout="5" />
+<action name="meta-data" timeout="5" />
+</actions>
+</resource-agent>
+EOI
+}
+
+INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
+CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s redis_replication"
+MASTER_HOST=""
+MASTER_ACTIVE_CACHED=""
+MASTER_ACTIVE=""
+
+master_is_active()
+{
+ if [ -z "$MASTER_ACTIVE_CACHED" ]; then
+ # determine if a master instance is already up and is healthy
+ crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
+ MASTER_ACTIVE=$?
+ MASTER_ACTIVE_CACHED="true"
+ fi
+ return $MASTER_ACTIVE
+}
+
+function set_master()
+{
+ MASTER_HOST="$1"
+ ${CRM_ATTR_REPL_INFO} -v "$1" -q
+}
+
+function last_known_master()
+{
+ if [ -z "$MASTER_HOST" ]; then
+ MASTER_HOST="$(${CRM_ATTR_REPL_INFO} --query -q 2>/dev/null)"
+ fi
+ echo "$MASTER_HOST"
+}
+
+function crm_master_reboot() {
+ "${HA_SBIN_DIR}/crm_master" -l reboot "$@"
+}
+
+function calculate_score()
+{
+ perf_score="$1"
+ connected_clients="$2"
+
+ if ocf_is_true "$OCF_RESKEY_wait_last_known_master"; then
+ # only set perferred score by slave_priority if
+ # we are not waiting for the last known master. Otherwise
+ # we want the agent to have complete control over the scoring.
+ perf_score=""
+ connected_clients="0"
+ fi
+
+ if [[ -z "$perf_score" ]]; then
+ if [[ "$(last_known_master)" == "$NODENAME" ]]; then
+ perf_score=1000
+ else
+ perf_score=1
+ fi
+ fi
+ perf_score=$(( perf_score + connected_clients ))
+ echo "$perf_score"
+}
+
+function set_score()
+{
+ local score="$1"
+
+ if ocf_is_true "$OCF_RESKEY_wait_last_known_master" && ! master_is_active; then
+ local last_master="$(last_known_master)"
+ if [ -n "$last_master" ] && [[ "$last_master" != "$NODENAME" ]]; then
+ ocf_log info "Postponing setting master score for ${NODENAME} until last known master instance [${last_master}] is promoted"
+ return
+ fi
+ fi
+
+ ocf_log debug "monitor: Setting master score to '$score'"
+ crm_master_reboot -v "$score"
+}
+
+function redis_client() {
+ ocf_log debug "redis_client: '$REDIS_CLIENT' -s '$REDIS_SOCKET' $@"
+ if [ -n "$clientpasswd" ]; then
+ "$REDIS_CLIENT" -s "$REDIS_SOCKET" -a "$clientpasswd" "$@" | sed 's/\r//'
+ else
+ "$REDIS_CLIENT" -s "$REDIS_SOCKET" "$@" | sed 's/\r//'
+ fi
+}
+
+function simple_status() {
+ local pid
+
+ if ! [ -f "$REDIS_PIDFILE" ]; then
+ return $OCF_NOT_RUNNING
+ fi
+
+ pid="$(<"$REDIS_PIDFILE")"
+ pidof "$REDIS_SERVER" | grep -q "\<$pid\>" || return $OCF_NOT_RUNNING
+
+ ocf_log debug "monitor: redis-server running under pid $pid"
+
+ return $OCF_SUCCESS
+}
+
+function monitor() {
+ local res
+
+ simple_status
+ res=$?
+ if (( res != OCF_SUCCESS )); then
+ return $res
+ fi
+
+ typeset -A info
+ while read line; do
+ [[ "$line" == "#"* ]] && continue
+ [[ "$line" != *":"* ]] && continue
+ IFS=':' read -r key value <<< "$line"
+ info[$key]="$value"
+ done < <(redis_client info)
+ if [[ -z "${info[role]}" ]]; then
+ ocf_log err "monitor: Could not get role from \`$REDIS_CLIENT -s $REDIS_SOCKET info\`"
+ return $OCF_ERR_GENERIC
+ fi
+
+ if ocf_is_ms; then
+ # Here we see if a score has already been set.
+ # If score isn't set we the redis setting 'slave_priority'.
+ # If that isn't set, we default to 1000 for a master, and 1 for slave.
+ # We then add 1 for each connected client
+ score="$(crm_master_reboot --get-value --quiet 2>/dev/null)"
+ if [[ -z "$score" ]]; then
+ score=$(calculate_score "${info[slave_priority]}" "${info[connected_clients]}")
+ set_score "$score"
+ fi
+
+ if [[ "${info[role]}" == "master" ]]; then
+ if ocf_is_probe; then
+ set_master "$NODENAME"
+ fi
+ return $OCF_RUNNING_MASTER
+ fi
+
+ if [ "$CHECK_SLAVE_STATE" -eq 1 ]; then
+ if [[ "${info[master_link_status]}" != "up" ]]; then
+ ocf_log info "monitor: Slave mode link has not yet been established (link=${info[master_link_status]})"
+ return $OCF_ERR_GENERIC
+ fi
+ if [[ "${info[master_host]}" != "$(last_known_master)" ]]; then
+ ocf_log err "monitor: Slave mode current master does not match running master. current=${info[master_host]}, running=$(last_known_master)"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+ fi
+ return $OCF_SUCCESS
+}
+
+function check_dump_file()
+{
+ if ! have_binary "$REDIS_CHECK_DUMP"; then
+ return 0
+ fi
+ $REDIS_CHECK_DUMP ${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE} 2>&1
+}
+
+function start() {
+ monitor
+ status=$?
+
+ if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then
+ ocf_log info "start: redis is already running"
+ return $OCF_SUCCESS
+ fi
+
+ [[ ! -d "$REDIS_RUNDIR" ]] && mkdir -p "$REDIS_RUNDIR"
+ chown -R "$REDIS_USER" "$REDIS_RUNDIR"
+
+ # check for 0 byte database dump file. This is an unrecoverable start
+ # condition that we can avoid by deleting the 0 byte database file.
+ if [ -f "${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE}" ]; then
+ local size="$(stat --format "%s" ${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE})"
+ if [ "$?" -eq "0" ] && [ "$size" -eq "0" ]; then
+ ocf_log notice "Detected 0 byte ${REDIS_DUMP_FILE}, deleting zero length file to avoid start failure."
+ rm -f ${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE}
+ fi
+ fi
+
+ ocf_log info "start: $REDIS_SERVER --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'"
+ output="$(su "$REDIS_USER" -s /bin/sh -c "cd '$REDIS_RUNDIR'; exec '$REDIS_SERVER' '$REDIS_CONFIG' --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'" 2>&1)"
+
+ while true; do
+ # wait for redis to start
+ typeset -A info
+ while read line; do
+ [[ "$line" == "#"* ]] && continue
+ [[ "$line" != *":"* ]] && continue
+ IFS=':' read -r key value <<< "$line"
+ info[$key]="$value"
+ done < <(redis_client info)
+
+ if (( info[loading] == 0 )); then
+ break
+ elif (( info[loading] == 1 )); then
+ sleep "${info[loading_eta_seconds]}"
+ elif pidof "$REDIS_SERVER" >/dev/null; then
+ # unknown error, but the process still exists.
+ # This check is mainly because redis daemonizes before it starts listening, causing `redis-cli` to fail
+ # See https://github.com/antirez/redis/issues/2368
+ # It's possible that the `pidof` will pick up a different redis, but in that case, the start operation will just time out
+ sleep 1
+ else
+ check_output="$(check_dump_file)"
+ ocf_log err "start: Unknown error waiting for redis to start. redis-check-dump output=${check_output//$'\n'/; }"
+ return $OCF_ERR_GENERIC
+ fi
+ done
+
+ ocf_is_ms && demote # pacemaker expects resources to start in slave mode
+
+ monitor
+ status=$?
+ if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then
+ return $OCF_SUCCESS
+ fi
+
+ check_output="$(check_dump_file)"
+ ocf_log err "start: Unknown error starting redis. redis-server output=${output//$'\n'/; } redis-check-dump output=${check_output//$'\n'/; }"
+ return $status
+}
+
+function stop() {
+ monitor
+ status=$?
+
+ if (( status == OCF_NOT_RUNNING )); then
+ ocf_log info "stop: redis is already stopped"
+ crm_master_reboot -D
+ return $OCF_SUCCESS
+ fi
+
+ pid="$(<"$REDIS_PIDFILE")"
+ kill -TERM "$pid"
+
+ while true; do
+ simple_status
+ status=$?
+ if (( status == OCF_NOT_RUNNING )); then
+ crm_master_reboot -D
+ return $OCF_SUCCESS
+ fi
+ sleep 1
+ done
+}
+
+function promote() {
+ monitor
+ status=$?
+
+ if (( status == OCF_RUNNING_MASTER )); then
+ ocf_log info "promote: Already running as master"
+ set_master "$NODENAME"
+ return $OCF_SUCCESS
+ elif (( status != OCF_SUCCESS )); then
+ ocf_log err "promote: Node is not running as a slave"
+ return $OCF_ERR_GENERIC
+ fi
+
+ redis_client slaveof no one
+
+ monitor
+ status=$?
+ if (( status == OCF_RUNNING_MASTER )); then
+ set_master "$NODENAME"
+ return $OCF_SUCCESS
+ fi
+
+ ocf_log err "promote: Unknown error while promoting to master (status=$status)"
+ return $OCF_ERR_GENERIC
+}
+
+function demote() {
+ local master_host
+ local master_port
+
+ CHECK_SLAVE_STATE=1
+ monitor
+ status=$?
+
+ if (( status == OCF_SUCCESS )); then
+ ocf_log info "demote: Already running as slave"
+ return $OCF_SUCCESS
+ elif (( status == OCF_NOT_RUNNING )); then
+ ocf_log err "demote: Failed to demote, redis not running."
+ return $OCF_NOT_RUNNING
+ fi
+
+ master_host="$(last_known_master)"
+ master_port="${REDIS_REPLICATION_PORT}"
+
+ # The elected master has to remain a slave during startup.
+ # During this period a placeholder master host is assigned.
+ if [ -z "$master_host" ] || [[ "$master_host" == "$NODENAME" ]]; then
+ CHECK_SLAVE_STATE=0
+ master_host="no-such-master"
+ elif ! master_is_active; then
+ # no master has been promoted yet. we'll be notified when the
+ # master starts.
+ CHECK_SLAVE_STATE=0
+ master_host="no-such-master"
+ fi
+
+ ocf_log info "demote: Setting master to '$master_host'"
+
+ redis_client slaveof "$master_host" "$master_port"
+
+ # Wait forever for the slave to connect to the master and finish the
+ # sync. Timeout is controlled by Pacemaker "op start timeout=XX".
+ #
+ # hint: redis master_link_status will only come "up" when
+ # the SYNC with the master has completed.
+ # This can take an arbitraty time (data) and should
+ # only be parametrized by the start operation timeout
+ # by the administrator, not by this resource agent code
+ while true; do
+ # Wait infinite if replication is syncing
+ # Then start/demote operation timeout determines timeout
+ monitor
+ status=$?
+ if (( status == OCF_SUCCESS )); then
+ return $OCF_SUCCESS
+ fi
+
+ sleep 1
+ done
+
+ ocf_log err "demote: Unexpected error setting slave mode (status=$status)"
+ return $OCF_ERR_GENERIC
+}
+
+function notify() {
+ mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
+ case "$mode" in
+ post-demote|post-promote) # change the master
+ monitor
+ status=$?
+ if (( status == OCF_SUCCESS )); then # were a slave
+ # calling demote updates the slave's connection
+ # to the newly appointed Master instance.
+ demote
+ fi
+ ;;
+ esac
+ return $OCF_SUCCESS
+}
+
+function validate() {
+ if [[ -x "$REDIS_SERVER" ]]; then
+ ocf_log err "validate: $REDIS_SERVER does not exist or is not executable"
+ return $OCF_ERR_INSTALLED
+ fi
+ if [[ -x "$REDIS_CLIENT" ]]; then
+ ocf_log err "validate: $REDIS_CLIENT does not exist or is not executable"
+ return $OCF_ERR_INSTALLED
+ fi
+ if [[ -f "$REDIS_CONFIG" ]]; then
+ ocf_log err "validate: $REDIS_CONFIG does not exist"
+ return $OCF_ERR_CONFIGURED
+ fi
+ if ! getent passwd "$REDIS_USER" &>/dev/null; then
+ ocf_log err "validate: $REDIS_USER is not a valid user"
+ return $OCF_ERR_CONFIGURED
+ fi
+}
+
+NODENAME=$(ocf_local_nodename)
+if [ -f "$REDIS_CONFIG" ]; then
+ clientpasswd="$(cat $REDIS_CONFIG | sed -n -e 's/^\s*requirepass\s*\(.*\)\s*$/\1/p' | tail -n 1)"
+fi
+
+ocf_log debug "action=${1:-$__OCF_ACTION} notify_type=${OCF_RESKEY_CRM_meta_notify_type} notify_operation=${OCF_RESKEY_CRM_meta_notify_operation} master_host=${OCF_RESKEY_CRM_meta_notify_master_uname} slave_host=${OCF_RESKEY_CRM_meta_notify_slave_uname} promote_host=${OCF_RESKEY_CRM_meta_notify_promote_uname} demote_host=${OCF_RESKEY_CRM_meta_notify_demote_uname}; params: bin=${OCF_RESKEY_bin} client_bin=${OCF_RESKEY_client_bin} config=${OCF_RESKEY_config} user=${OCF_RESKEY_user} rundir= [...]
+
+case "${1:-$__OCF_ACTION}" in
+ status|monitor)
+ monitor
+ ;;
+ start)
+ start
+ ;;
+ stop)
+ stop
+ ;;
+ restart)
+ stop && start
+ ;;
+ promote)
+ promote
+ ;;
+ demote)
+ demote
+ ;;
+ notify)
+ notify
+ ;;
+ meta-data)
+ meta_data
+ ;;
+ validate-all)
+ validate
+ ;;
+ *)
+ echo "Usage: $0 {monitor|start|stop|restart|promote|demote|notify|validate-all|meta-data}"
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+status=$?
+ocf_log debug "exit_status=$status"
+exit $status
diff --git a/heartbeat/sapdb-nosha.sh b/heartbeat/sapdb-nosha.sh
index c659909..06aa65c 100644
--- a/heartbeat/sapdb-nosha.sh
+++ b/heartbeat/sapdb-nosha.sh
@@ -523,7 +523,7 @@ sapdatabase_monitor() {
if [ $rc -ne $OCF_SUCCESS ]
then
- ocf_log err "The SAP database $SID ist not running: $output"
+ ocf_log err "The SAP database $SID is not running: $output"
fi
return $rc
}
diff --git a/heartbeat/sapdb.sh b/heartbeat/sapdb.sh
index ee30ec1..7edb4b8 100755
--- a/heartbeat/sapdb.sh
+++ b/heartbeat/sapdb.sh
@@ -243,7 +243,7 @@ sapdatabase_monitor() {
if [ $rc -ne $OCF_SUCCESS ]
then
- ocf_log err "The SAP database $SID ist not running: $output"
+ ocf_log err "The SAP database $SID is not running: $output"
fi
fi
fi
diff --git a/heartbeat/sg_persist b/heartbeat/sg_persist
index c4af069..1ce0a64 100755
--- a/heartbeat/sg_persist
+++ b/heartbeat/sg_persist
@@ -30,7 +30,7 @@
#
#
# OCF instance parameters
-# OCF_RESKEY_sg_persist_binary
+# OCF_RESKEY_binary
# OCF_RESKEY_devs
# OCF_RESKEY_required_devs_nof
# OCF_RESKEY_reservation_type
@@ -81,10 +81,10 @@ Master reservs all devices from "devs" list with reservation "--prout-type" valu
<parameters>
<parameter name="binary" unique="0">
<longdesc lang="en">
-The name of the binary that manages the resource
+The name of the binary that manages the resource.
</longdesc>
-<shortdesc>the binay name of the resource</shortdesc>
-<content type="string" default="sg_persist"/>
+<shortdesc>the binary name of the resource</shortdesc>
+<content type="string" default="$sg_persist_binary"/>
</parameter>
<parameter name="devs" unique="0" required="1">
@@ -177,7 +177,7 @@ sg_persist_init() {
exit $OCF_ERR_PERM
fi
- : ${SG_PERSIST:="$sg_persist_binary"}
+ SG_PERSIST=${OCF_RESKEY_binary:-"$sg_persist_binary"}
check_binary $SG_PERSIST
ROLE=$OCF_RESKEY_CRM_meta_role
@@ -248,15 +248,15 @@ sg_persist_get_status() {
for dev in ${EXISTING_DEVS[*]}
do
- READ_KEYS=`ocf_run $SG_PERSIST --in --read-keys $dev 2>&1`
- if [ $? -eq $OCF_SUCCESS ]; then
+ READ_KEYS=`$SG_PERSIST --in --read-keys $dev 2>&1`
+ if [ $? -eq 0 ]; then
WORKING_DEVS+=($dev)
echo $READ_KEYS | $GREP $NODE_ID_HEX >/dev/null
if [ $? -eq 0 ]; then
REGISTERED_DEVS+=($dev)
- READ_RESERVATION=`ocf_run $SG_PERSIST --in --read-reservation $dev 2>&1`
- if [ $? -eq $OCF_SUCCESS ]; then
+ READ_RESERVATION=`$SG_PERSIST --in --read-reservation $dev 2>&1`
+ if [ $? -eq 0 ]; then
echo $READ_RESERVATION | $GREP $NODE_ID_HEX >/dev/null
if [ $? -eq 0 ]; then
RESERVED_DEVS+=($dev)
@@ -414,10 +414,10 @@ sg_persist_action_stop() {
sg_persist_action_monitor() {
- ACT_MASTER_SCORE=`ocf_run -q $MASTER_SCORE_ATTRIBUTE --query --quiet 2>&1`
+ ACT_MASTER_SCORE=`$MASTER_SCORE_ATTRIBUTE --query --quiet 2>&1`
ocf_log debug "$RESOURCE monitor: ACT_MASTER_SCORE=$ACT_MASTER_SCORE"
- ACT_PENDING=`ocf_run $PENDING_ATTRIBUTE --query --quiet 2>&1`
+ ACT_PENDING=`$PENDING_ATTRIBUTE --query --quiet 2>&1`
ocf_log debug "$RESOURCE monitor: ACT_PENDING=$ACT_PENDING"
sg_persist_parse_act_pending
diff --git a/heartbeat/slapd b/heartbeat/slapd
index c26b16f..0baafd6 100755
--- a/heartbeat/slapd
+++ b/heartbeat/slapd
@@ -32,6 +32,7 @@
# OCF_RESKEY_password
# OCF_RESKEY_parameters
# OCF_RESKEY_stop_escalate
+# OCF_RESKEY_maxfiles
#
################################################################################
@@ -53,6 +54,7 @@
: ${OCF_RESKEY_password=""}
: ${OCF_RESKEY_parameters=""}
: ${OCF_RESKEY_stop_escalate=15}
+: ${OCF_RESKEY_maxfiles=""}
USAGE="Usage: $0 {start|stop|status|monitor|validate-all|meta-data}"
ORIG_IFS=$IFS
@@ -196,6 +198,14 @@ SIGKILL
<shortdesc lang="en">Seconds before stop escalation to KILL</shortdesc>
<content type="integer" default="15" />
</parameter>
+
+<parameter name="maxfiles">
+<longdesc lang="en">
+Maximum number of open files (for ulimit -n)
+</longdesc>
+<shortdesc lang="en">Max open files</shortdesc>
+<content type="string" default="" />
+</parameter>
</parameters>
<actions>
@@ -324,6 +334,14 @@ slapd_start()
options="$options $parameters"
fi
+ if [ -n "$OCF_RESKEY_maxfiles" ]; then
+ ulimit -n $OCF_RESKEY_maxfiles
+ u_rc=$?
+ if [ "$u_rc" -ne 0 ]; then
+ ocf_log warn "Could not set ulimit for open files for slapd to '$OCF_RESKEY_maxfiles'"
+ fi
+ fi
+
if [ -n "$services" ]; then
$slapd -h "$services" $options 2>&1; rc=$?
else
diff --git a/heartbeat/syslog-ng b/heartbeat/syslog-ng
index dfca65a..66ad9a5 100755
--- a/heartbeat/syslog-ng
+++ b/heartbeat/syslog-ng
@@ -234,12 +234,7 @@ fi
SYSLOG_NG_NAME=${CONFIGFILE##*/}
SYSLOG_NG_NAME=${SYSLOG_NG_NAME%.*}
-SYSLOG_NG_EXE="${OCF_RESKEY_syslog_ng_binary-/sbin/syslog-ng}"
-# why not default to /sbin/syslog-ng?
-#if [[ -z "$SYSLOG_NG_EXE" ]]; then
-# ocf_log err "Undefined parameter:syslog_ng_binary"
-# exit $OCF_ERR_CONFIGURED
-#fi
+SYSLOG_NG_EXE="${OCF_RESKEY_syslog_ng_binary:-/sbin/syslog-ng}"
if [[ ! -x "$SYSLOG_NG_EXE" ]]; then
ocf_log err "Invalid value:syslog_ng_binary:$SYSLOG_NG_EXE"
exit $OCF_ERR_CONFIGURED
diff --git a/heartbeat/tomcat b/heartbeat/tomcat
index 8b7fe31..07a7ce4 100755
--- a/heartbeat/tomcat
+++ b/heartbeat/tomcat
@@ -49,6 +49,13 @@
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+# Use runuser if available for SELinux.
+if [ -x /sbin/runuser ]; then
+ SU=runuser
+else
+ SU=su
+fi
+
############################################################################
# Usage
usage()
@@ -143,7 +150,7 @@ monitor_tomcat()
start_rotatelogs()
{
# -s is required because tomcat5.5's login shell is /bin/false
- su - -s /bin/sh $RESOURCE_TOMCAT_USER \
+ $SU - -s /bin/sh $RESOURCE_TOMCAT_USER \
-c "$ROTATELOGS -l \"$CATALINA_BASE/logs/catalina_%F.log\" $CATALINA_ROTATETIME" \
< "$CATALINA_OUT" > /dev/null 2>&1 &
}
@@ -154,7 +161,7 @@ rotate_catalina_out()
{
# Check catalina_%F.log is writable or not.
CURRENT_ROTATELOG_SUFFIX=`date +"%F"`
- su - -s /bin/sh $RESOURCE_TOMCAT_USER \
+ $SU - -s /bin/sh $RESOURCE_TOMCAT_USER \
-c "touch \"$CATALINA_BASE/logs/catalina_$CURRENT_ROTATELOG_SUFFIX.log\"" > /dev/null 2>&1
if [ $? -ne 0 ]; then
ocf_exit_reason "$CATALINA_BASE/logs/catalina_$CURRENT_ROTATELOG_SUFFIX.log is not writable."
@@ -205,7 +212,7 @@ attemptTomcatCommand()
if [ "$RESOURCE_TOMCAT_USER" = root ]; then
"$TOMCAT_START_SCRIPT" $@ >> "$TOMCAT_CONSOLE" 2>&1
else
- tomcatCommand $@ | su - -s /bin/sh "$RESOURCE_TOMCAT_USER" >> "$TOMCAT_CONSOLE" 2>&1
+ tomcatCommand $@ | $SU - -s /bin/sh "$RESOURCE_TOMCAT_USER" >> "$TOMCAT_CONSOLE" 2>&1
fi
if [ -n "$REDIRECT_DEFAULT_CONFIG" ]; then
diff --git a/heartbeat/zabbixserver b/heartbeat/zabbixserver
index fe7266e..4513141 100755
--- a/heartbeat/zabbixserver
+++ b/heartbeat/zabbixserver
@@ -130,26 +130,6 @@ getpid() {
}
#
-# Check if PID directory exists
-#
-check_piddir() {
- local piddir
- local severity
-
- # lower severity to info during probe
- severity=err
- ocf_is_probe && severity=info
-
- piddir=`dirname ${OCF_RESKEY_pid}`
- if [ ! -d $piddir ]; then
- ocf_log $severity "PID directory ${piddir} doesn't exist"
- return 1
- fi
-
- return 0
-}
-
-#
# Check for the server configuration file
#
check_config() {
@@ -322,9 +302,8 @@ zabbixserver_monitor() {
# validate configuration
#
zabbixserver_validate_all() {
- check_piddir || return $OCF_ERR_INSTALLED
check_config $OCF_RESKEY_config || return $OCF_ERR_INSTALLED
-
+ ocf_mkstatedir root 755 `dirname $OCF_RESKEY_pid` || return $OCF_ERR_INSTALLED
return $OCF_SUCCESS
}
diff --git a/ldirectord/ldirectord.in b/ldirectord/ldirectord.in
index 5fca81f..ded8563 100644
--- a/ldirectord/ldirectord.in
+++ b/ldirectord/ldirectord.in
@@ -214,6 +214,9 @@ If this directive is defined, the supplied script is executed whenever all
real servers for a virtual service are down or when the first real server
comes up again. In the first case, it is called with "start" as its first
argument, in the latter with "stop".
+Additional parameters are vserver with vport (vserver:vport) as second param
+and protocol (tcp/udp) as third param to identify the virtual service
+within the fallback script.
If defined in a virtual server section then the global value is overridden.
@@ -573,6 +576,8 @@ overridden by an optional per real-server based receive regexp.
For a DNS check this should be any one the A record's addresses or
any one of the PTR record's names.
+In case of dynamic DNS answers (different answers on the same question)
+a regex to match multiple addresses or PTR record names could also defined.
For a MySQL check, the receive setting is not used.
@@ -2103,7 +2108,7 @@ sub add_real_server
my $new_rsrv;
my $rsrv;
- $new_rsrv = {"server"=>$ip, "port"=>$port};
+ $new_rsrv = {"server"=>$ip, "port"=>$port, "failcount"=>0};
$flags =~ /(\w+)(.*)/ && ($1 eq "gate" || $1 eq "masq" || $1 eq "ipip")
or &config_error($line, "forward method must be gate, masq or ipip");
@@ -3632,6 +3637,7 @@ sub check_dns
my $request;
my $server;
my ($v,$r) = @_;
+ my $port = ld_checkport($v, $r);
{
# Net::DNS makes unguarded calls to eval
# which throw a fatal exception if they fail
@@ -3659,6 +3665,7 @@ sub check_dns
local $SIG{'ALRM'} = sub { die "timeout\n"; };
alarm($$v{negotiatetimeout});
$res->nameservers($server);
+ $res->port($port);
if ($$v{"protocol"} eq "tcp") {
$res->usevc(1);
}
@@ -3671,9 +3678,10 @@ sub check_dns
return $SERVICE_DOWN;
}
+ my $recstr = $$r{receive};
foreach $rr ($query->answer) {
- if (($rr->type eq "A" and $rr->address eq $$r{"receive"}) or
- ($rr->type eq "PTR" and $rr->ptrdname eq $$r{"receive"})) {
+ if (($rr->type eq "A" and length($recstr) and $rr->address =~ /$recstr/) or
+ ($rr->type eq "PTR" and length($recstr) and $rr->ptrdname =~ /$recstr/)) {
service_set($v, $r, "up", {do_log => 1}, "Success");
return $SERVICE_UP;
}
@@ -4238,9 +4246,9 @@ sub do_fallback_command
$v->{fallbackcommand_status} = $status;
if (defined($v->{fallbackcommand})) {
- &system_wrapper($v->{fallbackcommand} . " " . $status);
+ &system_wrapper($v->{fallbackcommand} . " " . $status . " " . $v->{server} . ":" . $v->{port} . " " . $v->{protocol});
} elsif (defined($FALLBACKCOMMAND)) {
- &system_wrapper($FALLBACKCOMMAND . " " . $status);
+ &system_wrapper($FALLBACKCOMMAND . " " . $status . " " . $v->{server} . ":" . $v->{port} . " " . $v->{protocol});
}
}
diff --git a/resource-agents.spec.in b/resource-agents.spec.in
index 0211cae..fbdb7ef 100644
--- a/resource-agents.spec.in
+++ b/resource-agents.spec.in
@@ -252,6 +252,9 @@ rm -rf %{buildroot}
%dir %{_datadir}/%{name}/ocft
%{_datadir}/%{name}/ocft/configs
%{_datadir}/%{name}/ocft/caselib
+%{_datadir}/%{name}/ocft/helpers.sh
+%{_datadir}/%{name}/ocft/runocft
+%{_datadir}/%{name}/ocft/runocft.prereq
%{_datadir}/%{name}/ocft/README
%{_datadir}/%{name}/ocft/README.zh_CN
diff --git a/rgmanager/src/resources/Makefile.am b/rgmanager/src/resources/Makefile.am
index 862bdf5..de88c69 100644
--- a/rgmanager/src/resources/Makefile.am
+++ b/rgmanager/src/resources/Makefile.am
@@ -29,13 +29,14 @@ RESOURCES = service.sh ip.sh nfsclient.sh nfsexport.sh \
vm.sh SAPInstance SAPDatabase named.sh db2.sh \
ASEHAagent.sh drbd.sh nfsserver.sh \
tomcat-6.sh orainstance.sh oralistener.sh oracledb.sh \
- bind-mount.sh
+ bind-mount.sh oradg.sh
METADATA = apache.metadata openldap.metadata samba.metadata \
mysql.metadata postgres-8.metadata \
tomcat-5.metadata named.metadata lvm.metadata \
drbd.metadata tomcat-6.metadata \
- orainstance.metadata oralistener.metadata
+ orainstance.metadata oralistener.metadata \
+ oradg.metadata
HELPERS = ocf-shellfuncs svclib_nfslock \
lvm_by_lv.sh lvm_by_vg.sh
diff --git a/rgmanager/src/resources/SAPDatabase b/rgmanager/src/resources/SAPDatabase
index b34d67f..2e398c4 100644
--- a/rgmanager/src/resources/SAPDatabase
+++ b/rgmanager/src/resources/SAPDatabase
@@ -736,7 +736,7 @@ sapdatabase_monitor() {
if [ $rc -ne $OCF_SUCCESS ]
then
- ocf_log err "The SAP database $SID ist not running: $output"
+ ocf_log err "The SAP database $SID is not running: $output"
fi
return $rc
}
diff --git a/rgmanager/src/resources/db2.sh b/rgmanager/src/resources/db2.sh
index f396ff6..57991f9 100755
--- a/rgmanager/src/resources/db2.sh
+++ b/rgmanager/src/resources/db2.sh
@@ -1,7 +1,7 @@
#!/bin/bash
#
# Copyright (c) 2011 Holger Teutsch <holger.teutsch at web.de>
-# Copyright (c) 2014 David Vossel <dvossel at redhat.com>
+# Copyright (c) 2014 David Vossel <davidvossel at gmail.com>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
diff --git a/rgmanager/src/resources/fs.sh.in b/rgmanager/src/resources/fs.sh.in
index c041fb7..2924fa7 100644
--- a/rgmanager/src/resources/fs.sh.in
+++ b/rgmanager/src/resources/fs.sh.in
@@ -240,7 +240,7 @@ verify_fstype()
[ -z "$OCF_RESKEY_fstype" ] && return 0
case $OCF_RESKEY_fstype in
- ext2|ext3|ext4|btrfs|jfs|xfs|reiserfs|vfat|tmpfs|vxfs)
+ ext2|ext3|ext4|btrfs|jfs|xfs|reiserfs|vfat|vxfs)
return 0
;;
*)
@@ -357,13 +357,6 @@ verify_options()
esac
;;
- tmpfs)
- case $o in
- size=*|nr_blocks=*|mode=*)
- continue
- ;;
- esac
- ;;
btrfs)
# tbd
continue
diff --git a/rgmanager/src/resources/ip.sh b/rgmanager/src/resources/ip.sh
index 7e19205..6391fab 100755
--- a/rgmanager/src/resources/ip.sh
+++ b/rgmanager/src/resources/ip.sh
@@ -27,6 +27,8 @@ LANG=C
PATH=/bin:/sbin:/usr/bin:/usr/sbin
export LC_ALL LANG PATH
+SENDUA=/usr/libexec/heartbeat/send_ua
+
# Grab nfs lock tricks if available
export NFS_TRICKS=1
if [ -f "$(dirname $0)/svclib_nfslock" ]; then
@@ -627,15 +629,38 @@ ipv6()
ocf_log info "Removing IPv6 address $addr from $dev"
fi
- if [ "$1" = "add" ]; then
- ocf_log debug "Pinging addr ${addr%%/*} from dev $dev"
- if ping_check inet6 ${addr%%/*} $dev; then
- ocf_log err "IPv6 address collision ${addr%%/*}"
- return 1
- fi
- fi
/sbin/ip -f inet6 addr $1 dev $dev $addr
[ $? -ne 0 ] && return 1
+
+ # Duplicate Address Detection [DAD]
+ # Kernel will flag the IP as 'tentative' until it ensured that
+ # there is no duplicates.
+ # if there is, it will flag it as 'dadfailed'
+ if [ "$1" = "add" ]; then
+ for i in {1..10}; do
+ ipstatus=$(/sbin/ip -o -f inet6 addr show dev $dev to $addr)
+ if [[ $ipstatus == *dadfailed* ]]; then
+ ocf_log err "IPv6 address collision ${addr%%/*} [DAD]"
+ ip -f inet6 addr del dev $dev $addr
+ if [[ $? -ne 0 ]]; then
+ ocf_log err "Could not delete IPv6 address"
+ fi
+ return 1
+ elif [[ $ipstatus != *tentative* ]]; then
+ break
+ elif [[ $i -eq 10 ]]; then
+ ofc_log warn "IPv6 address : DAD is still in tentative"
+ fi
+ sleep 0.5
+ done
+ # Now the address should be useable
+ # Try to send Unsolicited Neighbor Advertisements if send_ua is available
+ if [ -x $SENDUA ]; then
+ ARGS="-i 200 -c 5 ${addr%%/*} $maskbits $dev"
+ ocf_log info "$SENDUA $ARGS"
+ $SENDUA $ARGS || ocf_log err "Could not send ICMPv6 Unsolicited Neighbor Advertisements."
+ fi
+ fi
#
# NDP should take of figuring out our new address. Plus,
diff --git a/rgmanager/src/resources/mysql.sh b/rgmanager/src/resources/mysql.sh
index 736a91a..c4ec8ba 100755
--- a/rgmanager/src/resources/mysql.sh
+++ b/rgmanager/src/resources/mysql.sh
@@ -122,23 +122,14 @@ start()
return $OCF_ERR_GENERIC
fi
- declare i=$OCF_RESKEY_startup_wait
- while [ "$i" -gt 0 ]; do
- if [ -f "$MYSQL_pid_file" ]; then
- break;
- fi
- sleep 1
- let i=$i-1
- done
-
- if [ "$i" -eq 0 ]; then
- clog_service_start $CLOG_FAILED_TIMEOUT
- return $OCF_ERR_GENERIC
- fi
-
clog_service_start $CLOG_SUCCEED
- return 0;
+ # Sleep 1 sec before checking status so mysqld can start
+ sleep 1
+
+ status
+
+ return $?;
}
stop()
@@ -162,7 +153,27 @@ status()
status_check_pid "$MYSQL_pid_file"
case $? in
- $OCF_NOT_RUNNING)
+ $OCF_NOT_RUNNING)
+ ps auxww | grep -Pv "grep|$MYSQL_MYSQLD" | grep "$MYSQL_pid_file" &> /dev/null
+ if [ "$?" -eq "0" ];then
+ declare i=$OCF_RESKEY_startup_wait
+ while [ "$i" -gt 0 ]; do
+ if [ -f "$MYSQL_pid_file" ]; then
+ break;
+ fi
+ sleep 1
+ let i=$i-1
+ done
+
+ if [ "$i" -eq 0 ]; then
+ clog_service_start $CLOG_FAILED_TIMEOUT
+ return $OCF_ERR_GENERIC
+ else
+ clog_service_status $CLOG_SUCCEED
+ exit 0
+ fi
+ fi
+
clog_service_status $CLOG_FAILED "$MYSQL_pid_file"
return $OCF_NOT_RUNNING
;;
diff --git a/rgmanager/src/resources/netfs.sh b/rgmanager/src/resources/netfs.sh
index 5dad993..dc64cd7 100755
--- a/rgmanager/src/resources/netfs.sh
+++ b/rgmanager/src/resources/netfs.sh
@@ -80,14 +80,14 @@ do_metadata()
<content type="string"/>
</parameter>
- <parameter name="fstype" required="0">
+ <parameter name="fstype">
<longdesc lang="en">
File System type (nfs, nfs4 or cifs)
</longdesc>
<shortdesc lang="en">
File System Type
</shortdesc>
- <content type="string"/>
+ <content type="string" default="nfs"/>
</parameter>
<parameter name="no_unmount" required="0">
@@ -464,11 +464,6 @@ do_force_unmount() {
populate_defaults()
{
- if [ -z "$OCF_RESKEY_fstype" ]; then
- export OCF_RESKEY_fstype=nfs
- fi
-
-
case $OCF_RESKEY_fstype in
nfs|nfs4)
export OCF_RESKEY_device="$OCF_RESKEY_host:$OCF_RESKEY_export"
diff --git a/rgmanager/src/resources/nfsserver.sh b/rgmanager/src/resources/nfsserver.sh
index 4d98230..65885e6 100644
--- a/rgmanager/src/resources/nfsserver.sh
+++ b/rgmanager/src/resources/nfsserver.sh
@@ -117,6 +117,17 @@ meta_data()
<content type="integer" default=""/>
</parameter>
+ <parameter name="krbhost">
+ <longdesc lang="en">
+ This is the Kerberos hostname, which should be set according
+ to the floating IP.
+ </longdesc>
+ <shortdesc lang="en">
+ This is the Kerberos hostname.
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
</parameters>
<actions>
@@ -206,25 +217,53 @@ nfs_daemons()
case $1 in
start)
ocf_log info "Starting NFS daemons"
- /etc/init.d/nfs start
- if [ $? -ne 0 ]; then
+ if [ -z "$OCF_RESKEY_krbhost" ]; then
+ /etc/init.d/nfs start
+ rv=$?
+ else
+ unshare -u /bin/bash -c "hostname $OCF_RESKEY_krbhost; /etc/init.d/nfs start"
+ rv=$?
+ unshare -u /bin/bash -c "hostname $OCF_RESKEY_krbhost; /etc/init.d/rpcgssd start"
+ if [ $rv -ne 0 ]; then
+ ocf_log err "Failed to start rpcgssd"
+ return $OCF_ERR_GENERIC
+ fi
+ unshare -u /bin/bash -c "hostname $OCF_RESKEY_krbhost; /etc/init.d/rpcidmapd start"
+ if [ $rv -ne 0 ]; then
+ ocf_log err "Failed to start rpcidmapd"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ if [ $rv -ne 0 ]; then
ocf_log err "Failed to start NFS daemons"
- return 1
+ return $OCF_ERR_GENERIC
fi
ocf_log debug "NFS daemons are running"
- return 0
+ return $OCF_SUCCESS
;;
stop)
ocf_log info "Stopping NFS daemons"
+ if [ -n "$OCF_RESKEY_krbhost"]; then
+ if ! /etc/init.d/rpcidmapd stop; then
+ ocf_log err "Failed to stop rpcidmapd"
+ return $OCF_ERR_GENERIC
+ fi
+ if ! /etc/init.d/rpcgssd stop; then
+ ocf_log err "Failed to stop rpcgssd"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
if ! /etc/init.d/nfs stop; then
ocf_log err "Failed to stop NFS daemons"
- return 1
+ return $OCF_ERR_GENERIC
fi
ocf_log debug "NFS daemons are stopped"
- return 0
+ return $OCF_SUCCESS
;;
status|monitor)
declare recoverydir="$OCF_RESKEY_path/$OCF_RESKEY_nfspath/v4recovery"
@@ -233,11 +272,23 @@ nfs_daemons()
[ "$val" = "$recoverydir" ] || ocf_log warning \
"NFSv4 recovery directory is $val instead of $recoverydir"
/etc/init.d/nfs status
- if [ $? -eq 0 ]; then
- ocf_log debug "NFS daemons are running"
- return 0
+ if [ $? -ne 0 ]; then
+ ocf_log err "NFS is not running"
+ return $OCF_NOT_RUNNING
fi
- return $OCF_NOT_RUNNING
+ /etc/init.d/rpcgssd status
+ if [ $? -ne 0 ]; then
+ ocf_log err "rpcgssd is not running"
+ return $OCF_NOT_RUNNING
+ fi
+ /etc/init.d/rpcidmapd status
+ if [ $? -ne 0 ]; then
+ ocf_log err "rpcidmapd is not running"
+ return $OCF_NOT_RUNNING
+ fi
+
+ ocf_log debug "NFS daemons are running"
+ return $OCF_SUCCESS
;;
esac
}
diff --git a/rgmanager/src/resources/oradg.metadata b/rgmanager/src/resources/oradg.metadata
new file mode 100644
index 0000000..2f8f044
--- /dev/null
+++ b/rgmanager/src/resources/oradg.metadata
@@ -0,0 +1,107 @@
+<?xml version="1.0" ?>
+<!-- $Id: oradg.metadata 58 2009-06-29 05:15:12Z hevirtan $ -->
+
+<!-- Resource metadata for Oracle DB agent -->
+<resource-agent name="oradg" version="rgmanager 2.0">
+ <version>1.0</version>
+
+ <longdesc lang="en">
+ Oracle Data Guard Failover Instance
+ </longdesc>
+ <shortdesc lang="en">
+ Oracle Data Guard Failover Instance
+ </shortdesc>
+
+ <parameters>
+ <parameter name="name" primary="1">
+ <longdesc lang="en">
+ Instance name (SID) of oracle instance
+ </longdesc>
+ <shortdesc lang="en">
+ Oracle SID
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+ <parameter name="user" required="1">
+ <longdesc lang="en">
+ Oracle user name. This is the user name of the Oracle
+ user which the Oracle instance runs as.
+ </longdesc>
+ <shortdesc lang="en">
+ Oracle User Name
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+ <parameter name="home" required="1">
+ <longdesc lang="en">
+ This is the Oracle database home directory.
+ This is configured when you install Oracle.
+ </longdesc>
+ <shortdesc lang="en">
+ Oracle Home Directory
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+ <parameter name="listeners">
+ <longdesc lang="en">
+ List of Oracle listeners which will be started with
+ the database instance. Listener names are separated by
+ whitespace. Defaults to empty which disables listeners.
+ </longdesc>
+ <shortdesc lang="en">
+ Oracle listeners
+ </shortdesc>
+ <content type="string" default=""/>
+ </parameter>
+
+ <parameter name="vhost">
+ <longdesc lang="en">
+ Virtual hostname for DB Console startup
+ </longdesc>
+ <shortdesc lang="en">
+ Virtual hostname for DB Console
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+ <parameter name="lockfile">
+ <longdesc lang="en">
+ Location for lockfile which will be used for checking if
+ the Oracle should be running or not. Defaults to location
+ under /tmp.
+ </longdesc>
+ <shortdesc lang="en">
+ Pathname for lockfile
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+ <parameter name="tns_admin" required="0" unique="1">
+ <longdesc lang="en">
+ Full path to the directory that contains the Oracle
+ listener tnsnames.ora configuration file. The shell
+ variable TNS_ADMIN is set to the value provided.
+ </longdesc>
+ <shortdesc lang="en">
+ Full path to the directory containing tnsnames.ora
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+ </parameters>
+
+ <actions>
+ <action name="start" timeout="900"/>
+ <action name="stop" timeout="90"/>
+
+ <!-- Note: status check will also perform recover
+ (Oracle DB restart) if the check fails -->
+ <action name="status" timeout="900" interval="1m"/>
+ <action name="monitor" timeout="900" interval="1m"/>
+
+ <action name="meta-data" timeout="0"/>
+ <action name="verify-all" timeout="0"/>
+ </actions>
+</resource-agent>
diff --git a/rgmanager/src/resources/orainstance.sh b/rgmanager/src/resources/oradg.sh
similarity index 83%
copy from rgmanager/src/resources/orainstance.sh
copy to rgmanager/src/resources/oradg.sh
index b083224..7787126 100755
--- a/rgmanager/src/resources/orainstance.sh
+++ b/rgmanager/src/resources/oradg.sh
@@ -35,7 +35,7 @@
# All other trademarks are property of their respective owners.
#
#
-# $Id: orainstance.sh 127 2009-08-21 09:17:52Z hevirtan $
+# $Id: oradg.sh 127 2009-08-21 09:17:52Z hevirtan $
#
# Original version is distributed with RHCS. The modifications include
# the following minor changes:
@@ -65,13 +65,15 @@ ORACLE_SID=$OCF_RESKEY_name
# Optional parameters with default values
LISTENERS=$OCF_RESKEY_listeners
LOCKFILE="$ORACLE_HOME/.orainstance-${ORACLE_SID}.lock"
+[ -n "$OCF_RESKEY_vhost" ] && ORACLE_HOSTNAME=$OCF_RESKEY_vhost
[ -n "$OCF_RESKEY_lockfile" ] && LOCKFILE=$OCF_RESKEY_lockfile
-export LISTENERS ORACLE_USER ORACLE_HOME ORACLE_SID LOCKFILE
+export LISTENERS ORACLE_USER ORACLE_HOME ORACLE_SID LOCKFILE ORACLE_HOSTNAME
export LD_LIBRARY_PATH=$ORACLE_HOME/lib
export PATH=$ORACLE_HOME/bin:/bin:/sbin:/usr/bin:/usr/sbin
-declare -i RESTART_RETRIES=3
+#declare -i RESTART_RETRIES=3
+declare -i RESTART_RETRIES=0
declare -r DB_PROCNAMES="pmon"
declare -r LSNR_PROCNAME="tnslsnr"
@@ -98,10 +100,38 @@ start_db() {
# Set up our sqlplus script. Basically, we're trying to
# capture output in the hopes that it's useful in the case
# that something doesn't work properly.
- startup_cmd="set heading off;\nstartup;\nquit;\n"
- startup_stdout=$(echo -e "$startup_cmd" | sqlplus -S "/ as sysdba")
+
+ startup_stdout=$(sqlplus "/ as sysdba" << EOF
+set serveroutput on
+startup mount;
+
+declare
+ rol varchar(20);
+begin
+ select database_role into rol from v\$database;
+
+ dbms_output.put_line('Database role is ' || rol);
+ if (rol = 'PHYSICAL STANDBY') then
+ return;
+ end if;
+
+ execute immediate 'alter database open';
+end;
+/
+
+select database_role, open_mode from v\$database;
+set heading off;
+set serveroutput off;
+spool /tmp/dgstatus.${ORACLE_SID};
+select open_mode from v\$database;
+spool off;
+EOF
+)
rv=$?
+ # Data Guard Modification 2 - Remove deprecated parameter error from startup_stdout
+ startup_stdout=$(echo $startup_stdout | sed 's/ORA-32004//g')
+
# Dump output to syslog for debugging
ocf_log debug "[$ORACLE_SID] [$rv] sent $startup_cmd"
ocf_log debug "[$ORACLE_SID] [$rv] got $startup_stdout"
@@ -184,7 +214,7 @@ force_cleanup() {
ocf_log error "Not all Oracle processes for $ORACLE_SID exited cleanly, killing"
- pids=`ps ax | grep "ora_.*_${ORACLE_SID}" | grep -v grep | awk '{print $1}'`
+ pids=`ps ax | grep "ora_.*_${ORACLE_SID}$" | grep -v grep | awk '{print $1}'`
for pid in $pids; do
kill -9 $pid
@@ -207,7 +237,7 @@ exit_idle() {
declare -i n=0
ocf_log debug "Waiting for Oracle processes for $ORACLE_SID to terminate..."
- while ps ax | grep $ORACLE_SID | grep -v grep | grep -q -v $LSNR_PROCNAME; do
+ while ps ax | grep "ora_.*_${ORACLE_SID}$" | grep -v grep | grep -q -v $LSNR_PROCNAME; do
if [ $n -ge 90 ]; then
ocf_log debug "Timed out while waiting for Oracle processes for $ORACLE_SID to terminate"
force_cleanup
@@ -433,6 +463,24 @@ start_oracle() {
fi
done
+ if [ -n "$ORACLE_HOSTNAME" -a -s /tmp/dgstatus.${ORACLE_SID} ]; then
+ # Start DB Console if vhost defined and database_role is READ WRITE
+ if cat /tmp/dgstatus.${ORACLE_SID} 2>/dev/null | grep "READ WRITE"; then
+ ocf_log info "Starting Oracle EM DB Console for $ORACLE_SID"
+ emctl start dbconsole
+ if [ $? -ne 0 ]; then
+ ocf_log error "Oracle EM DB Console startup for $ORACLE_SID failed"
+ ocf_log error "Starting service $ORACLE_SID failed"
+ # Force good return status
+ #return 1
+ return 0
+ else
+ ocf_log info "Oracle EM DB Console startup for $ORACLE_SID succeeded"
+ fi
+ fi
+ rm -f /tmp/dgstatus.${ORACLE_SID}
+ fi
+
if [ -n "$LOCKFILE" ]; then
touch "$LOCKFILE"
fi
@@ -464,12 +512,35 @@ stop_oracle() {
ocf_log info "Stopping listener $LISTENER for $ORACLE_SID"
lsnrctl_stdout=$(lsnrctl stop "$LISTENER")
rv=$?
- if [ $? -ne 0 ]; then
+ if [ $rv -ne 0 ]; then
ocf_log error "Listener $LISTENER stop failed for $ORACLE_SID: $rv output $lsnrctl_stdout"
- # XXX - failure?
+
+ pid=`ps ax | grep "tnslsnr $LISTENER " | grep -v grep | awk '{print $1}'`
+ kill -9 $pid
+ rv=$?
+ if [ $rv -eq 0 ]; then
+ ocf_log info "Cleanup $LISTENER Killed PID $pid"
+ else
+ ocf_log error "Cleanup $LISTENER Kill PID $pid failed: $rv"
+ fi
fi
done
+ if [ -n "$ORACLE_HOSTNAME" ]; then
+ # Stop DB Console if vhost defined
+ ocf_log info "Stopping Oracle EM DB Console for $ORACLE_SID"
+ emctl stop dbconsole
+ if [ $? -ne 0 ]; then
+ ocf_log error "Stopping Oracle EM DB Console for $ORACLE_SID failed"
+ ocf_log error "Stopping service $ORACLE_SID failed"
+ # Force good return status
+ #return 1
+ return 0
+ else
+ ocf_log info "Stopping Oracle EM DB Console for $ORACLE_SID succeeded"
+ fi
+ fi
+
exit_idle
if [ $? -ne 0 ]; then
@@ -528,6 +599,8 @@ status_oracle() {
last=$?
done
+ # No status for DB Console (ORACLE_HOSTNAME)
+
# No lock file, but everything's running. Put the lock
# file back. XXX - this kosher?
if [ $last -eq 0 ] && [ $subsys_lock -ne 0 ]; then
@@ -543,6 +616,16 @@ status_oracle() {
# Do some real work... #
########################
+# Data Guard Modification 1 - Debug Logging
+case $1 in
+stop | start | status | restart | recover | monitor )
+[ $(id -u) = 0 ] && exec > "/tmp/oradg_${ORACLE_SID}_$1.log" 2>&1
+set -x
+date
+echo $@
+printenv
+esac
+
case $1 in
meta-data)
cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'`
diff --git a/rgmanager/src/resources/orainstance.sh b/rgmanager/src/resources/orainstance.sh
index b083224..3504a53 100755
--- a/rgmanager/src/resources/orainstance.sh
+++ b/rgmanager/src/resources/orainstance.sh
@@ -116,6 +116,15 @@ start_db() {
# Troubleshooting:
# ORA-00845 - Try rm -f /dev/shm/ora_*
# ORA-01081 - Try echo -e 'shutdown abort;\nquit;'|sqlplus "/ as sysdba"
+ # We need to ignore some non-fatl errors
+
+ ignore_error=(ORA-32004)
+
+ for error in ${ignore_error[*]}
+ do
+ startup_stdout=$(echo "$startup_stdout" | sed "s/${error}//g")
+ done
+
if [[ "$startup_stdout" =~ "ORA-" ]] || [[ "$startup_stdout" =~ "failure" ]]; then
ocf_log error "Starting Oracle DB $ORACLE_SID failed, found errors in stdout"
return 1
@@ -184,7 +193,7 @@ force_cleanup() {
ocf_log error "Not all Oracle processes for $ORACLE_SID exited cleanly, killing"
- pids=`ps ax | grep "ora_.*_${ORACLE_SID}" | grep -v grep | awk '{print $1}'`
+ pids=`ps ax | grep "ora_.*_${ORACLE_SID}$" | grep -v grep | awk '{print $1}'`
for pid in $pids; do
kill -9 $pid
@@ -207,7 +216,7 @@ exit_idle() {
declare -i n=0
ocf_log debug "Waiting for Oracle processes for $ORACLE_SID to terminate..."
- while ps ax | grep $ORACLE_SID | grep -v grep | grep -q -v $LSNR_PROCNAME; do
+ while ps ax | grep "ora_.*_${ORACLE_SID}$" | grep -v grep | grep -q -v $LSNR_PROCNAME; do
if [ $n -ge 90 ]; then
ocf_log debug "Timed out while waiting for Oracle processes for $ORACLE_SID to terminate"
force_cleanup
@@ -464,9 +473,17 @@ stop_oracle() {
ocf_log info "Stopping listener $LISTENER for $ORACLE_SID"
lsnrctl_stdout=$(lsnrctl stop "$LISTENER")
rv=$?
- if [ $? -ne 0 ]; then
+ if [ $rv -ne 0 ]; then
ocf_log error "Listener $LISTENER stop failed for $ORACLE_SID: $rv output $lsnrctl_stdout"
- # XXX - failure?
+
+ pid=`ps ax | grep "tnslsnr $LISTENER " | grep -v grep | awk '{print $1}'`
+ kill -9 $pid
+ rv=$?
+ if [ $rv -eq 0 ]; then
+ ocf_log info "Cleanup $LISTENER Killed PID $pid"
+ else
+ ocf_log error "Cleanup $LISTENER Kill PID $pid failed: $rv"
+ fi
fi
done
diff --git a/rgmanager/src/resources/postgres-8.metadata b/rgmanager/src/resources/postgres-8.metadata
index e7ae14f..c9f752c 100644
--- a/rgmanager/src/resources/postgres-8.metadata
+++ b/rgmanager/src/resources/postgres-8.metadata
@@ -51,18 +51,6 @@
<content type="string" default="-D /var/lib/pgsql/data"/>
</parameter>
- <parameter name="shutdown_wait">
- <longdesc lang="en">
- Wait X seconds for correct end of service shutdown.
- This option is ignored in current release.
- </longdesc>
- <shortdesc lang="en">
- Wait X seconds for correct end of service shutdown
- This option is ignored in current release.
- </shortdesc>
- <content type="integer" />
- </parameter>
-
<parameter name="startup_wait">
<longdesc lang="en">
Wait X seconds for correct end of service startup
diff --git a/rgmanager/src/resources/utils/fs-lib.sh b/rgmanager/src/resources/utils/fs-lib.sh
index 6288c09..9d91c79 100644
--- a/rgmanager/src/resources/utils/fs-lib.sh
+++ b/rgmanager/src/resources/utils/fs-lib.sh
@@ -763,7 +763,7 @@ do_post_unmount() {
# Agent-specific force unmount logic, if required
-# return = nonzero if successful, or 0 if unsuccessful
+# return = 0 if successful, or nonzero if unsuccessful
# (unsuccessful = try harder)
do_force_unmount() {
return 1
@@ -782,6 +782,19 @@ start_filesystem() {
declare mount_options=""
#
+ # Check if fstype is supported
+ #
+ verify_fstype
+ case $? in
+ $OCF_ERR_ARGS)
+ ocf_log err "File system type $OCF_RESKEY_fstype not supported"
+ return $OCF_ERR_ARGS
+ ;;
+ *)
+ ;;
+ esac
+
+ #
# Check if mount point was specified. If not, no need to continue.
#
case "$mp" in
diff --git a/rgmanager/src/resources/utils/member_util.sh b/rgmanager/src/resources/utils/member_util.sh
index 02fba77..51b1bce 100644
--- a/rgmanager/src/resources/utils/member_util.sh
+++ b/rgmanager/src/resources/utils/member_util.sh
@@ -30,6 +30,9 @@
#
is_node_member_clustat()
{
+ local node="$1"
+ local output_list
+
# Still having a tag while (a) online but (b) not running pacemaker
# (e.g. crm_node) or rgmanager not considered adequate for things like
# the LVM agent - so we use corosync-quorumtool instead. The function
@@ -51,8 +54,19 @@ is_node_member_clustat()
# 1 1 rhel7-1.priv.redhat.com
# 2 1 rhel7-2.priv.redhat.com
#
- corosync-quorumtool -l | grep -v "^Nodeid" | grep -i " $1\$" &> /dev/null
- return $?
+
+ output_list=$(corosync-quorumtool -l | grep -v "^Nodeid")
+
+ # first try searching for the node in the output as both a FQDN or shortname
+ echo "$output_list" | grep -i -e " $node\$" -e " $node\..*\$" &> /dev/null && return 0
+
+ # if the node was not found in the quorum list, try any known aliases found in /etc/hosts
+ for alias in $(cat /etc/hosts | grep -e "\s$node\s" -e "\s$node\$" | tail -n 1 | sed 's/\t/ /g' | cut -f2- -d " ");
+ do
+ echo "$output_list" | grep -i -e " $alias\$" &> /dev/null && return 0
+ done
+
+ return 1
}
diff --git a/rgmanager/src/resources/vm.sh b/rgmanager/src/resources/vm.sh
index d1494d5..4c602ff 100755
--- a/rgmanager/src/resources/vm.sh
+++ b/rgmanager/src/resources/vm.sh
@@ -164,6 +164,16 @@ meta_data()
<content type="string" default="live"/>
</parameter>
+ <parameter name="migrate_options">
+ <longdesc lang="en">
+ Extra options for the guest live migration.
+ </longdesc>
+ <shortdesc lang="en">
+ Extra options for the guest live migration.
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
<parameter name="tunnelled">
<longdesc lang="en">
Tunnel data over ssh to securely migrate virtual machines.
@@ -980,16 +990,16 @@ virsh_migrate()
# Xen and qemu have different migration mechanisms
#
if [ "$OCF_RESKEY_hypervisor" = "xen" ]; then
- cmd="virsh migrate $migrate_opt $OCF_RESKEY_name $OCF_RESKEY_hypervisor_uri $(printf $OCF_RESKEY_migration_uri $target)"
+ cmd="virsh migrate $migrate_opt $OCF_RESKEY_migrate_options $OCF_RESKEY_name $OCF_RESKEY_hypervisor_uri $(printf $OCF_RESKEY_migration_uri $target)"
ocf_log debug "$cmd"
err=$($cmd 2>&1 | head -1; exit ${PIPESTATUS[0]})
rv=$?
elif [ "$OCF_RESKEY_hypervisor" = "qemu" ]; then
if [ -z "$tunnelled_opt" ]; then
- cmd="virsh migrate $tunnelled_opt $migrate_opt $OCF_RESKEY_name $(printf $OCF_RESKEY_migration_uri $target) $(printf $migrateuriopt $target)"
+ cmd="virsh migrate $tunnelled_opt $migrate_opt $OCF_RESKEY_migrate_options $OCF_RESKEY_name $(printf $OCF_RESKEY_migration_uri $target) $(printf $migrateuriopt $target)"
else
- cmd="virsh migrate $tunnelled_opt $migrate_opt $OCF_RESKEY_name $(printf $OCF_RESKEY_migration_uri $target)"
+ cmd="virsh migrate $tunnelled_opt $migrate_opt $OCF_RESKEY_migrate_options $OCF_RESKEY_name $(printf $OCF_RESKEY_migration_uri $target)"
fi
ocf_log debug "$cmd"
@@ -1031,7 +1041,7 @@ xm_migrate()
# migrate() function sets target using migration_mapping;
# no need to do it here anymore
- cmd="xm migrate $migrate_opt $OCF_RESKEY_name $target"
+ cmd="xm migrate $migrate_opt $OCF_RESKEY_migrate_options $OCF_RESKEY_name $target"
ocf_log debug "$cmd"
err=$($cmd 2>&1 | head -1; exit ${PIPESTATUS[0]})
diff --git a/tools/ocft/Filesystem b/tools/ocft/Filesystem
index 0b4d781..2b22d1b 100644
--- a/tools/ocft/Filesystem
+++ b/tools/ocft/Filesystem
@@ -2,31 +2,23 @@
# by dejan at suse.de on
# Tue Feb 15 18:50:04 CET 2011
#
-# NB: Replace /var/run below with /run if your system mounts /run
-# as tmpfs!
-
CONFIG
Agent Filesystem
AgentRoot /usr/lib/ocf/resource.d/heartbeat
HangTimeout 20
VARIABLE
- OCFT_fs=/var/run/resource-agents/ocft-Filesystem-fs
- OCFT_loop=/dev/loop7
- OCFT_dir=/var/run/resource-agents/ocft-Filesystem-mnt
+ OCFT_rundir="`get_rundir`"
+ OCFT_fs="$OCFT_rundir/resource-agents/ocft-Filesystem-fs"
+ OCFT_dir="$OCFT_rundir/resource-agents/ocft-Filesystem-mnt"
+ OCFT_loop="`loopbackeddev make $OCFT_fs 16M`"
SETUP-AGENT
- losetup $OCFT_loop 2>/dev/null && exit 1
- rmdir $OCFT_dir 2>/dev/null || true
- mkdir $OCFT_dir
- dd if=/dev/zero of=$OCFT_fs bs=1 count=0 seek=16M 2>/dev/null
- mke2fs -j -Fq -m 0 $OCFT_fs
- losetup $OCFT_loop $OCFT_fs
+ mke2fs -j -Fq -m 0 $OCFT_loop
CLEANUP-AGENT
+ loopbackeddev unmake $OCFT_fs
rmdir $OCFT_dir
- rm $OCFT_fs
- losetup -d $OCFT_loop
CASE-BLOCK required_args
Env OCF_RESKEY_device=$OCFT_loop
diff --git a/tools/ocft/LVM b/tools/ocft/LVM
index 4bd2b22..00b69ee 100644
--- a/tools/ocft/LVM
+++ b/tools/ocft/LVM
@@ -8,15 +8,13 @@ CONFIG
HangTimeout 20
VARIABLE
- OCFT_pv=/var/run/resource-agents/ocft-LVM-pv
+ OCFT_rundir="`get_rundir`"
+ OCFT_pv=$OCFT_rundir/resource-agents/ocft-LVM-pv
OCFT_vg=ocft-vg
OCFT_lv=ocft-lv
- OCFT_loop=/dev/loop7
+ OCFT_loop="`loopbackeddev make $OCFT_pv 16M`"
SETUP-AGENT
- losetup $OCFT_loop 2>/dev/null && exit 1
- dd if=/dev/zero of=$OCFT_pv bs=1 count=0 seek=16M 2>/dev/null
- losetup $OCFT_loop $OCFT_pv
pvcreate $OCFT_loop
vgcreate -s 4K $OCFT_vg $OCFT_loop
lvcreate -n $OCFT_lv -L 600K $OCFT_vg
@@ -26,8 +24,7 @@ CLEANUP-AGENT
lvremove -f /dev/$OCFT_vg/$OCFT_lv
vgremove -f $OCFT_vg
pvremove $OCFT_loop
- losetup -d $OCFT_loop
- rm $OCFT_pv
+ loopbackeddev unmake $OCFT_pv
CASE-BLOCK required_args
Env OCF_RESKEY_volgrpname=$OCFT_vg
diff --git a/tools/ocft/Makefile.am b/tools/ocft/Makefile.am
index 6218bc3..8191c11 100644
--- a/tools/ocft/Makefile.am
+++ b/tools/ocft/Makefile.am
@@ -56,5 +56,8 @@ ocftcfgs_DATA = apache \
ocftdir = $(datadir)/$(PACKAGE_NAME)/ocft
ocft_DATA = README \
README.zh_CN \
- caselib
+ caselib \
+ helpers.sh \
+ runocft.prereq
+ocft_SCRIPTS = runocft
diff --git a/tools/ocft/Raid1 b/tools/ocft/Raid1
index 1c239c8..462b9b9 100644
--- a/tools/ocft/Raid1
+++ b/tools/ocft/Raid1
@@ -9,31 +9,20 @@ CONFIG
HangTimeout 20
VARIABLE
- OCFT_disk0=/var/run/resource-agents/ocft-Raid1-disk0
- OCFT_disk1=/var/run/resource-agents/ocft-Raid1-disk1
- OCFT_disk2=/var/run/resource-agents/ocft-Raid1-disk2
- OCFT_disk3=/var/run/resource-agents/ocft-Raid1-disk3
- OCFT_raidconf=/var/run/resource-agents/ocft-mdadm.conf
+ OCFT_rundir="`get_rundir`"
+ OCFT_disk0=$OCFT_rundir/resource-agents/ocft-Raid1-disk0
+ OCFT_disk1=$OCFT_rundir/resource-agents/ocft-Raid1-disk1
+ OCFT_disk2=$OCFT_rundir/resource-agents/ocft-Raid1-disk2
+ OCFT_disk3=$OCFT_rundir/resource-agents/ocft-Raid1-disk3
+ OCFT_raidconf=$OCFT_rundir/resource-agents/ocft-mdadm.conf
OCFT_raiddev=/dev/md8
OCFT_raiddev2=/dev/md9
- OCFT_loop0=/dev/loop6
- OCFT_loop1=/dev/loop7
- OCFT_loop2=/dev/loop4
- OCFT_loop3=/dev/loop5
+ OCFT_loop0="`loopbackeddev make $OCFT_disk0 16M`"
+ OCFT_loop1="`loopbackeddev make $OCFT_disk1 16M`"
+ OCFT_loop2="`loopbackeddev make $OCFT_disk2 16M`"
+ OCFT_loop3="`loopbackeddev make $OCFT_disk3 16M`"
SETUP-AGENT
- losetup $OCFT_loop0 2>/dev/null && exit 1
- losetup $OCFT_loop1 2>/dev/null && exit 1
- losetup $OCFT_loop2 2>/dev/null && exit 1
- losetup $OCFT_loop3 2>/dev/null && exit 1
- dd if=/dev/zero of=$OCFT_disk0 bs=1 count=0 seek=16M 2>/dev/null
- dd if=/dev/zero of=$OCFT_disk1 bs=1 count=0 seek=16M 2>/dev/null
- dd if=/dev/zero of=$OCFT_disk2 bs=1 count=0 seek=16M 2>/dev/null
- dd if=/dev/zero of=$OCFT_disk3 bs=1 count=0 seek=16M 2>/dev/null
- losetup $OCFT_loop0 $OCFT_disk0
- losetup $OCFT_loop1 $OCFT_disk1
- losetup $OCFT_loop2 $OCFT_disk2
- losetup $OCFT_loop3 $OCFT_disk3
mdadm --create $OCFT_raiddev -l 0 --raid-devices=2 $OCFT_loop0 $OCFT_loop1
mdadm --create $OCFT_raiddev2 -l 0 --raid-devices=2 $OCFT_loop2 $OCFT_loop3
echo DEVICE $OCFT_loop0 $OCFT_loop1 > $OCFT_raidconf
@@ -48,12 +37,11 @@ CLEANUP-AGENT
mdadm --zero-superblock $OCFT_loop3
mdadm --remove $OCFT_raiddev 2>/dev/null
mdadm --remove $OCFT_raiddev2 2>/dev/null
- losetup -d $OCFT_loop0
- losetup -d $OCFT_loop1
- losetup -d $OCFT_loop2
- losetup -d $OCFT_loop3
- rm $OCFT_disk0 $OCFT_disk1 $OCFT_raidconf
- rm $OCFT_disk2 $OCFT_disk3
+ loopbackeddev unmake $OCFT_disk0
+ loopbackeddev unmake $OCFT_disk1
+ loopbackeddev unmake $OCFT_disk2
+ loopbackeddev unmake $OCFT_disk3
+ rm -f $OCFT_raidconf
CASE-BLOCK required_args
Env OCF_RESKEY_raidconf=$OCFT_raidconf
diff --git a/tools/ocft/Xinetd b/tools/ocft/Xinetd
index ba9c85c..53f4f65 100644
--- a/tools/ocft/Xinetd
+++ b/tools/ocft/Xinetd
@@ -5,6 +5,14 @@ CONFIG
AgentRoot /usr/lib/ocf/resource.d/heartbeat
InstallPackage xinetd
+SETUP-AGENT
+ sed -i /disable/s/yes/no/ /etc/xinetd.d/echo
+ if which /etc/init.d/xinetd >/dev/null 2>&1; then
+ /etc/init.d/xinetd start
+ elif systemctl list-unit-files | grep -qs xinetd; then
+ systemctl start xinetd
+ fi
+
CASE-BLOCK required_args
Env OCF_RESKEY_service=discard
diff --git a/tools/ocft/apache b/tools/ocft/apache
index b2219ec..fe4f193 100644
--- a/tools/ocft/apache
+++ b/tools/ocft/apache
@@ -8,8 +8,13 @@ CONFIG
HangTimeout 20
SETUP-AGENT
- rcapache2 start
- rcapache2 stop
+ if systemctl list-unit-files 2>/dev/null | fgrep -q apache2.service; then
+ systemctl start apache2.service
+ systemctl stop apache2.service
+ else
+ /etc/init.d/apache2 start
+ /etc/init.d/apache2 stop
+ fi
CASE-BLOCK default_status
AgentRun stop
diff --git a/tools/ocft/caselib.in b/tools/ocft/caselib.in
index 01b108f..2c5735a 100644
--- a/tools/ocft/caselib.in
+++ b/tools/ocft/caselib.in
@@ -47,40 +47,22 @@ agent_install()
for pkg in "$@"; do
if [ -e /etc/SuSE-release ]; then
- if ! rpm -ql "$pkg" >/dev/null 2>&1; then
- echo "${__OCFT__showhost}Installing $pkg ..."
- zypper -q install -y "$pkg" >/dev/null 2>&1
- if ! rpm -ql "$pkg" >/dev/null 2>&1; then
- echo
- echo "${__OCFT__showhost}ERROR: Install '$pkg' failed."
- quit 2
- fi
- echo "done"
+ if ! zypper -q install -y "$pkg" >/dev/null 2>&1; then
echo
+ echo "${__OCFT__showhost}ERROR: Install '$pkg' failed."
+ quit 2
fi
elif [ -e /etc/debian_version ]; then
- if ! dpkg -L "$pkg" >/dev/null 2>&1; then
- echo "${__OCFT__showhost}Installing $pkg ..."
- apt-get -y install "$pkg" >/dev/null 2>&1
- if ! dpkg -L "$pkg" >/dev/null 2>&1; then
- echo
- echo "${__OCFT__showhost}ERROR: Install '$pkg' failed."
- quit 2
- fi
- echo "done"
+ if ! apt-get -y install "$pkg" >/dev/null 2>&1; then
echo
+ echo "${__OCFT__showhost}ERROR: Install '$pkg' failed."
+ quit 2
fi
elif [ -e /etc/redhat-release ]; then
- if ! rpm -ql "$pkg" >/dev/null 2>&1; then
- echo "${__OCFT__showhost}Installing $pkg ..."
- yum -y install "$pkg" >/dev/null 2>&1
- if ! rpm -ql "$pkg" >/dev/null 2>&1; then
- echo
- echo "${__OCFT__showhost}ERROR: Install '$pkg' failed."
- quit 2
- fi
- echo "done"
+ if ! yum -y install "$pkg" >/dev/null 2>&1; then
echo
+ echo "${__OCFT__showhost}ERROR: Install '$pkg' failed."
+ quit 2
fi
else
echo "${__OCFT__showhost}ERROR: Cannot detect your OS type."
@@ -106,6 +88,9 @@ agent_run()
set_ocf_env $agent
+ export OCF_RESKEY_CRM_meta_timeout
+ : ${OCF_RESKEY_CRM_meta_timeout:=$timeout}
+
aroot=${__OCFT__MYROOT:-$__OCFT__AGENT_ROOT}
setsid $aroot/$agent $cmd >/tmp/.ocft_runlog 2>&1 &
@@ -270,6 +255,9 @@ export OCF_ROOT=@OCF_ROOT_DIR@
export OCF_LIB=@OCF_LIB_DIR@/heartbeat
__OCFT__AGENT_ROOT=@OCF_RA_DIR@/heartbeat
__OCFT__CASES_DIR=/var/lib/@PACKAGE_NAME@/ocft/cases
+OCFT_DIR=@datadir@/@PACKAGE_NAME@/ocft
+
+. $OCFT_DIR/helpers.sh
__OCFT__atexit_num=0
@@ -294,6 +282,11 @@ export HA_SBIN_DIR=$__OCFT__fakebin
quit 3
}
+. $OCF_LIB/ocf-directories || {
+ echo "${__OCFT__showhost}ERROR: $OCF_LIB/ocf-directories not found."
+ quit 3
+}
+
while read __OCFT__line; do
if [ -n "$__OCFT__line" ]; then
__OCFT__retn=${__OCFT__line%%=*}
diff --git a/tools/ocft/exportfs b/tools/ocft/exportfs
index c83520d..6186273 100644
--- a/tools/ocft/exportfs
+++ b/tools/ocft/exportfs
@@ -5,22 +5,23 @@
CONFIG
Agent exportfs
AgentRoot /usr/lib/ocf/resource.d/heartbeat
- HangTimeout 40
+ InstallPackage nfs-kernel-server
+ HangTimeout 40
SETUP-AGENT
# nothing
CASE-BLOCK set_testenv
- Env OCF_RESKEY_directory=/usr
- Env OCF_RESKEY_fsid=105
- Env OCF_RESKEY_clientspec="*"
+ Env OCF_RESKEY_directory=/usr
+ Env OCF_RESKEY_fsid=105
+ Env OCF_RESKEY_clientspec="*"
Env OCF_RESKEY_CRM_meta_timeout=30000
CASE-BLOCK default_status
AgentRun stop
CASE-BLOCK prepare
- Include set_testenv
+ Include set_testenv
Include default_status
CASE "check base env"
@@ -37,6 +38,11 @@ CASE "check base env: invalid 'OCF_RESKEY_directory'"
Env OCF_RESKEY_directory=/no_such
AgentRun start OCF_ERR_INSTALLED
+CASE "check base env: non-decimal 'OCF_RESKEY_fsid'"
+ Include prepare
+ Env OCF_RESKEY_fsid="4f838db14f838db14f838db14f838db1"
+ AgentRun start OCF_SUCCESS
+
CASE "unimplemented command"
Include prepare
AgentRun no_cmd OCF_ERR_UNIMPLEMENTED
diff --git a/tools/ocft/exportfs-multidir b/tools/ocft/exportfs-multidir
index 72551c6..2e35e79 100644
--- a/tools/ocft/exportfs-multidir
+++ b/tools/ocft/exportfs-multidir
@@ -5,22 +5,23 @@
CONFIG
Agent exportfs
AgentRoot /usr/lib/ocf/resource.d/heartbeat
- HangTimeout 40
+ InstallPackage nfs-kernel-server
+ HangTimeout 40
SETUP-AGENT
- # nothing
+ # nothing
CASE-BLOCK set_testenv
- Env OCF_RESKEY_directory="/usr /var"
- Env OCF_RESKEY_fsid=105
- Env OCF_RESKEY_clientspec="*"
+ Env OCF_RESKEY_directory="/usr /var"
+ Env OCF_RESKEY_fsid=105
+ Env OCF_RESKEY_clientspec="*"
Env OCF_RESKEY_CRM_meta_timeout=30000
CASE-BLOCK default_status
AgentRun stop
CASE-BLOCK prepare
- Include set_testenv
+ Include set_testenv
Include default_status
CASE "check base env"
diff --git a/tools/ocft/helpers.sh b/tools/ocft/helpers.sh
new file mode 100644
index 0000000..9848934
--- /dev/null
+++ b/tools/ocft/helpers.sh
@@ -0,0 +1,43 @@
+get_rundir() {
+ local rundir
+ rundir="`mount | grep '/run ' | awk '{print $3}'`"
+ echo ${rundir:-"/var/run"}
+}
+
+loopbackeddev() {
+ local action file size ctlfile
+
+ action="$1"
+ file="$2"
+ size="$3"
+ ctlfile=$HA_RSCTMP/`echo $file | tr / _`
+
+ case "$action" in
+ start|setup|make)
+ if [ ! -f "$ctlfile" ]; then
+ if [ -z "$size" ]; then
+ echo "usage: $0 action file size" >&2
+ exit 1
+ fi
+ loopdev=`losetup -f`
+ if ! dd if=/dev/zero of=$file bs=1 count=0 seek=$size 2>/dev/null; then
+ echo "$0: dd failed" >&2
+ exit 1
+ fi
+ if ! losetup $loopdev $file; then
+ echo "$0: losetup failed" >&2
+ exit 1
+ fi
+ echo $loopdev | tee $ctlfile
+ else
+ cat $ctlfile
+ fi
+ ;;
+ stop|undo|unmake)
+ if [ -f "$ctlfile" ]; then
+ losetup -d `cat $ctlfile`
+ rm -f $file $ctlfile
+ fi
+ ;;
+ esac
+}
diff --git a/tools/ocft/iscsi b/tools/ocft/iscsi
index 7b0452a..3c5d524 100644
--- a/tools/ocft/iscsi
+++ b/tools/ocft/iscsi
@@ -3,26 +3,52 @@
CONFIG
Agent iscsi
AgentRoot /usr/lib/ocf/resource.d/heartbeat
- InstallPackage open-iscsi
- InstallPackage iscsitarget
+ InstallPackage open-iscsi targetcli
HangTimeout 20
VARIABLE
- OCFT_disk=/var/run/resource-agents/ocft-iscsi
- OCFT_target="iqn.2011-03.ocft.localhost:disk0"
+ OCFT_rundir="`get_rundir`"
+ OCFT_disk="$OCFT_rundir/resource-agents/ocft-iscsi"
+ OCFT_target="iqn.2003-01.org.linux-iscsi:ocft"
OCFT_portal="127.0.0.1:3260"
+ OCFT_loop="`loopbackeddev make $OCFT_disk 16M`"
+ OCF_RESKEY_iqn=$OCFT_target
+ OCF_RESKEY_portals=$OCFT_portal
+ OCF_RESKEY_target_iqn=$OCFT_target
+ OCF_RESKEY_path=$OCFT_loop
+ OCF_RESKEY_lun=1
SETUP-AGENT
- dd if=/dev/zero of=$OCFT_disk bs=1024k count=1 2>/dev/null
- echo Target $OCFT_target >> /etc/ietd.conf
- echo " Lun 0 Path=$OCFT_disk,Type=fileio" >> /etc/ietd.conf
- rciscsitarget start
- rcopen-iscsi start
- rciscsitarget restart
+ if systemctl list-unit-files 2>/dev/null | fgrep -q iscsid.service; then
+ systemctl start iscsid.service
+ else
+ /etc/init.d/open-iscsi start
+ fi
+ if systemctl list-unit-files 2>/dev/null | fgrep -q target.service; then
+ systemctl start target.service
+ else
+ /etc/init.d/target start
+ fi
+ export OCF_RESKEY_iqn=$OCFT_target
+ export OCF_RESKEY_portals=$OCFT_portal
+ export OCF_RESKEY_target_iqn=$OCFT_target
+ export OCF_RESKEY_path=$OCFT_loop
+ export OCF_RESKEY_lun=1
+ /usr/lib/ocf/resource.d/heartbeat/iSCSITarget start
+ /usr/lib/ocf/resource.d/heartbeat/iSCSILogicalUnit start
CLEANUP-AGENT
- rm -f $OCFT_disk
- sed -i "/^Target $OCFT_target/,+1d" /etc/ietd.conf
+ export OCF_RESKEY_iqn=$OCFT_target
+ export OCF_RESKEY_portals=$OCFT_portal
+ export OCF_RESKEY_target_iqn=$OCFT_target
+ export OCF_RESKEY_path=$OCFT_loop
+ export OCF_RESKEY_lun=1
+ /usr/lib/ocf/resource.d/heartbeat/iSCSILogicalUnit stop
+ /usr/lib/ocf/resource.d/heartbeat/iSCSITarget stop
+ if systemctl list-unit-files 2>/dev/null | fgrep -q target.service; then
+ systemctl stop target.service
+ fi
+ loopbackeddev unmake $OCFT_disk
CASE-BLOCK required_args
Env OCF_RESKEY_portal=$OCFT_portal
diff --git a/tools/ocft/mysql b/tools/ocft/mysql
index bae0dc7..305a900 100644
--- a/tools/ocft/mysql
+++ b/tools/ocft/mysql
@@ -7,8 +7,13 @@ CONFIG
HangTimeout 20
SETUP-AGENT
- rcmysql start
- rcmysql stop
+ if systemctl list-unit-files 2>/dev/null | fgrep -q mysql.service; then
+ systemctl start mysql.service
+ systemctl stop mysql.service
+ else
+ /etc/init.d/mysql start
+ /etc/init.d/mysql stop
+ fi
CASE-BLOCK crm_setting
Env OCF_RESKEY_CRM_meta_timeout=15000
diff --git a/tools/ocft/named b/tools/ocft/named
index 15024a8..90a4351 100644
--- a/tools/ocft/named
+++ b/tools/ocft/named
@@ -10,8 +10,8 @@ CONFIG
InstallPackage bind-utils
SETUP-AGENT
- rcnamed start
- rcnamed stop
+ /etc/init.d/named start
+ /etc/init.d/named stop
CASE-BLOCK crm_setting
Env OCF_RESKEY_CRM_meta_timeout=15000
diff --git a/tools/ocft/nfsserver b/tools/ocft/nfsserver
index cd73164..cf2ac89 100644
--- a/tools/ocft/nfsserver
+++ b/tools/ocft/nfsserver
@@ -6,11 +6,15 @@ CONFIG
InstallPackage nfs-kernel-server
HangTimeout 20
+SETUP-AGENT
+ INITSCRIPT=""
+ if test -f /etc/init.d/nfsserver; then
+ INITSCRIPT="/etc/init.d/nfsserver"
+ fi
+ true
+
CASE-BLOCK required_args
- Env OCF_RESKEY_nfs_init_script=/etc/init.d/nfsserver
- Env OCF_RESKEY_nfs_ip=127.0.0.1
- Env OCF_RESKEY_nfs_shared_infodir=/var/lib/nfs
- Env OCF_RESKEY_nfs_notify_cmd=/usr/sbin/sm-notify
+ Env OCF_RESKEY_nfs_init_script=$INITSCRIPT
CASE-BLOCK default_status
AgentRun stop
@@ -28,16 +32,6 @@ CASE "check base env: invalid 'OCF_RESKEY_nfs_init_script'"
Env OCF_RESKEY_nfs_init_script=no_such_script
AgentRun start OCF_ERR_INSTALLED
-CASE "check base env: unset 'OCF_RESKEY_nfs_ip'"
- Include prepare
- Unenv OCF_RESKEY_nfs_ip
- AgentRun start OCF_ERR_CONFIGURED
-
-CASE "check base env: unset 'OCF_RESKEY_nfs_shared_infodir'"
- Include prepare
- Unenv OCF_RESKEY_nfs_shared_infodir
- AgentRun start OCF_ERR_CONFIGURED
-
CASE "check base env: invalid 'OCF_RESKEY_nfs_notify_cmd'"
Include prepare
Env OCF_RESKEY_nfs_notify_cmd=no_such_program
diff --git a/tools/ocft/ocft.in b/tools/ocft/ocft.in
index bfa9dcb..3f7dfcf 100644
--- a/tools/ocft/ocft.in
+++ b/tools/ocft/ocft.in
@@ -637,6 +637,7 @@ start_test()
local sh shs testsh agents line ret
local rc=0
local varlib
+ local rc_f
if ! cd $CASES_DIR >/dev/null 2>&1; then
die "cases directory not found."
@@ -671,7 +672,8 @@ start_test()
echo "RA trace on, output in $varlib/trace_ra"
fi
- for sh in $testsh; do
+ rc_f=`mktemp`
+ (for sh in $testsh; do
if [ -r "$sh" ]; then
if [ -n "$opt_trace_ra" ]; then
export OCF_RESOURCE_INSTANCE="`echo $sh | sed 's/_.*//'`"
@@ -681,11 +683,11 @@ start_test()
case "$sh" in
setup*)
+ rc=$((rc|ret))
if [ $ret -ne 0 ]; then
warn "SETUP failed, break all tests of '$shs'."
break
fi
- rc=$((rc|ret))
;;
cleanup*)
if [ $ret -ne 0 ]; then
@@ -703,12 +705,14 @@ start_test()
;;
esac
fi
- done 2>&1 | while read -r line; do
+ done 2>&1; echo $rc > $rc_f) | while read -r line; do
echo "$line"
echo "$(date '+%F %T'): $line" | cat -A |
sed -r 's/\^\[\[[0-9]+m|\^I|.$//g' >>logs/$shs.log
done
done
+ rc=`cat $rc_f`
+ rm -f $rc_f
return $rc
}
diff --git a/tools/ocft/pgsql b/tools/ocft/pgsql
index abab33f..9944b09 100644
--- a/tools/ocft/pgsql
+++ b/tools/ocft/pgsql
@@ -7,8 +7,8 @@ CONFIG
HangTimeout 20
SETUP-AGENT
- rcpostgresql start
- rcpostgresql stop
+ /etc/init.d/postgresql start
+ /etc/init.d/postgresql stop
CASE-BLOCK crm_setting
Env OCF_RESKEY_CRM_meta_timeout=15000
diff --git a/tools/ocft/runocft b/tools/ocft/runocft
new file mode 100755
index 0000000..f66b6a4
--- /dev/null
+++ b/tools/ocft/runocft
@@ -0,0 +1,37 @@
+OCFTDIR=/usr/share/resource-agents/ocft
+CONFDIR=$OCFTDIR/configs
+
+prereq_run() {
+ eval "$@"
+}
+prereq_prog() {
+ which $@
+}
+
+test_prereq() {
+ local tp arg
+ tp=`echo $prereq|sed 's/:.*//'`
+ arg=`echo $prereq|sed 's/[a-z]*://'`
+ prereq_$tp $arg >/dev/null 2>&1
+}
+
+rm -f ocft.FAILED
+rc=0
+while read f prereq; do
+ if [ -n "$prereq" ] && ! test_prereq; then
+ echo "$f: prerequisite not fulfilled, skipping"
+ continue
+ fi
+ ocft make $f
+ if ! ocft test $f; then
+ echo $f >> ocft.FAILED
+ rc=1
+ fi
+done < $OCFTDIR/runocft.prereq
+
+if [ -f ocft.FAILED ]; then
+ echo "The following ocft tests failed:"
+ cat ocft.FAILED
+fi
+
+exit $rc
diff --git a/tools/ocft/runocft.prereq b/tools/ocft/runocft.prereq
new file mode 100644
index 0000000..3bc222c
--- /dev/null
+++ b/tools/ocft/runocft.prereq
@@ -0,0 +1,30 @@
+apache
+db2 prog:~db2inst1/sqllib/bin/db2
+drbd.linbit run:false
+exportfs
+exportfs-multidir
+Filesystem
+IPaddr2 run:ip addr show dev eth0
+IPaddr2v4 run:ip addr show dev eth0 && ip addr show dev eth1
+IPaddr2v6 run:ip addr show dev eth0 && ip addr show dev eth1
+IPsrcaddr run:false
+IPv6addr run:ip addr show dev eth0 && ip addr show dev eth1
+iscsi
+jboss run:false
+LVM
+MailTo
+mysql
+mysql-proxy run:false
+named
+nfsserver
+oracle prog:sqlplus
+pgsql
+portblock
+postfix
+Raid1
+SendArp
+sg_persist run:false
+tomcat run:false
+VirtualDomain run:false
+Xen run:false
+Xinetd
diff --git a/tools/send_arp.linux.c b/tools/send_arp.linux.c
index e1c1960..477100a 100644
--- a/tools/send_arp.linux.c
+++ b/tools/send_arp.linux.c
@@ -7,6 +7,23 @@
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet at ms2.inr.ac.ru>
+ * YOSHIFUJI Hideaki <yoshfuji at linux-ipv6.org>
+ */
+
+/* Andrew Beekhof, Lars Ellenberg:
+ * Based on arping from iputils,
+ * adapted to the command line conventions established by the libnet based
+ * send_arp tool as used by the IPaddr and IPaddr2 resource agents.
+ * The libnet based send_arp, and its command line argument convention,
+ * was first added to the heartbeat project by Matt Soffen.
+ *
+ * Latest "resync" with iputils as of:
+ * git://git.linux-ipv6.org/gitroot/iputils.git
+ * 511f8356e22615479c3cc16bca64d72d204f6df3
+ * Fri Jul 24 10:48:47 2015
+ * To get various bugfixes and support for infiniband and other link layer
+ * addresses which do not fit into plain "sockaddr_ll", and broadcast addresses
+ * that may be different from memset(,0xff,).
*/
#include <stdlib.h>
@@ -16,12 +33,17 @@
#include <sys/file.h>
#include <sys/time.h>
#include <sys/signal.h>
+#include <signal.h>
#include <sys/ioctl.h>
-#include <linux/if.h>
+#include <net/if.h>
#include <linux/if_packet.h>
#include <linux/if_ether.h>
#include <net/if_arp.h>
#include <sys/uio.h>
+#ifdef CAPABILITIES
+#include <sys/prctl.h>
+#include <sys/capability.h>
+#endif
#include <netdb.h>
#include <unistd.h>
@@ -32,40 +54,85 @@
#include <netinet/in.h>
#include <arpa/inet.h>
-static void usage(void) __attribute__((noreturn));
+#ifdef USE_SYSFS
+#include <sysfs/libsysfs.h>
+struct sysfs_devattr_values;
+#endif
-static int quit_on_reply;
-static char *device;
-static int ifindex;
-static char *source;
-static struct in_addr src, dst;
-static char *target;
-static int dad = 0, unsolicited = 0, advert = 0;
-static int quiet = 0;
-static int count = -1;
-static int timeout = 0;
-static int unicasting = 0;
-static int s = 0;
-static int broadcast_only = 0;
+#ifndef WITHOUT_IFADDRS
+#include <ifaddrs.h>
+#endif
-static struct sockaddr_ll me;
-static struct sockaddr_ll he;
+#ifdef USE_IDN
+#include <idna.h>
+#include <locale.h>
+#endif
-static struct timeval start, last;
+static char SNAPSHOT[] = "s20121221";
-static int sent, brd_sent;
-static int received, brd_recv, req_recv;
+static void usage(void) __attribute__((noreturn));
+
+#ifndef DEFAULT_DEVICE
+#define DEFAULT_DEVICE "eth0"
+#endif
+#ifdef DEFAULT_DEVICE
+# define DEFAULT_DEVICE_STR DEFAULT_DEVICE
+#else
+# define DEFAULT_DEVICE NULL
+#endif
+
+struct device {
+ const char *name;
+ int ifindex;
+#ifndef WITHOUT_IFADDRS
+ struct ifaddrs *ifa;
+#endif
+#ifdef USE_SYSFS
+ struct sysfs_devattr_values *sysfs;
+#endif
+};
+
+int quit_on_reply=0;
+struct device device = {
+ .name = DEFAULT_DEVICE,
+};
+char *source;
+struct in_addr src, dst;
+char *target;
+int dad, unsolicited, advert;
+int quiet;
+int count=-1;
+int timeout;
+int unicasting;
+int s;
+int broadcast_only;
+
+struct sockaddr_storage me;
+struct sockaddr_storage he;
+
+struct timeval start, last;
+
+int sent, brd_sent;
+int received, brd_recv, req_recv;
+
+#ifndef CAPABILITIES
+static uid_t euid;
+#endif
#define MS_TDIFF(tv1,tv2) ( ((tv1).tv_sec-(tv2).tv_sec)*1000 + \
((tv1).tv_usec-(tv2).tv_usec)/1000 )
-static void print_hex(unsigned char *p, int len);
-static int recv_pack(unsigned char *buf, int len, struct sockaddr_ll *FROM);
-static void set_signal(int signo, void (*handler)(void));
-static int send_pack(int s, struct in_addr src, struct in_addr dst,
- struct sockaddr_ll *ME, struct sockaddr_ll *HE);
-static void finish(void);
-static void catcher(void);
+#define OFFSET_OF(name,ele) ((size_t)&((name *)0)->ele)
+
+static socklen_t sll_len(size_t halen)
+{
+ socklen_t len = OFFSET_OF(struct sockaddr_ll, sll_addr) + halen;
+ if (len < sizeof(struct sockaddr_ll))
+ len = sizeof(struct sockaddr_ll);
+ return len;
+}
+
+#define SLL_LEN(hln) sll_len(hln)
void usage(void)
{
@@ -80,14 +147,18 @@ void usage(void)
" -V : print version and exit\n"
" -c count : how many packets to send\n"
" -w timeout : how long to wait for a reply\n"
- " -I device : which ethernet device to use (eth0)\n"
+ " -I device : which ethernet device to use"
+#ifdef DEFAULT_DEVICE_STR
+ " (" DEFAULT_DEVICE_STR ")"
+#endif
+ "\n"
" -s source : source ip address\n"
" destination : ask for what ip address\n"
);
exit(2);
}
-void set_signal(int signo, void (*handler)(void))
+static void set_signal(int signo, void (*handler)(void))
{
struct sigaction sa;
@@ -97,7 +168,126 @@ void set_signal(int signo, void (*handler)(void))
sigaction(signo, &sa, NULL);
}
-int send_pack(int s, struct in_addr src, struct in_addr dst,
+#ifdef CAPABILITIES
+static const cap_value_t caps[] = { CAP_NET_RAW, };
+static cap_flag_value_t cap_raw = CAP_CLEAR;
+#endif
+
+static void limit_capabilities(void)
+{
+#ifdef CAPABILITIES
+ cap_t cap_p;
+
+ cap_p = cap_get_proc();
+ if (!cap_p) {
+ perror("arping: cap_get_proc");
+ exit(-1);
+ }
+
+ cap_get_flag(cap_p, CAP_NET_RAW, CAP_PERMITTED, &cap_raw);
+
+ if (cap_raw != CAP_CLEAR) {
+ if (cap_clear(cap_p) < 0) {
+ perror("arping: cap_clear");
+ exit(-1);
+ }
+
+ cap_set_flag(cap_p, CAP_PERMITTED, 1, caps, CAP_SET);
+
+ if (cap_set_proc(cap_p) < 0) {
+ perror("arping: cap_set_proc");
+ if (errno != EPERM)
+ exit(-1);
+ }
+ }
+
+ if (prctl(PR_SET_KEEPCAPS, 1) < 0) {
+ perror("arping: prctl");
+ exit(-1);
+ }
+
+ if (setuid(getuid()) < 0) {
+ perror("arping: setuid");
+ exit(-1);
+ }
+
+ if (prctl(PR_SET_KEEPCAPS, 0) < 0) {
+ perror("arping: prctl");
+ exit(-1);
+ }
+
+ cap_free(cap_p);
+#else
+ euid = geteuid();
+#endif
+}
+
+static int modify_capability_raw(int on)
+{
+#ifdef CAPABILITIES
+ cap_t cap_p;
+
+ if (cap_raw != CAP_SET)
+ return on ? -1 : 0;
+
+ cap_p = cap_get_proc();
+ if (!cap_p) {
+ perror("arping: cap_get_proc");
+ return -1;
+ }
+
+ cap_set_flag(cap_p, CAP_EFFECTIVE, 1, caps, on ? CAP_SET : CAP_CLEAR);
+
+ if (cap_set_proc(cap_p) < 0) {
+ perror("arping: cap_set_proc");
+ return -1;
+ }
+
+ cap_free(cap_p);
+#else
+ if (setuid(on ? euid : getuid())) {
+ perror("arping: setuid");
+ return -1;
+ }
+#endif
+ return 0;
+}
+
+static int enable_capability_raw(void)
+{
+ return modify_capability_raw(1);
+}
+
+static int disable_capability_raw(void)
+{
+ return modify_capability_raw(0);
+}
+
+static void drop_capabilities(void)
+{
+#ifdef CAPABILITIES
+ cap_t cap_p = cap_init();
+
+ if (!cap_p) {
+ perror("arping: cap_init");
+ exit(-1);
+ }
+
+ if (cap_set_proc(cap_p) < 0) {
+ perror("arping: cap_set_proc");
+ exit(-1);
+ }
+
+ cap_free(cap_p);
+#else
+ if (setuid(getuid()) < 0) {
+ perror("arping: setuid");
+ exit(-1);
+ }
+#endif
+}
+
+static int send_pack(int s, struct in_addr src, struct in_addr dst,
struct sockaddr_ll *ME, struct sockaddr_ll *HE)
{
int err;
@@ -130,7 +320,7 @@ int send_pack(int s, struct in_addr src, struct in_addr dst,
p+=4;
gettimeofday(&now, NULL);
- err = sendto(s, buf, p-buf, 0, (struct sockaddr*)HE, sizeof(*HE));
+ err = sendto(s, buf, p-buf, 0, (struct sockaddr*)HE, SLL_LEN(ah->ar_hln));
if (err == p-buf) {
last = now;
sent++;
@@ -140,7 +330,7 @@ int send_pack(int s, struct in_addr src, struct in_addr dst,
return err;
}
-void finish(void)
+static void finish(void)
{
if (!quiet) {
printf("Sent %d probes (%d broadcast(s))\n", sent, brd_sent);
@@ -158,40 +348,43 @@ void finish(void)
printf("\n");
fflush(stdout);
}
-
- if (dad) {
- fflush(stdout);
- exit(!!received);
- }
-
+ fflush(stdout);
+ if (dad)
+ exit(!!received);
if (unsolicited)
exit(0);
-
- fflush(stdout);
exit(!received);
}
-void catcher(void)
+static void catcher(void)
{
- struct timeval tv;
+ struct timeval tv, tv_s, tv_o;
gettimeofday(&tv, NULL);
if (start.tv_sec==0)
start = tv;
- if (count-- == 0 || (timeout && MS_TDIFF(tv,start) > timeout*1000 + 500))
+ timersub(&tv, &start, &tv_s);
+ tv_o.tv_sec = timeout;
+ tv_o.tv_usec = 500 * 1000;
+
+ if (count-- == 0 || (timeout && timercmp(&tv_s, &tv_o, >)))
finish();
- if (last.tv_sec==0 || MS_TDIFF(tv,last) > 500) {
- send_pack(s, src, dst, &me, &he);
+ timersub(&tv, &last, &tv_s);
+ tv_o.tv_sec = 0;
+
+ if (last.tv_sec==0 || timercmp(&tv_s, &tv_o, >)) {
+ send_pack(s, src, dst,
+ (struct sockaddr_ll *)&me, (struct sockaddr_ll *)&he);
if (count == 0 && unsolicited)
finish();
}
alarm(1);
}
-void print_hex(unsigned char *p, int len)
+static void print_hex(unsigned char *p, int len)
{
int i;
for (i=0; i<len; i++) {
@@ -201,7 +394,7 @@ void print_hex(unsigned char *p, int len)
}
}
-int recv_pack(unsigned char *buf, int len, struct sockaddr_ll *FROM)
+static int recv_pack(unsigned char *buf, int len, struct sockaddr_ll *FROM)
{
struct timeval tv;
struct arphdr *ah = (struct arphdr*)buf;
@@ -231,7 +424,7 @@ int recv_pack(unsigned char *buf, int len, struct sockaddr_ll *FROM)
return 0;
if (ah->ar_pln != 4)
return 0;
- if (ah->ar_hln != me.sll_halen)
+ if (ah->ar_hln != ((struct sockaddr_ll *)&me)->sll_halen)
return 0;
if (len < sizeof(*ah) + 2*(4 + ah->ar_hln))
return 0;
@@ -242,7 +435,7 @@ int recv_pack(unsigned char *buf, int len, struct sockaddr_ll *FROM)
return 0;
if (src.s_addr != dst_ip.s_addr)
return 0;
- if (memcmp(p+ah->ar_hln+4, &me.sll_addr, ah->ar_hln))
+ if (memcmp(p+ah->ar_hln+4, ((struct sockaddr_ll *)&me)->sll_addr, ah->ar_hln))
return 0;
} else {
/* DAD packet was:
@@ -260,7 +453,7 @@ int recv_pack(unsigned char *buf, int len, struct sockaddr_ll *FROM)
*/
if (src_ip.s_addr != dst.s_addr)
return 0;
- if (memcmp(p, &me.sll_addr, me.sll_halen) == 0)
+ if (memcmp(p, ((struct sockaddr_ll *)&me)->sll_addr, ((struct sockaddr_ll *)&me)->sll_halen) == 0)
return 0;
if (src.s_addr && src.s_addr != dst_ip.s_addr)
return 0;
@@ -276,7 +469,7 @@ int recv_pack(unsigned char *buf, int len, struct sockaddr_ll *FROM)
printf("for %s ", inet_ntoa(dst_ip));
s_printed = 1;
}
- if (memcmp(p+ah->ar_hln+4, me.sll_addr, ah->ar_hln)) {
+ if (memcmp(p+ah->ar_hln+4, ((struct sockaddr_ll *)&me)->sll_addr, ah->ar_hln)) {
if (!s_printed)
printf("for ");
printf("[");
@@ -299,16 +492,78 @@ int recv_pack(unsigned char *buf, int len, struct sockaddr_ll *FROM)
brd_recv++;
if (ah->ar_op == htons(ARPOP_REQUEST))
req_recv++;
- if (quit_on_reply)
+ if (quit_on_reply || (count == 0 && received == sent))
finish();
if(!broadcast_only) {
- memcpy(he.sll_addr, p, me.sll_halen);
+ memcpy(((struct sockaddr_ll *)&he)->sll_addr, p, ((struct sockaddr_ll *)&me)->sll_halen);
unicasting=1;
}
return 1;
}
-#include <signal.h>
+#ifdef USE_SYSFS
+union sysfs_devattr_value {
+ unsigned long ulong;
+ void *ptr;
+};
+
+enum {
+ SYSFS_DEVATTR_IFINDEX,
+ SYSFS_DEVATTR_FLAGS,
+ SYSFS_DEVATTR_ADDR_LEN,
+#if 0
+ SYSFS_DEVATTR_TYPE,
+ SYSFS_DEVATTR_ADDRESS,
+#endif
+ SYSFS_DEVATTR_BROADCAST,
+ SYSFS_DEVATTR_NUM
+};
+
+struct sysfs_devattr_values
+{
+ char *ifname;
+ union sysfs_devattr_value value[SYSFS_DEVATTR_NUM];
+};
+
+static int sysfs_devattr_ulong_dec(char *ptr, struct sysfs_devattr_values *v, unsigned idx);
+static int sysfs_devattr_ulong_hex(char *ptr, struct sysfs_devattr_values *v, unsigned idx);
+static int sysfs_devattr_macaddr(char *ptr, struct sysfs_devattr_values *v, unsigned idx);
+
+struct sysfs_devattrs {
+ const char *name;
+ int (*handler)(char *ptr, struct sysfs_devattr_values *v, unsigned int idx);
+ int free;
+} sysfs_devattrs[SYSFS_DEVATTR_NUM] = {
+ [SYSFS_DEVATTR_IFINDEX] = {
+ .name = "ifindex",
+ .handler = sysfs_devattr_ulong_dec,
+ },
+ [SYSFS_DEVATTR_ADDR_LEN] = {
+ .name = "addr_len",
+ .handler = sysfs_devattr_ulong_dec,
+ },
+ [SYSFS_DEVATTR_FLAGS] = {
+ .name = "flags",
+ .handler = sysfs_devattr_ulong_hex,
+ },
+#if 0
+ [SYSFS_DEVATTR_TYPE] = {
+ .name = "type",
+ .handler = sysfs_devattr_ulong_dec,
+ },
+ [SYSFS_DEVATTR_ADDRESS] = {
+ .name = "address",
+ .handler = sysfs_devattr_macaddr,
+ .free = 1,
+ },
+#endif
+ [SYSFS_DEVATTR_BROADCAST] = {
+ .name = "broadcast",
+ .handler = sysfs_devattr_macaddr,
+ .free = 1,
+ },
+};
+#endif
static void byebye(int nsig)
{
@@ -317,26 +572,477 @@ static void byebye(int nsig)
exit(nsig);
}
+/*
+ * find_device()
+ *
+ * This function checks 1) if the device (if given) is okay for ARP,
+ * or 2) find fist appropriate device on the system.
+ *
+ * Return value:
+ * >0 : Succeeded, and appropriate device not found.
+ * device.ifindex remains 0.
+ * 0 : Succeeded, and approptiate device found.
+ * device.ifindex is set.
+ * <0 : Failed. Support not found, or other
+ * : system error. Try other method.
+ *
+ * If an appropriate device found, it is recorded inside the
+ * "device" variable for later reference.
+ *
+ * We have several implementations for this.
+ * by_ifaddrs(): requires getifaddr() in glibc, and rtnetlink in
+ * kernel. default and recommended for recent systems.
+ * by_sysfs(): requires libsysfs , and sysfs in kernel.
+ * by_ioctl(): unable to list devices without ipv4 address; this
+ * means, you need to supply the device name for
+ * DAD purpose.
+ */
+/* Common check for ifa->ifa_flags */
+static int check_ifflags(unsigned int ifflags, int fatal)
+{
+ if (!(ifflags & IFF_UP)) {
+ if (fatal) {
+ if (!quiet)
+ printf("Interface \"%s\" is down\n", device.name);
+ exit(2);
+ }
+ return -1;
+ }
+ if (ifflags & (IFF_NOARP | IFF_LOOPBACK)) {
+ if (fatal) {
+ if (!quiet)
+ printf("Interface \"%s\" is not ARPable\n", device.name);
+ exit(dad ? 0 : 2);
+ }
+ return -1;
+ }
+ return 0;
+}
+
+static int find_device_by_ifaddrs(void)
+{
+#ifndef WITHOUT_IFADDRS
+ int rc;
+ struct ifaddrs *ifa0, *ifa;
+ int count = 0;
+
+ rc = getifaddrs(&ifa0);
+ if (rc) {
+ perror("getifaddrs");
+ return -1;
+ }
+
+ for (ifa = ifa0; ifa; ifa = ifa->ifa_next) {
+ if (!ifa->ifa_addr)
+ continue;
+ if (ifa->ifa_addr->sa_family != AF_PACKET)
+ continue;
+ if (device.name && ifa->ifa_name && strcmp(ifa->ifa_name, device.name))
+ continue;
+
+ if (check_ifflags(ifa->ifa_flags, device.name != NULL) < 0)
+ continue;
+
+ if (!((struct sockaddr_ll *)ifa->ifa_addr)->sll_halen)
+ continue;
+ if (!ifa->ifa_broadaddr)
+ continue;
+
+ device.ifa = ifa;
+
+ if (count++)
+ break;
+ }
+
+ if (count == 1 && device.ifa) {
+ device.ifindex = if_nametoindex(device.ifa->ifa_name);
+ if (!device.ifindex) {
+ perror("arping: if_nametoindex");
+ freeifaddrs(ifa0);
+ return -1;
+ }
+ device.name = device.ifa->ifa_name;
+ return 0;
+ }
+ return 1;
+#else
+ return -1;
+#endif
+}
+
+#ifdef USE_SYSFS
+static void sysfs_devattr_values_init(struct sysfs_devattr_values *v, int do_free)
+{
+ int i;
+ if (do_free) {
+ free(v->ifname);
+ for (i = 0; i < SYSFS_DEVATTR_NUM; i++) {
+ if (sysfs_devattrs[i].free)
+ free(v->value[i].ptr);
+ }
+ }
+ memset(v, 0, sizeof(*v));
+}
+
+static int sysfs_devattr_ulong(char *ptr, struct sysfs_devattr_values *v, unsigned int idx,
+ unsigned int base)
+{
+ unsigned long *p;
+ char *ep;
+
+ if (!ptr || !v)
+ return -1;
+
+ p = &v->value[idx].ulong;
+ errno = 0;
+ *p = strtoul(ptr, &ep, base);
+ if ((*ptr && isspace(*ptr & 0xff)) || errno || (*ep != '\0' && *ep != '\n'))
+ goto out;
+
+ return 0;
+out:
+ return -1;
+}
+
+static int sysfs_devattr_ulong_dec(char *ptr, struct sysfs_devattr_values *v, unsigned int idx)
+{
+ int rc = sysfs_devattr_ulong(ptr, v, idx, 10);
+ return rc;
+}
+
+static int sysfs_devattr_ulong_hex(char *ptr, struct sysfs_devattr_values *v, unsigned int idx)
+{
+ int rc = sysfs_devattr_ulong(ptr, v, idx, 16);
+ return rc;
+}
+
+static int sysfs_devattr_macaddr(char *ptr, struct sysfs_devattr_values *v, unsigned int idx)
+{
+ unsigned char *m;
+ int i;
+ unsigned int addrlen;
+
+ if (!ptr || !v)
+ return -1;
+
+ addrlen = v->value[SYSFS_DEVATTR_ADDR_LEN].ulong;
+ m = malloc(addrlen);
+
+ for (i = 0; i < addrlen; i++) {
+ if (i && *(ptr + i * 3 - 1) != ':')
+ goto out;
+ if (sscanf(ptr + i * 3, "%02hhx", &m[i]) != 1)
+ goto out;
+ }
+
+ v->value[idx].ptr = m;
+ return 0;
+out:
+ free(m);
+ return -1;
+}
+#endif
+
+static int find_device_by_sysfs(void)
+{
+ int rc = -1;
+#ifdef USE_SYSFS
+ struct sysfs_class *cls_net;
+ struct dlist *dev_list;
+ struct sysfs_class_device *dev;
+ struct sysfs_attribute *dev_attr;
+ struct sysfs_devattr_values sysfs_devattr_values;
+ int count = 0;
+
+ if (!device.sysfs) {
+ device.sysfs = malloc(sizeof(*device.sysfs));
+ sysfs_devattr_values_init(device.sysfs, 0);
+ }
+
+ cls_net = sysfs_open_class("net");
+ if (!cls_net) {
+ perror("sysfs_open_class");
+ return -1;
+ }
+
+ dev_list = sysfs_get_class_devices(cls_net);
+ if (!dev_list) {
+ perror("sysfs_get_class_devices");
+ goto out;
+ }
+
+ sysfs_devattr_values_init(&sysfs_devattr_values, 0);
+
+ dlist_for_each_data(dev_list, dev, struct sysfs_class_device) {
+ int i;
+ int rc = -1;
+
+ if (device.name && strcmp(dev->name, device.name))
+ goto do_next;
+
+ sysfs_devattr_values_init(&sysfs_devattr_values, 1);
+
+ for (i = 0; i < SYSFS_DEVATTR_NUM; i++) {
+
+ dev_attr = sysfs_get_classdev_attr(dev, sysfs_devattrs[i].name);
+ if (!dev_attr) {
+ perror("sysfs_get_classdev_attr");
+ rc = -1;
+ break;
+ }
+ if (sysfs_read_attribute(dev_attr)) {
+ perror("sysfs_read_attribute");
+ rc = -1;
+ break;
+ }
+ rc = sysfs_devattrs[i].handler(dev_attr->value, &sysfs_devattr_values, i);
+
+ if (rc < 0)
+ break;
+ }
+
+ if (rc < 0)
+ goto do_next;
+
+ if (check_ifflags(sysfs_devattr_values.value[SYSFS_DEVATTR_FLAGS].ulong,
+ device.name != NULL) < 0)
+ goto do_next;
+
+ if (!sysfs_devattr_values.value[SYSFS_DEVATTR_ADDR_LEN].ulong)
+ goto do_next;
+
+ if (device.sysfs->value[SYSFS_DEVATTR_IFINDEX].ulong) {
+ if (device.sysfs->value[SYSFS_DEVATTR_FLAGS].ulong & IFF_RUNNING)
+ goto do_next;
+ }
+
+ sysfs_devattr_values.ifname = strdup(dev->name);
+ if (!sysfs_devattr_values.ifname) {
+ perror("malloc");
+ goto out;
+ }
+
+ sysfs_devattr_values_init(device.sysfs, 1);
+ memcpy(device.sysfs, &sysfs_devattr_values, sizeof(*device.sysfs));
+ sysfs_devattr_values_init(&sysfs_devattr_values, 0);
+
+ if (count++)
+ break;
+
+ continue;
+do_next:
+ sysfs_devattr_values_init(&sysfs_devattr_values, 1);
+ }
+
+ if (count == 1) {
+ device.ifindex = device.sysfs->value[SYSFS_DEVATTR_IFINDEX].ulong;
+ device.name = device.sysfs->ifname;
+ }
+ rc = !device.ifindex;
+out:
+ sysfs_close_class(cls_net);
+#endif
+ return rc;
+}
+
+static int check_device_by_ioctl(int s, struct ifreq *ifr)
+{
+ if (ioctl(s, SIOCGIFFLAGS, ifr) < 0) {
+ perror("ioctl(SIOCGIFINDEX");
+ return -1;
+ }
+
+ if (check_ifflags(ifr->ifr_flags, device.name != NULL) < 0)
+ return 1;
+
+ if (ioctl(s, SIOCGIFINDEX, ifr) < 0) {
+ perror("ioctl(SIOCGIFINDEX");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int find_device_by_ioctl(void)
+{
+ int s;
+ struct ifreq *ifr0, *ifr, *ifr_end;
+ size_t ifrsize = sizeof(*ifr);
+ struct ifconf ifc;
+ static struct ifreq ifrbuf;
+ int count = 0;
+
+ s = socket(AF_INET, SOCK_DGRAM, 0);
+ if (s < 0) {
+ perror("socket");
+ return -1;
+ }
+
+ memset(&ifrbuf, 0, sizeof(ifrbuf));
+
+ if (device.name) {
+ strncpy(ifrbuf.ifr_name, device.name, sizeof(ifrbuf.ifr_name) - 1);
+ if (check_device_by_ioctl(s, &ifrbuf))
+ goto out;
+ count++;
+ } else {
+ do {
+ int rc;
+ ifr0 = malloc(ifrsize);
+ if (!ifr0) {
+ perror("malloc");
+ goto out;
+ }
+
+ ifc.ifc_buf = (char *)ifr0;
+ ifc.ifc_len = ifrsize;
+
+ rc = ioctl(s, SIOCGIFCONF, &ifc);
+ if (rc < 0) {
+ perror("ioctl(SIOCFIFCONF");
+ goto out;
+ }
+
+ if (ifc.ifc_len + sizeof(*ifr0) + sizeof(struct sockaddr_storage) - sizeof(struct sockaddr) <= ifrsize)
+ break;
+ ifrsize *= 2;
+ free(ifr0);
+ ifr0 = NULL;
+ } while(ifrsize < INT_MAX / 2);
+
+ if (!ifr0) {
+ fprintf(stderr, "arping: too many interfaces!?\n");
+ goto out;
+ }
+
+ ifr_end = (struct ifreq *)(((char *)ifr0) + ifc.ifc_len - sizeof(*ifr0));
+ for (ifr = ifr0; ifr <= ifr_end; ifr++) {
+ if (check_device_by_ioctl(s, &ifrbuf))
+ continue;
+ memcpy(&ifrbuf.ifr_name, ifr->ifr_name, sizeof(ifrbuf.ifr_name));
+ if (count++)
+ break;
+ }
+ }
+
+ close(s);
+
+ if (count == 1) {
+ device.ifindex = ifrbuf.ifr_ifindex;
+ device.name = ifrbuf.ifr_name;
+ }
+ return !device.ifindex;
+out:
+ close(s);
+ return -1;
+}
+
+static int find_device(void)
+{
+ int rc;
+ rc = find_device_by_ifaddrs();
+ if (rc >= 0)
+ goto out;
+ rc = find_device_by_sysfs();
+ if (rc >= 0)
+ goto out;
+ rc = find_device_by_ioctl();
+out:
+ return rc;
+}
+
+/*
+ * set_device_broadcast()
+ *
+ * This fills the device "broadcast address"
+ * based on information found by find_device() funcion.
+ */
+static int set_device_broadcast_ifaddrs_one(struct device *device, unsigned char *ba, size_t balen, int fatal)
+{
+#ifndef WITHOUT_IFADDRS
+ struct ifaddrs *ifa;
+ struct sockaddr_ll *sll;
+
+ if (!device)
+ return -1;
+
+ ifa = device->ifa;
+ if (!ifa)
+ return -1;
+
+ sll = (struct sockaddr_ll *)ifa->ifa_broadaddr;
+
+ if (sll->sll_halen != balen) {
+ if (fatal) {
+ if (!quiet)
+ printf("Address length does not match...\n");
+ exit(2);
+ }
+ return -1;
+ }
+ memcpy(ba, sll->sll_addr, sll->sll_halen);
+ return 0;
+#else
+ return -1;
+#endif
+}
+static int set_device_broadcast_sysfs(struct device *device, unsigned char *ba, size_t balen)
+{
+#ifdef USE_SYSFS
+ struct sysfs_devattr_values *v;
+ if (!device)
+ return -1;
+ v = device->sysfs;
+ if (!v)
+ return -1;
+ if (v->value[SYSFS_DEVATTR_ADDR_LEN].ulong != balen)
+ return -1;
+ memcpy(ba, v->value[SYSFS_DEVATTR_BROADCAST].ptr, balen);
+ return 0;
+#else
+ return -1;
+#endif
+}
+
+static int set_device_broadcast_fallback(struct device *device, unsigned char *ba, size_t balen)
+{
+ if (!quiet)
+ fprintf(stderr, "WARNING: using default broadcast address.\n");
+ memset(ba, -1, balen);
+ return 0;
+}
+
+static void set_device_broadcast(struct device *dev, unsigned char *ba, size_t balen)
+{
+ if (!set_device_broadcast_ifaddrs_one(dev, ba, balen, 0))
+ return;
+ if (!set_device_broadcast_sysfs(dev, ba, balen))
+ return;
+ set_device_broadcast_fallback(dev, ba, balen);
+}
+
int
main(int argc, char **argv)
{
int socket_errno;
int ch;
- uid_t uid = getuid();
int hb_mode = 0;
signal(SIGTERM, byebye);
signal(SIGPIPE, byebye);
-
- device = strdup("eth0");
-
+
+ limit_capabilities();
+
+#ifdef USE_IDN
+ setlocale(LC_ALL, "");
+#endif
+
+ enable_capability_raw();
+
s = socket(PF_PACKET, SOCK_DGRAM, 0);
socket_errno = errno;
- if (setuid(uid)) {
- perror("arping: setuid");
- exit(-1);
- }
+ disable_capability_raw();
while ((ch = getopt(argc, argv, "h?bfDUAqc:w:s:I:Vr:i:p:")) != EOF) {
switch(ch) {
@@ -367,7 +1073,7 @@ main(int argc, char **argv)
timeout = atoi(optarg);
break;
case 'I':
- device = optarg;
+ device.name = optarg;
break;
case 'f':
quit_on_reply=1;
@@ -376,7 +1082,7 @@ main(int argc, char **argv)
source = optarg;
break;
case 'V':
- printf("send_arp utility\n");
+ printf("send_arp utility, based on arping from iputils-%s\n", SNAPSHOT);
exit(0);
case 'p':
case 'i':
@@ -405,7 +1111,7 @@ main(int argc, char **argv)
*/
unsolicited = 1;
- device = argv[optind];
+ device.name = argv[optind];
target = argv[optind+1];
} else {
@@ -417,10 +1123,8 @@ main(int argc, char **argv)
target = *argv;
}
- if (device == NULL) {
- fprintf(stderr, "arping: device (option -I) is required\n");
- usage();
- }
+ if (device.name && !*device.name)
+ device.name = NULL;
if (s < 0) {
errno = socket_errno;
@@ -428,39 +1132,42 @@ main(int argc, char **argv)
exit(2);
}
- if (1) {
- struct ifreq ifr;
- memset(&ifr, 0, sizeof(ifr));
- strncpy(ifr.ifr_name, device, IFNAMSIZ-1);
- if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
- fprintf(stderr, "arping: unknown iface %s\n", device);
- exit(2);
- }
- ifindex = ifr.ifr_ifindex;
+ if (find_device() < 0)
+ exit(2);
- if (ioctl(s, SIOCGIFFLAGS, (char*)&ifr)) {
- perror("ioctl(SIOCGIFFLAGS)");
+ if (!device.ifindex) {
+ if (device.name) {
+ fprintf(stderr, "arping: Device %s not available.\n", device.name);
exit(2);
}
- if (!(ifr.ifr_flags&IFF_UP)) {
- if (!quiet)
- printf("Interface \"%s\" is down\n", device);
- exit(2);
- }
- if (ifr.ifr_flags&(IFF_NOARP|IFF_LOOPBACK)) {
- if (!quiet)
- printf("Interface \"%s\" is not ARPable\n", device);
- exit(dad?0:2);
- }
+ fprintf(stderr, "arping: device (option -I) is required.\n");
+ usage();
}
if (inet_aton(target, &dst) != 1) {
struct hostent *hp;
- hp = gethostbyname2(target, AF_INET);
+ char *idn = target;
+#ifdef USE_IDN
+ int rc;
+
+ rc = idna_to_ascii_lz(target, &idn, 0);
+
+ if (rc != IDNA_SUCCESS) {
+ fprintf(stderr, "arping: IDN encoding failed: %s\n", idna_strerror(rc));
+ exit(2);
+ }
+#endif
+
+ hp = gethostbyname2(idn, AF_INET);
if (!hp) {
fprintf(stderr, "arping: unknown host %s\n", target);
exit(2);
}
+
+#ifdef USE_IDN
+ free(idn);
+#endif
+
memcpy(&dst, hp->h_addr, 4);
}
@@ -480,9 +1187,13 @@ main(int argc, char **argv)
perror("socket");
exit(2);
}
- if (device) {
- if (setsockopt(probe_fd, SOL_SOCKET, SO_BINDTODEVICE, device, strlen(device)+1) == -1)
+ if (device.name) {
+ enable_capability_raw();
+
+ if (setsockopt(probe_fd, SOL_SOCKET, SO_BINDTODEVICE, device.name, strlen(device.name)+1) == -1)
perror("WARNING: interface is ignored");
+
+ disable_capability_raw();
}
memset(&saddr, 0, sizeof(saddr));
saddr.sin_family = AF_INET;
@@ -514,9 +1225,9 @@ main(int argc, char **argv)
close(probe_fd);
};
- me.sll_family = AF_PACKET;
- me.sll_ifindex = ifindex;
- me.sll_protocol = htons(ETH_P_ARP);
+ ((struct sockaddr_ll *)&me)->sll_family = AF_PACKET;
+ ((struct sockaddr_ll *)&me)->sll_ifindex = device.ifindex;
+ ((struct sockaddr_ll *)&me)->sll_protocol = htons(ETH_P_ARP);
if (bind(s, (struct sockaddr*)&me, sizeof(me)) == -1) {
perror("bind");
exit(2);
@@ -529,18 +1240,20 @@ main(int argc, char **argv)
exit(2);
}
}
- if (me.sll_halen == 0) {
+ if (((struct sockaddr_ll *)&me)->sll_halen == 0) {
if (!quiet)
- printf("Interface \"%s\" is not ARPable (no ll address)\n", device);
+ printf("Interface \"%s\" is not ARPable (no ll address)\n", device.name);
exit(dad?0:2);
}
he = me;
- memset(he.sll_addr, -1, he.sll_halen);
+
+ set_device_broadcast(&device, ((struct sockaddr_ll *)&he)->sll_addr,
+ ((struct sockaddr_ll *)&he)->sll_halen);
if (!quiet) {
printf("ARPING %s ", inet_ntoa(dst));
- printf("from %s %s\n", inet_ntoa(src), device ? : "");
+ printf("from %s %s\n", inet_ntoa(src), device.name ? : "");
}
if (!src.s_addr && !dad) {
@@ -548,6 +1261,8 @@ main(int argc, char **argv)
exit(2);
}
+ drop_capabilities();
+
set_signal(SIGINT, finish);
set_signal(SIGALRM, catcher);
@@ -556,7 +1271,7 @@ main(int argc, char **argv)
while(1) {
sigset_t sset, osset;
unsigned char packet[4096];
- struct sockaddr_ll from;
+ struct sockaddr_storage from;
socklen_t alen = sizeof(from);
int cc;
@@ -565,11 +1280,12 @@ main(int argc, char **argv)
perror("arping: recvfrom");
continue;
}
+
sigemptyset(&sset);
sigaddset(&sset, SIGALRM);
sigaddset(&sset, SIGINT);
sigprocmask(SIG_BLOCK, &sset, &osset);
- recv_pack(packet, cc, &from);
+ recv_pack(packet, cc, (struct sockaddr_ll *)&from);
sigprocmask(SIG_SETMASK, &osset, NULL);
}
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-ha/resource-agents.git
More information about the Debian-HA-Commits
mailing list