Heartbeat-3-0-7e3a82377fa8/.hg_archival.txt0000644000000000000000000000013611576626513020265 0ustar00usergroup00000000000000repo: 8448b17e67437947c48c639c6faa4371c4a14b3b node: 7e3a82377fa8c88b4d9ee47e29020d4531f4629a Heartbeat-3-0-7e3a82377fa8/.hgignore0000644000000000000000000001247211576626513017010 0ustar00usergroup00000000000000syntax: glob # Autofoo entries *.o *.la *.lo *.pyc .libs .deps *.cache .cvsignore compile configure configure.status configure.lineno depcomp aclocal.m4 libtool ltmain.sh ltconfig libltdl mkinstalldirs install-sh missing py-compile autom4te* libtool.m4 ltdl.m4 libltdl.tar autoconf autoheader automake include/ha_version.h include/hb_config.h include/stamp-h1 ylwrap # BEAM Entries *.beam parser-messages MISC_ERRORS cscope.files cscope.out patches updates logs # OS and Editor Artifacts .DS_Store *.diff *.patch *~ # Entries generated by configure cim/mof/register_providers.sh cim/mof/unregister_providers.sh crm/admin/crm_primitive.py crm/admin/cluster cts/LSBDummy cts/*.py doc/cibadmin.8 doc/startstop fencing/test/STONITHDBasicSanityCheck heartbeat.spec heartbeat/init.d/heartbeat heartbeat/lib/BasicSanityCheck heartbeat/lib/ResourceManager heartbeat/lib/TestHeartbeatComm heartbeat/lib/ha_config heartbeat/lib/ha_propagate heartbeat/lib/hb_addnode heartbeat/lib/hb_delnode heartbeat/lib/hb_setsite heartbeat/lib/hb_setweight heartbeat/lib/hb_standby heartbeat/lib/hb_takeover heartbeat/lib/mach_down heartbeat/lib/req_resource heartbeat/rc.d/ask_resources heartbeat/rc.d/hb_takeover heartbeat/shellfuncs include/ha_config.h include/pils/plugin.h include/stamp-h2 ldirectord/init.d/ldirectord ldirectord/ldirectord lib/plugins/stonith/external/ibmrsa lib/plugins/stonith/external/riloe lib/plugins/stonith/external/ssh lib/plugins/stonith/ribcl.py linux-ha/config.h linux-ha/config.h.in linux-ha/stamp-h1 lrm/test/LRMBasicSanityCheck lrm/test/simple_ops mgmt/client/haclient.py mgmt/client/mgmtcmd.py pkg/InfoFiles/pkginfo pkg/InfoFiles/postinstall pkg/InfoFiles/preinstall port/heartbeat/pkg-deinstall port/heartbeat/pkg-descr port/heartbeat/pkg-install port/heartbeat/pkg-plist port/portMakefile resources/OCF/AudibleAlarm resources/OCF/ClusterMon resources/OCF/Delay resources/OCF/Dummy resources/OCF/eDir88 resources/OCF/Evmsd resources/OCF/Filesystem resources/OCF/ICP resources/OCF/IPaddr resources/OCF/IPaddr2 resources/OCF/IPsrcaddr resources/OCF/LVM resources/OCF/LinuxSCSI resources/OCF/MailTo resources/OCF/Pure-FTPd resources/OCF/Raid1 resources/OCF/ServeRAID resources/OCF/Stateful resources/OCF/SysInfo resources/OCF/VIPArip resources/OCF/WAS resources/OCF/WinPopup resources/OCF/Xen resources/OCF/Xinetd resources/OCF/apache resources/OCF/db2 resources/OCF/drbd resources/OCF/ocf-shellfuncs resources/OCF/oracle resources/OCF/oralsnr resources/OCF/pgsql resources/OCF/pingd resources/OCF/portblock resources/OCF/EvmsSCC resources/OCF/ManageRAID resources/OCF/ManageVE resources/OCF/SAPDatabase resources/OCF/SAPInstance resources/OCF/SendArp resources/OCF/WAS6 resources/OCF/mysql resources/OCF/rsyncd resources/heartbeat/AudibleAlarm resources/heartbeat/Delay resources/heartbeat/Filesystem resources/heartbeat/ICP resources/heartbeat/IPaddr resources/heartbeat/IPaddr2 resources/heartbeat/IPsrcaddr resources/heartbeat/IPv6addr resources/heartbeat/LVM resources/heartbeat/LVSSyncDaemonSwap resources/heartbeat/LinuxSCSI resources/heartbeat/MailTo resources/heartbeat/OCF resources/heartbeat/Raid1 resources/heartbeat/SendArp resources/heartbeat/ServeRAID resources/heartbeat/WAS resources/heartbeat/WinPopup resources/heartbeat/Xinetd resources/heartbeat/apache resources/heartbeat/db2 resources/heartbeat/hto-mapfuncs resources/heartbeat/portblock snmp_subagent/SNMPAgentSanityCheck tools/ccdv tools/haresources2cib.py tsa_plugin/linuxha-adapter tsa_plugin/testrun.sh # Project build targets contrib/ipfail/ipfail contrib/mlock/mlock crm/admin/ccm_tool crm/admin/cibadmin crm/admin/crm_attribute crm/admin/crm_diff crm/admin/crm_failcount crm/admin/crm_master crm/admin/crm_mon crm/admin/crm_resource crm/admin/crm_standby crm/admin/crm_uuid crm/admin/crm_verify crm/admin/crmadmin crm/admin/crm_commands.py crm/admin/crm_sh crm/admin/crm_utils.py crm/admin/iso8601 crm/cib/cib crm/cib/cibmon crm/crm.dtd crm/crmd/atest crm/crmd/crmd crm/crmd/fsa_actions_by_state.png crm/crmd/fsa_inputs.png crm/crmd/fsa_inputs_by_action.png crm/pengine/pengine crm/pengine/ptest crm/tengine/tengine crm/tengine/ttest doc/ChangeLog doc/GettingStarted.txt doc/HardwareGuide.txt doc/Requirements.txt doc/crm_resource.8 doc/faqntips.txt doc/heartbeat_api.txt doc/rsync.txt fencing/stonithd/stonithd fencing/test/apitest heartbeat/findif heartbeat/heartbeat heartbeat/libnet_util/send_arp ldirectord/ldirectord.8 lib/clplumbing/base64_md5_test lib/clplumbing/ipctest lib/clplumbing/ipctransientclient lib/clplumbing/ipctransientserver lib/hbclient/api_test lib/stonith/meatclient lib/stonith/stonith logd/ha_logd logd/ha_logger logd/logtest lrm/admin/lrmadmin lrm/lrmd/lrmd lrm/test/apitest lrm/test/callbacktest lrm/test/plugintest membership/ccm/ccm membership/ccm/ccm_testclient membership/ccm/clmtest membership/quorumd/quorumd membership/quorumd/quorumdtest telecom/apphbd/apphbd telecom/apphbd/apphbtest telecom/recoverymgrd/conf_lex.c telecom/recoverymgrd/conf_yacc.c telecom/recoverymgrd/conf_yacc.h telecom/recoverymgrd/recoverymgrd tools/attrd tools/attrd_updater tools/cl_respawn tools/cl_status tools/pingd tools/dopd tools/drbd-peer-outdater lib/mgmt/pymgmt.py lib/mgmt/pymgmt_wrap.c tsa_plugin/ha_mgmt_client_wrap.c # Misc GPATH GRTAGS GSYMS GTAGS HTML TAGS .gres.* *.orig .gdb_history # Entries better done as regexp's to avoid matching too broadly syntax: regexp ^config\.* README$ Makefile$ Makefile.in$ Heartbeat-3-0-7e3a82377fa8/.hgsigs0000644000000000000000000000043011576626513016461 0ustar00usergroup000000000000001ca510a87ba6e94ff21674d61fceb6e60245b6cb 0 iEYEABECAAYFAktnIVUACgkQp+S8HSIpHPDt8QCgmW939gG/IIppG5pdICspWBtVBJEAoKBKFN8qEEq50Ekt+htuGgj/mLbn 4c65e827f4810f25924396c4578bf8b2debd63d8 0 iEYEABECAAYFAk0CA0gACgkQp+S8HSIpHPCliQCgpJ4DFqPz2GcpMdjinJXNC4PcJHgAmwZKXvot2b55fNpykWyQKUsq7Rg3 Heartbeat-3-0-7e3a82377fa8/.hgtags0000644000000000000000000000600511576626513016456 0ustar00usergroup000000000000002072dd703c807cec32986d5b5c442f57b2f6487e STABLE-2.0.6 39ae63e8a5c5c39b8045a3e64b5b2d81cef31a90 STABLE-0.4.9c 4add4d1bd07559bef39c6b2b98bec9c254602be9 STABLE-0.4.9e 5cb1c5659e96c6f2a1733576bb59a24d210a15e9 STABLE-2.0.5 6ced60584c12d23dc8aaf89200aed906b5c1b96c Beta-0.4.9f 7e6bf9c061eacd192994b80df48e29ab23f42a1c DR-0.4.8k 91de0c9c401cad1495246172fcffa8d052395790 STABLE-2.0.7 a7354fcf024e506c0ae46f605ce9d2b7f50b76d1 STABLE-2.0.1 b900e278d896b16f22fc0c276a258069a6bcaf97 Beta-0.4.9a bcf6470a05c4694db50efe8f3f6c997a466045b6 Series-Root-1.0 d3abc17dd262da83ca64810884aa08fc717c338e STABLE-2.0.4 d41dff6877cf7b483b660889da12eaf43a1ac476 STABLE-2.0.0 d92c52afa6842d4aec5222918fc444a9448e1c7f STABLE-1.1.5 47df73aa08d5d100fd14320220905fc179621d70 STABLE-2.0.2 e409605d5b7014f14e27190f5f62688f600b05a1 Series-Root-0.4.9 f019c8dd3baa41cbda80a847537f9e134270d284 STABLE-2.0.3 f4843049a10d69fb2a424b508201f0e2731d26bf STABLE-1.1.3 f77f6462ecf824fe3530f75b51b8355e5a4237c3 Series-Root-1.2 2d298bca0d0af320752bfa293ac96ed08e2c6463 STABLE-2.0.8 91de0c9c401cad1495246172fcffa8d052395790 SLES10-GA-2.0.7-1.5 558427e03930e8dd1af7a41c47ec41e8d15c1326 SLE10-SP1 70067cb78a6e607fc18cb1aa88d896f5c9c39a39 STABLE-2.1.0 b0639ee14ba359a8d06fba16071504f2b66310e2 STABLE-2.1.1 9a995a8a70520703bd3946a2f3bcec2b20701a0e STABLE-2.1.2 1ad8497bc55db2a7861c5472d20226079f30fc4e obs-2.1.2-1 098c0993b1d0a075ae5dd551f84628a33eb69e43 obs-2.1.2-2 702e4f418ca82cf5248976a8da95a39b125166b8 obs-2.1.2-4 c492f19cb5831b1d7ae69c4b369693fde4dfcb1c obs-2.1.2-15 ef1be5978d973aef6424626ce81dc4474bdeb314 obs-2.1.2-24 4a3eac571f442c7cfcefc18fcaad35314460c1f6 STABLE-2.1.3 020df9293b7c7c52d979131225f32e25a4cdd1ed beta-2.99.0 4bbe943cf36c9a200d3aec9b019e3f11b5842dc9 beta-2.99.1 6671a027ff2f42e21f6e0c37483dd98b2885bd43 beta-2.99.2 3ab000a8e140c9602ddcf5c3b188b54995c09a9b beta-2.99.2 7d5b329e13ca485b43826bfb80679ce6b9dc5324 beta-2.99.2 9dbbc2cbd414036ea5e4e3a901fc3f48f5d5f1ad sle11-beta6 69386699962181fa8f8d6a1f4ace39d4a46df2e7 sle11-rc2 cad9fde7630c548b626b14964c1c689ee8239cf4 sle11-rc3 90ff997faa7288248ac57583b0c03df4c8e41bda sle11-rc5 4403e840c159912b460806febdf160beca7c986e sle11-rc5 0c40371abfa1e286d8c65845b0769053c05922b8 sle11-rc7 36b85470e8db55e4e861787413b54035984f0f06 sle11-rc9 36b85470e8db55e4e861787413b54035984f0f06 sle11-rc9 805ece641bcfa71df6d75059511b9d8e717f78d8 sle11-rc9 8b0fe39de012f1cfa2176e9c41feee8a7274daf0 STABLE-3.0.2-rc1 85a5167809d5bcb4a77a5ffbcd9bad016f4fb544 STABLE-3.0.2-rc2 a62025bb7439065461a2577c81a64de886e0d7d0 STABLE-3.0.2 a62025bb7439065461a2577c81a64de886e0d7d0 STABLE-3.0.2 0000000000000000000000000000000000000000 STABLE-3.0.2 0000000000000000000000000000000000000000 STABLE-3.0.2 1ca510a87ba6e94ff21674d61fceb6e60245b6cb STABLE-3.0.2 1b11383a6618bf2cd538be55ba8ff7aee1b1840b STABLE-3.0.3-rc1 645cec2ec68eb0cd41aa12ce282a23df45885561 STABLE-3.0.3 c142655b6ffb1267e25ab3495db81a8d79a60233 STABLE-3.0.4-rc1 fcd56a9dd18c286a8c6ad639997a56b5ea40d441 STABLE-3.0.4 9cc86d61f55df7d3684aeda1c7df7eb5dd1c20e3 STABLE-3.0.5-rc1 545554f6fc0adef98cb289c1779daa5b63ea08e1 STABLE-3.0.5 Heartbeat-3-0-7e3a82377fa8/ConfigureMe0000755000000000000000000002026111576626513017331 0ustar00usergroup00000000000000#!/bin/sh # # ConfigureMe: apply appropriate default local configuration options # # Copyright: 2001 Alan Robertson # License: GNU General Public License (GPL) # Usage() { cat <<-! Usage: $0 {configure|make|install|dist|distcheck|package|flags|bootstrap} [--xxx=yyy] $0 is a wrapper to invoke GNU 'configure' with options that match common conventions associated with this machine (i.e. $CFENV) You may also, if you wish, supply additional 'configure' options in their usual '--xxx=yyy' form. It will also build, make, install or create packages for the build environment. $0 does not know how to create packages for every environment, nor is the information on "common conventions" necessarily correct. Patches to this process are solicited -- especially in these areas. ! if [ -x ./configure ] then echo "Legal configure arguments are:" ./configure --help fi exit 1 } # # The vast majority of cases here have not been tested yet... # If you don't think the treatment of your favorite OS is right, # then submit a patch. Some of these conventions were wild guesses... # # autoconf "--enable-XXX" options generally have their default {yes|no} # set, and perhaps acted upon, in "configure.in". # # But we will also allow many of them to take a "try" setting, # interpreted there as: # "try to act as if 'yes' had been specified, but if this proves # troublesome, then continue as if 'no' had been specified". # By using these from here in "ConfigureMe", this allows the beginner # to make rapid, successful progress (albeit suboptimal) with an # implicit sense of achievement, rather than the demoralisation from # an unnecessarily failed 'yes' specification. # (David Lee, 2005) cmd=$0 pathtotop=`dirname ${cmd}` PACKAGECMD="" ConfigureLinux() { DFLAGS="" if [ -f /etc/UnitedLinux-release -a -s /etc/UnitedLinux-release ] then distro="United Linux" PACKAGECMD="$MAKE_CMD rpm" DFLAGS="--with-ucd-snmp-devel=ucdsnmp --with-group-id=90 --with-ccmuser-id=90" elif [ -f /etc/SuSE-release -a -s /etc/SuSE-release ] then distro="SuSE Linux" PACKAGECMD="$MAKE_CMD rpm" # -fno-unit-at-a-time is replaced by -fno-toplevel-reorder in gcc4.2 # But apparently it shouldn't be required # http://www.gnu.org/software/gcc/gcc-4.2/changes.html #export CFLAGS="$CFLAGS -fno-unit-at-a-time" DFLAGS="--with-group-id=90 --with-ccmuser-id=90" R=`cat /etc/SuSE-release | grep 'VERSION *= *' | sed -e 's%.*= *%%'` case $R in [78].*) DFLAGS="$DFLAGS --mandir=/usr/share/man --disable-snmp-subagent --disable-swig --with-ucd-snmp-devel=ucdsnmp";; esac elif [ -f /etc/redhat-release -a -s /etc/redhat-release ] then distro="RedHat Linux" PACKAGECMD="$MAKE_CMD rpm" DFLAGS="--mandir=/usr/share/man" elif [ -f /etc/conectiva-release -a -s /etc/conectiva-release ] then distro="Conectiva Linux" PACKAGECMD="$MAKE_CMD rpm" DFLAGS="--with-group-id=17 --mandir=/usr/share/man --infodir=/usr/share/info --with-ccmuser-id=17" elif [ -f /etc/debian_version -a -s /etc/debian_version ] then distro="Debian GNU/Linux" PACKAGECMD="$MAKE_CMD deb" DFLAGS="--mandir=/usr/share/man" elif [ -f /etc/gentoo-release -a -s /etc/gentoo-release ] then distro="Gentoo Linux" PACKAGECMD="$MAKE_CMD dist" DFLAGS="--with-group-name=cluster --with-ccmuser-name=cluster --with-group-id=65 --with-ccmuser-id=65" else distro="Generic Linux" fi CFENV="$distro" FLAGS="--prefix=/usr --sysconfdir=/etc --localstatedir=/var $DFLAGS --disable-rpath" } ConfigureAIX() { CFENV="AIX (freeware toolbox)" FLAGS="--disable-ldirectord --prefix /opt/freeware" } ConfigureFreeBSD() { FLAGS="--prefix=/usr/local --sysconfdir=/usr/local/etc --localstatedir=/var --enable-all --with-group-id=90 --with-ccmuser-id=90 --disable-rpath" CFENV="FreeBSD" } ConfigureOpenBSD() { FLAGS="--prefix=/usr/local --sysconfdir=/etc --localstatedir=/var --with-group-id=584 --with-ccmuser-id=584 --disable-rpath --with-group-name=_heartbeat --with-ccmuser-name=_heartbeat --with-ocf-root=/usr/local/lib/ocf/ --enable-fatal-warnings=no" export LDFLAGS="-liconv -L/usr/local/lib/libnet-1.0" export LIBNETCONFIG=/usr/local/bin/libnet-config-1.0 export AUTOCONF_VERSION=2.61 CFENV="OpenBSD" } ConfigureNetBSD() { FLAGS="--disable-ldirectord --prefix=/usr/local --sysconfdir=/usr/local/etc --localstatedir=/var --with-group-id=90 --with-ccmuser-id=90 --disable-rpath" CFENV="NetBSD" } ConfigureGenericBSD() { FLAGS="--disable-ldirectord --prefix=/usr/local --sysconfdir=/usr/local/etc --localstatedir=/var --with-group-id=90 --with-ccmuser-id=90 --disable-rpath" CFENV="Generic BSD" } ConfigureSolaris() { # PKGNAME: see comment in "configure.in" PKGNAME="LXHAhb" FLAGS="--disable-ldirectord --prefix=/opt/$PKGNAME --sysconfdir=/etc/opt/$PKGNAME --localstatedir=/var/opt/$PKGNAME --with-pkgname=$PKGNAME --disable-rpath" CFENV="Solaris" PACKAGECMD="$MAKE_CMD pkg" } ConfigureDarwin() { for dir in / /sw /opt/local; do if [ -d $dir ]; then install_prefix=$dir fi done FLAGS="--prefix=${install_prefix}" FLAGS="$FLAGS --with-initdir=/private/etc/mach_init.d" FLAGS="$FLAGS --localstatedir=${install_prefix}/var" FLAGS="$FLAGS --with-group-name=admin --with-ccmuser-name=daemon" FLAGS="$FLAGS --enable-fatal-warnings=yes" FLAGS="$FLAGS --disable-rpath" export CFENV="Darwin" } ConfigureGenericUNIX() { echo "Configuring for generic UNIX system" FLAGS="--disable-ldirectord --prefix=/usr --sysconfdir=/etc --localstatedir=/var" CFENV="Generic UNIX" } lcase() { # Convert to lower-case in a portable way if [ X"`echo A | dd conv=lcase 2>/dev/null`" = Xa ] then dd conv=lcase 2>/dev/null else tr ['A-Z'] ['a-z'] fi } GetConfigureFLAGS() { if [ "X$MAKE" != "X" ] then MAKE_CMD="$MAKE" elif which gmake > /dev/null then MAKE_CMD="gmake" else MAKE_CMD="make" fi case $CROSSCOMPILE in yes) GetCrossConfigureFlags;; *) GetNativeConfigureFlags;; esac } GetNativeConfigureFlags() { case `uname -s | lcase` in linux) ConfigureLinux;; aix) ConfigureAIX;; freebsd) ConfigureFreeBSD;; openbsd) ConfigureOpenBSD;; netbsd) ConfigureOpenBSD;; *bsd) ConfigureGenericBSD;; sunos) ConfigureSolaris;; darwin) ConfigureDarwin;; *) ConfigureGenericUNIX;; esac } GetCrossConfigureFlags() { case $CC in # Don't force endianness on ARM - it can be either type *arm*) FLAGS="--prefix=/usr/local/arm-linux/arm-linux --sysconfdir=/etc --localstatedir=/var";; *) echo "Error: Unsupported cross-compiler: [$CC]"; exit 1;; esac } Run() { echo "Running $@" "$@" } PackageItUp() { if [ "X$PACKAGECMD" = X ] then echo "Do not know how to build a package for $CFENV" >&2 return 1 else Run $PACKAGECMD fi } do_configure () { # Do autotools bootstrap if needed. # Should only be needed by developers and geeks because any # distributed stable versions (tar.gz etc.) should already have # "configure" etc. set up. if [ ! -x ${pathtotop}/configure ] then Run ${pathtotop}/bootstrap "$@" else Run ${pathtotop}/configure "$@" fi } cmd=`echo $1 | lcase` case $cmd in cross-*) CROSSCOMPILE=yes; cmd=`echo $cmd |cut -c7-`;; *) CROSSCOMPILE=no;; esac GetConfigureFLAGS echo "" echo "Configure flags for $CFENV: $FLAGS" >&2 if [ $# -le 0 ] then Usage fi shift case $cmd in flags) echo $FLAGS $@ ;; cf|conf|configure) do_configure $FLAGS $@ ;; boot|bootstrap) rm -f ${pathtotop}/configure do_configure $FLAGS $@ ;; make|build) do_configure $FLAGS $@ && \ Run $MAKE_CMD;; install) do_configure $FLAGS $@ && \ Run $MAKE_CMD install ;; dist) do_configure $FLAGS $@ && \ Run $MAKE_CMD dist ;; distcheck) do_configure $FLAGS $@ && \ source ./heartbeat/lib/ha_config && \ Run $MAKE_CMD DESTDIR="$PWD/heartbeat-$VERSION/=inst" distcheck ;; pkg|package|rpm) do_configure $FLAGS $@ && \ PackageItUp ;; deb|dpkg) do_configure $FLAGS $@ && if which fakeroot > /dev/null; then PackageItUp else echo "" echo "Please install fakeroot if you want to build a deb." fi ;; *) Usage ;; esac Heartbeat-3-0-7e3a82377fa8/GNUmakefile0000644000000000000000000000267111576626513017257 0ustar00usergroup00000000000000# # Copyright (C) 2008 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # -include Makefile PROJECT = heartbeat TARFILE = $(PROJECT).tar.bz2 RPM_ROOT = $(shell pwd) RPM_OPTS = --define "_sourcedir $(RPM_ROOT)" \ --define "_srcrpmdir $(RPM_ROOT)" \ --define "dist .$(DISTRO)" getdistro = $(shell test -e /etc/SuSE-release || echo fedora; test -e /etc/SuSE-release && echo suse) DISTRO ?= $(call getdistro) TAG ?= tip hgarchive: rm -f $(TARFILE) hg archive -t tbz2 -r $(TAG) $(TARFILE) echo `date`: Rebuilt $(TARFILE) srpm: hgarchive rm -f *.src.rpm rpmbuild -bs $(RPM_OPTS) $(PROJECT)-$(DISTRO).spec rpm: srpm @echo To create custom builds, edit the flags and options in $(PROJECT)-$(call getdistro).spec first rpmbuild $(RPM_OPTS) --rebuild $(RPM_ROOT)/*.src.rpm Heartbeat-3-0-7e3a82377fa8/Makefile.am0000644000000000000000000000724311576626513017241 0ustar00usergroup00000000000000# # linux-ha: Linux-HA code # # Copyright (C) 2002 Alan Robertson # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # ##auxdir = @ac_aux_dir@ ##AUX_DIST = $(auxdir)/install-sh $(auxdir)/missing \ ## $(auxdir)/mkinstalldirs ##AUX_DIST_EXTRA = $(auxdir)/readline.m4 $(auxdir)/sys_errlist.m4 \ ## $(auxdir)/sys_siglist.m4 EXTRA_DIST = bootstrap ConfigureMe README.in libltdl.tar RPMREL = @RPMREL@ RPM = @RPM@ RPMFLAGS = -ta @RPMTARGET@ TARFILE = @TARFILE@ AM_TAR = tar RPMDIR=$(HOME)/rpms WEBDIR=/home/alanr/ha-web/download HBWEBDIR=/home/alanr/ha-web/heartbeat RPMSRC=$(DESTDIR)$(RPMDIR)/SRPMS/$(PACKAGE)-$(VERSION)-$(RPMREL).src.rpm RPM386=$(DESTDIR)$(RPMDIR)/RPMS/i586/$(PACKAGE)-$(VERSION)-$(RPMREL).i586.rpm RPMstonith=$(DESTDIR)$(RPMDIR)/RPMS/i586/$(PACKAGE)-stonith-$(VERSION)-$(RPMREL).i586.rpm RPMpils=$(DESTDIR)$(RPMDIR)/RPMS/i586/$(PACKAGE)-pils-$(VERSION)-$(RPMREL).i586.rpm SHAREDIR=@HA_DATADIR@ HBSHAREDIR=@HA_NOARCHDATAHBDIR@ ALL_RPMS = $(RPMSRC) $(RPM386) $(RPMstonith) $(RPMpils) AUTOMAKE_OPTIONS = foreign ##ACLOCAL = aclocal -I $(auxdir) MAINTAINERCLEANFILES = Makefile.in aclocal.m4 configure DRF/config-h.in \ DRF/stamp-h.in libtool.m4 ltdl.m4 libltdl.tar coredir = @HA_COREDIR@ hauser = @HA_CCMUSER@ ## proc-ha is left out from SUBDIRS (completely obsolete) # 'buildtools' must be first in the list: tools to aid building of remainder. SUBDIRS = buildtools $(LIBLTDL_DIR) pkg port replace include lib \ heartbeat membership telecom resources tools doc cts \ config contrib HANDY_DOCS = doc/ChangeLog doc/GettingStarted.html doc/DirectoryMap.txt HBDOCS = doc/heartbeat_api.html # Pass these to configure when running "make distcheck" DISTCHECK_CONFIGURE_FLAGS = --with-initdir=prefix rpm: dist $(RPM) $(RPMFLAGS) $(TARFILE) February 2006 dnl dnl License: GNU General Public License (GPL) dnl AM_CHECK_PYTHON_HEADERS: Find location of python include files. dnl Taken from: dnl http://source.macgimp.org/ dnl which is GPL and is attributed to James Henstridge. dnl dnl AM_CHECK_PYTHON_HEADERS([ACTION-IF-POSSIBLE], [ACTION-IF-NOT-POSSIBLE]) dnl Imports: dnl $PYTHON dnl Exports: dnl PYTHON_INCLUDES AC_DEFUN([AM_CHECK_PYTHON_HEADERS], [AC_REQUIRE([AM_PATH_PYTHON]) AC_MSG_CHECKING(for headers required to compile python extensions) dnl deduce PYTHON_INCLUDES py_prefix=`$PYTHON -c "import sys; print sys.prefix"` py_exec_prefix=`$PYTHON -c "import sys; print sys.exec_prefix"` PYTHON_INCLUDES="-I${py_prefix}/include/python${PYTHON_VERSION}" if test "$py_prefix" != "$py_exec_prefix"; then PYTHON_INCLUDES="$PYTHON_INCLUDES -I${py_exec_prefix}/include/python${PYTHON_VERSION}" fi AC_SUBST(PYTHON_INCLUDES) dnl check if the headers exist: save_CPPFLAGS="$CPPFLAGS" CPPFLAGS="$CPPFLAGS $PYTHON_INCLUDES" AC_TRY_CPP([#include ],dnl [AC_MSG_RESULT(found) $1],dnl [AC_MSG_RESULT(not found) $2]) CPPFLAGS="$save_CPPFLAGS" ]) Heartbeat-3-0-7e3a82377fa8/autogen.sh0000755000000000000000000000011111576626513017171 0ustar00usergroup00000000000000#!/bin/sh echo Please run bootstrap instead of autogen.sh echo exit 1 Heartbeat-3-0-7e3a82377fa8/beam.tcl0000644000000000000000000003554211576626513016620 0ustar00usergroup00000000000000# Author: Alan Robertson # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2004 - 2005 International Business Machines, Inc. source beam_default_parms.tcl ####################################################################### # Project standards issues ####################################################################### set beam::allocation_may_return_null "yes" set beam::MISTAKE21::enabling_policy "unsafe" set beam::MISTAKE21::enabling_policy "nonportable" set beam::WARNING6::enabling_policy "always" set beam::WARNING10::enabling_policy "always" # set beam::WARNING14::enabling_policy "always" # We've disabled WARNING14 due to a 3.0 BEAM bug. #- WARNING14 /*incompatible types*/ >>>WARNING14_cib_client_connect_d323470040e402e4 #"callbacks.c", line 216: Comparing `client_callback' of type `gboolean (*)(IPC_Channel *, gpointer)' with `cib_null_callback' of type `gboolean (*)(IPC_Channel *, gpointer)' # The following two are BEAM bugs... Hope they get fixed soon... set beam::WARNING14::enabling_policy "" set beam::MISTAKE4::enabling_policy "" set beam::WARNING15::enabling_policy "same_line" set beam::PORTABILITY1::enabling_policy "always" set beam::PORTABILITY3::enabling_policy "always" set beam::PORTABILITY5::enabling_policy "always" beam::attribute_by_signature { advisory ( explanation = "Never use g_timeout_add() and friends. They are buggy. Use our Gmain_timeout_add() or Gmain_timeout_add_full() instead. They work correctly.", category = unsafe ) } "g_timeout_add" "g_timeout_add_full" beam::attribute_by_signature { advisory ( explanation = "Binary data is to be avoided except for very rare circumstances. It is not portable, and so should be avoided when at all possible.", category = nonportable ) } "cl_get_binary" "cl_msg_addbin" "cl_msg_modbin" "ha_get_binary" "ha_msg_addbin" "ha_msg_modbin" # #beam::attribute_by_signature { # advisory ( # explanation = "This function is not safe. Use strnlen(3) instead.", # category = unsafe # ) #} "strlen" # #beam::attribute_by_signature { # advisory ( # explanation = "This function is not safe. Use strncmp(3) instead.", # category = unsafe # ) #} "strcmp" # # #beam::attribute_by_signature { # advisory ( # explanation = "This function is not safe. Use strncpy(3)instead.", # category = unsafe # ) #} "strcpy" #beam::attribute_by_signature { # advisory ( # explanation = "This function is not safe. Use strncat(3) instead.", # category = unsafe # ) #} "strcat" #beam::attribute_by_signature { # advisory ( # explanation = "This function is not safe. Use snprintf(3) instead.", # category = unsafe # ) #} "sprintf" #beam::attribute_by_signature { # advisory ( # explanation = "This function is not safe. Use vsnprintf(3) instead.", # category = unsafe # ) #} "vsprintf" beam::attribute_by_signature { advisory ( explanation = "This function is not safe. Use fgets(3) instead.", category = unsafe ) } "gets" ####################################################################### # useful project definitions... ####################################################################### beam::attribute_by_signature { noreturn } "exit" "cleanexit" "yy_fatal_error" "usage" ####################################################################### # Things broken outside of our control... ####################################################################### lappend beam::MISTAKE15::disabled_macros YYSTYPE XSRETURN LT_STMT_START __DBGTRACE set beam::ERROR33::disabled_files "/*/*glib*/glib.h" set beam::MISTAKE5::disabled_files "/*/*glib*/gstring.h" lappend beam::WARNING15::disabled_files "*/*glib*/*.h" "/usr/*include/*.h" set beam::ERROR7::disabled_files "*/lib/bindings/perl/cl_raw/cl_raw_wrap.c" set beam::disabled_files "*/libltdl/*" # I think this yydestruct problem is a BEAM bug... set beam::MISTAKE1::disabled_functions "yydestruct" set beam::ERROR33::disabled_functions "g_bit_nth_msf" lappend beam::WARNING15::disabled_files "conf_yacc.c" lappend beam::WARNING15::disabled_files "conf_lex.c" lappend beam::WARNING15::disabled_files "pymgmt_wrap.c" lappend beam::ERROR2::disabled_files "pymgmt_wrap.c" lappend beam::ERROR9::disabled_files "pymgmt_wrap.c" ####################################################################### # Stuff missing from glibc definitions ####################################################################### beam::attribute_by_signature { allocator ( return_index = return, initial_state = initialized_to_unknown, if_out_of_memory = return_null, resource = heap_memory ), property (index = return, num_dereference = 0, type = provides, property_name = "memory allocation source", property_value = "from malloc" ), advisory ( explanation = "This function is not safe. Use strndup() instead.", category = unsafe ) } "strdup" beam::attribute_by_signature { allocator ( return_index = return, initial_state = initialized_to_unknown, if_out_of_memory = return_null, resource = heap_memory ), property (index = return, num_dereference = 0, type = provides, property_name = "memory allocation source", property_value = "from malloc" ), } "strndup" ####################################################################### beam::attribute_by_signature { allocator ( size_index = 1, return_index = return, initial_state = uninitialized, if_size_is_0 = error, if_size_is_negative = error, if_out_of_memory = return_null, resource = heap_memory ), property (index = return, type = provides, num_dereference = 0, property_name = "memory allocation source", property_value = "from cl_malloc" ) } "cl_malloc" beam::attribute_by_signature { allocator ( size_index = 1, multiplier_index = 2, return_index = return, initial_state = initialized_to_zero, if_size_is_0 = error, if_size_is_negative = error, if_out_of_memory = return_null, resource = heap_memory ), property (index = return, type = provides, num_dereference = 0, property_name = "memory allocation source", property_value = "from cl_malloc" ) } "cl_calloc" beam::attribute_by_signature { allocator ( return_index = return, initial_state = initialized_to_unknown, if_out_of_memory = return_null, resource = heap_memory ), property (index = return, type = provides, num_dereference = 0, property_name = "memory allocation source", property_value = "from cl_malloc" ) } "cl_strdup" beam::attribute_by_signature { deallocator ( pointer_index = 1, resource = heap_memory ), property (index = 1, type = requires, num_dereference = 0, property_name = "memory allocation source", property_value = "from cl_malloc" ) } "cl_free" beam::attribute_by_signature { allocator ( return_index = return, initial_state = initialized_to_unknown, if_out_of_memory = ok, resource = heap_memory ), property (index = return, type = provides, num_dereference = 0, property_name = "memory allocation source", property_value = "from msg_to_op" ) } "msg_to_op" "lrm_op_new" beam::attribute_by_signature { deallocator ( pointer_index = 1, resource = heap_memory ), property (index = 1, type = requires, num_dereference = 0, property_name = "memory allocation source", property_value = "from msg_to_op" ) } "free_op" beam::attribute_by_signature { allocator ( return_index = return, initial_state = initialized_to_unknown, if_out_of_memory = ok, resource = heap_memory ), property (index = return, type = provides, num_dereference = 0, property_name = "memory allocation source", property_value = "from lrmd_op_new" ) } "lrmd_op_new" "lrmd_op_copy" beam::attribute_by_signature { deallocator ( pointer_index = 1, resource = heap_memory ), property (index = 1, type = requires, num_dereference = 0, property_name = "memory allocation source", property_value = "from lrmd_op_new" ) } "lrmd_op_destroy" beam::attribute_by_signature { allocator ( return_index = return, initial_state = initialized_to_unknown, if_out_of_memory = ok, resource = heap_memory ), property (index = return, type = provides, num_dereference = 0, property_name = "memory allocation source", property_value = "from lrmd_client_new" ) } "lrmd_client_new" beam::attribute_by_signature { deallocator ( pointer_index = 1, resource = heap_memory ), property (index = 1, type = requires, num_dereference = 0, property_name = "memory allocation source", property_value = "from lrmd_client_new" ) } "lrmd_client_destroy" beam::attribute_by_signature { allocator ( return_index = return, initial_state = initialized_to_unknown, if_out_of_memory = ok, resource = heap_memory ), property (index = return, type = provides, num_dereference = 0, property_name = "memory allocation source", property_value = "from lrmd_rsc_new" ) } "lrmd_rsc_new" beam::attribute_by_signature { deallocator ( pointer_index = 1, resource = heap_memory ), property (index = 1, type = requires, num_dereference = 0, property_name = "memory allocation source", property_value = "from lrmd_rsc_new" ) } "lrmd_rsc_destroy" beam::resource_create { name = "cl_msg", display = "cl_msg", allocating_verb = "creating", allocated_verb = "created", freeing_verb = "destroying", freed_verb = "destroyed" } beam::attribute_by_signature { allocator ( size_index = 1, return_index = return, initial_state = initialized_to_unknown, if_size_is_0 = error, if_size_is_negative = error, if_out_of_memory = return_null, resource = cl_msg ), property (index = return, type = provides, num_dereference = 0, property_name = "memory allocation source", property_value = "from cl_msg_new" ) } "ha_msg_new" "cl_msg_new" "cl_msg_copy" "ha_msg_copy" "string2msg" "string2msg_ll" "wirefmt2msg" "wirefmt2msg_ll" "netstring2msg" "msgfromstream_string" "msgfromstream_netstring" "msgfromstream" "msgfromIPC" "msgfromIPC_noauth" "msgfromIPC_ll" beam::attribute_by_signature { deallocator ( pointer_index = 1, resource = cl_msg ), property (index = 1, type = requires, property_name = "memory allocation source", property_value = "from cl_msg_new" ) } "cl_msg_del" "ha_msg_del" # # glib memory malloc/free things # # Note that glib memory allocation will *never* fail. # # It will abort(3) instead. # # So regardless of what policies you have for other memory, # glib memory needs then have the if_out_of_memory = ok attribute on all # the allocators, and be shown as from a different source. # This is true for all glib data structures. # beam::attribute_by_signature { allocator ( size_index = 1, return_index = return, initial_state = uninitialized, if_size_is_0 = error, if_size_is_negative = error, if_out_of_memory = ok, ), property (index = return, type = provides, property_name = "memory allocation source", property_value = "from g_malloc" ) } "g_malloc" beam::attribute_by_signature { allocator ( size_index = 1, return_index = return, initial_state = initialized_to_zero, if_size_is_0 = error, if_size_is_negative = error, if_out_of_memory = ok, ), property (index = return, type = provides, property_name = "memory allocation source", property_value = "from g_malloc" ) } "g_malloc0" beam::attribute_by_signature { deallocator ( pointer_index = 1, resource = heap_memory ), property (index = 1, type = requires, property_name = "memory allocation source", property_value = "from g_malloc" ) } "g_free" beam::attribute_by_signature { allocator ( return_index = return, initial_state = initialized_to_unknown, if_out_of_memory = ok, resource = heap_memory ), property (index = return, num_dereference = 0, type = provides, property_name = "memory allocation source", property_value = "from g_malloc" ) } "g_strdup" # # Glib hash tables - GHashTable # beam::resource_create { name = "GHashTable", display = "GHashTable", allocating_verb = "creating", allocated_verb = "created", freeing_verb = "destroying", freed_verb = "destroyed" } beam::attribute_by_signature { allocator ( return_index = return, if_out_of_memory = ok, initial_state = initialized_to_unknown, resource = GHashTable ), property (index = return, type = provides, property_name = "memory allocation source", property_value = "from g_hash_table_new" ) } "g_hash_table_new" beam::attribute_by_signature { deallocator ( pointer_index = 1, resource = GHashTable ), property (index = 1, type = requires, property_name = "memory allocation source", property_value = "from g_hash_table_new" ) } "g_hash_table_destroy" # # Glib doubly linked lists - GList # beam::resource_create { name = "GList", display = "GList", allocating_verb = "creating", allocated_verb = "created", freeing_verb = "destroying", freed_verb = "destroyed" } beam::attribute_by_signature { allocator ( return_index = return, if_out_of_memory = ok, initial_state = initialized_to_unknown, resource = GList ), property (index = return, type = provides, property_name = "memory allocation source", property_value = "from g_list_alloc" ) } "g_list_alloc" beam::attribute_by_signature { deallocator ( pointer_index = 1, resource = GList ), property (index = 1, type = requires, property_name = "memory allocation source", property_value = "from g_list_alloc" ) } "g_list_free" "g_list_free1" # # Glib callback hooks - GHook # beam::resource_create { name = "GHook", display = "GHook", allocating_verb = "creating", allocated_verb = "created", freeing_verb = "destroying", freed_verb = "destroyed" } beam::attribute_by_signature { allocator ( return_index = return, if_out_of_memory = ok, initial_state = initialized_to_unknown, resource = GHook ), property (index = return, type = provides, property_name = "memory allocation source", property_value = "from g_hook_alloc" ) } "g_hook_alloc" beam::attribute_by_signature { deallocator ( pointer_index = 2, resource = GHook ), property (index = 1, type = requires, property_name = "memory allocation source", property_value = "from g_hook_alloc" ) } "g_hook_free" "g_hook_unref" Heartbeat-3-0-7e3a82377fa8/bootstrap0000755000000000000000000001451011576626513017143 0ustar00usergroup00000000000000#!/bin/sh # # License: GNU General Public License (GPL) # Copyright 2001 horms # (heavily mangled by alanr) # # bootstrap: set up the project and get it ready to make # # Basically, we run autoconf, automake and libtool in the # right way to get things set up for this environment. # # We also look and see if those tools are installed, and # tell you where to get them if they're not. # # Our goal is to not require dragging along anything # more than we need. If this doesn't work on your system, # (i.e., your /bin/sh is broken) send us a patch. # # This code loosely based on the corresponding named script in # enlightenment, and also on the sort-of-standard autoconf # bootstrap script. # Run this to generate all the initial makefiles, etc. testProgram() { cmd=$1 if [ -z "$cmd" ]; then return 1; fi arch=`uname -s` # Make sure the which is in an if-block... on some platforms it throws exceptions # # The ERR trap is not executed if the failed command is part # of an until or while loop, part of an if statement, part of a && # or || list. if which $cmd /dev/null 2>&1 then : else return 1 fi # The GNU standard is --version if $cmd --version /dev/null 2>&1 then return 0 fi # Maybe it suppports -V instead if $cmd -V /dev/null 2>&1 then return 0 fi # Nope, the program seems broken return 1 } srcdir=`dirname $0` CONFIG=$srcdir/configure if [ X$srcdir = "X" ] then srcdir=. fi case "$*" in --help) IsHelp=yes;; -?) IsHelp=yes; set -- --help;; *) IsHelp=no;; esac arch=`uname -s` # Disable the errors on FreeBSD until a fix can be found. if [ ! "$arch" = "FreeBSD" ]; then set -e # # All errors are fatal from here on out... # The shell will complain and exit on any "uncaught" error code. # # # And the trap will ensure sure some kind of error message comes out. # trap 'echo ""; echo "$0 exiting due to error (sorry!)." >&2' 0 fi HERE=`pwd` cd $srcdir RC=0 gnu="ftp://ftp.gnu.org/pub/gnu" # Check for Autoconf pkg="autoconf" URL=$gnu/$pkg/ for command in autoconf autoconf213 autoconf253 autoconf259 do if testProgram $command == 1 then : OK $pkg is installed autoconf=$command autoheader=`echo "$autoconf" | sed -e 's/autoconf/autoheader/'` autom4te=`echo "$autoconf" | sed -e 's/autoconf/autmo4te/'` autoreconf=`echo "$autoconf" | sed -e 's/autoconf/autoreconf/'` autoscan=`echo "$autoconf" | sed -e 's/autoconf/autoscan/'` autoupdate=`echo "$autoconf" | sed -e 's/autoconf/autoupdate/'` ifnames=`echo "$autoconf" | sed -e 's/autoconf/ifnames/'` fi done # Check to see if we got a valid command. if $autoconf --version /dev/null 2>&1 then echo "Autoconf package $autoconf found." else RC=$? cat <<-EOF >&2 You must have $pkg installed to compile the linux-ha package. Download the appropriate package for your system, or get the source tarball at: $URL EOF fi # Create local copy so that the incremental updates will work. rm -f ./autoconf ln -s `which $autoconf` ./autoconf # Check for automake pkg="automake" URL=$gnu/$pkg/ for command in automake automake14 automake-1.4 automake15 automake-1.5 automake17 automake-1.7 automake19 automake-1.9 do if testProgram $command then : OK $pkg is installed automake=$command aclocal=`echo "$automake" | sed -e 's/automake/aclocal/'` fi done # Check to see if we got a valid command. if $automake --version /dev/null 2>&1 then echo "Automake package $automake found." else RC=$? cat <<-EOF >&2 You must have $pkg installed to compile the linux-ha package. Download the appropriate package for your system, or get the source tarball at: $URL EOF fi # Create local copy so that the incremental updates will work. rm -f ./automake ln -s `which $automake` ./automake # Check for Libtool pkg="libtool" for command in libtool libtool14 libtool15 glibtool do URL=$gnu/$pkg/ if testProgram $command then : OK $pkg is installed libtool=$command libtoolize=`echo "$libtool" | sed -e 's/libtool/libtoolize/'` fi done # Check to see if we got a valid command. if $libtool --version /dev/null 2>&1 then echo "Libtool package $libtool found." else RC=$? cat <<-EOF >&2 You must have $pkg installed to compile the linux-ha package. Download the appropriate package for your system, or get the source tarball at: $URL EOF fi # Create local copy so that the incremental updates will work. rm -f ./libtool ln -s `which $libtool` ./libtool case $RC in 0) ;; *) exit $RC;; esac case $IsHelp in yes) $CONFIG "$@"; trap '' 0; exit 0;; esac oneline() { read x; echo "$x" } LT_version=`$libtool --version | oneline | sed -e 's%^[^0-9]*%%' -e s'% .*%%'` LT_majvers=`echo "$LT_version" | sed -e 's%\..*%%'` LT_minvers=`echo "$LT_version" | sed -e 's%^[^.]*\.%%' ` LT_minnum=`echo "$LT_minvers" | sed -e 's%[^0-9].*%%'` if [ $LT_majvers -lt 1 ] || [ $LT_majvers = 1 -a $LT_minnum -lt 4 ] then echo "Minimum version of libtool is 1.4. You have $LT_version installed." exit 1 fi echo $aclocal $ACLOCAL_FLAGS $aclocal $ACLOCAL_FLAGS # Create local copy so that the incremental updates will work. rm -f ./autoheader ln -s `which $autoheader` ./autoheader if echo $autoheader --version < /dev/null > /dev/null 2>&1 $autoheader --version < /dev/null > /dev/null 2>&1 then echo $autoheader $autoheader fi rm -rf libltdl libltdl.tar echo $libtoolize --ltdl --force --copy GREP_OPTIONS= $libtoolize --ltdl --force --copy || { if test -f /etc/debian_version; then echo "" echo "Running Debian? You probably need to install libltdl3-dev." fi exit 1 } echo $aclocal $ACLOCAL_FLAGS $aclocal $ACLOCAL_FLAGS # Emulate the old --ltdl-tar option... # If the libltdl directory is required we will unpack it later tar -cf libltdl.tar libltdl rm -rf libltdl echo $automake --add-missing --include-deps --copy $automake --add-missing --include-deps --copy echo $autoconf $autoconf test -f libtool.m4 || touch libtool.m4 test -f ltdl.m4 || touch ltdl.m4 cd $HERE if [ $# -lt 1 ] then echo Now run $CONFIG. else echo $CONFIG "$@" $CONFIG "$@" echo "Now type 'gmake' to compile the system, noting that" echo "'gmake' is often available as 'make'." echo fi trap '' 0 Heartbeat-3-0-7e3a82377fa8/buildtools/Makefile.am0000644000000000000000000000201511576626513021411 0ustar00usergroup00000000000000# # heartbeat: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # In the main build, under "--enable-pretty", ccdv is used as a front end # for the main C compiler. Bootstrapping this needs the real C compiler. CC = $(REAL_CC) MAINTAINERCLEANFILES = Makefile.in noinst_PROGRAMS = ccdv ccdv_SOURCES = ccdv.c Heartbeat-3-0-7e3a82377fa8/buildtools/ccdv.c0000644000000000000000000002317611576626513020453 0ustar00usergroup00000000000000/* ccdv.c * * Copyright (C) 2002-2003, by Mike Gleason, NcFTP Software. * All Rights Reserved. * * Licensed under the GNU General Public License. */ #include #include #include #include #include #include #include #include #include #define SETCOLOR_SUCCESS (gANSIEscapes ? "\033\1331;32m" : "") #define SETCOLOR_FAILURE (gANSIEscapes ? "\033\1331;31m" : "") #define SETCOLOR_WARNING (gANSIEscapes ? "\033\1331;33m" : "") #define SETCOLOR_NORMAL (gANSIEscapes ? "\033\1330;39m" : "") #define TEXT_BLOCK_SIZE 8192 #define INDENT 2 #define TERMS "vt100:vt102:vt220:vt320:xterm:xterm-color:ansi:linux:scoterm:scoansi:dtterm:cons25:cygwin" static size_t gNBufUsed = 0, gNBufAllocated = 0; static char *gBuf = NULL; static int gCCPID; static char gAction[64] = ""; static char gTarget[64] = ""; static char gAr[32] = ""; static char gArLibraryTarget[64] = ""; static int gDumpCmdArgs = 0; static char gArgsStr[800]; static int gColumns = 80; static int gANSIEscapes = 0; static int gExitStatus = 95; int main(int argc, char **argv); static void DumpFormattedOutput(void) { char *cp; char spaces[8 + 1] = " "; char *saved; int curcol; int i; curcol = 0; saved = NULL; for (cp = gBuf + ((gDumpCmdArgs == 0) ? strlen(gArgsStr) : 0); ; cp++) { if (*cp == '\0') { if (saved != NULL) { cp = saved; saved = NULL; } else break; } if (*cp == '\r') continue; if (*cp == '\t') { saved = cp + 1; cp = spaces + 8 - (8 - ((curcol - INDENT - 1) % 8)); } if (curcol == 0) { for (i = INDENT; --i >= 0; ) putchar(' '); curcol = INDENT; } putchar(*cp); if (++curcol == (gColumns - 1)) { putchar('\n'); curcol = 0; } else if (*cp == '\n') curcol = 0; } free(gBuf); } /* DumpFormattedOutput */ #if PROGRESS_TWIRL /* Difftime(), only for timeval structures. */ static void TimeValSubtract(struct timeval *tdiff, struct timeval *t1, struct timeval *t0) { tdiff->tv_sec = t1->tv_sec - t0->tv_sec; tdiff->tv_usec = t1->tv_usec - t0->tv_usec; if (tdiff->tv_usec < 0) { tdiff->tv_sec--; tdiff->tv_usec += 1000000; } } /* TimeValSubtract */ #endif static void Wait(void) { int pid2, status; do { status = 0; pid2 = (int) waitpid(gCCPID, &status, 0); } while (((pid2 >= 0) && (! WIFEXITED(status))) || ((pid2 < 0) && (errno == EINTR))); if (WIFEXITED(status)) gExitStatus = WEXITSTATUS(status); } /* Wait */ static int SlurpProgress(int fd) { char s1[71]; char *newbuf; #if PROGRESS_TWIRL int nready; fd_set ss; #endif size_t ntoread; ssize_t nread; struct timeval now, tnext, tleft; fd_set ss2; const char *trail = "/-\\|", *trailcp; trailcp = trail; snprintf(s1, sizeof(s1), "%s%s%s... ", gAction, gTarget[0] ? " " : "", gTarget); printf("\r%-70s%-9s", s1, ""); fflush(stdout); gettimeofday(&now, NULL); tnext = now; tnext.tv_sec++; tleft.tv_sec = 1; tleft.tv_usec = 0; FD_ZERO(&ss2); FD_SET(fd, &ss2); for(;;) { if (gNBufUsed == (gNBufAllocated - 1)) { if ((newbuf = (char *) realloc(gBuf, gNBufAllocated + TEXT_BLOCK_SIZE)) == NULL) { perror("ccdv: realloc"); return (-1); } gNBufAllocated += TEXT_BLOCK_SIZE; gBuf = newbuf; } #if PROGRESS_TWIRL for (;;) { ss = ss2; nready = select(fd + 1, &ss, NULL, NULL, &tleft); if (nready == 1) break; if (nready < 0) { if (errno != EINTR) { perror("ccdv: select"); return (-1); } continue; } gettimeofday(&now, NULL); if ((now.tv_sec > tnext.tv_sec) || ((now.tv_sec == tnext.tv_sec) && (now.tv_usec >= tnext.tv_usec))) { tnext = now; tnext.tv_sec++; tleft.tv_sec = 1; tleft.tv_usec = 0; printf("\r%-71s%c%-7s", s1, *trailcp, ""); fflush(stdout); if (*++trailcp == '\0') trailcp = trail; } else { TimeValSubtract(&tleft, &tnext, &now); } } #endif ntoread = (gNBufAllocated - gNBufUsed - 1); nread = read(fd, gBuf + gNBufUsed, ntoread); if (nread < 0) { if (errno == EINTR) continue; perror("ccdv: read"); return (-1); } else if (nread == 0) { break; } gNBufUsed += nread; gBuf[gNBufUsed] = '\0'; } snprintf(s1, sizeof(s1), "%s%s%s: ", gAction, gTarget[0] ? " " : "", gTarget); Wait(); if (gExitStatus == 0) { printf("\r%-70s", s1); printf("[%s%s%s]", ((gNBufUsed - strlen(gArgsStr)) < 4) ? SETCOLOR_SUCCESS : SETCOLOR_WARNING, "OK", SETCOLOR_NORMAL); printf("%-5s\n", " "); } else { printf("\r%-70s", s1); printf("[%s%s%s]", SETCOLOR_FAILURE, "ERROR", SETCOLOR_NORMAL); printf("%-2s\n", " "); gDumpCmdArgs = 1; /* print cmd when there are errors */ } fflush(stdout); return (0); } /* SlurpProgress */ static int SlurpAll(int fd) { char *newbuf; size_t ntoread; ssize_t nread; printf("%s%s%s\n", gAction, gTarget[0] ? " " : "", gTarget); fflush(stdout); for(;;) { if (gNBufUsed == (gNBufAllocated - 1)) { if ((newbuf = (char *) realloc(gBuf, gNBufAllocated + TEXT_BLOCK_SIZE)) == NULL) { perror("ccdv: realloc"); return (-1); } gNBufAllocated += TEXT_BLOCK_SIZE; gBuf = newbuf; } ntoread = (gNBufAllocated - gNBufUsed - 1); nread = read(fd, gBuf + gNBufUsed, ntoread); if (nread < 0) { if (errno == EINTR) continue; perror("ccdv: read"); return (-1); } else if (nread == 0) { break; } gNBufUsed += nread; gBuf[gNBufUsed] = '\0'; } Wait(); gDumpCmdArgs = (gExitStatus != 0); /* print cmd when there are errors */ return (0); } /* SlurpAll */ static const char * Basename(const char *path) { const char *cp; cp = strrchr(path, '/'); if (cp == NULL) return (path); return (cp + 1); } /* Basename */ static const char * Extension(const char *path) { const char *cp = path; cp = strrchr(path, '.'); if (cp == NULL) return (""); return (cp); } /* Extension */ static void Usage(void) { fprintf(stderr, "Usage: ccdv /path/to/cc CFLAGS...\n\n"); fprintf(stderr, "I wrote this to reduce the deluge Make output to make finding actual problems\n"); fprintf(stderr, "easier. It is intended to be invoked from Makefiles, like this. Instead of:\n\n"); fprintf(stderr, "\t.c.o:\n"); fprintf(stderr, "\t\t$(CC) $(CFLAGS) $(DEFS) $(CPPFLAGS) $< -c\n"); fprintf(stderr, "\nRewrite your rule so it looks like:\n\n"); fprintf(stderr, "\t.c.o:\n"); fprintf(stderr, "\t\t@ccdv $(CC) $(CFLAGS) $(DEFS) $(CPPFLAGS) $< -c\n\n"); fprintf(stderr, "ccdv 1.1.0 is Free under the GNU Public License. Enjoy!\n"); fprintf(stderr, " -- Mike Gleason, NcFTP Software \n"); exit(96); } /* Usage */ int main(int argc, char **argv) { int pipe1[2]; int devnull; char emerg[256]; int fd; int nread; int i; int cc = 0, pch = 0; const char *quote; if (argc < 2) Usage(); snprintf(gAction, sizeof(gAction), "Running %s", Basename(argv[1])); memset(gArgsStr, 0, sizeof(gArgsStr)); for (i = 1; i < argc; i++) { quote = (strchr(argv[i], ' ') != NULL) ? "\"" : ""; snprintf(gArgsStr + strlen(gArgsStr), sizeof(gArgsStr) - strlen(gArgsStr), "%s%s%s%s%s", (i == 1) ? "" : " ", quote, argv[i], quote, (i == (argc - 1)) ? "\n" : ""); if ((strcmp(argv[i], "-o") == 0) && ((i + 1) < argc)) { if (strcasecmp(Extension(argv[i + 1]), ".o") != 0) { strcpy(gAction, "Linking"); snprintf(gTarget, sizeof(gTarget), "%s", Basename(argv[i + 1])); } } else if (strncasecmp(Extension(argv[i]), ".c", 2) == 0) { cc++; snprintf(gTarget, sizeof(gTarget), "%s", Basename(argv[i])); } else if ((strncasecmp(Extension(argv[i]), ".h", 2) == 0) && (cc == 0)) { pch++; snprintf(gTarget, sizeof(gTarget), "%s", Basename(argv[i])); } else if (strchr("-+/", (int) argv[i][0]) != NULL) { continue; } else if ((i == 1) && (strcmp(Basename(argv[i]), "ar") == 0)) { snprintf(gAr, sizeof(gAr), "%s", Basename(argv[i])); } else if ((gArLibraryTarget[0] == '\0') && (strcasecmp(Extension(argv[i]), ".a") == 0)) { snprintf(gArLibraryTarget, sizeof(gArLibraryTarget), "%s", Basename(argv[i])); } } if ((gAr[0] != '\0') && (gArLibraryTarget[0] != '\0')) { strcpy(gAction, "Creating library"); snprintf(gTarget, sizeof(gTarget), "%s", gArLibraryTarget); } else if (pch > 0) { strcpy(gAction, "Precompiling"); } else if (cc > 0) { strcpy(gAction, "Compiling"); } if (pipe(pipe1) < 0) { perror("ccdv: pipe"); exit(97); } (void) close(0); devnull = open("/dev/null", O_RDWR, 00666); if ((devnull != 0) && (dup2(devnull, 0) == 0)) close(devnull); gCCPID = (int) fork(); if (gCCPID < 0) { (void) close(pipe1[0]); (void) close(pipe1[1]); perror("ccdv: fork"); exit(98); } else if (gCCPID == 0) { /* Child */ (void) close(pipe1[0]); /* close read end */ if (pipe1[1] != 1) { /* use write end on stdout */ (void) dup2(pipe1[1], 1); (void) close(pipe1[1]); } (void) dup2(1, 2); /* use write end on stderr */ execvp(argv[1], argv + 1); perror(argv[1]); exit(99); } /* parent */ (void) close(pipe1[1]); /* close write end */ fd = pipe1[0]; /* use read end */ gColumns = (getenv("COLUMNS") != NULL) ? atoi(getenv("COLUMNS")) : 0; gANSIEscapes = (getenv("TERM") != NULL) && (strstr(TERMS, getenv("TERM")) != NULL); gBuf = (char *) malloc(TEXT_BLOCK_SIZE); if (gBuf == NULL) goto panic; gNBufUsed = 0; gNBufAllocated = TEXT_BLOCK_SIZE; if (strlen(gArgsStr) < (gNBufAllocated - 1)) { strcpy(gBuf, gArgsStr); gNBufUsed = strlen(gArgsStr); } if (isatty(1)) { if (SlurpProgress(fd) < 0) goto panic; } else { if (SlurpAll(fd) < 0) goto panic; } DumpFormattedOutput(); exit(gExitStatus); panic: gDumpCmdArgs = 1; /* print cmd when there are errors */ DumpFormattedOutput(); while ((nread = read(fd, emerg, (size_t) sizeof(emerg))) > 0) { ssize_t retval; retval = write(2, emerg, (size_t) nread); } Wait(); exit(gExitStatus); } /* main */ /* eof ccdv.c */ Heartbeat-3-0-7e3a82377fa8/c-config-i686-pc-linux-gnu.tcl0000644000000000000000000003761511576626513022422 0ustar00usergroup00000000000000############################################################ # Invocation of beam_configure: # # '/home/alanr/beam-3.0.1/bin/beam_configure' '--output' 'c-config-i686-pc-linux-gnu.tcl' '--c' '/usr/bin/gcc' # # Location of compiler: # # /usr/bin/gcc # ############################################################ # # This is BEAM configuration file that describes a compiler # and a target machine. This was generated with beam_configure # version "1.1 (June 2005)". # # This information will help BEAM emulate this compiler's # features, macros, and header file locations, so that BEAM # can compile the same source code that the original compiler # could compile, and understand it with respect to the machine's # sizes and widths of types. # # The file format is Tcl, so basic Tcl knowledge may be beneficial # for anything more than the simplest of modifications. # # A quick Tcl primer: # - Lines starting with "#" or ";#" are comments # - Things inside balanced curly braces are literal strings {one string literal} # - Things in square brackets that aren't in curly braces are function calls, # and will be expanded inline automatically. This causes the most problems in # double-quoted strings: "this is a function call: [some_func]" # # This file contains these sections: # # 1) Source language dialect # 2) Default include paths # 3) Target machine configuration # 4) Predefined macros # 5) Miscellaneous options # # Each section has variables that help configure BEAM. They should # each be commented well. For additional documentation, please # refer to the local documentation in the install point. # # Note that the order of the sections is not important, # and variables may be set in any order. # ############################################################ ### This sets up the namespace that the rest of the file will ### live in. BEAM will look in certain namespaces depending ### on the source file that is being compiled. ### ### For C compilers, set the namespace to "::beam::compiler::c" ### For C++, set it to "::beam::compiler::cpp" # set ::namesp ::beam::compiler::c # set ::namesp ::beam::compiler::cpp set ::namesp ::beam::compiler::c namespace eval $::namesp { ### This should be first. It initializes the namespace ### correctly for BEAM's use. ::beam::compiler::init_settings ### This tells BEAM which pre-canned settings to load. ### BEAM comes with some function attributes and argument ### mappers for gcc, xlc, and vac. If unsure, set this to ### "default". set cc "gcc" ############################################################ # Section 1: Source language dialect ############################################################ ### The language_dialect variable selects among the available ### dialects of C and C++. ### ### By default, C files are set up as: ### ### set language_dialect c ### set c99_mode 0 ### set strict_mode 0 ### set gnu_mode 0 ### ### and C++ files are set up as: ### ### set language_dialect c++ ### set c99_mode 0 ### set strict_mode 0 ### set gnu_mode 0 ### ### Note that the dialect must match the namespace. ### Don't set up the C++ language in the C namespace or ### things will probably fail. ### ### This defaults to be the same as the language being ### compiled (based on the source file extension). ### Normally, it should not be set. # set language_dialect old_c ;# K&R # set language_dialect c ;# ANSI # set language_dialect c++ ### In addition to simply using C or C++, different ### modes are provided to enable or disable language ### extensions. Some modes are incompatible with eachother ### or with the language_dialect above, and will produce ### errors. ### C99 mode enables C99 extensions in C code. It is not ### compatible with C++ code. This overrides old_c, and ### instead forces regular C. # set c99_mode 0 # set c99_mode 1 ### Strict mode disables all non-ANSI/ISO features. It ### is compatible with C and C++ code, but not with old_c. # set strict_mode 0 # set strict_mode 1 ### GNU mode enables GNU C extensions in C code and ### GNU C++ extensions in C++ code. This overrides ### old_c, and instead forces regular C. ### ### The value should be a 5 digit number representing ### the version of GCC to emulate. It is of this format: ### ### major_version_num * 10000 + ### minor_version_num * 100 + ### patch_version_num ### ### so, GCC version "3.4.3" should be "30403". ### ### The minimum allowable value is "30200". # set gnu_mode 30200 set gnu_mode 30305 ### Other miscellaneous language settings. The values shown ### here are the defaults if they remain unset. # set language_friend_injection_enabled 0 # set language_use_nonstandard_for_init_scope 0 # set language_string_literals_are_const 1 # set language_allow_dollar_in_id_chars 1 # set language_end_of_line_comments_allowed 0 # set language_allow_spaces_in_include_directive 0 ############################################################ # Section 2: Default include paths ############################################################ ### The system_include_path variable is a list of directories ### that will be searched in for system headers. Parser warnings ### are suppressed in these directories. These will come ### after any directories specified with -I on the command line. # lappend system_include_path {/usr/include} # lappend system_include_path {/usr/vacpp/include} ### Maybe your include paths are part of the environment # if { [::info exists ::env(MY_INCLUDE_PATH)] } { # set system_include_path [concat $system_include_path \ # [split $::env(MY_INCLUDE_PATH) ":"]] # } lappend system_include_path {/usr/lib/gcc-lib/i586-suse-linux/3.3.5/include} lappend system_include_path {/usr/include} ############################################################ # Section 3: Target machine configuration ############################################################ ### These variables control the target machine and ### a few individual language options. ### ### Note: These examples do not cover all of the available ### options. For a complete list, refer to the BEAM documentation. ### ### Examples appear below the auto-configured ones. set target_alignof_double {4} set target_alignof_float {4} set target_alignof_int {4} set target_alignof_long {4} set target_alignof_long_double {4} set target_alignof_long_long {4} set target_alignof_pointer {4} set target_alignof_short {2} set target_char_bit {8} set target_dbl_max_exp {1024} set target_dbl_min_exp {-1021} set target_enum_bit_fields_are_always_unsigned {0} set target_flt_max_exp {128} set target_flt_min_exp {-125} set target_ldbl_max_exp {16384} set target_ldbl_min_exp {-16381} set target_little_endian {1} set target_plain_char_is_unsigned {0} set target_plain_int_bit_field_is_unsigned {0} set target_size_t_int_kind {unsigned int} set target_sizeof_double {8} set target_sizeof_float {4} set target_sizeof_int {4} set target_sizeof_long {4} set target_sizeof_long_double {12} set target_sizeof_long_long {8} set target_sizeof_pointer {4} set target_sizeof_short {2} set target_sizeof_size_t {4} set target_sizeof_wchar_t {4} set target_wchar_t_int_kind {long int} set target_wchar_t_is_unsigned {0} ### Examples ### ### The number of bits in a char # set target_char_bit 8 ### Default signedness options # set target_plain_char_is_unsigned 0 # set target_plain_char_is_unsigned 1 # # set target_plain_int_bit_field_is_unsigned 0 # set target_plain_int_bit_field_is_unsigned 1 # # set target_enum_bit_fields_are_always_unsigned 0 # set target_enum_bit_fields_are_always_unsigned 1 ### Endianness of target machine # set target_little_endian 0 # set target_little_endian 1 ### Sizes of basic types in multiples of char. Since ### a char is defined to have size 1, it is not a ### configuration option. # set target_sizeof_short 2 # set target_sizeof_int 4 # set target_sizeof_long 4 # set target_sizeof_long_long 8 # set target_sizeof_float 4 # set target_sizeof_double 8 # set target_sizeof_long_double 12 # set target_sizeof_pointer 4 ### Alignments of basic types in multiples of char. Since ### a char is defined to have alignment 1, it is not a ### configuration option. # set target_alignof_short 2 # set target_alignof_int 4 # set target_alignof_long 4 # set target_alignof_long_long 4 # set target_alignof_float 4 # set target_alignof_double 4 # set target_alignof_long_double 4 # set target_alignof_pointer 4 ### Special types # set target_sizeof_size_t 4 # set target_size_t_int_kind {unsigned int} # # set target_sizeof_wchar_t 4 # set target_wchar_t_int_kind {int} # set target_wchar_t_is_unsigned 0 ### Floating-point characteristics. The default ### values for these variables depend on the sizes ### set for the types. The examples shown here ### are appropriate if float is size 4, double is ### size 8, and long double is size 12. ### ### Note that these values do not have to be exact ### because BEAM currently has limited floating-point ### support. # set target_flt_max_exp 128 # set target_flt_min_exp -125 # set target_dbl_max_exp 1024 # set target_dbl_min_exp -1021 # set target_ldbl_max_exp 16384 # set target_ldbl_min_exp -16381 ### Other miscellaneous options. The values ### shown here are the default values. # set target_bit_field_container_size -1 # set target_zero_width_bit_field_alignment -1 # set target_zero_width_bit_field_affects_struct_alignment 0 # set target_unnamed_bit_field_affects_struct_alignment 0 ############################################################ # Section 4: Predefined macros ############################################################ ### The predefined_macro variable is an associated array that ### maps the name of a macro to the value. Be sure that the ### value contains quotes inside the curly braces if the ### expansion should also contain quotes. ### ### Curly braces are allowed in the expansion text as long ### as they are properly balanced. ### ### There is no limit to the number of predefined macros that ### you can define. # set predefined_macro(identifier1) {some_literal_value} # set predefined_macro(identifier2) {"some string value with quotes"} # set predefined_macro(identifier3(x,y)) { do { code; } while((x) && (y)) } set predefined_macro(__BEAM__) {1} set predefined_macro(__CHAR_BIT__) {8} set predefined_macro(__DBL_DIG__) {15} set predefined_macro(__DBL_EPSILON__) {2.2204460492503131e-16} set predefined_macro(__DBL_MANT_DIG__) {53} set predefined_macro(__DBL_MAX_10_EXP__) {308} set predefined_macro(__DBL_MAX_EXP__) {1024} set predefined_macro(__DBL_MAX__) {1.7976931348623157e+308} set predefined_macro(__DBL_MIN_10_EXP__) {(-307)} set predefined_macro(__DBL_MIN_EXP__) {(-1021)} set predefined_macro(__DBL_MIN__) {2.2250738585072014e-308} set predefined_macro(__DECIMAL_DIG__) {21} set predefined_macro(__FLT_DIG__) {6} set predefined_macro(__FLT_EPSILON__) {1.19209290e-7F} set predefined_macro(__FLT_EVAL_METHOD__) {2} set predefined_macro(__FLT_MANT_DIG__) {24} set predefined_macro(__FLT_MAX_10_EXP__) {38} set predefined_macro(__FLT_MAX_EXP__) {128} set predefined_macro(__FLT_MAX__) {3.40282347e+38F} set predefined_macro(__FLT_MIN_10_EXP__) {(-37)} set predefined_macro(__FLT_MIN_EXP__) {(-125)} set predefined_macro(__FLT_MIN__) {1.17549435e-38F} set predefined_macro(__FLT_RADIX__) {2} set predefined_macro(__GNUC_MINOR__) {3} set predefined_macro(__GNUC_PATCHLEVEL__) {5} set predefined_macro(__GNUC__) {3} set predefined_macro(__INT_MAX__) {2147483647} set predefined_macro(__LDBL_DIG__) {18} set predefined_macro(__LDBL_EPSILON__) {1.08420217248550443401e-19L} set predefined_macro(__LDBL_MANT_DIG__) {64} set predefined_macro(__LDBL_MAX_10_EXP__) {4932} set predefined_macro(__LDBL_MAX_EXP__) {16384} set predefined_macro(__LDBL_MAX__) {1.18973149535723176502e+4932L} set predefined_macro(__LDBL_MIN_10_EXP__) {(-4931)} set predefined_macro(__LDBL_MIN_EXP__) {(-16381)} set predefined_macro(__LDBL_MIN__) {3.36210314311209350626e-4932L} set predefined_macro(__LONG_LONG_MAX__) {9223372036854775807LL} set predefined_macro(__LONG_MAX__) {2147483647L} set predefined_macro(__NO_INLINE__) {1} set predefined_macro(__PTRDIFF_TYPE__) {int} set predefined_macro(__SCHAR_MAX__) {127} set predefined_macro(__SHRT_MAX__) {32767} set predefined_macro(__SIZE_TYPE__) {unsigned int} set predefined_macro(__WCHAR_TYPE__) {long int} set predefined_macro(__WINT_TYPE__) {unsigned int} set predefined_macro(__i386) {1} set predefined_macro(__i386__) {1} set predefined_macro(__linux__) {1} set predefined_macro(__unix) {1} set predefined_macro(__unix__) {1} set predefined_macro(i386) {1} set predefined_macro(linux) {1} set predefined_macro(unix) {1} ### You can also suppress the standard EDG predefined macros ### like __STDC__ if you set this pattern. By default, ### the pattern is "*", which allows all EDG predefined ### macros to get defined. Setting this to something ### like "* - __STDC__" would suppress the __STDC__ ### macro from being defined by default. This does ### not affect any predefined macros set up in this ### file; it only affects the basic EDG predefined macros. # set standard_predefined_macros "*" ############################################################ # Section 5: Miscellaneous options ############################################################ ### The extern variable is an associated array that maps ### unknown extern "string" values to known ones. For example, ### to force BEAM to treat ### ### extern "builtin" void func(); ### ### as ### ### extern "C" void func(); ### ### you should set this option: ### ### set extern(builtin) "C" ### ### There is no limit to the number of strings that you can ### map to the built-in strings of "C" or "C++". ### Some compilers define macro-like symbols that are being replaced ### with the name of the function they appear in. Below are the symbols ### EDG recognizes. Set to 1, if the symbol is replaced with a character ### string (as opposed to a variable). If in doubt define it as "1" ### which is more flexible. ### ### set function_name_is_string_literal(__PRETTY_FUNCTION__) 1 ### set function_name_is_string_literal(__FUNCTION__) 1 ### set function_name_is_string_literal(__FUNCDNAME__) 1 ### set function_name_is_string_literal(__func__) 1 set function_name_is_string_literal(__func__) 0 set function_name_is_string_literal(__FUNCTION__) 1 set function_name_is_string_literal(__PRETTY_FUNCTION__) 1 ############################################################ # End of the file and end of the namespace ############################################################ } Heartbeat-3-0-7e3a82377fa8/config/Makefile.am0000644000000000000000000000151111576626513020476 0ustar00usergroup00000000000000# # Copyright (C) 2005 Guochun Shi (gshi@ncsa.uiuc.edu) # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. MAINTAINERCLEANFILES = Makefile.in EXTRA_DIST = pidtest.c byteorder_test.c Heartbeat-3-0-7e3a82377fa8/config/byteorder_test.c0000644000000000000000000000033111576626513021643 0ustar00usergroup00000000000000#include int main () { unsigned int a = 0x1234; if ( (unsigned int) ( ((unsigned char *)&a)[0]) == 0x34 ) { printf("little-endian\n"); return 0; } else { printf("big-endian\n"); return 1; } } Heartbeat-3-0-7e3a82377fa8/config/pidtest.c0000644000000000000000000000521011576626513020262 0ustar00usergroup00000000000000 /* Linux-HA: pid test code * * Author: Jia Ming Pan * Modified by Guochun Shi * * Copyright (c) 2005 International Business Machines * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */ #include #include #include #include #include #include #include #define SAME 1 #define TRUE 1 #define FALSE 0 int childflag = 0; int grandchildflag = 0; int pidconsistent = TRUE; void * grandchild_func(void * data) { pid_t pid = (long) data; if (pid == getpid()){ grandchildflag = SAME; } if (grandchildflag ^ childflag){ pidconsistent = FALSE; printf("Inconsistency detected\n"); } return NULL; } void * child_func(void * data) { pid_t pid = (long) data; pthread_t thread_id; if (pid == getpid()){ childflag = SAME; } pthread_create(&thread_id, NULL, grandchild_func, (void*)(long)getpid()); } int main() { pthread_t thread_id; pthread_attr_t tattr; int firsttime = 1; pid_t pid; int status; pthread_attr_init(&tattr); pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_DETACHED); again: pid = fork(); if ( pid == 0 ) { childflag = 0; grandchildflag =0; if (pthread_create(&thread_id, &tattr, child_func, (void*)(long)getpid()) != 0){ printf("%s: creating thread failed", __FUNCTION__); } usleep(500000); if (firsttime){ firsttime=0; goto again; } if (pidconsistent){ return 0; }else{ return 1; } } if (waitpid(pid, &status, 0) <= 0){ printf("ERROR: wait for child %d failed\n",pid); } if (WIFEXITED(status)){ return (WEXITSTATUS(status)); }else{ printf("child process %d does not exit normally\n",pid); } return 0; } Heartbeat-3-0-7e3a82377fa8/configure.in0000644000000000000000000021760311576626513017521 0ustar00usergroup00000000000000dnl dnl autoconf for heartbeat dnl Started by David Lee December 2000 dnl automake stuff dnl added by Michael Moerz February 2001 dnl dnl License: GNU General Public License (GPL) dnl Initialiase, with sanity check of a unique file in the hierarchy AC_INIT(GNUmakefile) AC_PREREQ(2.53) AC_CONFIG_AUX_DIR(.) AC_REVISION($Revision: 1.552 $) dnl cvs revision AC_CANONICAL_HOST dnl Where #defines go (e.g. `AC_CHECK_HEADERS' below) dnl dnl Internal header: include/config.h dnl - Contains ALL defines dnl - include/config.h.in is generated automatically by autoheader dnl - NOT to be included in any header files except lha_internal.h dnl (which is also not to be included in any other header files) dnl dnl External header: include/hb_config.h dnl - Contains a subset of defines checked here dnl - Manually edit include/hb_config.h.in to have configure include dnl new defines dnl - Should not include HAVE_* defines dnl - Safe to include anywhere AM_CONFIG_HEADER(include/config.h include/hb_config.h) ALL_LINGUAS="en fr" AC_ARG_WITH(hapkgversion, [ --with-hapkgversion=name Override package version (if you're a packager needing to pretend) ], [ HAPKGVERSION="$withval" ], [ HAPKGVERSION="" ], ) if test -z "$HAPKGVERSION" ; then HAPKGVERSION="3.0.5" fi AM_INIT_AUTOMAKE(heartbeat, $HAPKGVERSION) RPMREL=1 AC_SUBST(RPMREL) HB_PKG=heartbeat AC_SUBST(HB_PKG) DISTDIR=$HB_PKG-$VERSION TARFILE=$DISTDIR.tar.gz AC_SUBST(DISTDIR) AC_SUBST(TARFILE) CC_IN_CONFIGURE=yes export CC_IN_CONFIGURE INIT_EXT="" USE_MODULES=0 echo Our Host OS: $host_os/$host dnl This OS-based decision-making is poor autotools practice; dnl feature-based mechanisms are strongly preferred. dnl dnl So keep this section to a bare minimum; regard as a "necessary evil". pf_argv_set="" case "$host_os" in *bsd*) LIBS="-L/usr/local/lib" CPPFLAGS="$CPPFLAGS -I/usr/local/include" INIT_EXT=".sh" REBOOT_OPTIONS="-f" POWEROFF_OPTIONS="-f" ;; *solaris*) pf_argv_set="PF_ARGV_NONE" REBOOT_OPTIONS="-n" POWEROFF_OPTIONS="-n" ;; *linux*) USE_MODULES=1 REBOOT_OPTIONS="-nf" POWEROFF_OPTIONS="-nf" ;; dnl anything? darwin*) AC_DEFINE_UNQUOTED(ON_DARWIN, 1, Compiling for Darwin platform) LIBS="$LIBS -L${prefix}/lib" CFLAGS="$CFLAGS -I${prefix}/include" REBOOT_OPTIONS="-f" POWEROFF_OPTIONS="-f" ;; esac AC_SUBST(INIT_EXT) AC_SUBST(USE_MODULES) AC_SUBST(REBOOT_OPTIONS) AC_SUBST(POWEROFF_OPTIONS) AC_DEFINE_UNQUOTED(REBOOT_OPTIONS, "$REBOOT_OPTIONS", reboot options) AC_DEFINE_UNQUOTED(POWEROFF_OPTIONS, "$POWEROFF_OPTIONS", poweroff options) AC_DEFINE_UNQUOTED(HA_LOG_FACILITY, LOG_DAEMON, Default logging facility) dnl Info for building/packaging systems. dnl dnl "pkg" (typically Solaris) generally suggests package names of the form: dnl COMPname dnl where: dnl COMP: (upper case) resembles a four character company "stock ticker"; dnl name: (lower case) is short-form (few character) product name. dnl dnl It is also conventional for the name to be closely related to the dnl installation location, typically "/opt/COMPname". dnl dnl See "linux-ha-dev" discussion, "heartbeat package name", from 12/Oct/2005. dnl "LXHAhb" seems the least evil compromise for a default. dnl Any site or packager may, of course, override this. dnl AC_ARG_WITH(pkgname, [ --with-pkgname=name name for pkg (typically for Solaris) ], [ PKGNAME="$withval" ], [ PKGNAME="LXHAhb" ], ) AC_SUBST(PKGNAME) MISSINGTHINGS="" MISSINGOPTIONALS="" dnl =============================================== dnl Helpers dnl =============================================== extract_header_define() { AC_MSG_CHECKING(for $2 in $1) Cfile=/tmp/extract_define.$2.${$} printf "#include \n" > ${Cfile}.c printf "#include <%s>\n" $1 >> ${Cfile}.c printf "int main(int argc, char **argv) { printf(\"%%s\", %s); return 0; }\n" $2 >> ${Cfile}.c $CC $CFLAGS ${Cfile}.c -o ${Cfile} value=`${Cfile}` AC_MSG_RESULT($value) printf $value rm -f ${Cfile}.c ${Cfile} } FatalMissingThing() { if test X"$MISSINGTHINGS" = X; then MISSINGTHINGS="$MISSINGTHINGS $1" else MISSINGTHINGS="$MISSINGTHINGS, $1" fi shift AC_MSG_RESULT(configure: ERROR: $1 ====================) shift for j in "$@" do if test "X$j" != X-; then AC_MSG_RESULT(configure: $j ==) fi done } WarnMissingThing() { if test X"$MISSINGOPTIONALS" = X; then MISSINGOPTIONALS="$MISSINGOPTIONALS $1" else MISSINGOPTIONALS="$MISSINGOPTIONALS, $1" fi shift AC_MSG_RESULT(configure: WARNING: $1 ====================) shift for j in "$@" do if test "X$j" != X-; then AC_MSG_RESULT(configure: $j ==) fi done } CheckMissingThings() { if test "X$MISSINGOPTIONALS" != "X" then AC_MSG_WARN(The following recommended components noted earlier are missing: $MISSINGOPTIONALS We will continue but you may have lost some non-critical functionality.) fi if test "X$MISSINGTHINGS" != "X" then AC_MSG_ERROR(The following required components noted earlier are missing: $MISSINGTHINGS Please supply them and try again.) fi } dnl dnl dnl Don't ya just hate working around buggy code? dnl dnl At least code that doesn't do what you want... dnl dnl This is to make substitutions work right in RPM specfiles. dnl dnl Horms says "This is pretty ugly". dnl Alanr says: "It works. s/ ugly//" dnl dnl Patches are being accepted... dnl dnl Keep copy of original (default) prefix prefix_orig="$prefix" prefix=`eval echo "$prefix"` case $prefix in NONE) prefix=/usr/local;; esac var() { case $1 in *'${'*) res=`eval echo "$1"`;; *) res="$1";; esac case "$res" in ""|NONE) echo "$2";; *) echo "$res";; esac } dnl Keep copy of original (default) localstatedir localstatedir_orig="$localstatedir" exec_prefix=`var "$exec_prefix" "$prefix"` bindir=`var "$bindir" "$exec_prefix/bin"` sbindir=`var "$sbindir" "$exec_prefix/sbin"` datarootdir=`var "$datarootdir" "$prefix/share"` datadir=`var "$datadir" "$prefix/share"` sysconfdir=`var "$sysconfdir" "$prefix/etc"` sharedstatedir=`var "$sharedstatedir" "$prefix/com"` localstatedir=`var "$localstatedir" "$prefix/var"` includedir=`var "$includedir" "$exec_prefix/include"` oldincludedir=`var "$oldincludedir" "$exec_prefix/include"` infodir=`var "$infodir" "$prefix/info"` mandir=`var "$mandir" "$exec_prefix/man"` dnl docdir is available in autoconf 2.60+, for older versions preseed dnl with the same value that 2.60+ uses docdir=`var "$docdir" "${datadir}/doc/${PACKAGE_NAME}"` libdir=`var "$libdir" "$exec_prefix/lib"` libexecdir=`var "$libexecdir" "$exec_prefix/libexec"` noarchlibdir=`var "$noarchlibdir" "$prefix/lib"` LDD=ldd dnl Which C compiler? dnl Defaults to GNU C compiler if available. dnl Always tries to set the compiler to ANSI C via options (AM) dnl Can force other with environment variable "CC". AC_PROG_CC AC_PROG_CC_STDC AM_PROG_CC_C_O dnl ************************************************************************ dnl Override the RPM target architecture on those platforms that need it... dnl ************************************************************************ case "$host_cpu" in ppc64|powerpc64) RPMTARGET="--target ppc";; *) RPMTARGET="";; esac AC_SUBST(RPMTARGET) dnl ************************************************************************ dnl Test to see whether library directories should be lib or lib64... dnl ************************************************************************ AC_MSG_CHECKING(for proper library directory suffix) tmpdir=/tmp tmpCfile=${tmpdir}/$$test.c tmpOutfile=${tmpdir}/$$test echo 'int main(int argc, char** argv) { return(1);}' >$tmpCfile if ${CC} ${CFLAGS} ${tmpCfile} -o ${tmpOutfile} then LIBC=`${LDD} ${tmpOutfile} | grep libc | sed -e 's%.*=> *%%' -e 's% .*$%%'` LibCdir=`dirname $LIBC` dirlist=`echo $LibCdir | tr '/' ' '` LibDirSuffix=unknown for dir in $dirlist do case $dir in *lib*) LibDirSuffix=$dir; break;; *);; esac done case $LibDirSuffix in unknown) LibDirSuffix=`basename $LibCdir`;; esac OutFileType=`file $tmpOutfile` rm -f $tmpCfile $tmpOutfile else AC_MSG_ERROR([Cannot Compile trivial C program]) fi # # The code above doesn't work right everywhere # (like Fedora and OpenBSD) # case ${LibDirSuffix} in *lib*) : Cool;; *) : Sigh... case $OutFileType in *64-bit*) case $host_os in openbsd*) LibDirSuffix=lib;; *) LibDirSuffix=lib64;; esac;; *32-bit*) LibDirSuffix=lib;; *) LibDirSuffix=lib;; esac;; esac # # This may not yet be quite right for PPC where the default # is to produce 32-bit binaries, even though the OS is 64-bit # or for that matter for system Z, But, it's a lot better than # it used to be. # AC_MSG_RESULT($LibDirSuffix) case $libdir in */*${LibDirSuffix}) : Cool ;; *) : Uh Oh... libdir=`dirname $libdir`/$LibDirSuffix AC_MSG_WARN([Overriding libdir to: $libdir]);; esac case $libexecdir in */$LibDirSuffix) : Cool ;; *) : Uh Oh... libexecdir=`dirname $libexecdir`/$LibDirSuffix AC_MSG_WARN([Overriding libexecdir to: $libexecdir]);; esac for j in exec_prefix bindir sbindir datadir sysconfdir localstatedir \ includedir oldincludedir mandir docdir stdocdir libdir noarchlibdir do dirname=`eval echo '${'${j}'}'` if test ! -d "$dirname" then AC_MSG_WARN([$j directory ($dirname) does not exist!]) fi done dnl The GNU conventions for installation directories don't always dnl sit well with this software. In particular, GNU's stated: dnl dnl '$(localstatedir)' should normally be '/usr/local/var', ... dnl dnl is poor for us: much better would be somewhere under '/var'. dnl dnl Here within "configure" it would be poor practice for us to interfere dnl with such values, irrespective of our opinion: dnl 1. user perspective: we would have gone behind their back; dnl 2. autoconf perspective: autoconf should avoid any OS-specific mindset. dnl dnl So if localstatedir still has its default value, we issue an advisory dnl warning and inform folk of our own "ConfigureMe", which is ideally dnl suited for setting such aspects (by user, and per-OS). dnl dnl [ Another option would be to detect, and to warn of, (post-expansion) dnl non-"/var/[...]" values: something like: dnl if test `expr "$localstatedir" : '^/var/.*'` -ge '5' \ dnl -o `expr "$localstatedir" : '^/var.*'` -eq '4' dnl then else fi dnl ] # If original localstatedir had defaulted then sanity-check the result. if test "x$localstatedir_orig" = 'x${prefix}/var' && # Note quote types test "x$prefix_orig" = xNONE then SNOOZING=10 AC_MSG_WARN(value/default "--localstatedir=$localstatedir" is poor.) AC_MSG_WARN("/var/something" is strongly recommended.) AC_MSG_WARN(We also recommend using "ConfigureMe".) AC_MSG_WARN(Sleeping for $SNOOZING seconds.) sleep $SNOOZING fi AC_CHECK_HEADERS(heartbeat/glue_config.h) GLUE_HEADER=none if test "$ac_cv_header_heartbeat_glue_config_h" = "yes"; then GLUE_HEADER=heartbeat/glue_config.h else AC_MSG_FAILURE(Core development headers were not found) fi CFLAGS="$CFLAGS -I${prefix}/include/heartbeat" AC_CHECK_HEADERS(glue_config.h) if test "$ac_cv_header_glue_config_h" != "yes"; then AC_MSG_FAILURE(Core development headers were not found) fi AC_DEFINE_UNQUOTED(HA_SYSCONFDIR, "$sysconfdir", Location of system configuration files) HA_URLBASE="http://linux-ha.org/wiki/" AC_SUBST(HA_URLBASE) AC_DEFINE_UNQUOTED(HA_URLBASE, "$HA_URLBASE", Web site base URL) HA_LIBDIR="${libdir}" AC_SUBST(HA_LIBDIR) AC_DEFINE_UNQUOTED(HA_LIBDIR,"$HA_LIBDIR", lib directory) HA_DATADIR="${datadir}" AC_SUBST(HA_DATADIR) AC_DEFINE_UNQUOTED(HA_DATADIR,"$HA_DATADIR", data (arch-independent) directory) HA_NOARCHDATAHBDIR="$HA_DATADIR/${HB_PKG}" AC_SUBST(HA_NOARCHDATAHBDIR) AC_DEFINE_UNQUOTED(HA_NOARCHDATAHBDIR,"$HA_NOARCHDATAHBDIR", $HB_PKG noarch data directory) HA_LIBHBDIR=`extract_header_define $GLUE_HEADER HA_LIBHBDIR` AC_SUBST(HA_LIBHBDIR) HA_VARRUNDIR=`extract_header_define $GLUE_HEADER HA_VARRUNDIR` AC_SUBST(HA_VARRUNDIR) HA_VARRUNHBDIR="$HA_VARRUNDIR/${HB_PKG}" AC_SUBST(HA_VARRUNHBDIR) HA_VARRUNHBRSCDIR="$HA_VARRUNHBDIR/rsctmp" AC_SUBST(HA_VARRUNHBRSCDIR) HA_VARLIBDIR="${localstatedir}/lib" AC_SUBST(HA_VARLIBDIR) HA_VARLIBHBDIR=`extract_header_define $GLUE_HEADER HA_VARLIBHBDIR` AC_SUBST(HA_VARLIBHBDIR) HA_COREDIR=`extract_header_define $GLUE_HEADER HA_COREDIR` AC_SUBST(HA_COREDIR) base_includedir="${includedir}" saf_includedir="${includedir}/saf" AC_SUBST(base_includedir) AC_SUBST(saf_includedir) AC_SUBST(docdir) AC_SUBST(stdocdir) # # Other interesting variables: ${host_vendor} and ${host_os} # sample values: suse linux # dnl We use this in the RPM specfile... AC_SUBST(ac_configure_args) cleaned_configure_args="" for j in ${ac_configure_args} do case $j in *--libdir=*|*--libexecdir=*) ;; *) cleaned_configure_args="$cleaned_configure_args $j";; esac done AC_SUBST(cleaned_configure_args) dnl ************************************************************************* PATH="$PATH:/sbin:/usr/sbin:/usr/local/sbin:/usr/local/bin" export PATH dnl checks for programs dnl AC_PROG_YACC AC_DECL_YYTEXT AM_PROG_LEX AM_PATH_PYTHON AC_LIBTOOL_DLOPEN dnl Enable dlopen support... AC_LIBLTDL_CONVENIENCE dnl make libltdl a convenience lib AC_PROG_LIBTOOL dnl Replacing AC_PROG_LIBTOOL with AC_CHECK_PROG because LIBTOOL dnl was NOT being expanded all the time thus causing things to fail. AC_CHECK_PROGS(LIBTOOL, glibtool libtool libtool15 libtool13) AC_MSG_CHECKING(for glibtool or libtool*) if test x"${LIBTOOL}" = x""; then FatalMissingThing "libtool" "You need libtool to build heartbeat." \ "You can get the source from ftp://www.gnu.org/pub/gnu/" \ "or you can locate it via http://www.gnu.org/software/libtool" else AC_MSG_RESULT($LIBTOOL has been found.) fi AC_CHECK_PROGS(MAKE, gmake make) AC_MSG_CHECKING(for gmake or make) if test x"${MAKE}" = x""; then FatalMissingThing "gmake" "You need gmake to build heartbeat." \ "You can get the source from ftp://www.gnu.org/pub/gnu/" \ "or you can locate it via http://www.gnu.org/software/make/" else AC_MSG_RESULT($MAKE has been found.) fi AC_SYS_LARGEFILE AC_PATH_PROGS(HTML2TXT, lynx w3m) case $HTML2TXT in */*) ;; *) HTML2TXT="";; esac AC_PATH_PROGS(POD2MAN, pod2man, pod2man) AC_PATH_PROGS(ROUTE, route) AC_PATH_PROGS(RPM, rpmbuild) if test x"${RPM}" = x""; then AC_PATH_PROGS(RPM, rpm) fi AC_DEFINE_UNQUOTED(ROUTE, "$ROUTE", path to route command) AC_PATH_PROGS(NETSTAT, netstat, /sbin/netstat) AC_DEFINE_UNQUOTED(NETSTAT, "$NETSTAT", path to the netstat command) AC_PATH_PROGS(PING, ping, /bin/ping) AC_PATH_PROGS(IFCONFIG, ifconfig, /sbin/ifconfig) AC_PATH_PROGS(SSH, ssh, /usr/bin/ssh) AC_PATH_PROGS(SCP, scp, /usr/bin/scp) AC_PATH_PROGS(PYTHON, python) dnl Need a "mail" program which can handle "-s subject" flag. dnl Ideally would write a suitable autoconf test here. dnl Pragmatically observe: dnl Solaris: "mail" doesn't offer "-s", but "mailx" does. Want "mailx". dnl Linux: "mail" offers "-s". Some releases have "mailx" symlink to "mail". dnl *BSD: Online man pages suggest similar to Linux. dnl HP-UX: Online man pages suggest similar to Solaris. dnl Thus "mailx", if it exists, should always be good; look for it first. AC_PATH_PROGS(MAILCMD, mailx mail) AC_SUBST(MAILCMD) AC_DEFINE_UNQUOTED(IFCONFIG, "$IFCONFIG", path to the ifconfig command) AC_PATH_PROGS(GETENT, getent) AC_PATH_PROGS(IP2UTIL, ip, /sbin/ip) AC_PATH_PROGS(REBOOT, reboot, /sbin/reboot) AC_PATH_PROGS(POWEROFF_CMD, poweroff, /sbin/poweroff) AC_SUBST(REBOOT) AC_SUBST(POWEROFF_CMD) AC_DEFINE_UNQUOTED(REBOOT, "$REBOOT", path to the reboot command) AC_DEFINE_UNQUOTED(POWEROFF_CMD, "$POWEROFF_CMD", path to the poweroff command) dnl "whoami", if it exists, may be hidden away on some System-V (e.g. Solaris) AC_PATH_PROGS(WHOAMI, whoami, , ${PATH}:/usr/ucb) AC_PATH_PROGS(SWIG, swig) AC_SUBST(SWIG) AC_PATH_PROGS(EGREP, egrep) AC_SUBST(EGREP) AC_PATH_PROGS(MSGFMT, msgfmt, [msgfmt not found],) AC_SUBST(MSGFMT) AC_PATH_PROGS(HG, hg, /usr/local/hg) AC_SUBST(HG) AC_PATH_PROGS(GZIP_PROG, gzip) AC_PATH_PROGS(TAR, tar) AC_PATH_PROGS(MD5, md5) AC_SUBST(GZIP_PROG) AC_SUBST(TAR) AC_SUBST(MD5) dnl The "test" program can be different from the "sh" builtin. AC_PATH_PROGS(TEST, test) AC_PATH_PROGS(PKGCONFIG, pkg-config) dnl xsltproc is required for building the man pages AC_PATH_PROGS(XSLTPROC, xsltproc) dnl ************************************************************************ dnl Check whether non-root user can chown. dnl ************************************************************************ if test -n "$WHOAMI"; then IAM=`$WHOAMI` fi AC_MSG_CHECKING(if chown works for non-root) F="./.chown-test.$$" touch $F if case "$IAM" in root) chown nobody $F; su nobody -c "chown root $F";; *) chown root $F esac >/dev/null 2>&1 then nonroot_chown=yes AC_MSG_RESULT(yes) else nonroot_chown=no AC_MSG_RESULT(no) fi rm -f $F AM_CONDITIONAL(NONROOT_CHOWN, test "$nonroot_chown" = yes ) dnl ************************************************************************ dnl checks for libraries dnl ************************************************************************ AC_CHECK_LIB(posix4, sched_getscheduler) dnl -lposix4 AC_CHECK_LIB(c, dlopen) dnl if dlopen is in libc... AC_CHECK_LIB(dl, dlopen) dnl -ldl (for Linux) AC_CHECK_LIB(rt, sched_getscheduler) dnl -lrt (for Tru64) AC_CHECK_LIB(gnugetopt, getopt_long) dnl -lgnugetopt ( if available ) # Not sure what OSes need this... Linux and AIX don't... # and AIX barfs on it... if test "$DLPREOPEN" = yes; then DLOPEN_FORCE_FLAGS="-dlpreopen force -dlopen-self" AC_DEFINE(DLPREOPEN, 1, [enable -dlpreopen flag]) else DLOPEN_FORCE_FLAGS="" fi AC_SUBST(DLOPEN_FORCE_FLAGS) dnl ************ uuid ********************** AC_ARG_WITH(uuid, [ --with-uuid=UUID mechanism for uuid {e2fsprogs|ossp|any|no} "e2fsprogs": e2fsprogs, typically Linux "ossp": not yet implemented "any": (default) any of the above, fallback to inbuilt "no": use inbuilt ], [ uuidimpl="$withval" ], [ uuidimpl="any" ], ) case $uuidimpl in e2fsprogs) AC_CHECK_LIB(uuid, uuid_parse,, AC_MSG_ERROR([e2fsprogs uuid library was explicitly requested but not found]) ) ;; ossp) AC_CHECK_LIB(uuid, uuid_create,, AC_MSG_ERROR([ossp uuid library was explicitly requested but not found]) ) ;; no) # Do not define anything; so use inbuilt (replace) implementation. ;; any) # Default: try to discover an implementation that we can use. AC_CHECK_LIB(uuid, uuid_parse) dnl e2fsprogs dnl AC_CHECK_LIB(uuid, uuid_create) dnl ossp ;; *) AC_MSG_ERROR([An invalid uuid library was explicitly requested]) ;; esac case "$host_os" in darwin*) dnl Recent Darwin versions do not need to link against a uuid library dnl Maybe this can be moved up into the previous block but it also might dnl break things (ie. the later check for uuid_parse) AC_CHECK_FUNCS(uuid_parse) esac AC_MSG_CHECKING(uuid implementation:) if test "$ac_cv_lib_uuid_uuid_parse" = yes; then AC_MSG_RESULT(e2fsprogs) elif test "$ac_cv_func_uuid_parse" = yes; then AC_MSG_RESULT(native) elif test "$ac_cv_lib_uuid_uuid_create" = yes; then AC_MSG_RESULT(ossp) AC_MSG_ERROR([heartbeat does not yet support ossp implementation of uuid]) else AC_MSG_RESULT(inbuilt) if test x"$uuidimpl" != x"no"; then WarnMissingThing "uuid library" \ "e2fsprogs uuid library is recommended" \ "On Linux this is the e2fsprogs-devel package" \ "See also http://sourceforge.net/projects/e2fsprogs/" fi fi dnl ************ uuid ********************** EXTRAGLIBMSG="-" if test "X${PKGCONFIG}" = "X"; then AC_MSG_RESULT(not found) FatalMissingThing "pkgconfig" "Package pkgconfig is required" \ "See http://pkgconfig.sourceforge.net/" EXTRALIBMSG="(this message might be bogus because pkgconfig is missing)" fi if test "x${enable_thread_safe}" = "xyes"; then GPKGNAME="gthread-2.0" else GPKGNAME="glib-2.0" fi if test "X${PKGCONFIG}" != "X" && $PKGCONFIG --exists $GPKGNAME; then GLIBCONFIG="$PKGCONFIG $GPKGNAME" else set -x echo PKG_CONFIG_PATH=$PKG_CONFIG_PATH $PKGCONFIG --exists $GPKGNAME; echo $? $PKGCONFIG --cflags $GPKGNAME; echo $? $PKGCONFIG $GPKGNAME; echo $? set +x FatalMissingThing "glib2-devel" \ "Package glib2-devel is missing." \ "You can get the source from ftp://ftp.gtk.org/pub/gtk/" \ "or you can locate it via http://www.gtk.org/download/" "$EXTRALIBMSG" fi AC_MSG_RESULT(using $GLIBCONFIG) sendarp_linux=0 case $host_os in *Linux*|*linux*) sendarp_linux=1;; esac AC_MSG_CHECKING(where is python installed) if test "x${PYTHON}" = x; then PYTHON="/usr/bin/env python"; fi AC_MSG_RESULT(using $PYTHON); AC_MSG_CHECKING(if netstat supports -f inet flag) $NETSTAT -rn -f inet > /dev/null 2>&1 if test x"$?" = x"0"; then AC_DEFINE(NETSTATPARM, "-rn -f inet ", parameters to netstat to retrieve route information) AC_MSG_RESULT(yes) else AC_DEFINE(NETSTATPARM, "-rn ", parameters to netstat to retrieve route information) AC_MSG_RESULT(no) fi if test X${PING} = X then FatalMissingThing "ping" "ping command is mandantory" fi dnl Determine if we use -w1 or -t1 for ping (in PYTHON Scripts) AC_MSG_CHECKING(ping option for timeout - w or t) for PING_TIMEOUT_OPT in "-w1" "-t1" do $PING -nq -c1 $PING_TIMEOUT_OPT localhost > /dev/null 2>&1 if test "$?" = 0 then AC_DEFINE_UNQUOTED(PING_TIMEOUT_OPT, "$PING_TIMEOUT_OPT", option for ping timeout) AC_MSG_RESULT($PING_TIMEOUT_OPT) break fi done AC_SUBST(PING_TIMEOUT_OPT) dnl Determining a route (particularly for "findif"). dnl There are various mechanisms on different systems. dnl Some mechanisms require root access to evaluate, but configure is often dnl (indeed, some say should be always) running non-root. dnl dnl Therefore much of the determination has to be run-time. dnl So the principle here, at configure time, is to explore what might be dnl available, and offer as much as possible to run-time. dnl AC_DEFINE(ROUTEPARM, "-n get", paramters for route to retrieve route information) AC_DEFINE(PROCROUTE, "/proc/net/route", path were to find route information in proc) AC_MSG_CHECKING(ifconfig option to list interfaces) for IFCONFIG_A_OPT in "-A" "-a" "" do $IFCONFIG $IFCONFIG_A_OPT > /dev/null 2>&1 if test "$?" = 0 then AC_DEFINE_UNQUOTED(IFCONFIG_A_OPT, "$IFCONFIG_A_OPT", option for ifconfig command) AC_MSG_RESULT($IFCONFIG_A_OPT) break fi done AC_SUBST(IFCONFIG_A_OPT) AC_SUBST(WHOAMI) AC_SUBST(HTML2TXT) AC_SUBST(POD2MAN) AC_SUBST(ROUTEPARM) AC_SUBST(PROCROUTE) dnl Locales check - is a real ugly workaround for now til I find dnl something more useful dnl dnl "Eric Z. Ayers" wrote: dnl > dnl > Here are some more data points: dnl > dnl > SUN: /usr/lib/locale/ dnl > HP : /usr/lib/nls/loc/ dnl > OSF: /usr/lib/nls/loc/ dnl > LINUX: /usr/share/locale/ dnl > UNIXWARE: /usr/lib/locale/ dnl > FREEBSD: /usr/share/locale dnl > VMS: just kidding dnl FindADir() { type="$1" whatfor="$2" shift; shift; AC_MSG_CHECKING(for $whatfor directory) for dir do if test -d "$dir" then AC_MSG_RESULT($dir) echo $dir return 0 fi done AC_MSG_RESULT(not found) return 1 } locale_locations="/usr/share/locale /usr/lib/locale /usr/lib/nls/loc" LOCALE=`FindADir error "locale" $locale_locations` if test "X$LOCALE" = X then FatalMissingThing "Locale directory" "Locale directory is mandantory." fi RPMDIR=`FindADir warn "RPM" /usr/src/packages /usr/src/redhat` if test x"${HAVE_LIBRT}" = x""; then LIBRT="" else LIBRT=-lrt fi AC_SUBST(LIBRT) # # Where is dlopen? # if test "$ac_cv_lib_c_dlopen" = yes; then LIBADD_DL="" elif test "$ac_cv_lib_dl_dlopen" = yes; then LIBADD_DL=-ldl else LIBADD_DL=${lt_cv_dlopen_libs} fi dnl dnl Check for location of gettext dnl dnl On at least Solaris 2.x, where it is in libc, specifying lintl causes dnl grief. Ensure minimal result, not the sum of all possibilities. dnl And do libc first. dnl Known examples: dnl c: Linux, Solaris 2.6+ dnl intl: BSD, AIX FunIsInLib() { fun=$1 lib=$2 lib_var1="ac_cv_lib_${lib}_$fun" lib_var2="ac_cv_lib_${lib}___$fun" for v in $lib_var1 $lib_var2 do var=`eval echo '${'${v}'}'` case $var in yes) return 0;; no) return 1;; esac done return 0 } for gt_test_lib in c intl do AC_CHECK_LIB($gt_test_lib, gettext) if FunIsInLib gettext $gt_test_lib; then break fi done # # Where is gettext()? # if FunIsInLib gettext c ; then LIBADD_INTL="" elif FunIsInLib gettext intl ; then LIBADD_INTL=-lintl elif test -f /usr/local/lib/libintl.so -a -s /usr/local/lib/libintl.so; then # This was added for FreeBSD LIBADD_INTL="-lintl" elif test -f /sw/lib/libintl.a -a -s /sw/lib/libintl.la -a -s /sw/lib/libintl.dylib; then # This was added for Darwin + Fink LIBADD_INTL="-lintl" else FatalMissingThing "gettext function" "no library providing gettext found" fi dnl dnl Glib allows its headers/libraries to be installed anywhere. dnl And they provide a command to let you know where they are. dnl This is nice, but having them in standard places is nice too ;-) dnl if test "X$GLIBCONFIG" != X; then AC_MSG_CHECKING(for special glib includes: ) GLIBHEAD=`$GLIBCONFIG --cflags` AC_MSG_RESULT($GLIBHEAD) CPPFLAGS="$CPPFLAGS $GLIBHEAD" AC_SUBST(GLIBHEAD) dnl Note: Not bundling "GLIBLIB" with general "LIBS". dnl 1. Only very few programs require GLIBLIB dnl (This isn't true anymore -- AlanR) dnl 2. Danger of creating run-time dependency on build-time LD_LIBRARY_PATH AC_MSG_CHECKING(for glib library flags) GLIBLIB=`$GLIBCONFIG --libs` AC_MSG_RESULT($GLIBLIB) AC_SUBST(GLIBLIB) fi dnl ************************************************************************ dnl checks for header files dnl dnl check for ANSI *.h files first dnl asm/page.h: Linux, for system PAGE_SIZE AC_HEADER_STDC AC_CHECK_HEADERS(db.h) AC_CHECK_HEADERS(asm/page.h) AC_CHECK_HEADERS(time.h) AC_CHECK_HEADERS(stdarg.h) AC_CHECK_HEADERS(tcpd.h) AC_CHECK_HEADERS(uuid.h) AC_CHECK_HEADERS(uuid/uuid.h) AC_CHECK_HEADERS(sys/param.h) AC_CHECK_HEADERS(netinet/in.h) AC_CHECK_HEADERS([stdint.h unistd.h]) AC_CHECK_HEADERS(sys/termios.h) AC_CHECK_HEADERS(sys/reboot.h) AC_CHECK_HEADERS(termios.h) dnl ************************************************************************ dnl FreeBSD requires sys/param.h and in.h to compile test netinet headers. dnl ************************************************************************ if test "$ac_cv_header_sys_param_h" -a "$ac_cv_header_netinet_in_h" = no; then AC_CHECK_HEADERS(netinet/in_systm.h) AC_CHECK_HEADERS(netinet/ip.h) AC_CHECK_HEADERS(netinet/ip_var.h) AC_CHECK_HEADERS(netinet/ip_compat.h) AC_CHECK_HEADERS(netinet/ip_fw.h) else AC_CHECK_HEADERS(netinet/in_systm.h,[],[],[#include #include ]) if test "$ac_cv_header_in_systm_h" = no; then AC_CHECK_HEADERS(netinet/ip.h,[],[],[#include #include ]) else AC_CHECK_HEADERS(netinet/ip.h,[],[],[#include #include #include ]) fi AC_CHECK_HEADERS(netinet/ip_var.h,[],[],[#include #include ]) AC_CHECK_HEADERS(netinet/ip_compat.h,[],[],[#include #include ]) AC_CHECK_HEADERS(netinet/ip_fw.h,[],[],[#include #include ]) fi AC_CHECK_HEADERS(sys/sockio.h) AC_CHECK_HEADERS(libintl.h) AC_CHECK_HEADERS(sys/types.h) AC_CHECK_HEADERS(sys/socket.h) AC_CHECK_HEADERS(arpa/inet.h) AC_CHECK_HEADERS(net/ethernet.h) AC_CHECK_HEADERS(malloc.h) AC_CHECK_HEADERS(termio.h) AC_CHECK_HEADERS(getopt.h) AC_CHECK_HEADERS(sys/prctl.h) AC_CHECK_HEADERS(linux/watchdog.h,[],[],[#include ]) dnl Sockets are our preferred and supported comms mechanism. But the dnl implementation needs to be able to convey credentials: some don't. dnl So on a few OSes, credentials-carrying streams might be a better choice. dnl dnl Solaris releases up to and including "9" fall into this category dnl (its sockets don't carry credentials; streams do). dnl dnl At Solaris 10, "getpeerucred()" is available, for both sockets and dnl streams, so it should probably use (preferred) socket mechanism. AC_CHECK_HEADERS(stropts.h) dnl streams available (fallback option) AC_CHECK_HEADERS(ucred.h) dnl e.g. Solaris 10 decl. of "getpeerucred()" AC_CHECK_FUNCS(getpeerucred) dnl ************************************************************************ dnl checks for headers needed by clplumbing On BSD AC_CHECK_HEADERS(sys/syslimits.h) if test "$ac_cv_header_sys_param_h" = no; then AC_CHECK_HEADERS(sys/ucred.h) else AC_CHECK_HEADERS(sys/ucred.h,[],[],[#include ]) fi dnl ************************************************************************ dnl checks for headers needed by clplumbing On Solaris AC_CHECK_HEADERS(sys/cred.h xti.h) dnl ************************************************************************ dnl checks for headers needed by clplumbing On FreeBSD/Solaris AC_CHECK_HEADERS(sys/filio.h) # # We've had this new API for a very long time # AC_DEFINE(HAVE_NEW_HB_API, 1, [have new heartbeat api]) dnl *************************************************************************** dnl Enable distro-style RPMs dnl *************************************************************************** AC_ARG_ENABLE([distro-rpm], [ --enable-distro-rpm Enable distro-style RPMs [default=no]], [], [enable_distro_rpm=no]) case $enable_distro_rpm in yes) DISTRO_STYLE_RPMS=1;; *) DISTRO_STYLE_RPMS=0;; esac AC_SUBST(DISTRO_STYLE_RPMS) dnl *************************************************************************** dnl RDS communication module dnl *************************************************************************** AC_ARG_ENABLE([rds], [ --enable-rds Enable known-to-be-broken proof-of-concept RDS module, [default=no]], [enable_rds=yes], []) AM_CONDITIONAL(BUILD_RDS_MODULE, test "x${enable_rds}" = "xyes") dnl *************************************************************************** dnl TIPC communication module dnl *************************************************************************** AC_ARG_ENABLE([tipc], [ --enable-tipc Enable TIPC Communication module, [default=try]], [], [enable_tipc=try]) AC_ARG_WITH([tipc], [ --with-tipc-source TIPC source code directory], [], []) TIPC_DEV_ROOT=${with_tipc_source} tipc_headers_found=yes if test "x${TIPC_DEV_ROOT}" != "x"; then dnl checking tipc.h in specified directory TIPC_HEADER_DIR=${TIPC_DEV_ROOT}/include AC_CHECK_HEADER([${TIPC_HEADER_DIR}/net/tipc/tipc.h], [], \ [tipc_headers_found=no] ) TIPC_INCLUDE="-I${TIPC_HEADER_DIR}" else dnl checking tipc.h in standard include directory AC_CHECK_HEADERS(net/tipc/tipc.h, [], [tipc_headers_found=no]) fi AC_SUBST(TIPC_INCLUDE) AM_CONDITIONAL(BUILD_TIPC_MODULE, test "x${tipc_headers_found}" = "xyes" && test "x${enable_tipc}" != "xno") dnl *************************************************************************** dnl Thread safe configuration dnl *************************************************************************** AM_CONDITIONAL(ENABLE_THREAD_SAFE, test "x${enable_thread_safe}" = "xyes") dnl ************************************************************************ dnl Handy function for checking for typedefs or struct defs dnl ************************************************************************ check_for_type() { type="$1" headers="" shift for arg do headers="${headers}${arg} " done program="if ((${type} *) 0) return 0; if (sizeof(${type})) return 0; return 0;" have="HAVE_`echo "$type" | tr ' ' '_' | dd conv=ucase 2>/dev/null`" varhave="heartbeat_cv_$have" AC_CACHE_CHECK([for type $type ],$varhave,[ AC_TRY_COMPILE([$headers], [$program], eval $varhave=yes, eval $varhave=no , eval $varhave=cross) ]) if test x"`eval echo '${'$varhave'}'`" = xyes; then return 0 fi return 1 } check_for_type_member() { type="$1" member="$2" headers="" shift shift for arg do headers="${headers}${arg} " done program="${type} foo; if ((${type} *) 0) return 0; if (sizeof(${type})) return 0; if (sizeof(foo)) return 0; (void*)foo.${member}; return 0;" have="HAVE_`echo "$type" | tr ' ' '_' | dd conv=ucase 2>/dev/null`" varhave="heartbeat_cv_$have" AC_CACHE_CHECK([for type $type ],$varhave,[ AC_TRY_COMPILE([$headers], [$program], eval $varhave=yes, eval $varhave=no , eval $varhave=cross) ]) if test x"`eval echo '${'$varhave'}'`" = xyes; then return 0 fi return 1 } dnl ************************************************************************ dnl checks for typedefs dnl dnl if not known on this system, #define size_t unsigned AC_TYPE_SIZE_T dnl dnl Check poll.h for nfds_t (Linux Only), if not define it as an unsigned long int. dnl if check_for_type "nfds_t" "#include "; then AC_DEFINE(HAVE_NFDS_T,1,[Do we have nfds_t?]) fi dnl ************************************************************************ dnl checks for structures dnl # # Look for all the variants of local/UNIX socket credentials # # Include all of these headers that we can find... # headers="" for hdr in "sys/param.h" "sys/socket.h" "sys/ucred.h" do hdrvar=ac_cv_header_`echo $hdr | sed -e 's%\.%_%' -e 's%/%_%'` if test x"`eval echo '${'$hdrvar'}'`" = xyes; then headers="$headers #include <$hdr>" fi done if check_for_type_member "struct ucred" "cr_ref" "$headers"; then AC_DEFINE(HAVE_STRUCT_UCRED_DARWIN,1,[Do we have the Darwin version of struct ucred?]) fi if check_for_type "struct ucred" "$headers"; then AC_DEFINE(HAVE_STRUCT_UCRED,1,[Do we have struct ucred?]) fi if check_for_type "struct cmsgcred" "$headers" ; then AC_DEFINE(HAVE_STRUCT_CMSGCRED,1,[Do we have struct cmsgcred?]) fi if check_for_type "struct fcred" "$headers"; then AC_DEFINE(HAVE_STRUCT_FCRED,1,[Do we have struct fcred?]) fi if check_for_type "struct cred" "$headers"; then AC_DEFINE(HAVE_STRUCT_CRED,1,[Do we have struct cred?]) fi if check_for_type "struct sockcred" "$headers"; then AC_DEFINE(HAVE_STRUCT_SOCKCRED,1,[Do we have struct sockcred?]) fi dnl Check TERMIOS for components (e.g. c_line not present on Solaris) dnl AC_CACHE_CHECK([for c_line in termios],samba_cv_HAVE_TERMIOS_C_LINE,[ AC_TRY_COMPILE([#include #include ], [struct termios ti; ti.c_line = 'a';], samba_cv_HAVE_TERMIOS_C_LINE=yes,samba_cv_HAVE_TERMIOS_C_LINE=no,samba_cv_HAVE_TERMIOS_C_LINE=cross)]) if test x"$samba_cv_HAVE_TERMIOS_C_LINE" = x"yes"; then AC_DEFINE(HAVE_TERMIOS_C_LINE,1,[ ]) fi dnl Check sockaddr_in for components (e.g. sin_len not present on Solaris) dnl AC_CACHE_CHECK([for sin_len in sockaddr_in],samba_cv_HAVE_SOCKADDR_IN_SIN_LEN,[ AC_TRY_COMPILE([#include #include ], [struct sockaddr_in si; si.sin_len = 1;], samba_cv_HAVE_SOCKADDR_IN_SIN_LEN=yes,samba_cv_HAVE_SOCKADDR_IN_SIN_LEN=no,samba_cv_HAVE_SOCKADDR_IN_SIN_LEN=cross)]) if test x"$samba_cv_HAVE_SOCKADDR_IN_SIN_LEN" = x"yes"; then AC_DEFINE(HAVE_SOCKADDR_IN_SIN_LEN,1,[ ]) fi dnl Check msghdr for components (e.g. msg_control/msg_controlen not present on Solaris) dnl AC_CACHE_CHECK([for msg_control in msghdr],samba_cv_HAVE_MSG_CONTROL_MSGHDR,[ AC_TRY_COMPILE([#include #include ], [struct msghdr mh; mh.msg_control = (void *)0;], samba_cv_HAVE_MSG_CONTROL_MSGHDR=yes,samba_cv_HAVE_MSG_CONTROL_MSGHDR=no,samba_cv_HAVE_MSG_CONTROL_MSGHDR=cross)]) if test x"$samba_cv_HAVE_MSG_CONTROL_MSGHDR" = x"yes"; then MSGHDR_TYPE="msghdr" IPCSOCKET_C="ipcsocket.c" IPCSOCKET_LO="ipcsocket.lo" else MSGHDR_TYPE="nmsghdr" IPCSOCKET_C="" IPCSOCKET_LO="" fi dnl Check syslog.h for 'facilitynames' table dnl AC_CACHE_CHECK([for facilitynames in syslog.h],ac_cv_HAVE_SYSLOG_FACILITYNAMES,[ AC_TRY_COMPILE([ #define SYSLOG_NAMES #include #include ], [ void *fnames; fnames = facilitynames; ], ac_cv_HAVE_SYSLOG_FACILITYNAMES=yes,ac_cv_HAVE_SYSLOG_FACILITYNAMES=no,ac_cv_HAVE_SYSLOG_FACILITYNAMES=cross)]) if test x"$ac_cv_HAVE_SYSLOG_FACILITYNAMES" = x"yes"; then AC_DEFINE(HAVE_SYSLOG_FACILITYNAMES,1,[ ]) fi dnl 'reboot()' system call: one argument (e.g. Linux) or two (e.g. Solaris)? dnl AC_CACHE_CHECK([number of arguments in reboot system call], ac_cv_REBOOT_ARGS,[ AC_TRY_COMPILE( [#include ], [(void)reboot(0);], ac_cv_REBOOT_ARGS=1, [AC_TRY_COMPILE( [#include ], [(void)reboot(0,(void *)0);], ac_cv_REBOOT_ARGS=2, ac_cv_REBOOT_ARGS=0 )], ac_cv_REBOOT_ARGS=0 ) ] ) dnl Argument count of 0 suggests no known 'reboot()' call. if test "$ac_cv_REBOOT_ARGS" -ge "1"; then AC_DEFINE_UNQUOTED(REBOOT_ARGS,$ac_cv_REBOOT_ARGS,[number of arguments for reboot system call]) fi dnl dnl Check for ALIGN in /sys/param.h dnl AC_MSG_CHECKING("for ALIGN in sys/param.h") AC_EGREP_CPP(FoundAlign, [#include #ifdef ALIGN FoundAlign #endif ], [ ALIGN="ALIGN" AC_MSG_RESULT(Yes) ], AC_MSG_RESULT("No")) AC_MSG_CHECKING("for T_ALIGN in xti.h") AC_EGREP_CPP(FoundAlign, [#include #ifdef T_ALIGN FoundAlign #endif ], [ ALIGN="T_ALIGN" AC_MSG_RESULT(Yes) ], AC_MSG_RESULT(No)) AC_SUBST(IPCSOCKET_C) AC_SUBST(IPCSOCKET_LO) AC_SUBST(ALIGN) AC_SUBST(MSGHDR_TYPE) dnl ************************************************************************ dnl checks for compiler characteristics dnl dnl Warnings for C compilers. Note: generic, portable warnings only. dnl Things likely to be specific to a particular OS or module should be dnl carefully handled afterwards. AC_C_STRINGIZE dnl ********************************************************************** dnl time-related declarations etc. AC_STRUCT_TIMEZONE if check_for_type_member "struct tm" "tm_gmtoff" "#include "; then AC_DEFINE(HAVE_TM_GMTOFF,1,[Do we have structure member tm_gmtoff?]) fi dnl ********************************************************************** dnl Check the size of the integer types dnl So we can have integers of known sizes as needed dnl AC_CHECK_SIZEOF(char) AC_CHECK_SIZEOF(short) AC_CHECK_SIZEOF(int) AC_CHECK_SIZEOF(long) AC_CHECK_SIZEOF(long long) AC_CHECK_SIZEOF(clock_t, [], [#include ]) AC_ARG_ENABLE([all], [ --enable-all Activate ALL features [default=no]]) AC_ARG_ENABLE([rpath], [ --enable-rpath Enable RPATH in libtool [default=yes]]) AC_ARG_ENABLE([ansi], [ --enable-ansi force GCC to compile to ANSI/ANSI standard for older compilers. [default=yes]]) AC_ARG_ENABLE([fatal-warnings], [ --enable-fatal-warnings very pedantic and fatal warnings for gcc [default=yes]], [], [enable_fatal_warnings=unknown]) AC_ARG_ENABLE([times-kludge], [ --enable-times-kludge enables a kludge to workaround a bug in glibc's times(2) call [default=yes]], [], [enable_times_kludge=unknown]) AC_ARG_ENABLE([no-long-long], [ --enable-no-long-long removes no long long warning for gcc [default=yes]], [], [enable_no_long_long=yes]) AC_ARG_ENABLE([traditional-warnings], [ --enable-traditional-warnings enable traditional warnings gcc (-Wtraditional) [default=no]]) AC_ARG_ENABLE([pretty], [ --enable-pretty Pretty-print compiler output unless there is an error [default=no]]) AC_ARG_ENABLE([quiet], [ --enable-quiet Supress make output unless there is an error [default=no]]) AC_ARG_ENABLE([thread-safe], [ --enable-thread-safe Enable some client libraries to be thread safe. [default=no]]) CC_WARNINGS="" enable_crm=no dnl - If requested, enable ALL subsystems. if test "${enable_all}" = "yes" ; then echo "Enabling all optional features." enable_ansi=yes; enable_fatal_warnings=yes; enable_quorumd=try; fi AC_ARG_ENABLE([bundled-ltdl], [ --enable-bundled-ltdl Configure, build and install the standalone ltdl library bundled with Heartbeat [default=no]]) LTDL_LIBS="" dnl Check before we enable -Wstrict-prototypes as it causes the test to fail AC_CHECK_LIB(ltdl, lt_dlopen, [LTDL_foo=1]) if test "x${enable_bundled_ltdl}" = "xyes"; then if test $ac_cv_lib_ltdl_lt_dlopen = yes; then AC_MSG_NOTICE([Disabling usage of installed ltdl]) fi ac_cv_lib_ltdl_lt_dlopen=no fi LIBLTDL_DIR="" if test $ac_cv_lib_ltdl_lt_dlopen != yes ; then AC_MSG_NOTICE([Installing local ltdl]) LIBLTDL_DIR=libltdl ( cd $srcdir ; $TAR -xvf libltdl.tar ) if test "$?" -ne 0; then AC_MSG_ERROR([$TAR of libltdl.tar in $srcdir failed]) fi AC_CONFIG_SUBDIRS(libltdl) else LIBS="$LIBS -lltdl" AC_MSG_NOTICE([Using installed ltdl]) INCLTDL="" LIBLTDL="" fi dnl libltdl additions AC_SUBST(INCLTDL) AC_SUBST(LIBLTDL) AC_SUBST(LIBLTDL_DIR) AC_MSG_CHECKING(if clock_t is long enough) if test $ac_cv_sizeof_clock_t -ge 8; then AC_MSG_RESULT(yes) AC_DEFINE(CLOCK_T_IS_LONG_ENOUGH, 1, [Set if CLOCK_T is adequate by itself for the "indefinite future" (>= 100 years)]) else AC_MSG_RESULT(no) fi dnl ************ curses ********************** dnl A few OSes (e.g. Linux) deliver a default "ncurses" alongside "curses". dnl Many non-Linux deliver "curses"; sites may add "ncurses". dnl dnl However, the source-code recommendation for both is to #include "curses.h" dnl (i.e. "ncurses" still wants the include to be simple, no-'n', "curses.h"). dnl dnl Andrew Beekhof (author of heartbeat code that uses this functionality) dnl wishes "ncurses" to take precedence. So the following ordering has dnl been devised to implement this. dnl dnl Look first for the headers, then set the libraries accordingly. dnl (Normally autoconf suggests looking for libraries first.) dnl AC_CHECK_HEADERS(curses.h) AC_CHECK_HEADERS(curses/curses.h) AC_CHECK_HEADERS(ncurses.h) AC_CHECK_HEADERS(ncurses/ncurses.h) dnl Although n-library is preferred, only look for it if the n-header was found. CURSESLIBS='' if test "$ac_cv_header_ncurses_h" = "yes"; then AC_CHECK_LIB(ncurses, printw, [CURSESLIBS='-lncurses'; AC_DEFINE(HAVE_LIBNCURSES,1, have ncurses library)] ) fi if test "$ac_cv_header_ncurses_ncurses_h" = "yes"; then AC_CHECK_LIB(ncurses, printw, [CURSESLIBS='-lncurses'; AC_DEFINE(HAVE_LIBNCURSES,1, have ncurses library)] ) fi dnl Only look for non-n-library if there was no n-library. if test X"$CURSESLIBS" = X"" -a "$ac_cv_header_curses_h" = "yes"; then AC_CHECK_LIB(curses, printw, [CURSESLIBS='-lcurses'; AC_DEFINE(HAVE_LIBCURSES,1, have curses library)] ) fi dnl Only look for non-n-library if there was no n-library. if test X"$CURSESLIBS" = X"" -a "$ac_cv_header_curses_curses_h" = "yes"; then AC_CHECK_LIB(curses, printw, [CURSESLIBS='-lcurses'; AC_DEFINE(HAVE_LIBCURSES,1, have curses library)] ) fi AC_SUBST(CURSESLIBS) dnl ************ curses ********************** if test "$GCC" = yes; then dnl dnl We make sure warnings are carefully scrubbed out of the output if dnl you're running on some platforms. You can enable that behavior with dnl the option "fatal-warnings", by specifying --enable-fatal-warnings. dnl Or, you can disable it with --disable-fatal-warnings. dnl dnl Horms 10th August 2001 dnl Don't do this, it seems to cause configure in dnl the libltdl/ directory to die under Debian Woody dnl I'm suspecting it will be a problem on other systems too. dnl For this reason it now defaults to off. dnl AlanR 11 August 2001 dnl Show no mercy to broken OSes and other software. If you have broken dnl software, turn this feature off. dnl NO warnings WHATSOVER will be tolerated without good reason. dnl A distribution being broken isn't a good reason. dnl The cure for that is fix the distribution, not destroy the integrity dnl of the entire project by defaulting it to "off". dnl In my experience, there are ways of making individual warnings go dnl away. dnl The only way I know to keep them out is to make them an absolute dnl pain to deal with. Otherwise they're a pain to fix. dnl This policy is not an accident, nor was it chosen without significant dnl thought and experience. dnl cc_supports_flag() { AC_MSG_CHECKING(whether $CC supports "$@") Cfile=/tmp/foo${$} touch ${Cfile}.c $CC -c "$@" ${Cfile}.c -o ${Cfile}.o >/dev/null 2>&1 rc=$? rm -f ${Cfile}.c ${Cfile}.o case $rc in 0) AC_MSG_RESULT(yes);; *) AC_MSG_RESULT(no);; esac return $rc } dnl ************ printw ********************** if test X"$CURSESLIBS" != X"" && cc_supports_flag -Wcast-qual \ && cc_supports_flag -Werror; then dnl Check for printw() prototype compatibility dnl FIXME: We can check che prototype compatibility only if $CC supports dnl -Wcast-qual and -Werror AC_MSG_CHECKING(whether printw() requires argument of "const char *") ac_save_LIBS=$LIBS LIBS="$CURSESLIBS $LIBS" ac_save_CFLAGS=$CFLAGS CFLAGS="-Wcast-qual -Werror" AC_LINK_IFELSE( [AC_LANG_PROGRAM( [ #if defined(HAVE_CURSES_H) # include #elif defined(HAVE_NCURSES_H) # include #endif ], [printw((const char *)"Test");] )], [ac_cv_compatible_printw=yes], [ac_cv_compatible_printw=no] ) LIBS=$ac_save_LIBS CFLAGS=$ac_save_CFLAGS AC_MSG_RESULT([$ac_cv_compatible_printw]) if test "$ac_cv_compatible_printw" = no; then AC_MSG_WARN([The printw() function of your ncurses or curses library is old, we will disable usage of the library. If you want to use this library anyway, please update to newer version of the library, ncurses 5.4 or later is recommended. You can get the library from http://www.gnu.org/software/ncurses/.]) AC_MSG_NOTICE([Disabling curses]) AC_DEFINE(HAVE_INCOMPATIBLE_PRINTW, 1, [Do we have incompatible printw() in curses library?]) dnl AC_DEFINE(HAVE_CURSES_H, 0) dnl AC_DEFINE(HAVE_NCURSES_H, 0) fi fi dnl ************ printw ********************** EXTRA_WARNINGS="" # We had to eliminate -Wnested-externs because of libtool changes WARNLIST="all missing-prototypes missing-declarations strict-prototypes declaration-after-statement pointer-arith write-strings cast-qual cast-align bad-function-cast inline missing-format-attribute format=2 format-security format-nonliteral no-long-long no-strict-aliasing" for j in $WARNLIST do if cc_supports_flag -W$j then case $j in "no-long-long") if test "${enable_no_long_long}" = "yes"; then EXTRA_WARNINGS="$EXTRA_WARNINGS -W$j" fi;; *) EXTRA_WARNINGS="$EXTRA_WARNINGS -W$j";; esac fi done dnl Add any system specific options here. if test "${enable_ansi}" = "unknown"; then enable_ansi=yes fi case "$host_os" in *linux*|*bsd*) if test "${enable_fatal_warnings}" = "unknown"; then enable_fatal_warnings=yes fi ;; *solaris*) ;; esac case "$host_os" in *linux*) if test "${enable_times_kludge}" = "unknown"; then enable_times_kludge=yes fi ;; esac if test "${enable_ansi}" = yes && cc_supports_flag -std=iso9899:199409 ; then echo "Enabling ANSI Compatibility on this platform" ANSI="-ansi -D_GNU_SOURCE -DANSI_ONLY" fi if test "${enable_fatal_warnings}" = yes && cc_supports_flag -Werror ; then echo "Enabling Fatal Warnings (-Werror) on this platform" FATAL_WARNINGS="-Werror" fi if test "$enable_traditional_warning" = yes && \ cc_supports_flag -Wtraditional; then echo "Enabling traditional warnings" EXTRA_WARNINGS="$EXTRA_WARNINGS -Wtraditional" fi CC_WARNINGS="$EXTRA_WARNINGS $FATAL_WARNINGS $ANSI" NON_FATAL_CC_WARNINGS="$EXTRA_WARNINGS" dnl Inline semantics are to be changed as of gcc v4.3. Since dnl 4.1.3 gcc issues a warning in case there are inlines. That's the dnl case with glib2. ver2num() { awk -F. '{print $3+($2+($1*100))*100;}' } GCCVER=`gcc -v 2>&1 | awk 'END{print $3}' | ver2num` firstver=40103 # 4.1.3 lastver=40300 # 4.3.0 if test $GCCVER -ge $firstver -a $GCCVER -lt $lastver; then CFLAGS="$CFLAGS -fgnu89-inline" fi fi if test "${enable_times_kludge}" = no; then echo "Disabling times(2) kludge" AC_DEFINE(DISABLE_TIMES_KLUDGE, 1, [disable times(2) kludge]) fi AC_SUBST(DISABLE_TIMES_KLUDGE) AC_MSG_CHECKING(which init (rc) directory to use) INITDIR="" for initdir in /etc/init.d /etc/rc.d/init.d /sbin/init.d \ /usr/local/etc/rc.d /etc/rc.d do if test -d $initdir then INITDIR=$initdir AC_MSG_RESULT($INITDIR); break fi done AC_ARG_WITH(initdir, [ --with-initdir=DIR directory for init (rc) scripts [${INITDIR}]], [ if test x"$withval" = xprefix; then INITDIR=${prefix}; else INITDIR="$withval"; fi ]) if test "X$INITDIR" = X then FatalMissingThing "init directory" "Could not locate init directory" fi AC_SUBST(INITDIR) dnl Checking for init defaults directory dnl Linux: /etc/sysconfig dnl Solaris: /etc/default AC_MSG_CHECKING(which init defaults directory to use) INITDEFDIR="" for iddir in /etc/sysconfig /etc/default do if test -d $iddir then INITDEFDIR=$iddir AC_MSG_RESULT($INITDEFDIR); break fi done AC_ARG_WITH(iddir, [ --with-initdefdir=DIR directory for init defaults [${INITDEFDIR}]], [ if test x"$withval" = xprefix; then INITDEFDIR=${prefix}; else INITDEFDIR="$withval"; fi ]) if test "X$INITDEFDIR" = X then WarnMissingThing "init defaults directory" "Could not locate init defaults directory" fi AC_SUBST(INITDEFDIR) dnl ********************************************************************** dnl checks for group to install fifos as dnl dnl These checks aren't right. We need to locate the lowest dnl unused group id if haclient isn't in the /etc/group file dnl and no one has overridden group-id with a --with-group-id dnl option. dnl if test -z "${IP2UTIL}" -o ! -x "${IP2UTIL}" then IP2UTIL=/sbin/ip fi HA_CCMUSER=`extract_header_define $GLUE_HEADER HA_CCMUSER` AC_SUBST(HA_CCMUSER) HA_APIGROUP=`extract_header_define $GLUE_HEADER HA_APIGROUP` AC_SUBST(HA_APIGROUP) if test -z "${GETENT}" -o ! -x "${GETENT}" then GETENT=getent getent() { grep "^${2}:" /etc/$1 } fi AC_ARG_WITH( group-id, [ --with-group-id=GROUP_ID GROUP_ID to run our programs as. [default=60] ], [ HA_APIGID="$withval" ], [ HA_APIGID=65 ], ) if getent group "$HA_APIGROUP" > /dev/null then HA_APIGID=`getent group "$HA_APIGROUP" | cut -d: -f3` fi AC_SUBST(HA_APIGID) AC_DEFINE_UNQUOTED(HA_APIGID, $HA_APIGID, id for api group) AC_ARG_WITH( ccmuser-id, [ --with-ccmuser-id=HA_HA_CCMUSER_ID USER_ID to run privileged non-root things as. [default=17] ], [ HA_CCMUID="$withval" ], [ HA_CCMUID=17 ], ) if getent passwd "$HA_CCMUSER" >/dev/null then HA_CCMUID=`getent passwd "$HA_CCMUSER" | cut -d: -f3` fi AC_SUBST(HA_CCMUID) AC_DEFINE_UNQUOTED(HA_CCMUID, $HA_CCMUID, id for ccm user) # # Priority for starting via init startup scripts # AC_ARG_WITH( start-init-priority, [ --with-start-init-priority=number Init start priority. [default=75] ], [ HB_INITSTARTPRI="$withval" ], [ HB_INITSTARTPRI=75 ], ) AC_SUBST(HB_INITSTARTPRI) AC_DEFINE_UNQUOTED(HB_INITSTARTPRI,"$HB_INITSTARTPRI", init start priority) # # Priority for stopping via init shutdown scripts # AC_ARG_WITH( stop-init-priority, [ --with-stop-init-priority=number Init stop priority. [default=5] ], [ HB_INITSTOPPRI="$withval" ], [ HB_INITSTOPPRI=05 ], ) AC_SUBST(HB_INITSTOPPRI) AC_DEFINE_UNQUOTED(HB_INITSTOPPRI,"$HB_INITSTOPPRI", init stop priority) AC_ARG_WITH( logd-stop-init-priority, [ --with-logd-stop-init-priority=number Init stop priority. [default=75] ], [ LOGD_INITSTOPPRI="$withval" ], [ LOGD_INITSTOPPRI=75 ], ) AC_SUBST(LOGD_INITSTOPPRI) AC_DEFINE_UNQUOTED(LOGD_INITSTOPPRI,"$LOGD_INITSTOPPRI", init stop priority) dnl ************************************************************************ dnl checks for library functions to replace them dnl dnl alphasort: Only on BSD. dnl System-V systems may have it, but hidden and/or deprecated. dnl A replacement function is supplied for it. dnl dnl NoSuchFunctionName: dnl is a dummy function which no system supplies. It is here to make dnl the system compile semi-correctly on OpenBSD which doesn't know dnl how to create an empty archive dnl dnl scandir: Only on BSD. dnl System-V systems may have it, but hidden and/or deprecated. dnl A replacement function is supplied for it. dnl dnl setenv: is some bsdish function that should also be avoided (use dnl putenv instead) dnl On the other hand, putenv doesn't provide the right API for the dnl code and has memory leaks designed in (sigh...) Fortunately this dnl A replacement function is supplied for it. dnl dnl setproctitle: sets the process title to a given string dnl dnl strerror: returns a string that corresponds to an errno. dnl A replacement function is supplied for it. dnl dnl unsetenv: is some bsdish function that should also be avoided (No dnl replacement) dnl A replacement function is supplied for it. dnl dnl strnlen: is a gnu function similar to strlen, but safer. dnl We wrote a tolearably-fast replacement function for it. dnl dnl strndup: is a gnu function similar to strdup, but safer. dnl We wrote a tolearably-fast replacement function for it. dnl dnl daemon: is a GNU function. The daemon() function is for programs wishing to dnl detach themselves from the controlling terminal and run in the dnl background as system daemon dnl A replacement function is supplied for it. dnl dnl Check Only dnl dnl getopt: If this is valid, define HAVE_DECL_GETOPT to make the getopt.h header compile cleanly. dnl AC_REPLACE_FUNCS(alphasort inet_pton NoSuchFunctionName scandir setenv strerror unsetenv strnlen strndup daemon uuid_parse strlcpy strlcat) dnl AC_CHECK_FUNCS(alphasort inet_pton NoSuchFunctionName scandir setenv strerror unsetenv strnlen strndup daemon uuid_parse) AC_CHECK_FUNCS(getopt, AC_DEFINE(HAVE_DECL_GETOPT, 1, [Have getopt function])) AC_CHECK_FUNCS(fcntl) AC_CHECK_FUNCS(flock) AC_CHECK_FUNCS(inet_aton) AC_CHECK_FUNCS(mallinfo) AC_CHECK_FUNCS(mallopt) AC_CHECK_FUNCS(__default_morecore) AC_CHECK_FUNCS(seteuid) AC_CHECK_FUNCS(setegid) AC_CHECK_FUNCS(getpeereid) dnl ********************************************************************** dnl Check for various argv[] replacing functions on various OSs dnl dnl Borrowed from Proftpd dnl Proftpd is Licenced under the terms of the GNU General Public Licence dnl and is available from http://www.proftpd.org/ dnl AC_CHECK_FUNCS(setproctitle) AC_CHECK_HEADERS(libutil.h) AC_CHECK_LIB(util, setproctitle, [AC_DEFINE(HAVE_SETPROCTITLE,1,[ ]) ac_cv_func_setproctitle="yes" ; LIBS="$LIBS -lutil"]) if test "$ac_cv_func_setproctitle" = "yes"; then pf_argv_set="PF_ARGV_NONE" fi if test "$pf_argv_set" = ""; then AC_CHECK_HEADERS(sys/pstat.h) if test "$ac_cv_header_pstat_h" = "yes"; then AC_CHECK_FUNCS(pstat) if test "$ac_cv_func_pstat" = "yes"; then pf_argv_set="PF_ARGV_PSTAT" else pf_argv_set="PF_ARGV_WRITEABLE" fi fi if test "$pf_argv_set" = ""; then AC_EGREP_HEADER([#define.*PS_STRINGS.*],sys/exec.h, have_psstrings="yes",have_psstrings="no") if test "$have_psstrings" = "yes"; then pf_argv_set="PF_ARGV_PSSTRINGS" fi fi if test "$pf_argv_set" = ""; then AC_CACHE_CHECK(whether __progname and __progname_full are available, pf_cv_var_progname, AC_TRY_LINK([extern char *__progname, *__progname_full;], [__progname = "foo"; __progname_full = "foo bar";], pf_cv_var_progname="yes", pf_cv_var_progname="no")) if test "$pf_cv_var_progname" = "yes"; then AC_DEFINE(HAVE___PROGNAME,1,[ ]) fi AC_CACHE_CHECK(which argv replacement method to use, pf_cv_argv_type, AC_EGREP_CPP(yes,[ #if defined(__GNU_HURD__) yes #endif ],pf_cv_argv_type="new", pf_cv_argv_type="writeable")) if test "$pf_cv_argv_type" = "new"; then pf_argv_set="PF_ARGV_NEW" fi if test "$pf_argv_set" = ""; then pf_argv_set="PF_ARGV_WRITEABLE" fi fi fi AC_DEFINE_UNQUOTED(PF_ARGV_TYPE, $pf_argv_set, mechanism to pretty-print ps output: setproctitle-equivalent) dnl End of tests borrowed from Proftpd dnl check if header file and lib are there for hbaping hbaping_build="yes" AC_CHECK_HEADERS(time.h hbaapi.h, , [hbaping_build="no"],[[#if HAVE_TIME_H #include #endif]]) AC_CHECK_LIB(HBAAPI, HBA_SendScsiInquiry, , [hbaping_build="no"]) AM_CONDITIONAL(HBAAPI_BUILD, test "x${hbaping_build}" = "xyes") dnl check if header file and lib are there for zlib zlib_installed="yes" AC_CHECK_HEADERS(zlib.h, , [zlib_installed="no"],) AC_CHECK_LIB(z, compress , , [zlib_installed="no"]) AM_CONDITIONAL(BUILD_ZLIB_COMPRESS_MODULE, test "x${zlib_installed}" = "xyes") if test "x${zlib_installed}" = "xno"; then FatalMissingThing "zlib" \ "The zlib library is missing" fi dnl check if header file and lib are there for zlib bz2_installed="yes" AC_CHECK_HEADERS(bzlib.h, , [bz2_installed="no"],) AC_CHECK_LIB(bz2, BZ2_bzBuffToBuffCompress , , [bz2_installed="no"]) AM_CONDITIONAL(BUILD_BZ2_COMPRESS_MODULE, test "x${bz2_installed}" = "xyes") dnl check if header file and lib are there for openais module openais_installed="yes" AC_CHECK_HEADERS(evs.h, , [openais_installed="no"],) AC_CHECK_LIB(evs, evs_initialize , , [openais_installed="no"]) AM_CONDITIONAL(BUILD_OPENAIS_MODULE, test "x${openais_installed}" = "xyes") dnl check if there are getpid() inconsistency dnl Note: reduce LIBS; in particular, ltdl can cause confusion. dnl Possibly better: move 'LIBS="$LIBS -lltdl"' from above to beyond here. dnl AC_MSG_CHECKING(for getpid() consistency in multi-process/threads program) ac_save_LIBS=$LIBS LIBS="-lpthread" AC_TRY_RUN(`cat $srcdir/config/pidtest.c`, AC_MSG_RESULT(ok), [AC_MSG_RESULT(fail); AC_DEFINE(GETPID_INCONSISTENT, 1 , [pid inconsistent])],) LIBS=$ac_save_LIBS dnl check byte order AC_MSG_CHECKING(for byteorder) AC_TRY_RUN(`cat $srcdir/config/byteorder_test.c`, [AC_MSG_RESULT(little-endian); AC_DEFINE(CONFIG_LITTLE_ENDIAN, 1, [little-endian])], [AC_MSG_RESULT(big-endian); AC_DEFINE(CONFIG_BIG_ENDIAN, 1, [big-endian])],) dnl dnl Lex and yacc can't be trusted to produce code that won't produce dnl warnings dnl NON_FATAL_CFLAGS="$CFLAGS $NON_FATAL_CC_WARNINGS" AC_SUBST(NON_FATAL_CFLAGS) dnl dnl We reset CFLAGS to include our warnings *after* all function dnl checking goes on, so that our warning flags don't keep the dnl AC_*FUNCS() calls above from working. In particular, -Werror will dnl *always* cause us troubles if we set it before here. dnl dnl CFLAGS="$CFLAGS $CC_WARNINGS" dnl NOTE: dnl This check should only be done after CFLAGS is set. Otherwise dnl linux box will complain because of a warning of the undefined dnl function sigignore(). dnl dnl In theory, all function checks should be done after the CFLAGS is dnl set since we are enforcing the -Werror. But this would have a big dnl impact on the whole source tree so I am only moving the dnl sigignore for now. A bit of a hack. dnl dnl sigignore: Only on Solaris. dnl it is a solaris replacement for signal(s,SIG_IGN). dnl AC_CHECK_FUNCS(sigignore) dnl dnl Make sure that CFLAGS is not exported. If the user did dnl not have CFLAGS in their environment then this should have dnl no effect. However if CFLAGS was exported from the user's dnl environment, then the new CFLAGS will also be exported dnl to sub processes. This causes a problem when configure dnl is run in the libltdl directory. if export | fgrep " CFLAGS=" > /dev/null; then SAVED_CFLAGS="$CFLAGS" unset CFLAGS CFLAGS="$SAVED_CFLAGS" unset SAVED_CFLAGS fi if test "$GCC" = yes; then CFLAGS="$CFLAGS -ggdb3" if cc_supports_flag -funsigned-char then CFLAGS="$CFLAGS -funsigned-char" fi else CFLAGS="$CFLAGS -g" fi dnl AC_SUBST(CC_WARNINGS) dnl ************************************************************************ dnl pre AC_OUTPUT stuff dnl dnl th aux dir (for holding config & autogenerated stuff) dnl AC_SUBST(ac_aux_dir) AC_SUBST(LIBADD_DL) dnl extra flags for dynamic linking libraries AC_SUBST(LIBADD_INTL) dnl extra flags for GNU gettext stuff... AC_SUBST(LOCALE) HA_HBCONF_DIR=$sysconfdir/ha.d AC_SUBST(HA_HBCONF_DIR) AC_DEFINE_UNQUOTED(HA_HBCONF_DIR,"$HA_HBCONF_DIR", Heartbeat configuration directory) HA_RC_DIR=$HA_HBCONF_DIR/rc.d AC_SUBST(HA_RC_DIR) AC_DEFINE_UNQUOTED(HA_RC_DIR,"$HA_RC_DIR", heartbeat v1 script directory) dnl ************************************************************************ dnl management and quorum daemons. AC_CHECK_HEADERS(gnutls/gnutls.h) dnl GNUTLS library: Attempt to determine by 'libgnutls-config' program. dnl If no 'libgnutls-config', try traditional autoconf means. AC_PATH_PROGS(LIBGNUTLS_CONFIG, libgnutls-config) if test -n "$LIBGNUTLS_CONFIG"; then AC_MSG_CHECKING(for gnutls header flags) GNUTLSHEAD="`$LIBGNUTLS_CONFIG --cflags`"; AC_MSG_RESULT($GNUTLSHEAD) AC_MSG_CHECKING(for gnutls library flags) GNUTLSLIBS="`$LIBGNUTLS_CONFIG --libs`"; AC_MSG_RESULT($GNUTLSLIBS) else AC_CHECK_LIB(gnutls, gnutls_init) fi AC_SUBST(GNUTLSHEAD) AC_SUBST(GNUTLSLIBS) if test "x${enable_openhpi}" != "xno"; then RPM_ENABLE_OPENHPI=1 else RPM_ENABLE_OPENHPI=0 fi AC_SUBST(RPM_ENABLE_OPENHPI) dnl quorum server configuration AC_ARG_ENABLE([quorumd], [ --enable-quorumd Compile the quorum server. [default=no]], [], [enable_quorumd=no]) if test "x${enable_quorumd}" != "xno"; then if test "x${enable_quorumd}" = "xtry"; then MISSING_FN="WarnMissingThing" else MISSING_FN="FatalMissingThing" fi enable_quorumd="yes" QUORUMD_ENABLED=1 if test "$ac_cv_header_gnutls_gnutls_h" = "no"; then $MISSING_FN "gnutls/gnutls.h" \ "The quorum server module needs GNU/TLS header files" enable_quorumd="no" QUORUMD_ENABLED=0 fi fi AC_SUBST(QUORUMD_ENABLED) AM_CONDITIONAL(QUORUMD_BUILD, test "x${enable_quorumd}" != "xno") AC_MSG_NOTICE([whether to enable the quorum server... ${enable_quorumd}]) dnl ********************************************** dnl drbd peer outdate plugin configuration dnl ********************************************** case "$host_os" in *linux*) AC_ARG_ENABLE([dopd], [ --enable-dopd Compile the Drbd Outdate Peer Daemon and tools. [default=yes]], [], [enable_dopd=yes]) ;; *) enable_dopd=no ;; esac AM_CONDITIONAL(BUILD_DRBD_OUTDATE_PEER, test "x${enable_dopd}" != "xno") hb_libdir="${libdir}/${HB_PKG}" AC_SUBST(hb_libdir) HA_PLUGIN_DIR=`extract_header_define $GLUE_HEADER HA_PLUGIN_DIR` AC_SUBST(HA_PLUGIN_DIR) HB_RA_DIR=`extract_header_define $GLUE_HEADER HB_RA_DIR` AC_SUBST(HB_RA_DIR) OCF_ROOT_DIR=`extract_header_define $GLUE_HEADER OCF_ROOT_DIR` AC_SUBST(OCF_ROOT_DIR) OCF_RA_DIR=`extract_header_define $GLUE_HEADER OCF_RA_DIR` AC_SUBST(OCF_RA_DIR) LSB_RA_DIR=`extract_header_define $GLUE_HEADER LSB_RA_DIR` AC_SUBST(LSB_RA_DIR) AC_ARG_ENABLE([valgrind], [ --enable-valgrind "Run selected heartbeat components using Valgrind." [default=no]], [], [enable_valgrind=no]) AC_ARG_WITH(valgrind-log, [ --with-valgrind-log=options Logging options to pass to valgrind], [ VALGRIND_LOG="$withval" ], []) AC_ARG_WITH(valgrind-suppress, [ --with-valgrind-suppress=file Name of a suppression file to pass to Valgrind [default=/dev/null]], [ VALGRIND_SUPP="$withval" ], [ VALGRIND_SUPP="/dev/null" ]) if test "x" = "x$VALGRIND_LOG"; then VALGRIND_LOG="--log-socket=127.0.0.1:1234" AC_MSG_NOTICE(Set default Valgrind options to: $VALGRIND_OPTS) AC_MSG_NOTICE(Remember to start a receiver on localhost:1234) fi AC_PATH_PROG(VALGRIND_BIN, valgrind) AC_DEFINE_UNQUOTED(VALGRIND_BIN, "$VALGRIND_BIN", Valgrind command) AC_DEFINE_UNQUOTED(VALGRIND_LOG, "$VALGRIND_LOG", Valgrind logging options) AC_DEFINE_UNQUOTED(VALGRIND_SUPP, "$VALGRIND_SUPP", Name of a suppression file to pass to Valgrind) dnl ********************************************************************** dnl 'AWK' had been determined via 'aclocal.m4' as the simple name, using dnl the current PATH (i.e. in the context of 'configure'). dnl dnl Things within heartbeat will use 'AWK', but from a different context, dnl so we should determine, and substitute, the full path. dnl dnl Note: Even that may have a flaw, e.g. if 'configure' finds (say) 'gawk', dnl which we here convert to '/path/to/gawk', but the run-time machine lacks it. dnl We won't worry about that for now. dnl (David Lee; March 2007) AC_PATH_PROGS([AWK], $AWK) dnl ********************************************************************** dnl Enable optional, experimental directories here... dnl XPERIMENTALDIRS="" AC_SUBST(XPERIMENTALDIRS) CheckMissingThings dnl Options for cleaning up the compiler output dnl In 'buildtools' build a front-end 'ccdv' to the C compiler. PRETTY_CC="" REAL_CC="${CC}" QUIET_LIBTOOL_OPTS="" QUIET_MAKE_OPTS="" if test x"${enable_pretty}" = "xyes"; then enable_quiet="yes" echo "install_sh: ${install_sh}" PRETTY_CC="`pwd`/buildtools/ccdv" CC="\$(PRETTY_CC) ${CC}" fi if test "x${enable_quiet}" = "xyes"; then QUIET_LIBTOOL_OPTS="--quiet" QUIET_MAKE_OPTS="--quiet" fi AC_MSG_RESULT(Supressing make details: ${enable_quiet}) AC_MSG_RESULT(Pretty printing of compiler output: ${enable_pretty}) dnl Put the above variables to use LIBTOOL="${LIBTOOL} --tag=CC \$(QUIET_LIBTOOL_OPTS)" MAKE="${MAKE} \$(QUIET_MAKE_OPTS)" AC_SUBST(CC) AC_SUBST(MAKE) AC_SUBST(LIBTOOL) AC_SUBST(PRETTY_CC) AC_SUBST(REAL_CC) AC_SUBST(QUIET_MAKE_OPTS) AC_SUBST(QUIET_LIBTOOL_OPTS) dnl *** "echo" adjustments (begin) *** dnl Some run-time scripts require options to "echo". dnl This configure is already determining and using "ac_n" and "ac_c" dnl for internal use, so make available externally. dnl (Not sure how "future proof" this is, but it at least seems clean.) dnl dnl This must be close to the end of "configure.in" otherwise it interferes dnl with output from the AC_MSG_*() macros. ECHO_N="$ac_n" ECHO_C="$ac_c" case $ac_n in -n) ECHO_E="-e";; *) ECHO_E="";; esac ECHO_CMD="echo" if test -x /usr/linux/bin/echo then # This is for AIX. I'm not sure it's necessary... ECHO_CMD="/usr/linux/bin/echo" ECHO_N="-n" ECHO_E="-e" fi AC_SUBST(ECHO_N) AC_SUBST(ECHO_C) AC_SUBST(ECHO_E) AC_SUBST(ECHO_CMD) dnl *** "echo" adjustments (end) *** dnl The Makefiles and shell scripts we output AC_CONFIG_FILES(Makefile \ README \ buildtools/Makefile \ config/Makefile \ cts/Makefile \ cts/CM_fs.py \ cts/CM_hb.py \ cts/CTS.py \ cts/CTSaudits.py \ cts/CTSlab.py \ cts/CTStests.py \ cts/CM_LinuxHAv2.py \ cts/CTSproxy.py \ cts/extracttests.py \ cts/getpeinputs.sh \ cts/OCFIPraTest.py \ cts/CIB.py \ cts/LSBDummy \ doc/Makefile \ doc/startstop \ doc/heartbeat.xml \ doc/cl_status.xml \ doc/apphbd.xml \ doc/hb_addnode.xml \ doc/hb_delnode.xml \ doc/hb_takeover.xml \ doc/hb_standby.xml \ doc/ha.cf.xml \ doc/authkeys.xml \ contrib/Makefile \ contrib/ipfail/Makefile \ contrib/mlock/Makefile \ contrib/drbd-outdate-peer/Makefile \ include/Makefile \ include/ocf/Makefile \ include/saf/Makefile \ replace/Makefile \ lib/Makefile \ lib/apphb/Makefile \ lib/hbclient/Makefile \ lib/plugins/Makefile \ lib/plugins/HBauth/Makefile \ lib/plugins/HBcomm/Makefile \ lib/plugins/HBcompress/Makefile \ lib/plugins/quorum/Makefile \ lib/plugins/quorumd/Makefile \ lib/plugins/tiebreaker/Makefile \ heartbeat/Makefile \ heartbeat/rc.d/Makefile \ heartbeat/rc.d/ask_resources \ heartbeat/rc.d/hb_takeover \ heartbeat/init.d/Makefile \ heartbeat/init.d/heartbeat \ heartbeat/lib/Makefile \ heartbeat/lib/BasicSanityCheck \ heartbeat/lib/ha_config \ heartbeat/lib/ha_propagate \ heartbeat/lib/hb_standby \ heartbeat/lib/mach_down \ heartbeat/lib/req_resource \ heartbeat/lib/ResourceManager \ heartbeat/lib/TestHeartbeatComm \ heartbeat/lib/hb_takeover \ heartbeat/lib/hb_addnode \ heartbeat/lib/hb_delnode \ heartbeat/lib/hb_setweight \ heartbeat/lib/hb_setsite \ heartbeat/logrotate.d/Makefile \ membership/Makefile \ membership/ccm/Makefile \ membership/quorumd/Makefile \ pkg/Makefile \ pkg/InfoFiles/pkginfo \ pkg/InfoFiles/preinstall \ pkg/InfoFiles/postinstall \ port/Makefile \ port/portMakefile \ port/heartbeat/pkg-deinstall \ port/heartbeat/pkg-descr \ port/heartbeat/pkg-install \ port/heartbeat/pkg-plist \ telecom/Makefile \ telecom/apphbd/Makefile \ tools/Makefile \ resources/Makefile \ resources/heartbeat/Makefile \ resources/heartbeat/apache \ resources/heartbeat/AudibleAlarm \ resources/heartbeat/Delay \ resources/heartbeat/db2 \ resources/heartbeat/Filesystem \ resources/heartbeat/hto-mapfuncs \ resources/heartbeat/ICP \ resources/heartbeat/ids \ resources/heartbeat/IPaddr \ resources/heartbeat/IPaddr2 \ resources/heartbeat/IPsrcaddr \ resources/heartbeat/IPv6addr \ resources/heartbeat/LinuxSCSI \ resources/heartbeat/LVM \ resources/heartbeat/MailTo \ resources/heartbeat/OCF \ resources/heartbeat/portblock \ resources/heartbeat/Raid1 \ resources/heartbeat/ServeRAID \ resources/heartbeat/SendArp \ resources/heartbeat/WAS \ resources/heartbeat/WinPopup \ resources/heartbeat/Xinetd \ ) dnl Now process the entire list of files added by previous dnl calls to AC_CONFIG_FILES() AC_OUTPUT() dnl subpackages configuration - perhaps configure it properly some other time dnl when it has been discussed at linux-ha-dev dnl AC_CONFIG_SUBDIRS(stonith heartbeat) dnl ***************** dnl Configure summary dnl ***************** eval my_datadir="`eval echo ${datadir}`" eval my_includedir="`eval echo ${includedir}`" eval my_initdir="${INITDIR}" eval my_libdir="`eval echo ${libdir}`" eval my_localstatedir="`eval echo ${localstatedir}`" eval my_mandir="`eval echo ${mandir}`" eval my_sbindir="`eval echo ${sbindir}`" eval my_docdir="`eval echo ${docdir}`" eval my_sysconfdir="`eval echo ${sysconfdir}`" eval my_initdefdir="`eval echo ${INITDEFDIR}`" AC_MSG_RESULT([]) AC_MSG_RESULT([$PACKAGE configuration:]) AC_MSG_RESULT([ Version = "$VERSION"]) AC_MSG_RESULT([ Executables = "$my_sbindir"]) AC_MSG_RESULT([ Man pages = "$my_mandir"]) AC_MSG_RESULT([ Libraries = "$my_libdir"]) AC_MSG_RESULT([ Header files = "$my_includedir"]) AC_MSG_RESULT([ Arch-independent files = "$my_datadir"]) AC_MSG_RESULT([ Documentation files = "$my_docdir"]) AC_MSG_RESULT([ State information = "$my_localstatedir"]) AC_MSG_RESULT([ System configuration = "$my_sysconfdir"]) AC_MSG_RESULT([ Init (rc) scripts = "$my_initdir"]) AC_MSG_RESULT([ Init (rc) defaults = "$my_initdefdir"]) AC_MSG_RESULT([ Use system LTDL = "${ac_cv_lib_ltdl_lt_dlopen}"]) AC_MSG_RESULT([ HA group name = "${HA_APIGROUP}"]) AC_MSG_RESULT([ HA group id = "${HA_APIGID}"]) AC_MSG_RESULT([ HA user name = "${HA_CCMUSER}"]) AC_MSG_RESULT([ HA user user id = "${HA_CCMUID}"]) AC_MSG_RESULT([ Build dopd plugin = "${enable_dopd}"]) AC_MSG_RESULT([ Enable times kludge = "${enable_times_kludge}"]) AC_SUBST(DISABLE_TIMES_KLUDGE) AC_MSG_RESULT([ CC_WARNINGS = "${CC_WARNINGS}"]) AC_MSG_RESULT([ Mangled CFLAGS = "${CFLAGS}"]) AC_MSG_RESULT([ Libraries = "${LIBS}"]) AC_MSG_RESULT([ RPATH enabled = "${enable_rpath}"]) AC_MSG_RESULT([ Distro-style RPMs = "${enable_distro_rpm}"]) AC_MSG_RESULT([ ]) AC_MSG_RESULT([Note: If you use the 'make install' method for installation you]) AC_MSG_RESULT([also need to adjust '/etc/passwd' and '/etc/group' manually.]) AC_MSG_RESULT([ ]) Heartbeat-3-0-7e3a82377fa8/contrib/Makefile.am0000644000000000000000000000166711576626513020705 0ustar00usergroup00000000000000# # heartbeat contrib directory: Linux-HA code # # Copyright (C) 2001,2002 Alan Robertson # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in ## Subdirectories... SUBDIRS = ipfail mlock if BUILD_DRBD_OUTDATE_PEER SUBDIRS += drbd-outdate-peer endif Heartbeat-3-0-7e3a82377fa8/contrib/drbd-outdate-peer/Makefile.am0000644000000000000000000000326011576626513024203 0ustar00usergroup00000000000000# # heartbeat: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl halibdir = $(libdir)/@HB_PKG@ havarlibdir = $(localstatedir)/lib/@HB_PKG@ dopddir = $(HA_VARRUNDIR)/$(HB_PKG)/dopd halib_PROGRAMS = dopd drbd-peer-outdater ## SOURCES dopd_SOURCES = dopd.c dopd.h dopd_LDADD = \ -lplumb \ $(top_builddir)/lib/hbclient/libhbclient.la $(GLIBLIB) drbd_peer_outdater_SOURCES = drbd-peer-outdater.c drbd_peer_outdater_LDADD = -lplumb $(GLIBLIB) ## additional Makefile targets # additional installations not covered normally install-exec-local: $(mkinstalldirs) $(DESTDIR)$(dopddir) -chgrp $(HA_APIGROUP) $(DESTDIR)/$(dopddir) -chown $(HA_CCMUSER) $(DESTDIR)/$(dopddir) chmod 750 $(DESTDIR)/$(dopddir) uninstall-local: rm -fr $(DESTDIR)$(dopddir) Heartbeat-3-0-7e3a82377fa8/contrib/drbd-outdate-peer/dopd.c0000644000000000000000000004125711576626513023251 0ustar00usergroup00000000000000/* drbd outdate peer daemon * Copyright (C) 2006 LINBIT * Written by Rasto Levrinc * * based on ipfail.c and attrd.c * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* linux specific; well, so is dopd. */ #include #include #include #include #include #include #include #include #include const char *node_name; /* The node we are connected to */ int quitnow = 0; /* Allows a signal to break us out of loop */ GMainLoop *mainloop; /* Reference to the mainloop for events */ ll_cluster_t *dopd_cluster_conn; GHashTable *connections = NULL; pthread_mutex_t conn_mutex = PTHREAD_MUTEX_INITIALIZER; /* only one client can be connected at a time */ typedef struct dopd_client_s { char *id; char *drbd_res; IPC_Channel *channel; GCHSource *source; } dopd_client_t; /* send_message_to_the_peer() * send message with drbd resource to other node. */ static gboolean send_message_to_the_peer(const char *drbd_peer, const char *drbd_resource) { HA_Message *msg = NULL; cl_log(LOG_INFO, "sending start_outdate message to the other node %s -> %s", node_name, drbd_peer); msg = ha_msg_new(3); ha_msg_add(msg, F_TYPE, "start_outdate"); ha_msg_add(msg, F_ORIG, node_name); ha_msg_add(msg, F_DOPD_RES, drbd_resource); cl_log(LOG_DEBUG, "sending [start_outdate res: %s] to node: %s", drbd_resource, drbd_peer); dopd_cluster_conn->llc_ops->sendnodemsg(dopd_cluster_conn, msg, drbd_peer); ha_msg_del(msg); return TRUE; } static void send_to_client(const dopd_client_t *client, const char *rc_string) { IPC_Channel *channel = client->channel; HA_Message *msg_out; msg_out = ha_msg_new(3); ha_msg_add(msg_out, F_TYPE, "outdater_rc"); ha_msg_add(msg_out, F_ORIG, node_name); ha_msg_add(msg_out, F_DOPD_VALUE, rc_string); if (msg2ipcchan(msg_out, channel) != HA_OK) { cl_log(LOG_ERR, "Could not send message to the client"); } } /* msg_start_outdate() * got start_outdate message with resource from other node. Execute drbd * outdate command, convert return code and send message to other node * with return code. * * Conversion of return codes of "drbdadm outdate ": * 0 => 4 (was successfully outdated) * 5 => 3 (is inconsistent, anyways) * 17 => 6 (is primary, cannot be outdated) * other => 20 (which is "officially undefined", * unspecified error, could not be outdated) * * since we do not stonith, * we cannot return "7" peer got stonithed [ node fencing ]. * and since we have obviously been reached, * we must not return "5" (down/unreachable). */ void msg_start_outdate(struct ha_msg *msg, void *private) { ll_cluster_t *hb = (ll_cluster_t *)private; int rc = 20; int command_ret; char rc_string[4]; HA_Message *msg2 = NULL; const char *drbd_resource = ha_msg_value(msg, F_DOPD_RES); char *command = NULL; /* execute outdate command */ command = malloc(strlen(OUTDATE_COMMAND) + 1 + strlen(drbd_resource) + 1); strcpy(command, OUTDATE_COMMAND); strcat(command, " "); strcat(command, drbd_resource); cl_log(LOG_DEBUG, "msg_start_outdate: command: %s", command); command_ret = system(command); if (WIFEXITED(command_ret)) { /* normal exit */ command_ret = WEXITSTATUS(command_ret); /* convert return code */ if (command_ret == 0) rc = 4; else if (command_ret == 5) rc = 3; else if (command_ret == 17) rc = 6; else cl_log(LOG_INFO, "unknown exit code from %s: %i", command, command_ret); } else { /* something went wrong */ if (WIFSIGNALED(command_ret)) { cl_log(LOG_INFO, "killed by signal %i: %s", WTERMSIG(command_ret), command); } else { cl_log(LOG_INFO, "strange status code from %s: 0x%x", command, command_ret); } } free(command); cl_log(LOG_DEBUG, "msg_start_outdate: %s, command rc: %i, rc: %i", ha_msg_value(msg, F_ORIG), command_ret, rc); sprintf(rc_string, "%i", rc); cl_log(LOG_INFO, "sending return code: %s, %s -> %s\n", rc_string, node_name, ha_msg_value(msg, F_ORIG)); /* send return code to oder node */ msg2 = ha_msg_new(4); ha_msg_add(msg2, F_TYPE, "outdate_rc"); ha_msg_add(msg2, F_DOPD_VALUE, rc_string); ha_msg_add(msg2, F_DOPD_RES, drbd_resource); ha_msg_add(msg2, F_ORIG, node_name); hb->llc_ops->sendnodemsg(hb, msg2, ha_msg_value(msg, F_ORIG)); ha_msg_del(msg2); } /* msg_outdate_rc() * got outdate_rc message with return code from other node. Send the * return code to the outdater client. */ void msg_outdate_rc(struct ha_msg *msg_in, void *private) { const char *rc_string = ha_msg_value(msg_in, F_DOPD_VALUE); const char *rc_res = ha_msg_value(msg_in, F_DOPD_RES); dopd_client_t *client = g_hash_table_lookup(connections, rc_res); cl_log(LOG_DEBUG, "msg_outdate_rc: %s %s", rc_res, rc_string); if (client == NULL) return; send_to_client(client, rc_string); } /* check_drbd_peer() * walk the nodes and return * FALSE if peer is not found, not a "normal" node, or "dead" * (no point in trying to reach those nodes). * TRUE if peer is found to be alive and "normal". */ gboolean check_drbd_peer(const char *drbd_peer) { const char *node; gboolean found = FALSE; if (!strcasecmp(drbd_peer, node_name)) { cl_log(LOG_WARNING, "drbd peer node %s is me!\n", drbd_peer); return FALSE; } cl_log(LOG_DEBUG, "Starting node walk"); if (dopd_cluster_conn->llc_ops->init_nodewalk(dopd_cluster_conn) != HA_OK) { cl_log(LOG_WARNING, "Cannot start node walk"); cl_log(LOG_WARNING, "REASON: %s", dopd_cluster_conn->llc_ops->errmsg(dopd_cluster_conn)); return FALSE; } while((node = dopd_cluster_conn->llc_ops->nextnode(dopd_cluster_conn)) != NULL) { const char *status = dopd_cluster_conn->llc_ops->node_status(dopd_cluster_conn, node); if (!strcmp(status, "dead")) { cl_log(LOG_WARNING, "Cluster node: %s: status: %s", node, status); return FALSE; } /* Look for the peer */ if (!strcmp("normal", dopd_cluster_conn->llc_ops->node_type(dopd_cluster_conn, node)) && !strcasecmp(node, drbd_peer)) { cl_log(LOG_DEBUG, "node %s found\n", node); found = TRUE; break; } } if (dopd_cluster_conn->llc_ops->end_nodewalk(dopd_cluster_conn) != HA_OK) { cl_log(LOG_INFO, "Cannot end node walk"); cl_log(LOG_INFO, "REASON: %s", dopd_cluster_conn->llc_ops->errmsg(dopd_cluster_conn)); } if (found == FALSE) cl_log(LOG_WARNING, "drbd peer %s was not found\n", drbd_peer); return found; } /* outdater_callback() * got message from outdater client with drbd resource, it will be sent * to the other node. */ static gboolean outdater_callback(IPC_Channel *client, gpointer user_data) { int lpc = 0; HA_Message *msg = NULL; const char *drbd_peer = NULL; const char *drbd_resource = NULL; dopd_client_t *curr_client = (dopd_client_t*)user_data; gboolean stay_connected = TRUE; cl_log(LOG_DEBUG, "invoked: %s", curr_client->id); while (IPC_ISRCONN(client)) { if(client->ops->is_message_pending(client) == 0) { break; } msg = msgfromIPC_noauth(client); if (msg == NULL) { cl_log(LOG_DEBUG, "%s: no message this time", curr_client->id); continue; } lpc++; cl_log(LOG_DEBUG, "Processing msg from %s", curr_client->id); cl_log(LOG_DEBUG, "Got message from (%s). (peer: %s, res :%s)", ha_msg_value(msg, F_ORIG), ha_msg_value(msg, F_OUTDATER_PEER), ha_msg_value(msg, F_OUTDATER_RES)); drbd_resource = ha_msg_value(msg, F_OUTDATER_RES); drbd_peer = ha_msg_value(msg, F_OUTDATER_PEER); if (check_drbd_peer(drbd_peer)) { dopd_client_t *entry; pthread_mutex_lock(&conn_mutex); entry = g_hash_table_lookup(connections, drbd_resource); if (entry == NULL) { curr_client->drbd_res = strdup(drbd_resource); if (entry == NULL) g_hash_table_insert(connections, curr_client->drbd_res, curr_client); pthread_mutex_unlock(&conn_mutex); send_message_to_the_peer(drbd_peer, drbd_resource); } else if (entry != curr_client) { pthread_mutex_unlock(&conn_mutex); cl_log(LOG_DEBUG, "one client with %s already " "connected", drbd_resource); send_to_client(curr_client, "21"); } else pthread_mutex_unlock(&conn_mutex); } else { /* peer "dead" or not in node list. * return "peer could not be reached" */ send_to_client(curr_client, "5"); } ha_msg_del(msg); msg = NULL; if(client->ch_status != IPC_CONNECT) { break; } } cl_log(LOG_DEBUG, "Processed %d messages", lpc); if (client->ch_status != IPC_CONNECT) stay_connected = FALSE; return stay_connected; } /* outdater_ipc_connection_destroy() * clean client struct */ static void outdater_ipc_connection_destroy(gpointer user_data) { dopd_client_t *client = (dopd_client_t*)user_data; if (client == NULL) return; cl_log(LOG_DEBUG, "destroying connection: %s\n", client->drbd_res); if (client->source != NULL) { if (client->drbd_res != NULL) { dopd_client_t *entry = g_hash_table_lookup(connections, client->drbd_res); if (entry == client) g_hash_table_remove(connections, (gpointer)client->drbd_res); } cl_log(LOG_DEBUG, "Deleting %s (%p) from mainloop", client->id, client->source); G_main_del_IPC_Channel(client->source); client->source = NULL; } free(client->id); free(client); return; } /* outdater_client_connect() * outdater is connected set outdater_callback. */ static gboolean outdater_client_connect(IPC_Channel *channel, gpointer user_data) { dopd_client_t *new_client = malloc(sizeof(dopd_client_t)); cl_log(LOG_DEBUG, "Connecting channel"); if(channel == NULL) { cl_log(LOG_ERR, "Channel was NULL"); return FALSE; } else if(channel->ch_status != IPC_CONNECT) { cl_log(LOG_ERR, "Channel was disconnected"); return FALSE; } memset(new_client, 0, sizeof(dopd_client_t)); new_client->channel = channel; new_client->id = malloc(10); strcpy(new_client->id, "outdater"); new_client->source = G_main_add_IPC_Channel( G_PRIORITY_DEFAULT, channel, FALSE, outdater_callback, new_client, outdater_ipc_connection_destroy); cl_log(LOG_DEBUG, "Client %s (%p) connected", new_client->id, new_client->source); return TRUE; } static void outdater_client_destroy(gpointer user_data) { cl_log(LOG_INFO, "ipc server destroy"); } /* set_callbacks() * set callbacks for communication between two nodes */ void set_callbacks(ll_cluster_t *hb) { /* Add each of the callbacks we use with the API */ if (hb->llc_ops->set_msg_callback(hb, "start_outdate", msg_start_outdate, hb) != HA_OK) { cl_log(LOG_ERR, "Cannot set msg_start_outdate callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(2); } if (hb->llc_ops->set_msg_callback(hb, "outdate_rc", msg_outdate_rc, hb) != HA_OK) { cl_log(LOG_ERR, "Cannot set msg_outdate_rc callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(2); } } void set_signals(ll_cluster_t *hb) { /* Setup the various signals */ CL_SIGINTERRUPT(SIGINT, 1); CL_SIGNAL(SIGINT, gotsig); CL_SIGINTERRUPT(SIGTERM, 1); CL_SIGNAL(SIGTERM, gotsig); cl_log(LOG_DEBUG, "Setting message signal"); if (hb->llc_ops->setmsgsignal(hb, 0) != HA_OK) { cl_log(LOG_ERR, "Cannot set message signal"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(13); } } void gotsig(int nsig) { (void)nsig; quitnow = 1; } /* Used to handle the API in the gmainloop */ gboolean dopd_dispatch(IPC_Channel* ipc, gpointer user_data) { struct ha_msg *reply; ll_cluster_t *hb = user_data; reply = hb->llc_ops->readmsg(hb, 0); if (reply != NULL) { ha_msg_del(reply); reply=NULL; return TRUE; } return TRUE; } void dopd_dispatch_destroy(gpointer user_data) { return; } gboolean dopd_timeout_dispatch(gpointer user_data) { ll_cluster_t *hb = user_data; if (quitnow) { g_main_quit(mainloop); return FALSE; } if (hb->llc_ops->msgready(hb)) { return dopd_dispatch(NULL, user_data); } return TRUE; } /* Sign in to the API */ void open_api(ll_cluster_t *hb) { cl_log(LOG_DEBUG, "Signing in with heartbeat"); if (hb->llc_ops->signon(hb, "dopd")!= HA_OK) { cl_log(LOG_ERR, "Cannot sign on with heartbeat"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(1); } } /* Log off of the API and clean up */ void close_api(ll_cluster_t *hb) { if (hb->llc_ops->signoff(hb, FALSE) != HA_OK) { cl_log(LOG_ERR, "Cannot sign off from heartbeat."); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(14); } if (hb->llc_ops->delete(hb) != HA_OK) { cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); cl_log(LOG_ERR, "Cannot delete API object."); exit(15); } } static IPC_WaitConnection * dopd_channel_init(void) { IPC_WaitConnection *wait_ch; mode_t mask; char path[] = IPC_PATH_ATTR; GHashTable * attrs; attrs = g_hash_table_new(g_str_hash,g_str_equal); g_hash_table_insert(attrs, path, dopd_socket); mask = umask(0); wait_ch = ipc_wait_conn_constructor(IPC_ANYTYPE, attrs); if (wait_ch == NULL) { cl_perror("Can't create wait channel of type %s", IPC_ANYTYPE); exit(1); } mask = umask(mask); g_hash_table_destroy(attrs); return wait_ch; } int main(int argc, char **argv) { unsigned fmask; char pid[10]; char *bname, *parameter; IPC_Channel *apiIPC; IPC_WaitConnection *wait_ch; /* Get the name of the binary for logging purposes */ bname = strdup(argv[0]); cl_log_set_entity(bname); cl_log_set_facility(HA_LOG_FACILITY); cl_log_set_logd_channel_source(NULL, NULL); cl_inherit_logging_environment(500); cl_set_corerootdir(HA_COREDIR); cl_cdtocoredir(); dopd_cluster_conn = ll_cluster_new("heartbeat"); memset(pid, 0, sizeof(pid)); snprintf(pid, sizeof(pid), "%ld", (long)getpid()); cl_log(LOG_DEBUG, "PID=%s", pid); open_api(dopd_cluster_conn); /* Obtain our local node name */ node_name = dopd_cluster_conn->llc_ops->get_mynodeid(dopd_cluster_conn); if (node_name == NULL) { cl_log(LOG_ERR, "Cannot get my nodeid"); cl_log(LOG_ERR, "REASON: %s", dopd_cluster_conn->llc_ops->errmsg(dopd_cluster_conn)); exit(19); } cl_log(LOG_DEBUG, "[We are %s]", node_name); /* See if we should drop cores somewhere odd... */ parameter = dopd_cluster_conn->llc_ops->get_parameter(dopd_cluster_conn, KEY_COREROOTDIR); if (parameter) { cl_set_corerootdir(parameter); cl_cdtocoredir(); } cl_cdtocoredir(); set_callbacks(dopd_cluster_conn); fmask = LLC_FILTER_DEFAULT; cl_log(LOG_DEBUG, "Setting message filter mode"); if (dopd_cluster_conn->llc_ops->setfmode(dopd_cluster_conn, fmask) != HA_OK) { cl_log(LOG_ERR, "Cannot set filter mode"); cl_log(LOG_ERR, "REASON: %s", dopd_cluster_conn->llc_ops->errmsg(dopd_cluster_conn)); exit(8); } connections = g_hash_table_new_full( g_str_hash, g_str_equal, NULL, NULL); set_signals(dopd_cluster_conn); cl_log(LOG_DEBUG, "Waiting for messages..."); errno = 0; mainloop = g_main_new(TRUE); apiIPC = dopd_cluster_conn->llc_ops->ipcchan(dopd_cluster_conn); /* Watch the API IPC for input */ G_main_add_IPC_Channel(G_PRIORITY_HIGH, apiIPC, FALSE, dopd_dispatch, (gpointer)dopd_cluster_conn, dopd_dispatch_destroy); Gmain_timeout_add_full(G_PRIORITY_DEFAULT, 1000, dopd_timeout_dispatch, (gpointer)dopd_cluster_conn, dopd_dispatch_destroy); wait_ch = dopd_channel_init(); if (wait_ch == NULL) { cl_log(LOG_ERR, "Could not start IPC server"); } else { G_main_add_IPC_WaitConnection( G_PRIORITY_LOW, wait_ch, NULL, FALSE, outdater_client_connect, strdup(T_OUTDATER), outdater_client_destroy); } g_main_run(mainloop); g_main_destroy(mainloop); g_hash_table_destroy(connections); if (!quitnow && errno != EAGAIN && errno != EINTR) { cl_log(LOG_ERR, "read_hb_msg returned NULL"); cl_log(LOG_ERR, "REASON: %s", dopd_cluster_conn->llc_ops->errmsg(dopd_cluster_conn)); } close_api(dopd_cluster_conn); return 0; } Heartbeat-3-0-7e3a82377fa8/contrib/drbd-outdate-peer/dopd.h0000644000000000000000000000316611576626513023253 0ustar00usergroup00000000000000/* drbd outdate peer daemon * Copyright (C) 2006 LINBIT * * Written by Rasto Levrinc * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define T_OUTDATER "outdater" #define F_OUTDATER_PEER "outdater_peer" #define F_OUTDATER_RES "outdater_res" #define F_DOPD_VALUE "dop_value" #define F_DOPD_RES "dop_res" #define OUTDATE_COMMAND "drbdadm outdate" static char dopd_socket[] = HA_VARRUNDIR "/heartbeat/dopd/outdater"; #include /* Prototypes */ void node_walk(ll_cluster_t *); gboolean check_drbd_peer(const char *); void set_signals(ll_cluster_t *); void gotsig(int); void set_callbacks(ll_cluster_t *); void open_api(ll_cluster_t *); void close_api(ll_cluster_t *); gboolean dopd_dispatch(IPC_Channel *, gpointer); void dopd_dispatch_destroy(gpointer); gboolean dopd_timeout_dispatch(gpointer); void msg_start_outdate(struct ha_msg *, void *); void msg_outdate_rc(struct ha_msg *, void *); Heartbeat-3-0-7e3a82377fa8/contrib/drbd-outdate-peer/drbd-peer-outdater.c0000644000000000000000000001712111576626513026005 0ustar00usergroup00000000000000/* drbd-peer-outdater * Copyright (C) 2006 LINBIT * * Written by Rasto Levrinc * * based on attrd * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define OPTARGS "hVt:p:r:" #define DEFAULT_TIMEOUT 60 /* timeout in seconds */ typedef struct dop_client_s { int timeout; GMainLoop *mainloop; int rc; } dop_client_t; static const char *crm_system_name = "drbd-peer-outdater"; static void usage(const char* cmd, int exit_status); static void dop_exit(dop_client_t *client) { int rc; if (client == NULL) exit(5); rc = client->rc; free(client); exit(rc); } static gboolean outdate_callback(IPC_Channel * server, gpointer user_data) { dop_client_t *client = (dop_client_t *)user_data; HA_Message *msg = NULL; const char *rc_string; char *ep; int rc; msg = msgfromIPC_noauth(server); if (!msg) { cl_log(LOG_WARNING, "no message from server or other " "instance is running\n"); if (client->mainloop != NULL && g_main_is_running(client->mainloop)) g_main_quit(client->mainloop); return FALSE; } cl_log(LOG_DEBUG, "message: %s, %s\n", ha_msg_value(msg, F_TYPE), ha_msg_value(msg, F_ORIG) ); rc_string = ha_msg_value(msg, F_DOPD_VALUE); errno = 0; rc = strtol(rc_string, &ep, 10); if (errno != 0 || *ep != EOS) { cl_log(LOG_WARNING, "unknown message: %s from server", rc_string); client->rc = 20; /* "officially undefined", unspecified error */ ha_msg_del(msg); if (client->mainloop != NULL && g_main_is_running(client->mainloop)) g_main_quit(client->mainloop); return FALSE; } ha_msg_del(msg); /* ok, peer returned something useful */ client->rc = rc; if (client->mainloop != NULL && g_main_is_running(client->mainloop)) { g_main_quit(client->mainloop); } else dop_exit(client); return TRUE; } static void outdater_dispatch_destroy(gpointer user_data) { return; } static gboolean outdater_timeout_dispatch(gpointer user_data) { dop_client_t *client = (dop_client_t *)user_data; cl_log(LOG_WARNING, "error: could not connect to dopd after %i seconds" ": timeout reached\n", client->timeout); if (client->mainloop != NULL && g_main_is_running(client->mainloop)) g_main_quit(client->mainloop); return FALSE; } static void dopd_connection_destroy(gpointer user_data) { return; } static GCHSource* init_dopd_client_ipc_comms(const char *channel_name, gboolean (*dispatch)( IPC_Channel* source_data, gpointer user_data), void *client_data, IPC_Channel **out_ch) { IPC_Channel *ch; GHashTable *attrs; GCHSource *the_source = NULL; void *callback_data = client_data; static char path[] = IPC_PATH_ATTR; attrs = g_hash_table_new(g_str_hash,g_str_equal); g_hash_table_insert(attrs, path, dopd_socket); ch = ipc_channel_constructor(IPC_ANYTYPE, attrs); g_hash_table_destroy(attrs); if (ch == NULL) { cl_log(LOG_ERR, "Could not access channel on: %s", dopd_socket); return NULL; } else if (ch->ops->initiate_connection(ch) != IPC_OK) { cl_log(LOG_DEBUG, "Could not init comms on: %s", dopd_socket); ch->ops->destroy(ch); return NULL; } *out_ch = ch; the_source = G_main_add_IPC_Channel( G_PRIORITY_HIGH, ch, FALSE, dispatch, callback_data, dopd_connection_destroy); return the_source; } int main(int argc, char ** argv) { HA_Message *update = NULL; IPC_Channel *ipc_server = NULL; int argerr = 0; int flag; char *drbd_peer = NULL; char *drbd_resource = NULL; int timeout = DEFAULT_TIMEOUT; int i; dop_client_t *new_client = NULL; GCHSource *src = NULL; cl_log_set_entity(crm_system_name); cl_log_set_facility(HA_LOG_FACILITY); cl_log_set_logd_channel_source(NULL, NULL); cl_inherit_logging_environment(500); cl_set_corerootdir(HA_COREDIR); cl_cdtocoredir(); while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch(flag) { case 'V': debug_level++; break; case 'h': /* Help message */ usage(crm_system_name, LSB_EXIT_OK); break; case 'p': drbd_peer = strdup(optarg); break; case 'r': drbd_resource = strdup(optarg); break; case 't': timeout = atoi(optarg); break; default: ++argerr; break; } } /* the caller drbdadm sets DRBD_PEER env variable, use it if * -p option was not specified */ if ((drbd_peer == NULL) && !(drbd_peer = getenv("DRBD_PEER"))) { ++argerr; } /* the caller drbdadm sets DRBD_RESOURCE env variable, use it if * -r option was not specified */ if ((drbd_resource == NULL) && !(drbd_resource = getenv("DRBD_RESOURCE"))) { ++argerr; } if (optind > argc) { ++argerr; } if (argerr) { usage(crm_system_name, LSB_EXIT_GENERIC); } for (i = 0; i < strlen(drbd_peer); i++) drbd_peer[i] = tolower(drbd_peer[i]); cl_log(LOG_DEBUG, "drbd peer: %s\n", drbd_peer); cl_log(LOG_DEBUG, "drbd resource: %s\n", drbd_resource); new_client = malloc(sizeof(dop_client_t)); memset(new_client, 0, sizeof(dop_client_t)); new_client->timeout = timeout; new_client->mainloop = g_main_new(FALSE); new_client->rc = 5; /* default: down/unreachable */ /* Connect to the IPC server */ src = init_dopd_client_ipc_comms(T_OUTDATER, outdate_callback, (gpointer)new_client, &ipc_server); if (ipc_server == NULL) { cl_log(LOG_WARNING, "Could not connect to "T_OUTDATER" channel\n"); dop_exit(new_client); /* unreachable */ } /* send message with drbd resource to dopd */ update = ha_msg_new(3); ha_msg_add(update, F_TYPE, T_OUTDATER); ha_msg_add(update, F_ORIG, crm_system_name); ha_msg_add(update, F_OUTDATER_PEER, drbd_peer); ha_msg_add(update, F_OUTDATER_RES, drbd_resource); if (msg2ipcchan(update, ipc_server) != HA_OK) { cl_log(LOG_WARNING, "Could not send message\n"); dop_exit(new_client); } Gmain_timeout_add_full(G_PRIORITY_DEFAULT, new_client->timeout * 1000, outdater_timeout_dispatch, (gpointer)new_client, outdater_dispatch_destroy); g_main_run(new_client->mainloop); dop_exit(new_client); return 20; /* not reached */ } static void usage(const char* cmd, int exit_status) { FILE* stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s -r -p [-t ]\n", cmd); fprintf(stream, "\t-p \tdrbd peer\n"); fprintf(stream, "\t-r \tdrbd resource\n"); fprintf(stream, "\t-t \ttimeout in seconds; default: %d\n\n", DEFAULT_TIMEOUT); fprintf(stream, "The drbd peer and drbd resource have to be specified either on the\n" "commandline using the -p and -r options, or using the $DRBD_PEER and\n" "$DRBD_RESOURCE environment variables. $DRBD_RESOURCE and $DRBD_PEER\n" "will be ignored, if the command line options are used.\n"); fflush(stream); exit(exit_status); } Heartbeat-3-0-7e3a82377fa8/contrib/ipfail/Makefile.am0000644000000000000000000000261511576626513022143 0ustar00usergroup00000000000000# # heartbeat: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl halibdir = $(libdir)/@HB_PKG@ havarlibdir = $(localstatedir)/lib/@HB_PKG@ apigid = @HA_APIGID@ ipfailuid = @HA_CCMUID@ halib_PROGRAMS = ipfail ## SOURCES ipfail_SOURCES = ipfail.c ipfail.h ipfail_LDADD = \ -lplumb \ $(top_builddir)/lib/hbclient/libhbclient.la $(GLIBLIB) ## additional Makefile targets # additional installations not covered normally install-exec-local: uninstall-local: Heartbeat-3-0-7e3a82377fa8/contrib/ipfail/ipfail.c0000644000000000000000000005220711576626513021521 0ustar00usergroup00000000000000/* ipfail: IP Failover plugin for Linux-HA * * Copyright (C) 2002-2004 Kevin Dwyer * * This plugin uses ping nodes to determine a failure in an * interface's connectivity and forces a hb_standby. It is based on the * api_test.c program included with Linux-HA. * * Setup: In your ha.cf file make sure you have a ping node setup for each * interface. Choosing something like the switch that you are connected * to is a good idea. Choosing your win95 reboot-o-matic is a bad idea. * * The way this works is by taking note of when a ping node dies. * When a death is detected, it communicates with the other side to see * if the other side saw it die (sort of). If it didn't, then we know * who deserves to have the resources. * * There are ways to improve this, and I'm working on them. * */ /* * api_test: Test program for testing the heartbeat API * * Copyright (C) 2000 Alan Robertson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ipfail.h" #include /* ICK! global vars. */ const char *node_name; /* The node we are connected to */ char other_node[SYS_NMLN]; /* The remote node in the pair */ int node_stable; /* Other node stable? */ int need_standby; /* Are we waiting for stability? */ int quitnow = 0; /* Allows a signal to break us out of loop */ int auto_failback; /* How is our auto_failback configured? */ GMainLoop *mainloop; /* Reference to the mainloop for events */ guint delay_giveup_tag = 0;/* Our delay timer */ int main(int argc, char **argv) { unsigned fmask; ll_cluster_t *hb; char pid[10]; char *bname, *parameter; IPC_Channel *apiIPC; cl_log_enable_stderr(TRUE); /* Get the name of the binary for logging purposes */ bname = strdup(argv[0]); cl_log_set_entity(basename(bname)); cl_log_set_facility(HA_LOG_FACILITY); cl_inherit_logging_environment(0); hb = ll_cluster_new("heartbeat"); memset(other_node, 0, sizeof(other_node)); need_standby = 0; memset(pid, 0, sizeof(pid)); snprintf(pid, sizeof(pid), "%ld", (long)getpid()); cl_log(LOG_DEBUG, "PID=%s", pid); open_api(hb); node_stable = is_stable(hb); if (node_stable == -1) { cl_log(LOG_ERR, "No managed resources"); exit(100); } /* Obtain our local node name */ node_name = hb->llc_ops->get_mynodeid(hb); if (node_name == NULL) { cl_log(LOG_ERR, "Cannot get my nodeid"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(19); } cl_log(LOG_DEBUG, "[We are %s]", node_name); /* Check to see if we should engage auto_failback tactics */ parameter = hb->llc_ops->get_parameter(hb, "auto_failback"); if (parameter) { /* This is equivalent to nice_failback off */ if (!strcmp(parameter, "legacy")) { cl_log(LOG_ERR, "auto_failback set to " "incompatible legacy option."); exit(100); } if(cl_str_to_boolean(parameter, &auto_failback) != HA_OK){ cl_log(LOG_ERR, " invalid auto_faiback value(%s)", parameter); exit(100); } cl_log(LOG_DEBUG, "auto_failback -> %i (%s)", auto_failback, parameter); free(parameter); }else{ cl_log(LOG_ERR, "Couldn't get auto_failback setting."); } /* See if we should drop cores somewhere odd... */ parameter = hb->llc_ops->get_parameter(hb, KEY_COREROOTDIR); if (parameter) { cl_set_corerootdir(parameter); cl_cdtocoredir(); } cl_cdtocoredir(); set_callbacks(hb); fmask = LLC_FILTER_DEFAULT; cl_log(LOG_DEBUG, "Setting message filter mode"); if (hb->llc_ops->setfmode(hb, fmask) != HA_OK) { cl_log(LOG_ERR, "Cannot set filter mode"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(8); } node_walk(hb); set_signals(hb); cl_log(LOG_DEBUG, "Waiting for messages..."); errno = 0; cl_log_enable_stderr(FALSE); /* We will sit in a glib loop waiting for inputs, or making decisions * for failover */ mainloop = g_main_new(TRUE); apiIPC = hb->llc_ops->ipcchan(hb); /* Watch the API IPC for input */ G_main_add_IPC_Channel(G_PRIORITY_HIGH, apiIPC, FALSE, ipfail_dispatch, (gpointer)hb, ipfail_dispatch_destroy); Gmain_timeout_add_full(G_PRIORITY_DEFAULT, 1000, ipfail_timeout_dispatch, (gpointer)hb, ipfail_dispatch_destroy); g_main_run(mainloop); g_main_destroy(mainloop); if (!quitnow && errno != EAGAIN && errno != EINTR) { cl_perror("read_hb_msg returned NULL"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); } close_api(hb); return 0; } int is_stable(ll_cluster_t *hb) { const char *resources = hb->llc_ops->get_resources(hb); if (!resources) /* Heartbeat is not providing resource management */ return -1; if (!strcmp(resources, "transition")) return 0; return 1; } void node_walk(ll_cluster_t *hb) { const char *node; /* const char *intf; --Out until ifwalk is fixed */ cl_log(LOG_DEBUG, "Starting node walk"); if (hb->llc_ops->init_nodewalk(hb) != HA_OK) { cl_log(LOG_ERR, "Cannot start node walk"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(9); } while((node = hb->llc_ops->nextnode(hb)) != NULL) { cl_log(LOG_DEBUG, "Cluster node: %s: status: %s", node , hb->llc_ops->node_status(hb, node)); /* Look for our partner */ if (!strcmp("normal", hb->llc_ops->node_type(hb, node)) && strcmp(node, node_name)) { strlcpy(other_node, node, sizeof(other_node)); cl_log(LOG_DEBUG, "[They are %s]", other_node); } /* ifwalking is broken for ping nodes. I don't think we even need it at this point. if (hb->llc_ops->init_ifwalk(hb, node) != HA_OK) { cl_log(LOG_ERR, "Cannot start if walk"); cl_log(LOG_ERR, "REASON: %s" , hb->llc_ops->errmsg(hb)); exit(10); } while ((intf = hb->llc_ops->nextif(hb))) { cl_log(LOG_DEBUG, "\tnode %s: intf: %s ifstatus: %s" , node, intf , hb->llc_ops->if_status(hb, node, intf)); } if (hb->llc_ops->end_ifwalk(hb) != HA_OK) { cl_log(LOG_ERR, "Cannot end if walk"); cl_log(LOG_ERR, "REASON: %s" , hb->llc_ops->errmsg(hb)); exit(11); } -END of ifwalkcode */ } if (hb->llc_ops->end_nodewalk(hb) != HA_OK) { cl_log(LOG_ERR, "Cannot end node walk"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(12); } } void set_callbacks(ll_cluster_t *hb) { /* Add each of the callbacks we use with the API */ if (hb->llc_ops->set_msg_callback(hb, T_APICLISTAT, msg_ipfail_join, hb) != HA_OK) { cl_log(LOG_ERR, "Cannot set msg_ipfail_join callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(2); } if (hb->llc_ops->set_msg_callback(hb, T_RESOURCES, msg_resources, hb) != HA_OK) { cl_log(LOG_ERR, "Cannot set msg_resources callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(18); } if (hb->llc_ops->set_msg_callback(hb, "num_ping_nodes", msg_ping_nodes, hb) != HA_OK) { cl_log(LOG_ERR, "Cannot set msg callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(3); } if (hb->llc_ops->set_msg_callback(hb, "abort_giveup", msg_abort_giveup, hb) != HA_OK) { cl_log(LOG_ERR, "Cannot set msg_abort_giveup callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(4); } if (hb->llc_ops->set_msg_callback(hb, "you_are_dead", i_am_dead, hb) != HA_OK) { cl_log(LOG_ERR, "Cannot set i_am_dead callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(5); } if (hb->llc_ops->set_nstatus_callback(hb, NodeStatus, hb) != HA_OK) { cl_log(LOG_ERR, "Cannot set node status callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(6); } if (hb->llc_ops->set_ifstatus_callback(hb, LinkStatus, hb) != HA_OK) { cl_log(LOG_ERR, "Cannot set if status callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(7); } } void set_signals(ll_cluster_t *hb) { /* Setup the various signals */ CL_SIGINTERRUPT(SIGINT, 1); CL_SIGNAL(SIGINT, gotsig); CL_SIGINTERRUPT(SIGTERM, 1); CL_SIGNAL(SIGTERM, gotsig); cl_log(LOG_DEBUG, "Setting message signal"); if (hb->llc_ops->setmsgsignal(hb, 0) != HA_OK) { cl_log(LOG_ERR, "Cannot set message signal"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(13); } } void NodeStatus(const char *node, const char *status, void *private) { /* Callback for node status changes */ cl_log(LOG_INFO, "Status update: Node %s now has status %s" , node, status); if (strcmp(status, DEADSTATUS) == 0) { if (ping_node_status(private)) { cl_log(LOG_INFO, "NS: We are still alive!"); } else { cl_log(LOG_INFO, "NS: We are dead. :<"); } } else if (strcmp(status, PINGSTATUS) == 0) { /* A ping node just came up, if we died, request resources? * If so, that would emulate the primary/secondary type of * High-Availability, instead of nice_failback mode */ /* Lets make sure we weren't both down, and now half up. */ int num_ping; cl_log(LOG_INFO, "A ping node just came up."); num_ping = ping_node_status(private); ask_ping_nodes(private, num_ping); } } void LinkStatus(const char *node, const char *lnk, const char *status, void *private) { /* Callback for Link status changes */ int num_ping=0; cl_log(LOG_INFO, "Link Status update: Link %s/%s now has status %s" , node, lnk, status); if (strcmp(status, DEADSTATUS) == 0) { /* If we can still see pinging node, request resources */ if ((num_ping = ping_node_status(private))) { ask_ping_nodes(private, num_ping); cl_log(LOG_INFO, "Checking remote count" " of ping nodes."); } else { cl_log(LOG_INFO, "We are dead. :<"); ask_ping_nodes(private, num_ping); } } } int ping_node_status(ll_cluster_t *hb) { /* ping_node_status: Takes the hearbeat cluster as input, * returns number of ping nodes found to be in the cluster, * and therefore alive. */ const char *node; int found=0; /* Number of ping nodes found */ if (hb->llc_ops->init_nodewalk(hb) != HA_OK) { cl_log(LOG_ERR, "Cannot start node walk"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(16); } while((node = hb->llc_ops->nextnode(hb))!= NULL) { if (!strcmp(PINGSTATUS, hb->llc_ops->node_status(hb, node))) { cl_log(LOG_DEBUG, "Found ping node %s!", node); found++; } } if (hb->llc_ops->end_nodewalk(hb) != HA_OK) { cl_log(LOG_ERR, "Cannot end node walk"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(17); } return found; } gboolean giveup(gpointer user_data) { /* Giveup: Takes the heartbeat cluster as input and the type of * resources to give up. Returning FALSE causes the timer to die. * Forces the local node to release a particular class of resources. */ struct giveup_data *gd = user_data; ll_cluster_t *hb = gd->hb; const char *res_type = gd->res_type; struct ha_msg *msg; char pid[10]; cl_log(LOG_INFO, "giveup() called (timeout worked)"); if (is_stable(hb)) { memset(pid, 0, sizeof(pid)); snprintf(pid, sizeof(pid), "%ld", (long)getpid()); msg = ha_msg_new(3); ha_msg_add(msg, F_TYPE, T_ASKRESOURCES); ha_msg_add(msg, F_RESOURCES, res_type); ha_msg_add(msg, F_ORIG, node_name); ha_msg_add(msg, F_COMMENT, "me"); hb->llc_ops->sendclustermsg(hb, msg); cl_log(LOG_DEBUG, "Message [" T_ASKRESOURCES "] sent."); ha_msg_del(msg); need_standby = 0; } else { need_standby = 1; } return FALSE; } void delay_giveup(ll_cluster_t *hb, const char *res_type, int mseconds) { struct giveup_data *gd; gd = malloc(sizeof(struct giveup_data)); if (gd == NULL) { cl_log(LOG_ERR, "Out of memory, can't giveup."); return; } gd->hb = hb; gd->res_type = res_type; /* Set mseconds to -1 to use default. (twice the keepalive) */ if (mseconds < 0) { mseconds = hb->llc_ops->get_keepalive(hb) * 2; } cl_log(LOG_INFO, "Delayed giveup in %i seconds.", mseconds / 1000); if (delay_giveup_tag) { /* A timer exists already? */ cl_log(LOG_DEBUG, "Detected existing delay timer, overriding"); Gmain_timeout_remove(delay_giveup_tag); delay_giveup_tag = 0; } /* We are going to call giveup in mseconds/1000 Seconds. */ delay_giveup_tag = Gmain_timeout_add_full(G_PRIORITY_DEFAULT, mseconds, giveup, (gpointer)gd, giveup_destroy); } void giveup_destroy(gpointer user_data) { /* Clean up the struct giveup_data that we were using */ free(user_data); delay_giveup_tag = 0; cl_log(LOG_DEBUG, "giveup timeout has been destroyed."); } void abort_giveup() { if (delay_giveup_tag) { cl_log(LOG_INFO, "Aborted delayed giveup (%u)", delay_giveup_tag); Gmain_timeout_remove(delay_giveup_tag); delay_giveup_tag = 0; } else { cl_log(LOG_INFO, "No giveup timer to abort."); } } void send_abort_giveup(ll_cluster_t *hb) { struct ha_msg *msg; msg = ha_msg_new(2); ha_msg_add(msg, F_TYPE, "abort_giveup"); ha_msg_add(msg, F_ORIG, node_name); hb->llc_ops->sendnodemsg(hb, msg, other_node); cl_log(LOG_DEBUG, "Abort message sent."); ha_msg_del(msg); } void msg_abort_giveup(struct ha_msg *msg, void *private) { abort_giveup(); } void msg_ipfail_join(struct ha_msg *msg, void *private) { /* msg_ipfail_join: When another ipfail client sends a join * message, call ask_ping_nodes() to compare ping node counts. * Callback for the T_APICLISTAT message. */ /* If this is a join message from ipfail on a different node.... */ if (!strcmp(ha_msg_value(msg, F_STATUS), JOINSTATUS) && !strcmp(ha_msg_value(msg, F_FROMID), "ipfail") && strcmp(ha_msg_value(msg, F_ORIG), node_name)) { cl_log(LOG_DEBUG, "Got join message from another ipfail client. (%s)", ha_msg_value(msg, F_ORIG)); ask_ping_nodes(private, ping_node_status(private)); } } void msg_resources(struct ha_msg *msg, void *private) { const char * orig = ha_msg_value(msg, F_ORIG); const char * isstable = ha_msg_value(msg, F_ISSTABLE); /* msg_resources: Catch T_RESOURCES messages, so that we can * find out when stability is achieved among the cluster */ if (!orig || !isstable) { return; /* No stability info in this message... */ } /* Right now there are two stable messages sent out, we are * only concerned with the one that has no info= line on it. */ if (!strcmp(orig, other_node) && !ha_msg_value(msg, F_COMMENT) && !strcmp(isstable, "1")) { cl_log(LOG_DEBUG, "Other side is now stable."); node_stable = 1; /* There may be a pending standby */ if (need_standby) { /* Gratuitious ARPs take some time, is there a * way to know when they're finished? I don't * want this sleep here, even if it only is during * startup. */ /* This value is prone to be wrong for different * situations. We need the resource stability * message to be delayed until the resource scripts * finish, and then we can stop waiting. */ sleep(10); /* If the resource message stuff is solved, we could * safely giveup() here. However, since we're waiting * for arbitrary amounts of time it may be wise to * recheck the assumptions of the cluster and count * ping nodes. */ ask_ping_nodes(private, ping_node_status(private)); /* giveup(private); */ /* The ask_ping_nodes message will sort out whether * a standby is necessary. */ need_standby = 0; } } else if (!strcmp(orig, other_node) && !strcmp(isstable, "0")) { cl_log(LOG_DEBUG, "Other side is unstable."); node_stable = 0; } } void ask_ping_nodes(ll_cluster_t *hb, int num_ping) { /* ask_ping_nodes: Takes the heartbeat cluster and the number of * ping nodes we can see alive as input, returning nothing. * It asks the other node for the number of ping nodes it can see. */ struct ha_msg *msg; char pid[10], np[5]; cl_log(LOG_INFO, "Asking other side for ping node count."); memset(pid, 0, sizeof(pid)); snprintf(pid, sizeof(pid), "%ld", (long)getpid()); memset(np, 0, sizeof(np)); snprintf(np, sizeof(np), "%d", num_ping); msg = ha_msg_new(3); ha_msg_add(msg, F_TYPE, "num_ping_nodes"); ha_msg_add(msg, F_ORIG, node_name); ha_msg_add(msg, F_NUMPING, np); hb->llc_ops->sendnodemsg(hb, msg, other_node); cl_log(LOG_DEBUG, "Message [" F_NUMPING "] sent."); ha_msg_del(msg); } void msg_ping_nodes(struct ha_msg *msg, void *private) { /* msg_ping_nodes: Takes the message and heartbeat cluster as input; * returns nothing. Callback for the num_ping_nodes message. */ int num_nodes=0; ll_cluster_t *hb = private; cl_log(LOG_DEBUG, "Got asked for num_ping."); num_nodes = ping_node_status(hb); if (num_nodes > atoi(ha_msg_value(msg, F_NUMPING))) { cl_log(LOG_INFO, "Telling other node that we have more visible ping " "nodes."); you_are_dead(hb); } else if (num_nodes < atoi(ha_msg_value(msg, F_NUMPING))) { cl_log(LOG_INFO, "Giving up because we have less visible ping nodes."); delay_giveup(hb, HB_ALL_RESOURCES, -1); } else { cl_log(LOG_INFO, "Ping node count is balanced."); send_abort_giveup(hb); if (delay_giveup_tag) { /* We've got a delayed giveup, and we're now balanced*/ /* BUG? We don't want to do this if we have an auto_failback pending, I think. */ abort_giveup(); } else if (auto_failback && is_stable(hb)) { /* We're balanced, so make sure we don't have foreign * stuff */ cl_log(LOG_INFO, "Giving up foreign resources (auto_failback)."); delay_giveup(hb, HB_FOREIGN_RESOURCES, -1); } } } void you_are_dead(ll_cluster_t *hb) { /* you_are_dead: Takes the heartbeat cluster as input; returns nothing. * Sends the you_are_dead message to the dead node. */ struct ha_msg *msg; char pid[10]; cl_log(LOG_DEBUG, "Sending you_are_dead."); memset(pid, 0, sizeof(pid)); snprintf(pid, sizeof(pid), "%ld", (long)getpid()); msg = ha_msg_new(1); ha_msg_add(msg, F_TYPE, "you_are_dead"); hb->llc_ops->sendnodemsg(hb, msg, other_node); cl_log(LOG_DEBUG, "Message [you_are_dead] sent."); ha_msg_del(msg); } void i_am_dead(struct ha_msg *msg, void *private) { /* i_am_dead: Takes the you_are_dead message and the heartbeat cluster * as input; returns nothing. * Callback for the you_are_dead message. */ cl_log(LOG_INFO, "Giving up because we were told that we have less ping nodes."); delay_giveup(private, HB_ALL_RESOURCES, -1); } void gotsig(int nsig) { (void)nsig; quitnow = 1; } /* Used to handle the API in the gmainloop */ gboolean ipfail_dispatch(IPC_Channel* ipc, gpointer user_data) { struct ha_msg *reply; ll_cluster_t *hb = user_data; /* if (hb->llc_ops->msgready(hb)) cl_log(LOG_DEBUG, "Msg ready!"); cl_log(LOG_DEBUG, "Reading a message!"); */ reply = hb->llc_ops->readmsg(hb, 0); if (reply != NULL) { /* cl_log_message(reply); */ ha_msg_del(reply); reply=NULL; return TRUE; } /* else return FALSE; */ return TRUE; } void ipfail_dispatch_destroy(gpointer user_data) { return; } gboolean ipfail_timeout_dispatch(gpointer user_data) { ll_cluster_t *hb = user_data; if (quitnow) { g_main_quit(mainloop); return FALSE; } if (hb->llc_ops->msgready(hb)) { /* cl_log(LOG_DEBUG, "Msg ready! [2]"); */ return ipfail_dispatch(NULL, user_data); } return TRUE; } void open_api(ll_cluster_t *hb) { /* Sign in to the API */ cl_log(LOG_DEBUG, "Signing in with heartbeat"); if (hb->llc_ops->signon(hb, "ipfail")!= HA_OK) { cl_log(LOG_ERR, "Cannot sign on with heartbeat"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(1); } } void close_api(ll_cluster_t *hb) { /* Log off of the API and clean up */ if (hb->llc_ops->signoff(hb, FALSE) != HA_OK) { cl_log(LOG_ERR, "Cannot sign off from heartbeat."); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(14); } if (hb->llc_ops->delete(hb) != HA_OK) { cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); cl_log(LOG_ERR, "Cannot delete API object."); exit(15); } } Heartbeat-3-0-7e3a82377fa8/contrib/ipfail/ipfail.h0000644000000000000000000000406311576626513021523 0ustar00usergroup00000000000000/* ipfail.h: ipfail header file * * Copyright (C) 2003 Kevin Dwyer * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* Various defualts */ #define F_NUMPING "num_ping" #define HB_LOCAL_RESOURCES "local" #define HB_FOREIGN_RESOURCES "foreign" #define HB_ALL_RESOURCES "all" /* Data structures */ struct giveup_data { ll_cluster_t *hb; const char *res_type; }; /* Prototypes */ void node_walk(ll_cluster_t *); void set_signals(ll_cluster_t *); void NodeStatus(const char *, const char *, void *); void LinkStatus(const char *, const char *, const char *, void *); void msg_ipfail_join(struct ha_msg *, void *); void msg_ping_nodes(struct ha_msg *, void *); void i_am_dead(struct ha_msg *, void *); void msg_resources(struct ha_msg *, void *); void gotsig(int); gboolean giveup(gpointer); void you_are_dead(ll_cluster_t *); int ping_node_status(ll_cluster_t *); void ask_ping_nodes(ll_cluster_t *, int); void set_callbacks(ll_cluster_t *); void open_api(ll_cluster_t *); void close_api(ll_cluster_t *); gboolean ipfail_dispatch(IPC_Channel *, gpointer); void ipfail_dispatch_destroy(gpointer); gboolean ipfail_timeout_dispatch(gpointer); void delay_giveup(ll_cluster_t *, const char *, int); void giveup_destroy(gpointer); void abort_giveup(void); void send_abort_giveup(ll_cluster_t *); void msg_abort_giveup(struct ha_msg *, void *); int is_stable(ll_cluster_t *); Heartbeat-3-0-7e3a82377fa8/contrib/mlock/Makefile.am0000644000000000000000000000265311576626513022006 0ustar00usergroup00000000000000# # heartbeat: multi clients lock test code # # Copyright (C) 2004 Guochun Shi(gshi@ncsa.uiuc.edu) # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl halibdir = $(libdir)/@HB_PKG@ havarlibdir = $(localstatedir)/lib/@HB_PKG@ halib_PROGRAMS = mlock ## SOURCES mlock_SOURCES = mlock.c mlock_LDADD =$(GLIBLIB) $(top_builddir)/replace/libreplace.la ## additional Makefile targets # additional installations not covered normally install-exec-local: uninstall-local: Heartbeat-3-0-7e3a82377fa8/contrib/mlock/mlock.c0000644000000000000000000004566211576626513021232 0ustar00usergroup00000000000000/* * * multi-clients NFS lock test code * * Copyright (C) 2004 Guochun Shi * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MYPORT 5000 #define PASS 0 #define WARNING 1 #define FATAL 2 #define END 0 #define output printf struct test_task{ int num; int sec; int func; off_t offset; off_t length; int pass; int fail; }; #define MAX_FN_LEN 32 struct global_info{ char filename[MAX_FN_LEN]; int clientID; int sockfd; int fd; int fd_save; int testpass; int testwarn; int testfail; pid_t forkpid; }; enum{ OPEN, MANOPEN, CLOSE, CLOSE_SAVEFD, WAIT, SEND, RUN, READ, WRITE, SLEEP, PUSHFD, POPFD, TRUNC, FORK, KILL, OUTPUT }; struct run_param{ int num; int sec; int func; off_t offset; off_t length; int pass; int fail; }; struct io_param{ int num; int sec; int start; int whichmsg; }; struct sleep_param{ int time; }; struct task{ int op; struct run_param run_param; struct io_param io_param; struct sleep_param sleep_param; }; struct global_info gi; static void pushfd(void) { if ((gi.fd_save = dup(gi.fd)) == -1 ){ perror("dup"); exit(1); } return; } static void popfd(void) { if (dup2(gi.fd_save, gi.fd) == -1){ perror("dup2"); exit(1); } return; } static void close_savefd(void) { if (close(gi.fd_save) == -1){ perror("close"); exit(1); } return; } static void hb_trunc(void ) { if (ftruncate(gi.fd, 0) == -1){ perror("ftruncate"); exit(1); } return; } static int report (int result, struct test_task *tt) { if (tt->pass == result){ output("PASSED.\n"); gi.testpass++; } else if ((tt->pass == EAGAIN && result == EACCES) || (tt->pass == EACCES && result == EAGAIN) || tt->fail == WARNING){ output("warning\n"); gi.testwarn++; } else { output("test failed, result =%d\n", result); gi.testfail++; exit(1); } return 0; } static void test_exit(void ) { if( gi.testwarn == 0 && gi.testfail == 0 ){ output("All tests finished successfully!\n"); exit(1); } else { output(" ********There are %d warnings, %d errors******", gi.testwarn, gi.testfail); exit(1); } } static void manopen_testfile(void) { if ((gi.fd = open(gi.filename,O_RDWR|O_CREAT, 02666)) == -1){ perror("open"); exit(1); } return ; } static void open_testfile(void) { if ((gi.fd = open(gi.filename,O_RDWR|O_CREAT, 0666)) == -1){ perror("open"); exit(1); } return ; } static void close_testfile(void) { if( close(gi.fd) == -1){ perror("close"); exit(1); } /* unlink(gi.filename); */ return; } #define MSGONE 1 #define MSGTWO 2 #define MSGA "abcde" #define MSGB "edcba" #define WR_START 4*1024 - 4 #define LOCKLEN 6 static void write_testfile(struct task* task) { const char* msg; int result; output("%d-%d:\t", task->io_param.num, task->io_param.sec); if (task->io_param.whichmsg == 1){ msg = MSGA; } else { msg = MSGB; } if (lseek(gi.fd, task->io_param.start, SEEK_SET) == -1){ perror("lseek"); exit(1); } if ((result = write(gi.fd, msg, LOCKLEN)) == -1){ perror("write"); exit(1); } output("PASSED.\n"); } static void read_testfile( struct task* task) { const char* msg; char buf[LOCKLEN]; output("%d-%d:\t", task->io_param.num, task->io_param.sec); if (task->io_param.whichmsg == 1){ msg = MSGA; } else { msg = MSGB; } if (lseek(gi.fd, task->io_param.start, SEEK_SET) == -1){ perror("lseek"); exit(1); } if (read(gi.fd, buf, LOCKLEN) == -1){ perror("read"); exit(1); } if(memcmp(buf, msg, LOCKLEN) != 0){ output("\nread content is not matched\n"); output("conent in file is =%s,\n" "the msg compared with is %s\n", buf, msg); exit(1); } output("PASSED.\n"); } static int do_test( struct test_task* tt) { int result = PASS; struct flock flock; int cmd; flock.l_whence = SEEK_SET; flock.l_start = tt->offset; flock.l_len = tt->length; flock.l_type = F_WRLCK; switch(tt->func){ case F_TEST: cmd = F_GETLK; break; case F_TLOCK: cmd = F_SETLK; break; case F_LOCK: cmd = F_SETLKW; break; case F_ULOCK: flock.l_type = F_UNLCK; cmd = F_SETLK; break; default: output("wrong func in task! \n"); exit(1); } output("%d-%d:\t", tt->num, tt->sec); if(lseek(gi.fd, tt->offset, 0) < 0) { result = errno; } if (result == 0) { /* if (result = lockf(gi.fd, tt->func, tt->length) != 0){ */ if ((result = fcntl(gi.fd, cmd, &flock)) != 0 ){ result = errno; }else if ( cmd == F_GETLK && flock.l_type != F_UNLCK){ result = EACCES; } } return report(result, tt); } static int test(int num, int sec, int func, off_t offset, off_t length, int pass, int fail){ struct test_task tt ; tt.num = num; tt.sec = sec; tt.func = func; tt.offset = offset; tt.length = length; tt.pass = pass; tt.fail = fail; return do_test(&tt); } static int waitnotice(void) { int numbytes; char buf; if ((numbytes = recv(gi.sockfd, &buf, 1, 0)) == -1){ perror("recv"); close(gi.sockfd); exit(1); } else if (numbytes == 0){ output("socket broken\n"); exit(1); } return 0; } static int sendnotice(void) { int numbytes; char buf; if ((numbytes = send(gi.sockfd, &buf, 1, 0)) == -1){ perror("send"); close(gi.sockfd); exit(1); } return 0; } #if 0 static int send_msg(void* msg, int len) { if (send(gi.sockfd, msg, len, 0) != len){ perror("send"); close(gi.sockfd); exit(1); } return 0; } static int recv_msg(void* buf, int len) { int numbytes = 0; while (len > 0 ){ if (( numbytes = recv(gi.sockfd, buf, len, 0)) == -1){ perror("recv"); close(gi.sockfd); exit(1); } len -= numbytes; } return 0; } #endif static void init_comm(const char* servername) { struct sockaddr_in their_addr; socklen_t sin_size; if (gi.clientID == 0 ){ int sockfd; struct sockaddr_in my_addr; int yes = 1; if((sockfd = socket(AF_INET, SOCK_STREAM, 0)) == -1){ perror("socket"); exit(1); } my_addr.sin_family = AF_INET; my_addr.sin_port = htons(MYPORT); my_addr.sin_addr.s_addr = INADDR_ANY; memset(&(my_addr.sin_zero), '\0', 8); if (setsockopt(sockfd,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) { perror("setsockopt"); exit(1); } if (bind(sockfd, (struct sockaddr *)&my_addr, sizeof(struct sockaddr)) == -1){ perror("bind"); exit(1); } if (listen(sockfd, 10) == -1){ perror("listen"); exit(1); } sin_size = sizeof(their_addr); if(( gi.sockfd = accept(sockfd, (struct sockaddr *)&their_addr, (socklen_t *)&sin_size)) == -1) { perror("accept"); exit(1); } close(sockfd); } else{ struct hostent *he; if (!servername){ printf("servername is NULL\n"); exit(1); } if ((he=gethostbyname(servername)) == NULL){ output("gethostbyname: Error, servername =%s \n", servername); exit(1); } if(( gi.sockfd = socket(AF_INET, SOCK_STREAM, 0)) == -1){ perror("socket"); exit(1); } their_addr.sin_family = AF_INET; their_addr.sin_port = htons(MYPORT); memcpy(&their_addr.sin_addr, he->h_addr, sizeof(struct in_addr)); memset(&(their_addr.sin_zero), '\0', 8); if(connect(gi.sockfd, (struct sockaddr *) &their_addr, sizeof(struct sockaddr)) == -1){ perror("connect"); exit(1); } } } static GSList * generate_task_list(void) { GSList* task_list = NULL; size_t i; int task_table[][9] = { /*test 1: testing lock function in only one machine*/ {0, OPEN}, {0, WAIT}, {0, RUN, 1, 1, F_TEST, 0, 1, PASS}, {0, RUN, 1, 2, F_TEST, 0, END, PASS}, {0, RUN, 1, 3, F_TEST, 1, 1, PASS}, {0, RUN, 1, 4, F_TEST, 1, END, PASS}, {0, SEND}, {0, CLOSE}, {1, SEND}, {1, WAIT}, /*test 2: node 1 locks the whole file node 2 tries to lock different regions of the same file*/ {0, OPEN}, {0 ,WAIT}, {0, RUN, 2, 0, F_TLOCK, 0, END, PASS}, {0, SEND}, {0, WAIT}, {0, CLOSE}, {1, OPEN}, {1, SEND}, {1, WAIT}, {1, RUN, 2, 1, F_TEST, 0, 1, EACCES}, {1, RUN, 2, 2, F_TEST, 0, END, EACCES}, {1, RUN, 2, 3, F_TEST, 1, 1, EACCES}, {1, RUN, 2, 4, F_TEST, 1, END, EACCES}, {1, SEND}, {1, CLOSE}, /* test 3: node 1 locks the 1st byte. node 2 tries to lock different regions*/ {0, OPEN}, {0, WAIT}, {0, RUN, 3, 0, F_TLOCK, 0, 1, PASS}, {0, SEND}, {0, WAIT}, {0, RUN, 3, 5, F_ULOCK, 0, 1, PASS}, {0, CLOSE}, {1, OPEN}, {1, SEND}, {1, WAIT}, {1, RUN, 3, 1, F_TEST, 0, 1, EACCES}, {1, RUN, 3, 2, F_TEST, 0, END, EACCES}, {1, RUN, 3, 3, F_TEST, 1, 1, PASS}, {1, RUN, 3, 4, F_TEST, 1, END, PASS}, {1, SEND}, {1, CLOSE}, /* test 4: node 1 locks the second byte node 2 tries to lock different regions */ {0, OPEN}, {0, WAIT}, {0, RUN, 4, 0, F_TLOCK, 1, 1, PASS}, {0, SEND}, {0, WAIT}, {0, RUN, 4, 10, F_ULOCK, 1, 1, PASS}, {0, CLOSE}, {1, OPEN}, {1, SEND}, {1, WAIT}, {1, RUN, 4, 1, F_TEST, 0, 1, PASS}, {1, RUN, 4, 2, F_TEST, 0, 2, EACCES}, {1, RUN, 4, 3, F_TEST, 0, END, EACCES}, {1, RUN, 4, 4, F_TEST, 1, 1, EACCES}, {1, RUN, 4, 5, F_TEST, 1, 2, EACCES}, {1, RUN, 4, 6, F_TEST, 1, END, EACCES}, {1, RUN, 4, 7, F_TEST, 2, 1, PASS}, {1, RUN, 4, 8, F_TEST, 2, 2, PASS}, {1, RUN, 4, 9, F_TEST, 2, END, PASS}, {1, SEND}, {1, CLOSE}, /* test 5: node 1 locks the 1st and 3rd bytes, node 2 tries to lock different regions */ {0, OPEN}, {0, WAIT}, {0, RUN, 5, 0, F_TLOCK, 0, 1, PASS}, {0, RUN, 5, 1, F_TLOCK, 2, 1, PASS}, {0, SEND}, {0, WAIT}, {0, RUN, 5, 14, F_ULOCK, 0, 1, PASS}, {0, RUN, 5, 15, F_ULOCK, 2, 1, PASS}, {0, CLOSE}, {1, OPEN}, {1, SEND}, {1, WAIT}, {1, RUN, 5, 2, F_TEST, 0, 1, EACCES}, {1, RUN, 5, 3, F_TEST, 0, 2, EACCES}, {1, RUN, 5, 4, F_TEST, 0, END, EACCES}, {1, RUN, 5, 5, F_TEST, 1, 1, PASS}, {1, RUN, 5, 6, F_TEST, 1, 2, EACCES}, {1, RUN, 5, 7, F_TEST, 1, END, EACCES}, {1, RUN, 5, 8, F_TEST, 2, 1, EACCES}, {1, RUN, 5, 9, F_TEST, 2, 2, EACCES}, {1, RUN, 5, 10, F_TEST, 2, END, EACCES}, {1, RUN, 5, 11, F_TEST, 3, 1, PASS}, {1, RUN, 5, 12, F_TEST, 3, 2, PASS}, {1, RUN, 5, 13, F_TEST, 3, END, PASS}, {1, SEND}, {1, CLOSE}, /*test 6 : about maxof , ignored now */ /*test 7: test nodes' mutual exclusion. */ {0, OPEN}, {0, WAIT}, {0, RUN, 7, 0, F_TLOCK, WR_START, LOCKLEN, PASS}, {0, WRITE, 7, 1, WR_START, MSGONE}, {0, SEND}, {0, READ, 7, 2, WR_START, MSGONE}, {0, RUN, 7, 3, F_ULOCK, WR_START, LOCKLEN, PASS}, {0, WAIT}, {0, RUN, 7, 7, F_LOCK, WR_START, LOCKLEN, PASS}, {0, READ, 7, 8, WR_START, MSGTWO}, {0, RUN, 7, 9, F_ULOCK, WR_START, LOCKLEN, PASS}, {0, SEND}, {0, CLOSE}, {1, OPEN}, {1, SEND}, {1, WAIT}, {1, RUN, 7, 4, F_LOCK, WR_START, LOCKLEN, PASS}, {1, SEND}, {1, WRITE, 7, 5, WR_START, MSGTWO}, {1, RUN, 7, 6, F_ULOCK, WR_START, LOCKLEN, PASS}, {1, WAIT}, {1, CLOSE}, /*test 8: rate test, ignored now */ /*test 9: Test mandatory locking. FIXME: this testing cannot work yet */ #if 0 {0, MANOPEN}, {0, SLEEP, 20}, {0, WAIT}, {0, RUN, 9, 0, F_TLOCK, 0, LOCKLEN, PASS}, {0, WRITE, 9, 1, 0, MSGONE}, {0, SEND}, {0, READ, 9, 2, 0, MSGONE}, {0, RUN, 9, 3, F_ULOCK, 0, LOCKLEN, PASS}, {0, WAIT}, {0, READ, 9, 5, 0, MSGTWO}, {0, SEND}, {0, CLOSE}, {1, OPEN}, {1, SEND}, {1, WAIT}, {1, RUN, 9, 4, F_TEST, 0, LOCKLEN, EACCES}, {1, WRITE, 9, 4, 0, MSGTWO}, {1, SEND}, {1, WAIT}, {1, CLOSE}, #endif /* test 10: Make sure a locked region is split properly */ {0, OPEN}, {0, WAIT}, {0, RUN, 10, 0, F_TLOCK, 0, 3, PASS}, {0, RUN, 10, 1, F_ULOCK, 1, 1, PASS}, {0, SEND}, {0, WAIT}, {0, RUN, 10, 6, F_ULOCK, 0, 1, PASS}, {0, RUN, 10, 7, F_ULOCK, 2, 1, PASS}, {0, SEND}, {0, WAIT}, /* {0, RUN, 10, 9, F_ULOCK, 0, 1, PASS}, */ {0, RUN, 10, 10, F_TLOCK, 1, 3, PASS}, {0, RUN, 10, 11, F_ULOCK, 2, 1, PASS}, {0, SEND}, {0, WAIT}, {0, CLOSE}, {1, OPEN}, {1, SEND}, {1, WAIT}, {1, RUN, 10, 2, F_TEST, 0, 1, EACCES}, {1, RUN, 10, 3, F_TEST, 2, 1, EACCES}, {1, RUN, 10, 4, F_TEST, 3, END, PASS}, {1, RUN, 10, 5, F_TEST, 1, 1, PASS}, {1, SEND}, {1, WAIT}, {1, RUN, 10, 8, F_TEST, 0, 3, PASS}, {1, SEND}, {1, WAIT}, {1, RUN, 10, 12, F_TEST, 1, 1, EACCES}, {1, RUN, 10, 13, F_TEST, 3, 1, EACCES}, {1, RUN, 10, 14, F_TEST, 4, END, PASS}, {1, RUN, 10, 15, F_TEST, 2, 1, PASS}, {1, RUN, 10, 16, F_TEST, 0, 1, PASS}, {1, SEND}, {1, CLOSE}, /* test 11: make sure close() releases the process's locks */ {0, OPEN}, {0, WAIT}, {0, PUSHFD}, {0, RUN, 11, 0, F_TLOCK, 0, 0, PASS}, {0, CLOSE}, {0, SEND}, {0, WAIT}, {0, POPFD}, {0, RUN, 11, 3, F_TLOCK, 29, 1463, PASS}, {0, RUN, 11, 4, F_TLOCK, 0X2000, 87, PASS}, {0, CLOSE}, {0, SEND}, {0, WAIT}, {0, POPFD}, {0, WRITE, 11, 7, 0, MSGONE}, {0, RUN, 11, 8, F_TLOCK, 0, 0, PASS}, {0, CLOSE}, {0, SEND}, {0, WAIT}, {0, POPFD}, {0, WRITE, 11, 11, 0, MSGTWO}, {0, RUN, 11, 12, F_TLOCK, 0, 0, PASS}, {0, TRUNC}, {0, CLOSE}, {0, SEND}, {0, WAIT}, {0, CLOSE_SAVEFD}, {1, OPEN}, {1, SEND}, {1, WAIT}, {1, RUN, 11, 1, F_TLOCK, 0, 0, PASS}, {1, RUN, 11, 2, F_ULOCK, 0, 0, PASS}, {1, SEND}, {1, WAIT}, {1, RUN, 11, 5, F_TLOCK, 0, 0, PASS}, {1, RUN, 11, 6, F_ULOCK, 0, 0, PASS}, {1, SEND}, {1, WAIT}, {1, RUN, 11, 9, F_TLOCK, 0, 0, PASS}, {1, RUN, 11, 10, F_ULOCK, 0, 0, PASS}, {1, SEND}, {1, WAIT}, {1, RUN, 11, 13, F_TLOCK, 0, 0, PASS}, {1, RUN, 11, 14, F_ULOCK, 0, 0, PASS}, {1, SEND}, {1, CLOSE}, /* test 12: Signalled process should release locks. */ {0, OPEN}, {0, WAIT}, {0, SLEEP, 1}, {0, RUN, 12, 1, F_TLOCK, 0, 0, PASS}, {0, SEND}, {0, CLOSE}, {1, OPEN}, {1, FORK, RUN, 12, 0, F_TLOCK, 0, 0, PASS}, {1, SLEEP, 1}, {1, KILL}, {1, SEND}, {1, WAIT}, {1, CLOSE}, }; for (i = 0; i < sizeof(task_table)/(sizeof(task_table[0])); i++ ){ if ( gi.clientID == task_table[i][0]){ struct task* task = g_malloc( sizeof(struct task)); int j = 2; task->op = task_table[i][1]; switch(task->op){ case WRITE: case READ: task->io_param.num = task_table[i][j++]; task->io_param.sec = task_table[i][j++]; task->io_param.start = task_table[i][j++]; task->io_param.whichmsg = task_table[i][j++]; break; case SLEEP: task->sleep_param.time = task_table[i][j++]; break; case FORK: j++; /* fall through */ case RUN: task->run_param.num = task_table[i][j++]; task->run_param.sec = task_table[i][j++]; task->run_param.func = task_table[i][j++]; task->run_param.offset = task_table[i][j++]; task->run_param.length = task_table[i][j++]; task->run_param.pass = task_table[i][j++]; break; } task_list = g_slist_append(task_list, task); } } return task_list; } static void remove_task_list(GSList* task_list) { size_t i; for (i = 0; i < g_slist_length(task_list); i++){ g_free(g_slist_nth_data(task_list, i)); } g_slist_free(task_list); } static void runtests(GSList* task_list) { size_t i; for (i = 0; i < g_slist_length(task_list); i++){ struct task* task; task = g_slist_nth_data(task_list, i); switch(task->op){ case OPEN: open_testfile(); break; case MANOPEN: manopen_testfile(); break; case CLOSE: close_testfile(); break; case CLOSE_SAVEFD: close_savefd(); break; case WAIT: waitnotice(); break; case SEND: sendnotice(); break; case WRITE: write_testfile(task); break; case READ: read_testfile(task); break; case RUN: test( task->run_param.num, task->run_param.sec, task->run_param.func, task->run_param.offset, task->run_param.length, task->run_param.pass, FATAL); break; case FORK:{ int subpid = fork(); if (subpid < 0){ perror("can't fork off child"); exit(1); } if (subpid == 0){ test( task->run_param.num, task->run_param.sec, task->run_param.func, task->run_param.offset, task->run_param.length, task->run_param.pass, FATAL); while(1) { sleep(1); } }else{ gi.forkpid = subpid; } break; } case KILL: if(kill(gi.forkpid, SIGINT) == -1){ perror("kill"); exit(1); } break; case SLEEP: sleep(task->sleep_param.time); break; case PUSHFD: pushfd(); break; case POPFD: popfd(); break; case TRUNC: hb_trunc(); break; } } } static void usage(const char* pgm) { output("Usage: this test need to run in two machines, the filesname and num have to be the same in both nodes\n"); output("node1:\n %s [-N num] \n", pgm); output("node2:\n %s [-N num] \n", pgm); return; } int main (int argc, char** argv) { char* servername = NULL; char* filename = NULL; GSList* task_list = NULL; int option; int num_ites; int i; extern char *optarg; /*default number of phase is 1*/ num_ites = 1; while ((option = getopt(argc, argv, "N:h")) != -1){ switch(option){ case 'N': if (sscanf(optarg, "%d", &num_ites) <= 0) { usage(argv[0]); exit(1); } break; case 'h': default: usage(argv[0]); exit(1); } } gi.clientID = 0; for (i = optind ; i < argc; i++) { if (filename == NULL){ filename = argv[i]; } else { servername = argv[i]; gi.clientID = 1; } } if (filename == NULL){ usage(argv[0]); exit(1); } gi.testpass = 0; gi.testwarn = 0; gi.testfail = 0; strlcpy(gi.filename, filename, sizeof(gi.filename)); init_comm(servername); for (i = 0; i < num_ites; i++){ output("Iteration %d: \n", i); task_list = generate_task_list(); runtests(task_list); remove_task_list(task_list); } test_exit(); return 0; } Heartbeat-3-0-7e3a82377fa8/cts/CIB.py.in0000644000000000000000000002477111576626513017357 0ustar00usergroup00000000000000#!@PYTHON@ '''CTS: Cluster Testing System: CIB generator ''' __copyright__=''' Author: Jia Ming Pan Copyright (C) 2006 International Business Machines ''' from UserDict import UserDict import sys, time, types, syslog, os, struct, string, signal, traceback from CTS import ClusterManager from CM_hb import HeartbeatCM class CIB: cib_option_template = ''' ''' ipaddr_template = ''' ''' hb_ipaddr_template = ''' ''' lsb_resource = ''' ''' dummy_resource_template = ''' ''' clustermon_resource_template = ''' ''' clustermon_location_constraint = ''' ''' master_slave_resource = ''' ''' resource_group_template = '''%s %s %s''' per_node_constraint_template = ''' ''' stonith_resource_template = """ """ cib_template =''' %s %s %s ''' def NextIP(self): fields = string.split(self.CM.Env["IPBase"], '.') fields[3] = str(int(fields[3])+1) ip = string.join(fields, '.') self.CM.Env["IPBase"]=ip return ip def __init__(self, CM): self.CM = CM #make up crm config cib_options = self.cib_option_template % CM.Env["DoFencing"] #create resources and their constraints resources = "" constraints = "" if self.CM.Env["DoBSC"] == 1: cib_options = cib_options + ''' ''' if self.CM.Env["CIBResource"] != 1: # generate cib self.cts_cib = self.cib_template % (cib_options, resources, constraints) return if self.CM.cluster_monitor == 1: resources += self.clustermon_resource_template constraints += self.clustermon_location_constraint ip1=self.NextIP() ip2=self.NextIP() ip3=self.NextIP() ip1_rsc = self.ipaddr_template % (ip1, ip1, ip1, ip1, ip1) ip2_rsc = self.hb_ipaddr_template % (ip2, ip2, ip2, ip2, ip2) ip3_rsc = self.ipaddr_template % (ip3, ip3, ip3, ip3, ip3) resources += self.resource_group_template % (ip1_rsc, ip2_rsc, ip3_rsc) # lsb resource resources += self.lsb_resource # Mirgator resources += self.dummy_resource_template % \ ("migrator", "migrator", "migrator", "migrator") # per node resource fields = string.split(self.CM.Env["IPBase"], '.') for node in self.CM.Env["nodes"]: ip = self.NextIP() per_node_resources = self.ipaddr_template % \ ("rsc_"+node, "rsc_"+node, "rsc_"+node, "rsc_"+node, ip) per_node_constraint = self.per_node_constraint_template % \ ("rsc_"+node, "rsc_"+node, "rsc_"+node, "rsc_"+node, node) resources += per_node_resources constraints += per_node_constraint # fencing resource nodelist = "" len = 0 for node in self.CM.Env["nodes"]: nodelist += node + " " len = len + 1 stonith_resource = self.stonith_resource_template % \ (self.CM.Env["reset"].stonithtype, self.CM.Env["reset"].configName, self.CM.Env["reset"].configValue) resources += stonith_resource #master slave resource resources += self.master_slave_resource % (2*len, 2, len, 1) # generate cib self.cts_cib = self.cib_template % (cib_options, resources, constraints) def cib(self): return self.cts_cib Heartbeat-3-0-7e3a82377fa8/cts/CM_LinuxHAv2.py.in0000755000000000000000000006676611576626513021136 0ustar00usergroup00000000000000#!@PYTHON@ '''CTS: Cluster Testing System: LinuxHA v2 dependent modules... ''' __copyright__=''' Author: Huang Zhen Copyright (C) 2004 International Business Machines Additional Audits, Revised Start action, Default Configuration: Copyright (C) 2004 Andrew Beekhof ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import os,sys,CTS,CTSaudits,CTStests, warnings from CTS import * from CM_hb import HeartbeatCM from CTSaudits import ClusterAudit from CTStests import * from CIB import * try: from xml.dom.minidom import * except ImportError: sys.__stdout__.write("Python module xml.dom.minidom not found\n") sys.__stdout__.write("Please install python-xml or similar before continuing\n") sys.__stdout__.flush() sys.exit(1) ####################################################################### # # LinuxHA v2 dependent modules # ####################################################################### class LinuxHAv2(HeartbeatCM): ''' The linux-ha version 2 cluster manager class. It implements the things we need to talk to and manipulate linux-ha version 2 clusters ''' def __init__(self, Environment, randseed=None): HeartbeatCM.__init__(self, Environment, randseed=randseed) self.clear_cache = 0 self.cib_installed = 0 self.config = None self.cluster_monitor = 0 self.use_short_names = 1 self.update({ "Name" : "linux-ha-v2", "DeadTime" : 300, "StartTime" : 300, # Max time to start up "StableTime" : 30, "StartCmd" : "@INITDIR@/heartbeat@INIT_EXT@ start > /dev/null 2>&1", "StopCmd" : "@INITDIR@/heartbeat@INIT_EXT@ stop > /dev/null 2>&1", "ElectionCmd" : "@sbindir@/crmadmin -E %s", "StatusCmd" : "@sbindir@/crmadmin -S %s 2>/dev/null", "EpocheCmd" : "@sbindir@/ccm_tool -e", "QuorumCmd" : "@sbindir@/ccm_tool -q", "CibQuery" : "@sbindir@/cibadmin -Ql", "ParitionCmd" : "@sbindir@/ccm_tool -p", "IsRscRunning" : "@sbindir@/lrmadmin -E %s monitor 0 0 EVERYTIME 2>/dev/null|grep return", "ExecuteRscOp" : "@sbindir@/lrmadmin -n %s -E %s %s 0 %d EVERYTIME 2>/dev/null", "CIBfile" : "%s:@HA_VARLIBDIR@/heartbeat/crm/cib.xml", "TmpDir" : "/tmp", "BreakCommCmd2" : "@HA_NOARCHDATAHBDIR@/TestHeartbeatComm break-communication %s>/dev/null 2>&1", "IsIPAddrRscRunning" : "", "StandbyCmd" : "@sbindir@/crm_standby -U %s -v %s 2>/dev/null", "UUIDQueryCmd" : "@sbindir@/crmadmin -N", "StandbyQueryCmd" : "@sbindir@/crm_standby -GQ -U %s 2>/dev/null", # Patterns to look for in the log files for various occasions... "Pat:DC_IDLE" : "crmd.*State transition.*-> S_IDLE", # This wont work if we have multiple partitions # Use: "Pat:They_started" : "%s crmd:.*State transition.*-> S_NOT_DC", "Pat:They_started" : "Updating node state to member for %s", "Pat:We_started" : "%s crmd:.* State transition.*-> S_IDLE", "Pat:We_stopped" : "heartbeat.*%s.*Heartbeat shutdown complete", "Pat:Logd_stopped" : "%s logd:.*Exiting write process", "Pat:They_stopped" : "%s crmd:.*LOST:.* %s ", "Pat:All_stopped" : "heartbeat.*%s.*Heartbeat shutdown complete", "Pat:They_dead" : "node %s.*: is dead", "Pat:TransitionComplete" : "Transition status: Complete: complete", # Bad news Regexes. Should never occur. "BadRegexes" : ( r"ERROR:", r"CRIT:", r"Shutting down\.", r"Forcing shutdown\.", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r", exiting\.", r"WARN.*Ignoring HA message.*vote.*not in our membership list", r"pengine.*Attempting recovery of resource", r"tengine.*is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Attempting to schedule .* after a stop", r"Resource .* was active at shutdown", r"duplicate entries for call_id", r"Search terminated:", r"No need to invoke the TE", r":global_timer_callback", r"Faking parameter digest creation", r"Parameters to .* action changed:", r"Parameters to .* changed", ), }) del self["Standby"] if self.Env["DoBSC"]: del self["Pat:They_stopped"] del self["Pat:Logd_stopped"] self.Env["use_logd"] = 0 self.check_transitions = 0 self.check_elections = 0 self.CIBsync = {} self.default_cts_cib=CIB(self).cib() self.debug(self.default_cts_cib) def errorstoignore(self): # At some point implement a more elegant solution that # also produces a report at the end '''Return list of errors which are known and very noisey should be ignored''' if 1: return [ "crmadmin:", "ERROR: Message hist queue is filling up" ] return [] def install_config(self, node): if not self.ns.WaitForNodeToComeUp(node): self.log("Node %s is not up." % node) return None if not self.CIBsync.has_key(node) and self.Env["ClobberCIB"] == 1: self.CIBsync[node] = 1 self.rsh.remote_py(node, "os", "system", "rm -f @HA_VARLIBDIR@/heartbeat/crm/cib.xml") self.rsh.remote_py(node, "os", "system", "rm -f @HA_VARLIBDIR@/heartbeat/crm/cib.xml.sig") self.rsh.remote_py(node, "os", "system", "rm -f @HA_VARLIBDIR@/heartbeat/crm/cib.xml.last") self.rsh.remote_py(node, "os", "system", "rm -f @HA_VARLIBDIR@/heartbeat/crm/cib.xml.sig.last") # Only install the CIB on the first node, all the other ones will pick it up from there if self.cib_installed == 1: return None self.cib_installed = 1 if self.Env["CIBfilename"] == None: self.debug("Installing Generated CIB on node %s" %(node)) warnings.filterwarnings("ignore") cib_file=os.tmpnam() warnings.resetwarnings() os.system("rm -f "+cib_file) self.debug("Creating new CIB for " + node + " in: " + cib_file) os.system("echo \'" + self.default_cts_cib + "\' > " + cib_file) if 0!=self.rsh.echo_cp(None, cib_file, node, "@HA_VARLIBDIR@/heartbeat/crm/cib.xml"): raise ValueError("Can not create CIB on %s "%node) os.system("rm -f "+cib_file) else: self.debug("Installing CIB (%s) on node %s" %(self.Env["CIBfilename"], node)) if 0!=self.rsh.cp(self.Env["CIBfilename"], "root@" + (self["CIBfile"]%node)): raise ValueError("Can not scp file to %s "%node) self.rsh.remote_py(node, "os", "system", "chown @HA_CCMUSER@ @HA_VARLIBDIR@/heartbeat/crm/cib.xml") def prepare(self): '''Finish the Initialization process. Prepare to test...''' for node in self.Env["nodes"]: self.ShouldBeStatus[node] = "" self.StataCM(node) def test_node_CM(self, node): '''Report the status of the cluster manager on a given node''' watchpats = [ ] watchpats.append("Current ping state: (S_IDLE|S_NOT_DC)") watchpats.append(self["Pat:They_started"]%node) idle_watch = CTS.LogWatcher(self["LogFileName"], watchpats) idle_watch.setwatch() out=self.rsh.readaline(node, self["StatusCmd"]%node) ret= (string.find(out, 'ok') != -1) self.debug("Node %s status: %s" %(node, out)) if not ret: if self.ShouldBeStatus[node] == self["up"]: self.log( "Node status for %s is %s but we think it should be %s" %(node, self["down"], self.ShouldBeStatus[node])) self.ShouldBeStatus[node]=self["down"] return 0 if self.ShouldBeStatus[node] == self["down"]: self.log( "Node status for %s is %s but we think it should be %s: %s" %(node, self["up"], self.ShouldBeStatus[node], out)) self.ShouldBeStatus[node]=self["up"] # check the output first - because syslog-ng looses messages if string.find(out, 'S_NOT_DC') != -1: # Up and stable return 2 if string.find(out, 'S_IDLE') != -1: # Up and stable return 2 # fall back to syslog-ng and wait if not idle_watch.look(): # just up self.debug("Warn: Node %s is unstable: %s" %(node, out)) return 1 # Up and stable return 2 # Is the node up or is the node down def StataCM(self, node): '''Report the status of the cluster manager on a given node''' if self.test_node_CM(node) > 0: return 1 return None # Being up and being stable is not the same question... def node_stable(self, node): '''Report the status of the cluster manager on a given node''' if self.test_node_CM(node) == 2: return 1 self.log("Warn: Node %s not stable" %(node)) return None def cluster_stable(self, timeout=None): watchpats = [ ] watchpats.append("Current ping state: S_IDLE") watchpats.append(self["Pat:DC_IDLE"]) if timeout == None: timeout = self["DeadTime"] idle_watch = CTS.LogWatcher(self["LogFileName"], watchpats, timeout) idle_watch.setwatch() any_up = 0 for node in self.Env["nodes"]: # have each node dump its current state if self.ShouldBeStatus[node] == self["up"]: self.rsh.readaline(node, (self["StatusCmd"] %node) ) any_up = 1 if any_up == 0: self.debug("Cluster is inactive") return 1 ret = idle_watch.look() if ret: self.debug(ret) return 1 self.log("Warn: Cluster Master not IDLE after %ds" % timeout) return None def is_node_dc(self, node, status_line=None): rc = 0 if not status_line: status_line = self.rsh.readaline(node, self["StatusCmd"]%node) if not status_line: rc = 0 elif string.find(status_line, 'S_IDLE') != -1: rc = 1 elif string.find(status_line, 'S_INTEGRATION') != -1: rc = 1 elif string.find(status_line, 'S_FINALIZE_JOIN') != -1: rc = 1 elif string.find(status_line, 'S_POLICY_ENGINE') != -1: rc = 1 elif string.find(status_line, 'S_TRANSITION_ENGINE') != -1: rc = 1 if rc == 1: self.debug("%s _is_ the DC" % node) return rc def active_resources(self, node): (rc, output) = self.rsh.remote_py( node, "os", "system", """@sbindir@/crm_mon -1 | grep "Started %s" """ % node) resources = [] for line in output: fields = line.split() resources.append(fields[0]) return resources def ResourceOp(self, resource, op, node, interval=0, app="lrmadmin"): ''' Execute an operation on a resource ''' self.rsh.readaline(node, self["ExecuteRscOp"] % (app, resource, op, interval)) return self.rsh.lastrc def ResourceLocation(self, rid): ResourceNodes = [] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == self["up"]: if self.ResourceOp(rid, "monitor", node) == 0: ResourceNodes.append(node) return ResourceNodes def isolate_node(self, node, allowlist): '''isolate the communication between the nodes''' rc = self.rsh(node, self["BreakCommCmd2"]%allowlist) if rc == 0: return 1 else: self.log("Could not break the communication from node: %s",node) return None def Configuration(self): if self.config: return self.config.getElementsByTagName('configuration')[0] warnings.filterwarnings("ignore") cib_file=os.tmpnam() warnings.resetwarnings() os.system("rm -f "+cib_file) if self.Env["ClobberCIB"] == 1: if self.Env["CIBfilename"] == None: self.debug("Creating new CIB in: " + cib_file) os.system("echo \'"+ self.default_cts_cib +"\' > "+ cib_file) else: os.system("cp "+self.Env["CIBfilename"]+" "+cib_file) else: if 0 != self.rsh.echo_cp( self.Env["nodes"][0], "@HA_VARLIBDIR@/heartbeat/crm/cib.xml", None, cib_file): raise ValueError("Can not copy file to %s, maybe permission denied"%cib_file) self.config = parse(cib_file) os.remove(cib_file) return self.config.getElementsByTagName('configuration')[0] def Resources(self): ResourceList = [] #read resources in cib configuration = self.Configuration() resources = configuration.getElementsByTagName('resources')[0] rscs = configuration.getElementsByTagName('primitive') incs = configuration.getElementsByTagName('clone') groups = configuration.getElementsByTagName('group') for rsc in rscs: if rsc in resources.childNodes: ResourceList.append(HAResource(self,rsc)) for grp in groups: for rsc in rscs: if rsc in grp.childNodes: if self.use_short_names: resource = HAResource(self,rsc) else: resource = HAResource(self,rsc,grp.getAttribute('id')) ResourceList.append(resource) for inc in incs: max = 0 inc_name = inc.getAttribute("id") instance_attributes = inc.getElementsByTagName('instance_attributes')[0] attributes = instance_attributes.getElementsByTagName('attributes')[0] nvpairs = attributes.getElementsByTagName('nvpair') for nvpair in nvpairs: if nvpair.getAttribute("name") == "clone_max": max = int(nvpair.getAttribute("value")) inc_rsc = inc.getElementsByTagName('primitive')[0] for i in range(0,max): rsc = HAResource(self,inc_rsc) rsc.inc_no = i rsc.inc_name = inc_name rsc.inc_max = max if self.use_short_names: rsc.rid = rsc.rid + ":%d"%i else: rsc.rid = inc_name+":"+rsc.rid + ":%d"%i rsc.Instance = rsc.rid ResourceList.append(rsc) return ResourceList def ResourceGroups(self): GroupList = [] #read resources in cib configuration = self.Configuration() groups = configuration.getElementsByTagName('group') rscs = configuration.getElementsByTagName('primitive') for grp in groups: group = [] GroupList.append(group) for rsc in rscs: if rsc in grp.childNodes: if self.use_short_names: resource = HAResource(self,rsc) else: resource = HAResource(self,rsc,grp.getAttribute('id')) group.append(resource) return GroupList def Dependencies(self): DependencyList = [] #read dependency in cib configuration=self.Configuration() constraints=configuration.getElementsByTagName('constraints')[0] rsc_to_rscs=configuration.getElementsByTagName('rsc_to_rsc') for node in rsc_to_rscs: dependency = {} dependency["id"]=node.getAttribute('id') dependency["from"]=node.getAttribute('from') dependency["to"]=node.getAttribute('to') dependency["type"]=node.getAttribute('type') dependency["strength"]=node.getAttribute('strength') DependencyList.append(dependency) return DependencyList def find_partitions(self): ccm_partitions = [] for node in self.Env["nodes"]: self.debug("Retrieving partition details for %s" %node) if self.ShouldBeStatus[node] == self["up"]: partition = self.rsh.readaline(node, self["ParitionCmd"]) if not partition: self.log("no partition details for %s" %node) elif len(partition) > 2: partition = partition[:-1] found=0 for a_partition in ccm_partitions: if partition == a_partition: found = 1 if found == 0: self.debug("Adding partition from %s: %s" %(node, partition)) ccm_partitions.append(partition) else: self.log("bad partition details for %s" %node) return ccm_partitions def HasQuorum(self, node_list): # If we are auditing a partition, then one side will # have quorum and the other not. # So the caller needs to tell us which we are checking # If no value for node_list is specified... assume all nodes if not node_list: node_list = self.Env["nodes"] for node in node_list: if self.ShouldBeStatus[node] == self["up"]: quorum = self.rsh.readaline(node, self["QuorumCmd"]) if string.find(quorum, "1") != -1: return 1 elif string.find(quorum, "0") != -1: return 0 else: self.log("WARN: Unexpected quorum test result from "+ node +":"+ quorum) return 0 def Components(self): complist = [] common_ignore = [ "Pending action:", "ERROR: crm_log_message_adv:", "ERROR: MSG: No message to dump", "pending LRM operations at shutdown", "Lost connection to the CIB service", "Connection to the CIB terminated...", "Sending message to CIB service FAILED", "crmd: .*Action A_RECOVER .* not supported", "ERROR: stonithd_op_result_ready: not signed on", "send_ipc_message: IPC Channel to .* is not connected", "unconfirmed_actions: Waiting on .* unconfirmed actions", "cib_native_msgready: Message pending on command channel", "crmd:.*do_exit: Performing A_EXIT_1 - forcefully exiting the CRMd", "verify_stopped: Resource .* was active at shutdown. You may ignore this error if it is unmanaged.", ] stonith_ignore = [ "ERROR: stonithd_signon: ", "update_failcount: Updating failcount for child_DoFencing", "ERROR: te_connect_stonith: Sign-in failed: triggered a retry", ] stonith_ignore.extend(common_ignore) complist.append(Process("ccm", 0, [ "State transition S_IDLE", "CCM connection appears to have failed", "crmd: .*Action A_RECOVER .* not supported", "crmd: .*Input I_TERMINATE from do_recover", "Exiting to recover from CCM connection failure", "crmd:.*do_exit: Could not recover from internal error", "crmd: .*I_ERROR.*(ccm_dispatch|crmd_cib_connection_destroy)", # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # "tengine_stonith_callback: .*result=0", "State transition S_.* -> S_INTEGRATION.*input=I_NODE_JOIN", "State transition S_STARTING -> S_PENDING", ], [], common_ignore, 1, self)) complist.append(Process("cib", 0, [ "State transition S_IDLE", "Lost connection to the CIB service", "Connection to the CIB terminated...", "crmd: .*Input I_TERMINATE from do_recover", "crmd: .*I_ERROR.*crmd_cib_connection_destroy", "crmd:.*do_exit: Could not recover from internal error", ], [], common_ignore, 1, self)) complist.append(Process("lrmd", 0, [ "State transition S_IDLE", "LRM Connection failed", "crmd: .*I_ERROR.*lrm_dispatch", "State transition S_STARTING -> S_PENDING", ".*crmd .*exited with return code 2.", "crmd: .*Input I_TERMINATE from do_recover", "crmd:.*do_exit: Could not recover from internal error", ], [], common_ignore, 1, self)) complist.append(Process("crmd", 0, [ # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # "tengine_stonith_callback: .*result=0", "State transition S_IDLE", "State transition S_STARTING -> S_PENDING", ], [ "tengine: .*ERROR: subsystem_msg_dispatch: The server .* has left us: Shutting down...NOW", "pengine: .*ERROR: subsystem_msg_dispatch: The server .* has left us: Shutting down...NOW", ], common_ignore, 1, self)) complist.append(Process("pengine", 1, [ "State transition S_IDLE", ".*crmd .*exited with return code 2.", "crmd: .*Input I_TERMINATE from do_recover", "crmd:.*do_exit: Could not recover from internal error", ], [], common_ignore, 1, self)) complist.append(Process("tengine", 1, [ "State transition S_IDLE", ".*crmd .*exited with return code 2.", "crmd: .*Input I_TERMINATE from do_recover", "crmd:.*do_exit: Could not recover from internal error", ], [], common_ignore, 1, self)) if self.Env["DoFencing"] == 1 : complist.append(Process("stonithd", 0, [], [ "tengine_stonith_connection_destroy: Fencing daemon has left us", "Attempting connection to fencing daemon", "te_connect_stonith: Connected", ], stonith_ignore, 0, self)) # complist.append(Process("heartbeat", 0, [], [], [], None, self)) return complist def NodeUUID(self, node): lines = self.rsh.readlines(node, self["UUIDQueryCmd"]) for line in lines: self.debug("UUIDLine:"+ line) m = re.search(r'%s.+\((.+)\)' % node, line) if m: return m.group(1) return "" def StandbyStatus(self, node): out=self.rsh.readaline(node, self["StandbyQueryCmd"]%node) if not out: return "off" out = out[:-1] self.debug("Standby result: "+out) return out # status == "on" : Enter Standby mode # status == "off": Enter Active mode def SetStandbyMode(self, node, status): current_status = self.StandbyStatus(node) cmd = self["StandbyCmd"] % (node, status) ret = self.rsh(node, cmd) return True class HAResource(Resource): def __init__(self, cm, node, group=None): ''' Get information from xml node ''' if group == None : self.rid = str(node.getAttribute('id')) else : self.rid = group + ":" + str(node.getAttribute('id')) self.rclass = str(node.getAttribute('class')) self.rtype = str(node.getAttribute('type')) self.inc_name = None self.inc_no = -1 self.inc_max = -1 self.rparameters = {} nvpairs = [] list = node.getElementsByTagName('instance_attributes') if len(list) > 0: attributes = list[0] list = attributes.getElementsByTagName('attributes') if len(list) > 0: parameters = list[0] nvpairs = parameters.getElementsByTagName('nvpair') for nvpair in nvpairs: name=nvpair.getAttribute('name') value=nvpair.getAttribute('value') self.rparameters[name]=value # This should normally be called first... FIXME! Resource.__init__(self, cm, self.rtype, self.rid) # resources that dont need quorum will have: # ops = node.getElementsByTagName('op') for op in ops: if op.getAttribute('name') == "start" and op.getAttribute('prereq') == "nothing": self.needs_quorum = 0 def IsRunningOn(self, nodename): ''' This member function returns true if our resource is running on the given node in the cluster. We call the status operation for the resource script. ''' rc = self.CM.ResourceOp(self.rid, "monitor", nodename) return (rc == 0) def RunningNodes(self): return self.CM.ResourceLocation(self.rid) def Start(self, nodename): ''' This member function starts or activates the resource. ''' return self.CM.ResourceOp(self.rid, "start", nodename) def Stop(self, nodename): ''' This member function stops or deactivates the resource. ''' return self.CM.ResourceOp(self.rid, "stop", nodename) def IsWorkingCorrectly(self, nodename): return self.IsRunningOn(nodename) ####################################################################### # # A little test code... # # Which you are advised to completely ignore... # ####################################################################### if __name__ == '__main__': pass Heartbeat-3-0-7e3a82377fa8/cts/CM_fs.py.in0000644000000000000000000000436211576626513017743 0ustar00usergroup00000000000000#!@PYTHON@ '''CTS: Cluster Testing System: Failsafe dependent modules... Classes related to testing high-availability clusters... Lots of things are implemented. Lots of things are not implemented. We have many more ideas of what to do than we've implemented. ''' __copyright__=''' Copyright (C) 2000, 2001 Alan Robertson Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #import types, string, select, sys, time, re, os, random, struct #from os import system #from UserDict import UserDict #from syslog import * #from popen2 import Popen3 class FailSafeCM(ClusterManager): ''' The FailSafe cluster manager class. Not implemented yet. ''' def __init__(self, randseed=None): ClusterManager.__init__(self, randseed=randseed) self.update({ "Name" : "FailSafe", "StartCmd" : None, # Fix me! "StopCmd" : None, # Fix me! "StatusCmd" : None, # Fix me! "RereadCmd" : None, # Fix me! "TestConfigDir" : None, # Fix me! "LogFileName" : None, # Fix me! "Pat:We_started" : None, # Fix me! "Pat:They_started" : None, # Fix me! "Pat:We_stopped" : None, # Fix me! "Pat:They_stopped" : None, # Fix me! "BadRegexes" : None, # Fix me! }) self._finalConditions() def SyncTestConfigs(self): pass def SetClusterConfig(self): pass def ResourceGroups(self): raise ValueError("Forgot to write ResourceGroups()") Heartbeat-3-0-7e3a82377fa8/cts/CM_hb.py.in0000755000000000000000000005312711576626513017732 0ustar00usergroup00000000000000#!@PYTHON@ '''CTS: Cluster Testing System: heartbeat dependent modules... Classes related to testing high-availability clusters... Lots of things are implemented. Lots of things are not implemented. We have many more ideas of what to do than we've implemented. ''' __copyright__=''' Copyright (C) 2000,2001,2005 Alan Robertson Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. from CTS import * class HeartbeatCM(ClusterManager): ''' The heartbeat cluster manager class. It implements the things we need to talk to and manipulate heartbeat clusters ''' def __init__(self, Environment, randseed=None): self.ResourceDirs = ["@sysconfdir@/ha.d/resource.d", "@sysconfdir@/rc.d/init.d", "@sysconfdir@/rc.d/"] self.ResourceFile = Environment["HAdir"] + "/haresources" self.ConfigFile = Environment["HAdir"]+ "/ha.cf" ClusterManager.__init__(self, Environment, randseed=randseed) self.update({ "Name" : "heartbeat", "DeadTime" : 30, "StableTime" : 30, "StartCmd" : "@libdir@/heartbeat/ha_logd -d >/dev/null 2>&1; MALLOC_CHECK_=2 @libdir@/heartbeat/heartbeat >/dev/null 2>&1", "StopCmd" : "@libdir@/heartbeat/heartbeat -k", "StatusCmd" : "@libdir@/heartbeat/heartbeat -s", "RereadCmd" : "@libdir@/heartbeat/heartbeat -r", "StartDRBDCmd" : "@sysconfdir@/init.d/drbd start >/dev/null 2>&1", "StopDRBDCmd" : "@sysconfdir@/init.d/drbd stop", "StatusDRBDCmd" : "@sysconfdir@/init.d/drbd status", "DRBDCheckconf" : "@sysconfdir@/init.d/drbd checkconfig >/var/run/drbdconf 2>&1", "BreakCommCmd" : "@HA_NOARCHDATAHBDIR@/TestHeartbeatComm break-communication >/dev/null 2>&1", "FixCommCmd" : "@HA_NOARCHDATAHBDIR@/TestHeartbeatComm fix-communication >/dev/null 2>&1", "DelFileCommCmd" : "@HA_NOARCHDATAHBDIR@/TestHeartbeatComm delete-testingfile >/dev/null 2>&1", "SaveFileCmd" : "@HA_NOARCHDATAHBDIR@/TestHeartbeatComm save-testingfile /tmp/OnlyForTesting >/dev/null 2>&1", "ReduceCommCmd" : "@HA_NOARCHDATAHBDIR@/TestHeartbeatComm reduce-communication %s %s>/dev/null 2>&1", "RestoreCommCmd" : "@HA_NOARCHDATAHBDIR@/TestHeartbeatComm restore-communication /tmp/OnlyForTesting >/dev/null 2>&1", "IPaddrCmd" : "@sysconfdir@/ha.d/resource.d/IPaddr %s status", "Standby" : "@HA_NOARCHDATAHBDIR@/hb_standby >/dev/null 2>&1", "TestConfigDir" : "@sysconfdir@/ha.d/testconfigs", "LogFileName" : Environment["LogFileName"], # Patterns to look for in the log files for various occasions... "Pat:We_started" : " (%s) .* Initial resource acquisition complete", "Pat:They_started" : " (%s) .* Initial resource acquisition complete", "Pat:We_stopped" : "%s heartbeat.*Heartbeat shutdown complete", "Pat:Logd_stopped" : "%s logd:.*Exiting write process", "Pat:They_stopped" : "%s heartbeat.*node (%s).*: is dead", "Pat:They_dead" : "node (%s).*: is dead", "Pat:All_stopped" : " (%s).*heartbeat.*Heartbeat shutdown complete", "Pat:StandbyOK" : "Standby resource acquisition done", "Pat:StandbyNONE" : "No reply to standby request", "Pat:StandbyTRANSIENT" : "standby message.*ignored.*in flux", "Pat:Return_partition" : "Cluster node %s returning after partition", # Bad news Regexes. Should never occur. "BadRegexes" : ( r"Shutting down\.", r"Forcing shutdown\.", r"Both machines own .* resources!", r"No one owns .* resources!", r", exiting\.", r"ERROR:", r"CRIT.*:", ), }) self.cf=HBConfig(Environment["HAdir"]) self._finalConditions() def SetClusterConfig(self, configpath="default", nodelist=None): '''Activate the named test configuration throughout the cluster. This code is specialized to heartbeat. ''' rc=1 Command=''' cd %s%s%s; : cd to test configuration directory for j in * do if [ -f "@sysconfdir@/ha.d/$j" ]; then if cmp $j @sysconfdir@/ha.d/$j >/dev/null 2>&1; then : Config file $j is already up to correct. else echo "Touching $j" cp $j @sysconfdir@/ha.d/$j fi fi done ''' % (self["TestConfigDir"], os.sep, configpath) if nodelist == None: nodelist=self.Env["nodes"] for node in nodelist: if not self.rsh(node, Command): rc=None self.rereadall() return rc def ResourceGroups(self): ''' Return the list of resources groups defined in this configuration. This code is specialized to heartbeat. We make the assumption that the resource file on the local machine is the same as that of a cluster member. We aren't necessarily a member of the cluster (In fact, we usually aren't). ''' RscGroups=[] file = open(self.ResourceFile, "r") while (1): line = file.readline() if line == "": break idx=string.find(line, '#') if idx >= 0: line=line[:idx] if line == "": continue line = string.strip(line) # Is this wrong? tokens = re.split("[ \t]+", line) # Ignore the default server for this resource group del tokens[0] Group=[] for token in tokens: if token != "": idx=string.find(token, "::") if idx > 0: tuple=string.split(token, "::") else: # # Is this an IPaddr default resource type? # if re.match("^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$" , token): tuple=["IPaddr", token] else: tuple = [token, None] Resource = self.hbResource(tuple[0], tuple[1]) Group.append(Resource) RscGroups.append(Group) file.close() return RscGroups def InternalCommConfig(self): ''' Return a list of communication paths. Each path consists of a tuple like this: mediatype serial | ip interface/dev name eth0 | /dev/ttyS0... protocol tcp?? | udp | None port Number | None ''' Path = {"mediatype" : None, "interface": None, "protocol" : None, "port": None} cf = self.cf for cfp in cf.Parameters: if cfp == "serial": if Path["mediatype"] == None: Path["mediatype"] = ["serial"] else: Path["mediatype"].append("serial") if Path["interface"] == None: Path["interface"] = cf.Parameters["serial"] else: for serial in cf.Parameters["serial"]: Path["interface"].append(serial) if cfp == "bcast" or cfp == "mcast" or cfp == "ucast" : if Path["mediatype"] == None: Path["mediatype"] = ["ip"] else: Path["mediatype"].append("ip") if cfp == "bcast": interfaces = cf.Parameters[cfp] if cfp == "ucast": interfaces = [cf.Parameters[cfp][0]] if cfp == "mcast": Path["port"] = [cf.Parameters[cfp][0][2]] Path["protocol"] = "udp" interfaces = [cf.Parameters[cfp][0][0]] if Path["interface"] == None: Path["interface"] = interfaces else: for interface in interfaces: if interface not in Path["interface"]: Path["interface"].append(interface) if cfp == "udpport": Path["port"] = cf.Parameters["udpport"] Path["protocol"] = ["udp"] if Path["port"] == None: Path["port"] = [694] return Path def HasQuorum(self, node_list): ( '''Return TRUE if the cluster currently has quorum. According to current heartbeat code this means one node is up. ''') return self.upcount() >= 1 def hbResource(self, type, instance): ''' Our job is to create the right kind of resource. For most resources, we just create an HBResource object, but for IP addresses, we create an HBipResource instead. Some other types of resources may also be added as special cases. ''' if type == "IPaddr": return HBipResource(self, type, instance) return HBResource(self, type, instance) class HBResource(Resource): def IsRunningOn(self, nodename): ''' This member function returns true if our resource is running on the given node in the cluster. We call the status operation for the resource script. ''' return self._ResourceOperation("status", "OK|running", nodename) def _ResourceOperation(self, operation, pattern, nodename): ''' We call the requested operation for the resource script. We don't care what kind of operation we were called to do particularly. When we were created, we were bound to a cluster manager, which has its own remote execution method (which we use here). ''' if self.Instance == None: instance = "" else: instance = self.Instance Rlist = 'LIST="' for dir in self.CM.ResourceDirs: Rlist = Rlist + " " + dir Rlist = Rlist + '"; ' Script= Rlist + ''' T="''' + self.ResourceType + '''"; I="''' + instance + '''"; for dir in $LIST; do if [ -f "$dir/$T" -a -x "$dir/$T" ] then "$dir/$T" $I ''' + operation + ''' exit $? fi done 2>&1; exit 1;''' #print "Running " + Script + "\n" line = self.CM.rsh.readaline(nodename, Script) if operation == "status": if re.search(pattern, line): return 1 return self.CM.rsh.lastrc == 0 def Start(self, nodename): ''' This member function starts or activates the resource. ''' return self._ResourceOperation("start", None, nodename) def Stop(self, nodename): ''' This member function stops or deactivates the resource. ''' return self._ResourceOperation("stop", None, nodename) # def IsWorkingCorrectly(self, nodename): # "We default to returning TRUE for this one..." # if self.Instance == None: # self.CM.log("Faking out: " + self.ResourceType) # else: # self.CM.log("Faking out: " + self.ResourceType + self.Instance) # return 1 def IsWorkingCorrectly(self, nodename): return self._ResourceOperation("monitor", "OK", nodename) class HBipResource(HBResource): ''' We are a specialized IP address resource which knows how to test to see if our resource type is actually being served. We are cheat and run the IPaddr resource script on the current machine, because it's a more interesting case. ''' def IsWorkingCorrectly(self, nodename): return self._ResourceOperation("monitor", "OK", self.CM.OurNode) # # A heartbeat configuration class... # It reads and parses the heartbeat config # files # class HBConfig: # Which options have multiple words on the line? MultiTokenKeywords = {"mcast" : None , "stonith_host": None} def __init__(self, configdir="/etc/ha.d"): self.Parameters = {} self.ResourceGroups = {} self._ReadConfig(os.path.join(configdir, "ha.cf")) FirstUp_NodeSelection() LastUp_NodeSelection() no_failback = NoAutoFailbackPolicy() auto_failback = AutoFailbackPolicy() # # We allow each resource group to have its own failover/back # policies # if self.Parameters.has_key("nice_failback") \ and self.Parameters["nice_failback"] == "on": HBConfig.DefaultFailbackPolicy = no_failback elif self.Parameters.has_key("auto_failback") \ and self.Parameters["auto_failback"] == "off": HBConfig.DefaultFailbackPolicy = no_failback else: HBConfig.DefaultFailbackPolicy = auto_failback HBConfig.DefaultNodeSelectionPolicy = NodeSelectionPolicies["FirstUp"] self._ReadResourceGroups(os.path.join(configdir, "haresources")) # Read ha.cf config file def _ReadConfig(self, ConfigFile): self.ConfigPath = ConfigFile; fp = open(ConfigFile) while 1: line=fp.readline() if not line: return line = re.sub("#.*", "", line) line = string.rstrip(line) if len(line) < 1: continue tokens = line.split() key = tokens[0] values = tokens[1:] if HBConfig.MultiTokenKeywords.has_key(key): # group items from this line together, and separate # from the items on other lines values = [values] if self.Parameters.has_key(key): if key == "node": self.Parameters[key].extend(values) else: self.Parameters[key].append(values[0]) else: self.Parameters[key] = values # Read a line from the haresources file... # - allow for \ continuations... def _GetRscLine(self, fp): linesofar = None continuation=1 while continuation: continuation = 0 line=fp.readline() if not line: break line = re.sub("#.*", "", line) if line[len(line)-2] == "\\": line = line[0:len(line)-2] + "\n" continuation=1 if linesofar == None: linesofar = line else: linesofar = linesofar + line return linesofar def _ReadResourceGroups(self, RscFile): self.RscPath = RscFile; fp = open(RscFile) thisline = "" while 1: line=self._GetRscLine(fp) if not line: return line = line.strip() if len(line) < 1: continue tokens = line.split() node = tokens[0] resources = tokens[1:] rscargs=[] for resource in resources: name=resource.split("::", 1) if len(name) > 1: args=name[1].split("::") else: args=None name = name[0] rscargs.append(Resource(name, args)) name = tokens[0] + "__" + tokens[1] assert not self.ResourceGroups.has_key(name) # # Create the resource group # self.ResourceGroups[name] = ResourceGroup(name \ , rscargs , node.split(",") # Provide default value , HBConfig.DefaultNodeSelectionPolicy , HBConfig.DefaultFailbackPolicy) # # Return the list of nodes in the cluster... # def nodes(self): result = self.Parameters["node"] result.sort() return result class ClusterState: pass class ResourceGroup: def __init__(self, name, resourcelist, possiblenodes , nodeselection_policy, failback_policy): self.name = name self.resourcelist = resourcelist self.possiblenodes = possiblenodes self.nodeselection_policy = nodeselection_policy self.failback_policy = failback_policy self.state = None self.attributes = {} self.history = [] def __str__(self): result = string.join(self.possiblenodes, ",") for rsc in self.resourcelist: result = result + " " + str(rsc) return result class Resource: def __init__(self, name, arguments=None): self.name = name self.arguments = arguments def __str__(self): result = self.name try: for arg in self.arguments: result = result + "::" + arg except TypeError: pass return result ####################################################################### # # Base class defining policies for where we put resources # when we're starting, or when a failure has occurred... # ####################################################################### NodeSelectionPolicies = {} class NodeSelectionPolicy: def __init__(self, name): self.name = name NodeSelectionPolicies[name] = self def name(self): return self.name # # nodenames: the list of nodes eligible to run this resource # ResourceGroup: the group to be started... # ClusterState: Cluster state information # def SelectNode(self, nodenames, ResourceGroup, ClusterState): return None # # Choose the first node in the list... # class FirstUp_NodeSelection(NodeSelectionPolicy): def __init__(self): NodeSelectionPolicy.__init__(self, "FirstUp") def SelectNode(self, nodenames, ResourceGroup, ClusterState): return nodenames[0] # # Choose the last node in the list... # (kind of a dumb example) # class LastUp_NodeSelection(NodeSelectionPolicy): def __init__(self): NodeSelectionPolicy.__init__(self, "LastUp") def SelectNode(self, nodenames, ResourceGroup, ClusterState): return nodenames[len(nodenames)-1] ####################################################################### # # Failback policies... # # Where to locate a resource group when an eligible node rejoins # the cluster... # ####################################################################### FailbackPolicies = {} class FailbackPolicy: def __init__(self, name): self.name = name FailbackPolicies[name] = self def name(self): return self.name # # currentnode: The node the service is currently on # returningnode: The node which just rejoined # eligiblenodes: Permitted nodes which are up # SelectionPolicy: the normal NodeSelectionPolicy # Cluster state information... # def SelectNewNode(self, currentnode, returningnode, eligiblenodes , SelectionPolicy, ResourceGroup, ClusterState): return None # # This FailbackPolicy is like "normal failback" in heartbeat # class AutoFailbackPolicy(FailbackPolicy): def __init__(self): FailbackPolicy.__init__(self, "failback") def SelectNewNode(self, currentnode, returningnode, eligiblenodes , SelectionPolicy, ResourceGroup, ClusterState): # Select where it should run based on current normal policy # just as though we were starting it for the first time. return SelectionPolicy(eligiblenodes, ResourceGroup, ClusterState) # # This FailbackPolicy is like "nice failback" in heartbeat # class NoAutoFailbackPolicy(FailbackPolicy): def __init__(self): FailbackPolicy.__init__(self, "failuresonly") def SelectNewNode(self, currentnode, returningnode, eligiblenodes , SelectionPolicy, ResourceGroup): # Always leave the resource where it is... return currentnode ####################################################################### # # A little test code... # # Which you are advised to completely ignore... # ####################################################################### if __name__ == '__main__': FirstUp_NodeSelection() LastUp_NodeSelection() no_failback = NoAutoFailbackPolicy() auto_failback = AutoFailbackPolicy() cf=HBConfig("/etc/ha.d") print "Cluster configuration:\n" print "Nodes:", cf.nodes(), "\n" print "Config Parameters:", cf.Parameters, "\n" for groupname in cf.ResourceGroups.keys(): print "Resource Group %s:\n\t%s\n" % (groupname, cf.ResourceGroups[groupname]) Heartbeat-3-0-7e3a82377fa8/cts/CTS.py.in0000755000000000000000000011725311576626513017414 0ustar00usergroup00000000000000#!@PYTHON@ '''CTS: Cluster Testing System: Main module Classes related to testing high-availability clusters... Lots of things are implemented. Lots of things are not implemented. We have many more ideas of what to do than we've implemented. ''' __copyright__=''' Copyright (C) 2000, 2001 Alan Robertson Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import types, string, select, sys, time, re, os, struct, os, signal import base64, pickle, binascii from UserDict import UserDict from syslog import * from popen2 import Popen3 class RemoteExec: '''This is an abstract remote execution class. It runs a command on another machine - somehow. The somehow is up to us. This particular class uses ssh. Most of the work is done by fork/exec of ssh or scp. ''' def __init__(self): # -n: no stdin, -x: no X11 self.Command = "@SSH@ -l root -n -x" # -f: ssh to background self.CommandnoBlock = "@SSH@ -f -l root -n -x" # -B: batch mode, -q: no stats (quiet) self.CpCommand = "@SCP@ -B -q" self.OurNode=string.lower(os.uname()[1]) def setcmd(self, rshcommand): '''Set the name of the remote shell command''' self.Command = rshcommand def _fixcmd(self, cmd): return re.sub("\'", "'\\''", cmd) def _cmd(self, *args): '''Compute the string that will run the given command on the given remote system''' args= args[0] sysname = args[0] command = args[1] #print "sysname: %s, us: %s" % (sysname, self.OurNode) if sysname == None or string.lower(sysname) == self.OurNode or sysname == "localhost": ret = command else: ret = self.Command + " " + sysname + " '" + self._fixcmd(command) + "'" #print ("About to run %s\n" % ret) return ret def _cmd_noblock(self, *args): '''Compute the string that will run the given command on the given remote system''' args= args[0] sysname = args[0] command = args[1] #print "sysname: %s, us: %s" % (sysname, self.OurNode) if sysname == None or string.lower(sysname) == self.OurNode or sysname == "localhost": ret = command + " &" else: ret = self.CommandnoBlock + " " + sysname + " '" + self._fixcmd(command) + "'" #print ("About to run %s\n" % ret) return ret def __call__(self, *args): '''Run the given command on the given remote system If you call this class like a function, this is the function that gets called. It just runs it roughly as though it were a system() call on the remote machine. The first argument is name of the machine to run it on. ''' count=0; rc = 0; while count < 3: rc = os.system(self._cmd(args)) if rc == 0: return rc print "Retrying command %s" % self._cmd(args) count=count+1 return rc def popen(self, *args): '''popen the given remote command on the remote system. As in __call__, the first argument is name of the machine to run it on. ''' #print "Now running %s\n" % self._cmd(args) return Popen3(self._cmd(args), None) def readaline(self, *args): '''Run a command on the remote machine and capture 1 line of stdout from the given remote command As in __call__, the first argument is name of the machine to run it on. ''' p = self.popen(args[0], args[1]) p.tochild.close() result = p.fromchild.readline() p.fromchild.close() self.lastrc = p.wait() return result def readlines(self, *args): p = self.popen(args[0], args[1]) p.tochild.close() result = p.fromchild.readlines() p.fromchild.close() self.lastrc = p.wait() return result def cp(self, *args): '''Perform a remote copy''' cpstring=self.CpCommand for arg in args: cpstring = cpstring + " \'" + arg + "\'" count=0; rc = 0; for i in range(3): rc = os.system(cpstring) if rc == 0: return rc print "Retrying command %s" % cpstring return rc def echo_cp(self, src_host, src_file, dest_host, dest_file): '''Perform a remote copy via echo''' (rc, lines) = self.remote_py(src_host, "os", "system", "cat %s" % src_file) if rc != 0: print "Copy of %s:%s failed" % (src_host, src_file) elif dest_host == None: fd = open(dest_file, "w") fd.writelines(lines) fd.close() else: big_line="" for line in lines: big_line = big_line + line (rc, lines) = self.remote_py(dest_host, "os", "system", "echo '%s' > %s" % (big_line, dest_file)) return rc def noBlock(self, *args): '''Perform a remote execution without waiting for it to finish''' sshnoBlock = self._cmd_noblock(args) count=0; rc = 0; for i in range(3): rc = os.system(sshnoBlock) if rc == 0: return rc print "Retrying command %s" % sshnoBlock return rc def remote_py(self, node, module, func, *args): '''Execute a remote python function If the call success, lastrc == 0 and return result. If the call fail, lastrc == 1 and return the reason (string) ''' encode_args = binascii.b2a_base64(pickle.dumps(args)) encode_cmd = string.join(["@HA_NOARCHDATAHBDIR@/cts/CTSproxy.py",module,func,encode_args]) #print "%s: %s.%s %s" % (node, module, func, repr(args)) result = self.readlines(node, encode_cmd) if result != None: result.pop() if self.lastrc == 0: last_line="" if result != None: array_len = len(result) if array_len > 0: last_line=result.pop() #print "result: %s" % repr(last_line) return pickle.loads(binascii.a2b_base64(last_line)), result return -1, result class LogWatcher: '''This class watches logs for messages that fit certain regular expressions. Watching logs for events isn't the ideal way to do business, but it's better than nothing :-) On the other hand, this class is really pretty cool ;-) The way you use this class is as follows: Construct a LogWatcher object Call setwatch() when you want to start watching the log Call look() to scan the log looking for the patterns ''' def __init__(self, log, regexes, timeout=10, debug=None): '''This is the constructor for the LogWatcher class. It takes a log name to watch, and a list of regular expressions to watch for." ''' # Validate our arguments. Better sooner than later ;-) for regex in regexes: assert re.compile(regex) self.regexes = regexes self.filename = log self.debug=debug self.whichmatch = -1 self.unmatched = None if self.debug: print "Debug now on for for log", log self.Timeout = int(timeout) self.returnonlymatch = None if not os.access(log, os.R_OK): raise ValueError("File [" + log + "] not accessible (r)") def setwatch(self, frombeginning=None): '''Mark the place to start watching the log from. ''' self.file = open(self.filename, "r") self.size = os.path.getsize(self.filename) if not frombeginning: self.file.seek(0,2) def ReturnOnlyMatch(self, onlymatch=1): '''Mark the place to start watching the log from. ''' self.returnonlymatch = onlymatch def look(self, timeout=None): '''Examine the log looking for the given patterns. It starts looking from the place marked by setwatch(). This function looks in the file in the fashion of tail -f. It properly recovers from log file truncation, but not from removing and recreating the log. It would be nice if it recovered from this as well :-) We return the first line which matches any of our patterns. ''' last_line=None first_line=None if timeout == None: timeout = self.Timeout done=time.time()+timeout+1 if self.debug: print "starting search: timeout=%d" % timeout for regex in self.regexes: print "Looking for regex: ", regex while (timeout <= 0 or time.time() <= done): newsize=os.path.getsize(self.filename) if self.debug > 4: print "newsize = %d" % newsize if newsize < self.size: # Somebody truncated the log! if self.debug: print "Log truncated!" self.setwatch(frombeginning=1) continue if newsize > self.file.tell(): line=self.file.readline() if self.debug > 2: print "Looking at line:", line if line: last_line=line if not first_line: first_line=line if self.debug: print "First line: "+ line which=-1 for regex in self.regexes: which=which+1 if self.debug > 3: print "Comparing line to ", regex #matchobj = re.search(string.lower(regex), string.lower(line)) matchobj = re.search(regex, line) if matchobj: self.whichmatch=which if self.returnonlymatch: return matchobj.group(self.returnonlymatch) else: if self.debug: print "Returning line" return line newsize=os.path.getsize(self.filename) if self.file.tell() == newsize: if timeout > 0: time.sleep(0.025) else: if self.debug: print "End of file" if self.debug: print "Last line: "+last_line return None if self.debug: print "Timeout" if self.debug: print "Last line: "+last_line return None def lookforall(self, timeout=None): '''Examine the log looking for ALL of the given patterns. It starts looking from the place marked by setwatch(). We return when the timeout is reached, or when we have found ALL of the regexes that were part of the watch ''' if timeout == None: timeout = self.Timeout save_regexes = self.regexes returnresult = [] while (len(self.regexes) > 0): oneresult = self.look(timeout) if not oneresult: self.unmatched = self.regexes self.regexes = save_regexes return None returnresult.append(oneresult) del self.regexes[self.whichmatch] self.unmatched = None self.regexes = save_regexes return returnresult # In case we ever want multiple regexes to match a single line... #- del self.regexes[self.whichmatch] #+ tmp_regexes = self.regexes #+ self.regexes = [] #+ which = 0 #+ for regex in tmp_regexes: #+ matchobj = re.search(regex, oneresult) #+ if not matchobj: #+ self.regexes.append(regex) class NodeStatus: def __init__(self, Env): self.Env = Env self.rsh = RemoteExec() def IsNodeBooted(self, node): '''Return TRUE if the given node is booted (responds to pings)''' return os.system("@PING@ -nq -c1 @PING_TIMEOUT_OPT@ %s >/dev/null 2>&1" % node) == 0 def IsSshdUp(self, node): return self.rsh(node, "true") == 0; def WaitForNodeToComeUp(self, node, Timeout=300): '''Return TRUE when given node comes up, or None/FALSE if timeout''' timeout=Timeout anytimeouts=0 while timeout > 0: if self.IsNodeBooted(node) and self.IsSshdUp(node): if anytimeouts: # Fudge to wait for the system to finish coming up time.sleep(30) self.Env.log("Node %s now up" % node) return 1 time.sleep(1) if (not anytimeouts): self.Env.log("Waiting for node %s to come up" % node) anytimeouts=1 timeout = timeout - 1 self.Env.log("%s did not come up within %d tries" % (node, Timeout)) def WaitForAllNodesToComeUp(self, nodes, timeout=300): '''Return TRUE when all nodes come up, or FALSE if timeout''' for node in nodes: if not self.WaitForNodeToComeUp(node, timeout): return None return 1 class ClusterManager(UserDict): '''The Cluster Manager class. This is an subclass of the Python dictionary class. (this is because it contains lots of {name,value} pairs, not because it's behavior is that terribly similar to a dictionary in other ways.) This is an abstract class which class implements high-level operations on the cluster and/or its cluster managers. Actual cluster managers classes are subclassed from this type. One of the things we do is track the state we think every node should be in. ''' def __InitialConditions(self): #if os.geteuid() != 0: # raise ValueError("Must Be Root!") None def _finalConditions(self): for key in self.keys(): if self[key] == None: raise ValueError("Improper derivation: self[" + key + "] must be overridden by subclass.") def __init__(self, Environment, randseed=None): self.Env = Environment self.__InitialConditions() self.clear_cache = 0 self.TestLoggingLevel=0 self.data = { "up" : "up", # Status meaning up "down" : "down", # Status meaning down "StonithCmd" : "@sbindir@/stonith -t baytech -p '10.10.10.100 admin admin' %s", "DeadTime" : 30, # Max time to detect dead node... "StartTime" : 90, # Max time to start up # # These next values need to be overridden in the derived class. # "Name" : None, "StartCmd" : None, "StopCmd" : None, "StatusCmd" : None, "RereadCmd" : None, "StartDRBDCmd" : None, "StopDRBDCmd" : None, "StatusDRBDCmd" : None, "DRBDCheckconf" : None, "BreakCommCmd" : None, "FixCommCmd" : None, "TestConfigDir" : None, "LogFileName" : None, "Pat:We_started" : None, "Pat:They_started" : None, "Pat:We_stopped" : None, "Pat:They_stopped" : None, "BadRegexes" : None, # A set of "bad news" regexes # to apply to the log } self.rsh = RemoteExec() self.ShouldBeStatus={} self.OurNode=string.lower(os.uname()[1]) self.ShouldBeStatus={} self.ns = NodeStatus(self.Env) def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [] def log(self, args): self.Env.log(args) def debug(self, args): self.Env.debug(args) def prepare(self): '''Finish the Initialization process. Prepare to test...''' for node in self.Env["nodes"]: if self.StataCM(node): self.ShouldBeStatus[node]=self["up"] else: self.ShouldBeStatus[node]=self["down"] def upcount(self): '''How many nodes are up?''' count=0 for node in self.Env["nodes"]: if self.ShouldBeStatus[node]==self["up"]: count=count+1 return count def TruncLogs(self): '''Truncate the log for the cluster manager so we can start clean''' if self["LogFileName"] != None: os.system("cp /dev/null " + self["LogFileName"]) def install_config(self, node): return None def clear_all_caches(self): if self.clear_cache: for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == self["down"]: self.debug("Removing cache file on: "+node) self.rsh.remote_py(node, "os", "system", "rm -f @HA_VARLIBHBDIR@/hostcache") else: self.debug("NOT Removing cache file on: "+node) def StartaCM(self, node): '''Start up the cluster manager on a given node''' self.debug("Starting %s on node %s" %(self["Name"], node)) ret = 1 if not self.ShouldBeStatus.has_key(node): self.ShouldBeStatus[node] = self["down"] if self.ShouldBeStatus[node] != self["down"]: return 1 patterns = [] # Technically we should always be able to notice ourselves starting if self.upcount() == 0: patterns.append(self["Pat:We_started"] % node) else: patterns.append(self["Pat:They_started"] % node) watch = LogWatcher( self["LogFileName"], patterns, timeout=self["StartTime"]+10) watch.setwatch() self.install_config(node) self.ShouldBeStatus[node] = "any" if self.StataCM(node) and self.cluster_stable(self["DeadTime"]): self.log ("%s was already started" %(node)) return 1 # Clear out the host cache so autojoin can be exercised if self.clear_cache: self.debug("Removing cache file on: "+node) self.rsh.remote_py(node, "os", "system", "rm -f @HA_VARLIBHBDIR@/hostcache") if self.rsh(node, self["StartCmd"]) != 0: self.log ("Warn: Start command failed on node %s" %(node)) return None self.ShouldBeStatus[node]=self["up"] watch_result = watch.lookforall() if watch.unmatched: for regex in watch.unmatched: self.log ("Warn: Startup pattern not found: %s" %(regex)) if watch_result: #self.debug("Found match: "+ repr(watch_result)) self.cluster_stable(self["DeadTime"]) return 1 if self.StataCM(node) and self.cluster_stable(self["DeadTime"]): return 1 self.log ("Warn: Start failed for node %s" %(node)) return None def StartaCMnoBlock(self, node): '''Start up the cluster manager on a given node with none-block mode''' self.debug("Starting %s on node %s" %(self["Name"], node)) # Clear out the host cache so autojoin can be exercised if self.clear_cache: self.debug("Removing cache file on: "+node) self.rsh.remote_py(node, "os", "system", "rm -f @HA_VARLIBHBDIR@/hostcache") self.rsh.noBlock(node, self["StartCmd"]) self.ShouldBeStatus[node]=self["up"] return 1 def StopaCM(self, node): '''Stop the cluster manager on a given node''' self.debug("Stopping %s on node %s" %(self["Name"], node)) if self.ShouldBeStatus[node] != self["up"]: return 1 if self.rsh(node, self["StopCmd"]) == 0: self.ShouldBeStatus[node]=self["down"] self.cluster_stable(self["DeadTime"]) return 1 else: self.log ("Could not stop %s on node %s" %(self["Name"], node)) return None def StopaCMnoBlock(self, node): '''Stop the cluster manager on a given node with none-block mode''' self.debug("Stopping %s on node %s" %(self["Name"], node)) self.rsh.noBlock(node, self["StopCmd"]) self.ShouldBeStatus[node]=self["down"] return 1 def cluster_stable(self, timeout = None): time.sleep(self["StableTime"]) return 1 def node_stable(self, node): return 1 def RereadCM(self, node): '''Force the cluster manager on a given node to reread its config This may be a no-op on certain cluster managers. ''' rc=self.rsh(node, self["RereadCmd"]) if rc == 0: return 1 else: self.log ("Could not force %s on node %s to reread its config" % (self["Name"], node)) return None def StataCM(self, node): '''Report the status of the cluster manager on a given node''' out=self.rsh.readaline(node, self["StatusCmd"]) ret= (string.find(out, 'stopped') == -1) try: if ret: if self.ShouldBeStatus[node] == self["down"]: self.log( "Node status for %s is %s but we think it should be %s" % (node, self["up"], self.ShouldBeStatus[node])) else: if self.ShouldBeStatus[node] == self["up"]: self.log( "Node status for %s is %s but we think it should be %s" % (node, self["down"], self.ShouldBeStatus[node])) except KeyError: pass if ret: self.ShouldBeStatus[node]=self["up"] else: self.ShouldBeStatus[node]=self["down"] return ret def startall(self, nodelist=None): '''Start the cluster manager on every node in the cluster. We can do it on a subset of the cluster if nodelist is not None. ''' ret = 1 map = {} if not nodelist: nodelist=self.Env["nodes"] for node in nodelist: if self.ShouldBeStatus[node] == self["down"]: if not self.StartaCM(node): ret = 0 return ret def stopall(self, nodelist=None): '''Stop the cluster managers on every node in the cluster. We can do it on a subset of the cluster if nodelist is not None. ''' ret = 1 map = {} if not nodelist: nodelist=self.Env["nodes"] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == self["up"]: if not self.StopaCM(node): ret = 0 return ret def rereadall(self, nodelist=None): '''Force the cluster managers on every node in the cluster to reread their config files. We can do it on a subset of the cluster if nodelist is not None. ''' map = {} if not nodelist: nodelist=self.Env["nodes"] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == self["up"]: self.RereadCM(node) def statall(self, nodelist=None): '''Return the status of the cluster managers in the cluster. We can do it on a subset of the cluster if nodelist is not None. ''' result={} if not nodelist: nodelist=self.Env["nodes"] for node in nodelist: if self.StataCM(node): result[node] = self["up"] else: result[node] = self["down"] return result def isolate_node(self, node): '''isolate the communication between the nodes''' rc = self.rsh(node, self["BreakCommCmd"]) if rc == 0: return 1 else: self.log("Could not break the communication between the nodes frome node: %s" % node) return None def unisolate_node(self, node): '''fix the communication between the nodes''' rc = self.rsh(node, self["FixCommCmd"]) if rc == 0: return 1 else: self.log("Could not fix the communication between the nodes from node: %s" % node) return None def reducecomm_node(self,node): '''reduce the communication between the nodes''' rc = self.rsh(node, self["ReduceCommCmd"]%(self.Env["XmitLoss"],self.Env["RecvLoss"])) if rc == 0: return 1 else: self.log("Could not reduce the communication between the nodes from node: %s" % node) return None def savecomm_node(self,node): '''save current the communication between the nodes''' rc = 0 if float(self.Env["XmitLoss"])!=0 or float(self.Env["RecvLoss"])!=0 : rc = self.rsh(node, self["SaveFileCmd"]); if rc == 0: return 1 else: self.log("Could not save the communication between the nodes from node: %s" % node) return None def restorecomm_node(self,node): '''restore the saved communication between the nodes''' rc = 0 if float(self.Env["XmitLoss"])!=0 or float(self.Env["RecvLoss"])!=0 : rc = self.rsh(node, self["RestoreCommCmd"]); if rc == 0: return 1 else: self.log("Could not restore the communication between the nodes from node: %s" % node) return None def SyncTestConfigs(self): '''Synchronize test configurations throughout the cluster. This one's a no-op for FailSafe, since it does that by itself. ''' fromdir=self["TestConfigDir"] if not os.access(fromdir, os.F_OK | os.R_OK | os.W_OK): raise ValueError("Directory [" + fromdir + "] not accessible (rwx)") for node in self.Env["nodes"]: if node == self.OurNode: continue self.log("Syncing test configurations on " + node) # Perhaps I ought to use rsync... self.rsh.cp("-r", fromdir, node + ":" + fromdir) def SetClusterConfig(self, configpath="default", nodelist=None): '''Activate the named test configuration throughout the cluster. It would be useful to implement this :-) ''' pass return 1 def ResourceGroups(self): "Return a list of resource type/instance pairs for the cluster" raise ValueError("Abstract Class member (ResourceGroups)") def InternalCommConfig(self): "Return a list of paths: each patch consists of a tuple" raise ValueError("Abstract Class member (InternalCommConfig)") def HasQuorum(self, node_list): "Return TRUE if the cluster currently has quorum" # If we are auditing a partition, then one side will # have quorum and the other not. # So the caller needs to tell us which we are checking # If no value for node_list is specified... assume all nodes raise ValueError("Abstract Class member (HasQuorum)") def Components(self): raise ValueError("Abstract Class member (Components)") def RestartClusterLogging(self): self.log("WARN: Restarting logging on cluster nodes") for node in self.Env["nodes"]: cmd=self.Env["logrestartcmd"] if self.rsh.noBlock(node, cmd) != 0: self.log ("ERROR: Cannot restart logging on %s [%s failed]" % (node, cmd)) def TestLogging(self): self.TestLoggingLevel=self.TestLoggingLevel+1 ret=1 if self.TestLoggingLevel > 3: self.log("ERROR: Unable to fix remote logging. Stopping tests.") self.TestLoggingLevel=self.TestLoggingLevel-1 return None patterns= [] prefix="Test message from " for node in self.Env["nodes"]: patterns.append(prefix + node) watch = LogWatcher(self["LogFileName"], patterns, 30 + len(self.Env["nodes"])) watch.setwatch() logpri = self.Env["logfacility"] + ".info" for node in self.Env["nodes"]: cmd="logger -p %s %s%s" % (logpri, prefix, node) if self.rsh.noBlock(node, cmd) != 0: self.log ("ERROR: Cannot execute remote command [%s] on %s" % (cmd, node)) watch_result = watch.lookforall() if watch.unmatched: self.log("ERROR: Remote logging is not working.") for regex in watch.unmatched: self.log ("ERROR: Test message [%s] not found in logs." % (regex)) self.RestartClusterLogging() time.sleep(30*self.TestLoggingLevel) ret=self.TestLogging() if ret: self.log("NOTE: Cluster logging now working.") self.TestLoggingLevel=self.TestLoggingLevel-1 return ret def CheckDf(self): dfcmd="df -k /var/log | tail -1 | tr -s ' ' | cut -d' ' -f2" dfmin=500000 result=1 for node in self.Env["nodes"]: dfout=self.rsh.readaline(node, dfcmd) if not dfout: self.log ("ERROR: Cannot execute remote df command [%s] on %s" % (dfcmd, node)) else: try: idfout = int(dfout) except (ValueError, TypeError): self.log("Warning: df output from %s was invalid [%s]" % (node, dfout)) else: if idfout == 0: self.log("CRIT: Completely out of log disk space on %s" % node) result=None elif idfout <= 1000: self.log("WARN: Low on log disk space (%d Mbytes) on %s" % (idfout, node)) return result class Resource: ''' This is an HA resource (not a resource group). A resource group is just an ordered list of Resource objects. ''' def __init__(self, cm, rsctype=None, instance=None): self.CM = cm self.ResourceType = rsctype self.Instance = instance self.needs_quorum = 1 def Type(self): return self.ResourceType def Instance(self, nodename): return self.Instance def IsRunningOn(self, nodename): ''' This member function returns true if our resource is running on the given node in the cluster. It is analagous to the "status" operation on SystemV init scripts and heartbeat scripts. FailSafe calls it the "exclusive" operation. ''' raise ValueError("Abstract Class member (IsRunningOn)") return None def IsWorkingCorrectly(self, nodename): ''' This member function returns true if our resource is operating correctly on the given node in the cluster. Heartbeat does not require this operation, but it might be called the Monitor operation, which is what FailSafe calls it. For remotely monitorable resources (like IP addresses), they *should* be monitored remotely for testing. ''' raise ValueError("Abstract Class member (IsWorkingCorrectly)") return None def Start(self, nodename): ''' This member function starts or activates the resource. ''' raise ValueError("Abstract Class member (Start)") return None def Stop(self, nodename): ''' This member function stops or deactivates the resource. ''' raise ValueError("Abstract Class member (Stop)") return None def __repr__(self): if (self.Instance and len(self.Instance) > 1): return "{" + self.ResourceType + "::" + self.Instance + "}" else: return "{" + self.ResourceType + "}" class Component: def kill(self, node): None class Process(Component): def __init__(self, name, dc_only, pats, dc_pats, badnews_ignore, triggersreboot, cm): self.name = str(name) self.dc_only = dc_only self.pats = pats self.dc_pats = dc_pats self.CM = cm self.badnews_ignore = badnews_ignore self.triggersreboot = triggersreboot self.KillCmd = "killall -9 " + self.name def kill(self, node): if self.CM.rsh(node, self.KillCmd) != 0: self.CM.log ("ERROR: Kill %s failed on node %s" %(self.name,node)) return None return 1 class ScenarioComponent: def __init__(self, Env): self.Env = Env def IsApplicable(self): '''Return TRUE if the current ScenarioComponent is applicable in the given LabEnvironment given to the constructor. ''' raise ValueError("Abstract Class member (IsApplicable)") def SetUp(self, CM): '''Set up the given ScenarioComponent''' raise ValueError("Abstract Class member (Setup)") def TearDown(self, CM): '''Tear down (undo) the given ScenarioComponent''' raise ValueError("Abstract Class member (Setup)") class Scenario: ( '''The basic idea of a scenario is that of an ordered list of ScenarioComponent objects. Each ScenarioComponent is SetUp() in turn, and then after the tests have been run, they are torn down using TearDown() (in reverse order). A Scenario is applicable to a particular cluster manager iff each ScenarioComponent is applicable. A partially set up scenario is torn down if it fails during setup. ''') def __init__(self, Components): "Initialize the Scenario from the list of ScenarioComponents" for comp in Components: if not issubclass(comp.__class__, ScenarioComponent): raise ValueError("Init value must be subclass of" " ScenarioComponent") self.Components = Components def IsApplicable(self): ( '''A Scenario IsApplicable() iff each of its ScenarioComponents IsApplicable() ''' ) for comp in self.Components: if not comp.IsApplicable(): return None return 1 def SetUp(self, CM): '''Set up the Scenario. Return TRUE on success.''' j=0 while j < len(self.Components): if not self.Components[j].SetUp(CM): # OOPS! We failed. Tear partial setups down. CM.log("Tearing down partial setup") self.TearDown(CM, j) return None j=j+1 return 1 def TearDown(self, CM, max=None): '''Tear Down the Scenario - in reverse order.''' if max == None: max = len(self.Components)-1 j=max while j >= 0: self.Components[j].TearDown(CM) j=j-1 class InitClusterManager(ScenarioComponent): ( '''InitClusterManager is the most basic of ScenarioComponents. This ScenarioComponent simply starts the cluster manager on all the nodes. It is fairly robust as it waits for all nodes to come up before starting as they might have been rebooted or crashed for some reason beforehand. ''') def __init__(self, Env): pass def IsApplicable(self): '''InitClusterManager is so generic it is always Applicable''' return 1 def SetUp(self, CM): '''Basic Cluster Manager startup. Start everything''' CM.prepare() # Clear out the cobwebs ;-) self.TearDown(CM) for node in CM.Env["nodes"]: CM.rsh(node, CM["DelFileCommCmd"]+ "; true") # Now start the Cluster Manager on all the nodes. CM.log("Starting Cluster Manager on all nodes.") return CM.startall() def TearDown(self, CM): '''Set up the given ScenarioComponent''' # Stop the cluster manager everywhere CM.log("Stopping Cluster Manager on all nodes") return CM.stopall() class PingFest(ScenarioComponent): ( '''PingFest does a flood ping to each node in the cluster from the test machine. If the LabEnvironment Parameter PingSize is set, it will be used as the size of ping packet requested (via the -s option). If it is not set, it defaults to 1024 bytes. According to the manual page for ping: Outputs packets as fast as they come back or one hundred times per second, whichever is more. For every ECHO_REQUEST sent a period ``.'' is printed, while for every ECHO_REPLY received a backspace is printed. This provides a rapid display of how many packets are being dropped. Only the super-user may use this option. This can be very hard on a net- work and should be used with caution. ''' ) def __init__(self, Env): self.Env = Env def IsApplicable(self): '''PingFests are always applicable ;-) ''' return 1 def SetUp(self, CM): '''Start the PingFest!''' self.PingSize=1024 if CM.Env.has_key("PingSize"): self.PingSize=CM.Env["PingSize"] CM.log("Starting %d byte flood pings" % self.PingSize) self.PingPids=[] for node in CM.Env["nodes"]: self.PingPids.append(self._pingchild(node)) CM.log("Ping PIDs: " + repr(self.PingPids)) return 1 def TearDown(self, CM): '''Stop it right now! My ears are pinging!!''' for pid in self.PingPids: if pid != None: CM.log("Stopping ping process %d" % pid) os.kill(pid, signal.SIGKILL) def _pingchild(self, node): Args = ["ping", "-qfn", "-s", str(self.PingSize), node] sys.stdin.flush() sys.stdout.flush() sys.stderr.flush() pid = os.fork() if pid < 0: self.Env.log("Cannot fork ping child") return None if pid > 0: return pid # Otherwise, we're the child process. os.execvp("ping", Args) self.Env.log("Cannot execvp ping: " + repr(Args)) sys.exit(1) class PacketLoss(ScenarioComponent): ( ''' It would be useful to do some testing of CTS with a modest amount of packet loss enabled - so we could see that everything runs like it should with a certain amount of packet loss present. ''') def IsApplicable(self): '''always Applicable''' return 1 def SetUp(self, CM): '''Reduce the reliability of communications''' if float(CM.Env["XmitLoss"]) == 0 and float(CM.Env["RecvLoss"]) == 0 : return 1 for node in CM.Env["nodes"]: CM.reducecomm_node(node) CM.log("Reduce the reliability of communications") return 1 def TearDown(self, CM): '''Fix the reliability of communications''' if float(CM.Env["XmitLoss"]) == 0 and float(CM.Env["RecvLoss"]) == 0 : return 1 for node in CM.Env["nodes"]: CM.unisolate_node(node) CM.log("Fix the reliability of communications") class BasicSanityCheck(ScenarioComponent): ( ''' ''') def IsApplicable(self): return self.Env["DoBSC"] def SetUp(self, CM): CM.prepare() # Clear out the cobwebs self.TearDown(CM) # Now start the Cluster Manager on all the nodes. CM.log("Starting Cluster Manager on BSC node(s).") return CM.startall() def TearDown(self, CM): CM.log("Stopping Cluster Manager on BSC node(s).") return CM.stopall() Heartbeat-3-0-7e3a82377fa8/cts/CTSaudits.py.in0000755000000000000000000006131311576626513020621 0ustar00usergroup00000000000000#!@PYTHON@ '''CTS: Cluster Testing System: Audit module ''' __copyright__=''' Copyright (C) 2000, 2001,2005 Alan Robertson Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import time, os, popen2, string, re import CTS import os import popen2 class ClusterAudit: def __init__(self, cm): self.CM = cm def __call__(self): raise ValueError("Abstract Class member (__call__)") def is_applicable(self): '''Return TRUE if we are applicable in the current test configuration''' raise ValueError("Abstract Class member (is_applicable)") return 1 def name(self): raise ValueError("Abstract Class member (name)") AllAuditClasses = [ ] class ResourceAudit(ClusterAudit): def name(self): return "ResourceAudit" def _doauditRsc(self, resource): ResourceNodes = [] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == self.CM["up"]: if resource.IsRunningOn(node): ResourceNodes.append(node) return ResourceNodes def _doaudit(self): '''Check to see if all resources are running in exactly one place in the cluster. We also verify that the members of a resource group are all running on the same node in the cluster, and we monitor that they are all running "properly". ''' Fatal = 0 result = [] # Thought: use self.CM.find_partitions() and make this audit # aware of partitions. Since in a split cluster one # partition may have quorum (and permission to run resources) # and the other not. Groups = self.CM.ResourceGroups() for group in Groups: GrpServedBy = None lastResource = None for resource in group: # # _doauditRsc returns the set of nodes serving # the given resource. This is normally a single node. # ResourceNodes = self._doauditRsc(resource) # Is the resource served without quorum present? if not self.CM.HasQuorum(None) and len(ResourceNodes) != 0 and resource.needs_quorum: result.append("Resource " + repr(resource) + " active without Quorum: " + repr(ResourceNodes)) # Is the resource served at all? elif len(ResourceNodes) == 0 and self.CM.HasQuorum(None): result.append("Resource " + repr(resource) + " not served anywhere.") # Is the resource served too many times? elif len(ResourceNodes) > 1: result.append("Resource " + repr(resource) + " served too many times: " + repr(ResourceNodes)) self.CM.log("Resource " + repr(resource) + " served too many times: " + repr(ResourceNodes)) Fatal = 1 elif GrpServedBy == None: GrpServedBy = ResourceNodes # Are all the members of the Rsc Grp served by the same node? elif GrpServedBy != ResourceNodes: result.append("Resource group resources" + repr(resource) + " running on different nodes: " + repr(ResourceNodes)+" vs "+repr(GrpServedBy) + "(otherRsc = " + repr(lastResource) + ")") self.CM.log("Resource group resources" + repr(resource) + " running on different nodes: " + repr(ResourceNodes)+" vs "+repr(GrpServedBy) + "(otherRsc = " + repr(lastResource) + ")") Fatal = 1 if self.CM.Env.has_key("SuppressMonitoring") and \ self.CM.Env["SuppressMonitoring"]: continue # Is the resource working correctly ? if not Fatal and len(ResourceNodes) == 1: beforearpchild = popen2.Popen3("date;/sbin/arp -n|cut -c1-15,26-50,75-" , None) beforearpchild.tochild.close() # /dev/null if not resource.IsWorkingCorrectly(ResourceNodes[0]): afterarpchild = popen2.Popen3("/sbin/arp -n|cut -c1-15,26-50,75-" , None) afterarpchild.tochild.close() # /dev/null result.append("Resource " + repr(resource) + " not operating properly." + " Resource is running on " + ResourceNodes[0]); Fatal = 1 self.CM.log("ARP table before failure ========"); for line in beforearpchild.fromchild.readlines(): self.CM.log(line) self.CM.log("ARP table after failure ========"); for line in afterarpchild.fromchild.readlines(): self.CM.log(line) self.CM.log("End of ARP tables ========"); try: beforearpchild.wait() afterarpchild.wait() except OSError: pass afterarpchild.fromchild.close() beforearpchild.fromchild.close() lastResource = resource if (Fatal): result.insert(0, "FATAL") # Kludgy. return result def __call__(self): # # Audit the resources. Since heartbeat doesn't really # know when resource acquisition is complete, we will # poll until things get stable. # # Having a resource duplicately implemented is a Fatal Error # with no tolerance granted. # audresult = self._doaudit() # # Probably the constant below should be a CM parameter. # Then it could be 0 for FailSafe. # Of course, it really depends on what resources # you have in the test suite, and how long it takes # for them to settle. # Recently, we've changed heartbeat so we know better when # resource acquisition is done. # audcount=5; while(audcount > 0): audresult = self._doaudit() if (len(audresult) <= 0 or audresult[0] == "FATAL"): audcount=0 else: audcount = audcount - 1 if (audcount > 0): time.sleep(1) if (len(audresult) > 0): self.CM.log("Fatal Audit error: " + repr(audresult)) return (len(audresult) == 0) def is_applicable(self): if self.CM["Name"] == "heartbeat": return 1 return 0 class HAResourceAudit(ClusterAudit): def __init__(self, cm): self.CM = cm def _RscRunningNodes(self, resource): ResourceNodes = [] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == self.CM["up"]: if resource.IsRunningOn(node): ResourceNodes.append(node) return ResourceNodes def __call__(self): passed = 1 NodeofRsc = {} NumofInc = {} MaxofInc = {} self.CM.debug("Do Audit HAResourceAudit") #Calculate the count of active nodes up_count = 0; for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == self.CM["up"]: up_count += 1 #Make sure the resouces are running on one and only one node Resources = self.CM.Resources() for resource in Resources : RunningNodes = self._RscRunningNodes(resource) NodeofRsc[resource.rid]=RunningNodes if resource.inc_name == None: #Is the resource served without quorum present? if not self.CM.HasQuorum(None) and len(RunningNodes) != 0 and resource.needs_quorum: self.CM.log("Resource " + repr(resource) + " active without Quorum: " + repr(RunningNodes)) passed = 0 #Is the resource served at all? elif len(RunningNodes) == 0 and self.CM.HasQuorum(None): self.CM.log("Resource " + repr(resource) + " not served anywhere.") passed = 0 # Is the resource served too many times? elif len(RunningNodes) > 1: self.CM.log("Resource " + repr(resource) + " served too many times: " + repr(RunningNodes)) passed = 0 else: if not NumofInc.has_key(resource.inc_name): NumofInc[resource.inc_name]=0 MaxofInc[resource.inc_name]=resource.inc_max running = 1 #Is the resource served without quorum present? if not self.CM.HasQuorum(None) and len(RunningNodes) != 0 and resource.needs_quorum == 1: self.CM.log("Resource " + repr(resource) + " active without Quorum: " + repr(RunningNodes)) passed = 0 #Is the resource served at all? elif len(RunningNodes) == 0 : running = 0 # Is the resource served too many times? elif len(RunningNodes) > 1: self.CM.log("Resource " + repr(resource) + " served too many times: " + repr(RunningNodes)) passed = 0 if running: NumofInc[resource.inc_name] += 1 if self.CM.HasQuorum(None): for inc_name in NumofInc.keys(): if NumofInc[inc_name] != min(up_count, MaxofInc[inc_name]): passed = 0 self.CM.log("Cloned resource "+ str(inc_name) +" has "+ str(NumofInc[inc_name]) +" active instances (max: " + str(MaxofInc[inc_name]) +", active nodes: "+ str(up_count) + ")") Groups = self.CM.ResourceGroups() for group in Groups : group_printed = 0 first_rsc = group[0].rid RunningNodes = NodeofRsc[first_rsc] for rsc in group : if RunningNodes != NodeofRsc[rsc.rid]: passed = 0 if group_printed == 0: group_printed = 1 self.CM.log("Group audit failed for: %s" % repr(group)) if not NodeofRsc[first_rsc] or len(NodeofRsc[first_rsc]) == 0: self.CM.log("* %s not running" % first_rsc) else: self.CM.log("* %s running on %s" %(first_rsc, repr(NodeofRsc[first_rsc]))) if not NodeofRsc[rsc.rid] or len(NodeofRsc[rsc.rid]) == 0: self.CM.log("* %s not running" % rsc.rid) else: self.CM.log("* %s running on %s" %(rsc.rid, repr(NodeofRsc[rsc.rid]))) # Make sure the resouces with "must","placement" constraint # are running on the same node Dependancies = self.CM.Dependencies() for dependency in Dependancies: if dependency["type"] == "placement" and dependency["strength"] == "must": if NodeofRsc[dependency["from"]] != NodeofRsc[dependency["to"]]: print dependency["from"] + " and " + dependency["to"] + " should be run on same node" passed = 0 return passed def is_applicable(self): if self.CM["Name"] == "linux-ha-v2" and self.CM.Env["ResCanStop"] == 0: return 1 return 0 def name(self): return "HAResourceAudit" class CrmdStateAudit(ClusterAudit): def __init__(self, cm): self.CM = cm self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} def has_key(self, key): return self.Stats.has_key(key) def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 def __call__(self): passed = 1 up_are_down = 0 down_are_up = 0 unstable_list = [] self.CM.debug("Do Audit %s"%self.name()) for node in self.CM.Env["nodes"]: should_be = self.CM.ShouldBeStatus[node] rc = self.CM.test_node_CM(node) if rc > 0: if should_be == self.CM["down"]: down_are_up = down_are_up + 1 if rc == 1: unstable_list.append(node) elif should_be == self.CM["up"]: up_are_down = up_are_down + 1 if len(unstable_list) > 0: passed = 0 self.CM.log("Cluster is not stable: %d (of %d): %s" %(len(unstable_list), self.CM.upcount(), repr(unstable_list))) if up_are_down > 0: passed = 0 self.CM.log("%d (of %d) nodes expected to be up were down." %(up_are_down, len(self.CM.Env["nodes"]))) if down_are_up > 0: passed = 0 self.CM.log("%d (of %d) nodes expected to be down were up." %(down_are_up, len(self.CM.Env["nodes"]))) return passed def name(self): return "CrmdStateAudit" def is_applicable(self): if self.CM["Name"] == "linux-ha-v2": return 1 return 0 class CIBAudit(ClusterAudit): def __init__(self, cm): self.CM = cm self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} def has_key(self, key): return self.Stats.has_key(key) def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 def __call__(self): self.CM.debug("Do Audit %s"%self.name()) passed = 1 ccm_partitions = self.CM.find_partitions() if len(ccm_partitions) == 0: self.CM.debug("\tNo partitions to audit") return 1 for partition in ccm_partitions: self.CM.debug("\tAuditing CIB consistency for: %s" %partition) partition_passed = 0 if self.audit_cib_contents(partition) == 0: passed = 0 return passed def audit_cib_contents(self, hostlist): passed = 1 first_host = None first_host_xml = "" partition_hosts = hostlist.split() for a_host in partition_hosts: if first_host == None: first_host = a_host first_host_xml = self.store_remote_cib(a_host) #self.CM.debug("Retrieved CIB: %s" % first_host_xml) else: a_host_xml = self.store_remote_cib(a_host) diff_cmd="@sbindir@/crm_diff -c -VV -f -N \'%s\' -O '%s'" % (a_host_xml, first_host_xml) infile, outfile, errfile = os.popen3(diff_cmd) diff_lines = outfile.readlines() for line in diff_lines: if not re.search("", line): passed = 0 self.CM.log("CibDiff[%s-%s]: %s" % (first_host, a_host, line)) else: self.CM.debug("CibDiff[%s-%s] Ignoring: %s" % (first_host, a_host, line)) diff_lines = errfile.readlines() for line in diff_lines: passed = 0 self.CM.log("CibDiff[%s-%s] ERROR: %s" % (first_host, a_host, line)) return passed def store_remote_cib(self, node): combined = "" first_line = 1 extra_debug = 0 #self.CM.debug("\tRetrieving CIB from: %s" % node) lines = self.CM.rsh.readlines(node, self.CM["CibQuery"]) if extra_debug: self.CM.debug("Start Cib[%s]" % node) for line in lines: combined = combined + line[:-1] if first_line: self.CM.debug("[Cib]" + line) first_line = 0 elif extra_debug: self.CM.debug("[Cib]" + line) if extra_debug: self.CM.debug("End Cib[%s]" % node) #self.CM.debug("Complete CIB: %s" % combined) return combined def name(self): return "CibAudit" def is_applicable(self): if self.CM["Name"] == "linux-ha-v2": return 1 return 0 class PartitionAudit(ClusterAudit): def __init__(self, cm): self.CM = cm self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} self.NodeEpoche={} self.NodeState={} self.NodeQuorum={} def has_key(self, key): return self.Stats.has_key(key) def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 def __call__(self): self.CM.debug("Do Audit %s"%self.name()) passed = 1 ccm_partitions = self.CM.find_partitions() if ccm_partitions == None or len(ccm_partitions) == 0: return 1 if len(ccm_partitions) > 1: self.CM.log("ERROR: %d cluster partitions detected:" %len(ccm_partitions)) passed = 0 for partition in ccm_partitions: self.CM.log("\t %s" %partition) for partition in ccm_partitions: partition_passed = 0 if self.audit_partition(partition) == 0: passed = 0 return passed def trim_string(self, avalue): if not avalue: return None if len(avalue) > 1: return avalue[:-1] def trim2int(self, avalue): if not avalue: return None if len(avalue) > 1: return int(avalue[:-1]) def audit_partition(self, partition): passed = 1 dc_found = [] dc_allowed_list = [] lowest_epoche = None node_list = partition.split() self.CM.debug("Auditing partition: %s" %(partition)) for node in node_list: if self.CM.ShouldBeStatus[node] != self.CM["up"]: self.CM.log("Warn: Node %s appeared out of nowhere" %(node)) self.CM.ShouldBeStatus[node] = self.CM["up"] # not in itself a reason to fail the audit (not what we're # checking for in this audit) self.NodeState[node] = self.CM.rsh.readaline( node, self.CM["StatusCmd"]%node) self.NodeEpoche[node] = self.CM.rsh.readaline( node, self.CM["EpocheCmd"]) self.NodeQuorum[node] = self.CM.rsh.readaline( node, self.CM["QuorumCmd"]) self.NodeState[node] = self.trim_string(self.NodeState[node]) self.NodeEpoche[node] = self.trim2int(self.NodeEpoche[node]) self.NodeQuorum[node] = self.trim_string(self.NodeQuorum[node]) if not self.NodeEpoche[node]: self.CM.log("Warn: Node %s dissappeared: cant determin epoche" %(node)) self.CM.ShouldBeStatus[node] = self.CM["down"] # not in itself a reason to fail the audit (not what we're # checking for in this audit) elif lowest_epoche == None or self.NodeEpoche[node] < lowest_epoche: lowest_epoche = self.NodeEpoche[node] if not lowest_epoche: self.CM.log("Lowest epoche not determined in %s" % (partition)) passed = 0 for node in node_list: if self.CM.ShouldBeStatus[node] == self.CM["up"]: if self.CM.is_node_dc(node, self.NodeState[node]): dc_found.append(node) if self.NodeEpoche[node] == lowest_epoche: self.CM.debug("%s: OK" % node) elif not self.NodeEpoche[node]: self.CM.debug("Check on %s ignored: no node epoche" % node) elif not lowest_epoche: self.CM.debug("Check on %s ignored: no lowest epoche" % node) else: self.CM.log("DC %s is not the oldest node (%d vs. %d)" %(node, self.NodeEpoche[node], lowest_epoche)) passed = 0 if len(dc_found) == 0: self.CM.log("DC not found on any of the %d allowed nodes: %s (of %s)" %(len(dc_allowed_list), str(dc_allowed_list), str(node_list))) elif len(dc_found) > 1: self.CM.log("%d DCs (%s) found in cluster partition: %s" %(len(dc_found), str(dc_found), str(node_list))) passed = 0 elif self.CM.Env["CIBResource"] == 1 and self.NodeQuorum[dc_found[0]] == "1": self.CM.debug("%s: %s" % (dc_found[0], self.NodeQuorum[dc_found[0]])) Resources = self.CM.Resources() for node in node_list: if self.CM.ShouldBeStatus[node] == self.CM["up"]: for resource in Resources: if resource.rid == "rsc_"+node: if resource.IsRunningOn(node) == 0: self.CM.log("Node %s is not running its own resource" %(node)) passed = 0 elif self.CM.Env["CIBResource"] == 1: # no quorum means no resource management self.CM.debug("Not auditing resources - no quorum") if passed == 0: for node in node_list: if self.CM.ShouldBeStatus[node] == self.CM["up"]: self.CM.log("epoche %s : %s" %(self.NodeEpoche[node], self.NodeState[node])) return passed def name(self): return "PartitionAudit" def is_applicable(self): if self.CM["Name"] == "linux-ha-v2": return 1 return 0 AllAuditClasses.append(CrmdStateAudit) AllAuditClasses.append(PartitionAudit) AllAuditClasses.append(ResourceAudit) AllAuditClasses.append(HAResourceAudit) AllAuditClasses.append(CIBAudit) def AuditList(cm): result = [] for auditclass in AllAuditClasses: result.append(auditclass(cm)) return result Heartbeat-3-0-7e3a82377fa8/cts/CTSlab.py.in0000755000000000000000000006620611576626513020074 0ustar00usergroup00000000000000#!@PYTHON@ '''CTS: Cluster Testing System: Lab environment module ''' __copyright__=''' Copyright (C) 2001,2005 Alan Robertson Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. from UserDict import UserDict import sys, time, types, string, syslog, random, os, string, signal, traceback from CTS import ClusterManager from CM_hb import HeartbeatCM from CTStests import BSC_AddResource from socket import gethostbyname_ex tests = None cm = None old_handler = None DefaultFacility = "daemon" def sig_handler(signum, frame) : if cm != None: cm.log("Interrupted by signal %d"%signum) if signum == 10 and tests != None : tests.summarize() if signum == 15 : sys.exit(1) class ResetMechanism: def reset(self, node): raise ValueError("Abstract class member (reset)") class Stonith(ResetMechanism): def __init__(self, sttype="external/ssh", pName=None, pValue=None , path="@sbindir@/stonith"): self.pathname=path self.configName=pName self.configValue=pValue self.stonithtype=sttype def reset(self, node): if self.configValue == None : config=node else: config=self.configValue cmdstring = "%s -t '%s' -p '%s' '%s' 2>/dev/null" % (self.pathname , self.stonithtype, config, node) return (os.system(cmdstring) == 0) class Stonithd(ResetMechanism): def __init__(self, nodes, sttype = 'external/ssh'): self.sttype = sttype self.nodes = nodes self.query_cmd_pat = '@libdir@/heartbeat/stonithdtest/apitest 0 %s 60000 0' self.reset_cmd_pat = '@libdir@/heartbeat/stonithdtest/apitest 1 %s 60000 0' self.poweron_cmd_pat = '@libdir@/heartbeat/stonithdtest/apitest 2 %s 60000 0' self.poweroff_cmd_pat= '@libdir@/heartbeat/stonithdtest/apitest 3 %s 60000 0' self.lrmd_add_pat = '@sbindir@/lrmadmin -A %s stonith ' + sttype + ' NULL hostlist=%s livedangerously=yes' self.lrmd_start_pat = '@sbindir@/lrmadmin -E %s start 0 0 EVERYTIME' self.lrmd_stop_pat = '@sbindir@/lrmadmin -E %s stop 0 0 EVERYTIME' self.lrmd_del_pat = '@sbindir@/lrmadmin -D %s' self.rsc_id = 'my_stonithd_id' self.command = "@SSH@ -l root -n -x" self.command_noblock = "@SSH@ -f -l root -n -x" self.stonithd_started_nodes = [] self.fail_reason = '' def _remote_exec(self, node, cmnd): return (os.system("%s %s %s > /dev/null" % (self.command, node, cmnd)) == 0) def _remote_readlines(self, node, cmnd): f = os.popen("%s %s %s" % (self.command, node, cmnd)) return f.readlines() def _stonithd_started(self, node): return node in self.stonithd_started_nodes def _start_stonithd(self, node, hosts): hostlist = string.join(hosts, ',') lrmd_add_cmd = self.lrmd_add_pat % (self.rsc_id, hostlist) ret = self._remote_exec(node, lrmd_add_cmd) if not ret:return ret lrmd_start_cmd = self.lrmd_start_pat % self.rsc_id ret = self._remote_exec(node, lrmd_start_cmd) if not ret:return ret self.stonithd_started_nodes.append(node) return 1 def _stop_stonithd(self, node): lrmd_stop_cmd = self.lrmd_stop_pat % self.rsc_id ret = self._remote_exec(node, lrmd_stop_cmd) if not ret:return ret lrmd_del_cmd = self.lrmd_del_pat % self.rsc_id ret = self._remote_exec(node, lrmd_del_cmd) if not ret:return ret self.stonithd_started_nodes.remove(node) return 1 def _do_stonith(self, init_node, target_node, action): stonithd_started = self._stonithd_started(init_node) if not stonithd_started: ret = self._start_stonithd(init_node, [target_node]) if not ret: self.fail_reason = "failed to start stonithd on node %s" % init_node return ret command = "" if action == "RESET": command = self.reset_cmd_pat % target_node elif action == "POWEROFF": command = self.poweroff_cmd_pat % target_node elif action == "POWERON": command = self.poweron_cmd_pat % target_node else: self.fail_reason = "unknown operation type %s" % action return 0 lines = self._remote_readlines(init_node, command) result = "".join(lines) if not stonithd_started: self._stop_stonithd(init_node) index = result.find("result=0") if index == -1: self.fail_reason = "unexpected stonithd status: %s" % result return 0 return 1 # Should we randomly choose a node as init_node here if init_node not specified? def reset(self, init_node, target_node): return self._do_stonith(init_node, target_node, "RESET") def poweron(self, init_node, target_node): return self._do_stonith(init_node, target_node, "POWERON") def poweroff(self, init_node, target_node): return self._do_stonith(init_node, target_node, "POWEROFF") class Logger: TimeFormat = "%b %d %H:%M:%S\t" def __call__(self, lines): raise ValueError("Abstract class member (__call__)") def write(self, line): return self(line.rstrip()) def writelines(self, lines): for s in lines: self.write(s) return 1 def flush(self): return 1 def isatty(self): return None class SysLog(Logger): # http://docs.python.org/lib/module-syslog.html defaultsource="CTS" map = { "kernel": syslog.LOG_KERN, "user": syslog.LOG_USER, "mail": syslog.LOG_MAIL, "daemon": syslog.LOG_DAEMON, "auth": syslog.LOG_AUTH, "lpr": syslog.LOG_LPR, "news": syslog.LOG_NEWS, "uucp": syslog.LOG_UUCP, "cron": syslog.LOG_CRON, "local0": syslog.LOG_LOCAL0, "local1": syslog.LOG_LOCAL1, "local2": syslog.LOG_LOCAL2, "local3": syslog.LOG_LOCAL3, "local4": syslog.LOG_LOCAL4, "local5": syslog.LOG_LOCAL5, "local6": syslog.LOG_LOCAL6, "local7": syslog.LOG_LOCAL7, } def __init__(self, labinfo): if labinfo.has_key("syslogsource"): self.source=labinfo["syslogsource"] else: self.source=SysLog.defaultsource if labinfo.has_key("SyslogFacility"): self.facility=labinfo["SyslogFacility"] else: self.facility=DefaultFacility if SysLog.map.has_key(self.facility): self.facility=SysLog.map[self.facility] syslog.openlog(self.source, 0, self.facility) def setfacility(self, facility): self.facility = facility if SysLog.map.has_key(self.facility): self.facility=SysLog.map[self.facility] syslog.closelog() syslog.openlog(self.source, 0, self.facility) def __call__(self, lines): if isinstance(lines, types.StringType): syslog.syslog(lines) else: for line in lines: syslog.syslog(line) def name(self): return "Syslog" class StdErrLog(Logger): def __init__(self, labinfo): pass def __call__(self, lines): t = time.strftime(Logger.TimeFormat, time.localtime(time.time())) if isinstance(lines, types.StringType): sys.__stderr__.writelines([t, lines, "\n"]) else: for line in lines: sys.__stderr__.writelines([t, line, "\n"]) sys.__stderr__.flush() def name(self): return "StdErrLog" class FileLog(Logger): def __init__(self, labinfo, filename=None): if filename == None: filename=labinfo["LogFileName"] self.logfile=filename import os self.hostname = os.uname()[1]+" " self.source = "CTS: " def __call__(self, lines): fd = open(self.logfile, "a") t = time.strftime(Logger.TimeFormat, time.localtime(time.time())) if isinstance(lines, types.StringType): fd.writelines([t, self.hostname, self.source, lines, "\n"]) else: for line in lines: fd.writelines([t, self.hostname, self.source, line, "\n"]) fd.close() def name(self): return "FileLog" class CtsLab(UserDict): '''This class defines the Lab Environment for the Cluster Test System. It defines those things which are expected to change from test environment to test environment for the same cluster manager. It is where you define the set of nodes that are in your test lab what kind of reset mechanism you use, etc. This class is derived from a UserDict because we hold many different parameters of different kinds, and this provides provide a uniform and extensible interface useful for any kind of communication between the user/administrator/tester and CTS. At this point in time, it is the intent of this class to model static configuration and/or environmental data about the environment which doesn't change as the tests proceed. Well-known names (keys) are an important concept in this class. The HasMinimalKeys member function knows the minimal set of well-known names for the class. The following names are standard (well-known) at this time: nodes An array of the nodes in the cluster reset A ResetMechanism object logger An array of objects that log strings... CMclass The type of ClusterManager we are running (This is a class object, not a class instance) RandSeed Random seed. It is a triple of bytes. (optional) HAdir Base directory for HA installation The CTS code ignores names it doesn't know about/need. The individual tests have access to this information, and it is perfectly acceptable to provide hints, tweaks, fine-tuning directions or other information to the tests through this mechanism. ''' def __init__(self, nodes): self.data = {} self["nodes"] = nodes self.MinimalKeys=["nodes", "reset", "logger", "CMclass", "HAdir"] def HasMinimalKeys(self): 'Return TRUE if our object has the minimal set of keys/values in it' result = 1 for key in self.MinimalKeys: if not self.has_key(key): result = None return result def SupplyDefaults(self): if not self.has_key("logger"): self["logger"] = (SysLog(self), StdErrLog(self)) if not self.has_key("reset"): self["reset"] = Stonith() if not self.has_key("CMclass"): self["CMclass"] = HeartbeatCM if not self.has_key("LogFileName"): self["LogFileName"] = "@HA_VARLOGDIR@/ha-log" if not self.has_key("logrestartcmd"): self["logrestartcmd"] = "@INITDIR@/syslog restart" if not self.has_key("logfacility"): LogFacility = DefaultFacility # # Now set up our random number generator... # self.RandomGen = random.Random() # Get a random seed for the random number generator. if self.has_key("RandSeed"): randseed = self["RandSeed"] self.log("Random seed is: " + str(randseed)) self.RandomGen.seed(str(randseed)) else: randseed = int(time.time()) self.log("Random seed is: " + str(randseed)) self.RandomGen.seed(str(randseed)) def log(self, args): "Log using each of the supplied logging methods" for logfcn in self._logfunctions: logfcn(string.strip(args)) def debug(self, args): "Log using each of the supplied logging methods" for logfcn in self._logfunctions: if logfcn.name() != "StdErrLog": logfcn("debug: %s" % string.strip(args)) def __setitem__(self, key, value): '''Since this function gets called whenever we modify the dictionary (object), we can (and do) validate those keys that we know how to validate. For the most part, we know how to validate the "MinimalKeys" elements. ''' # # List of nodes in the system # if key == "nodes": self.Nodes = {} for node in value: # I don't think I need the IP address, etc. but this validates # the node name against /etc/hosts and/or DNS, so it's a # GoodThing(tm). try: self.Nodes[node] = gethostbyname_ex(node) except: print node+" not found in DNS... aborting" raise # # Reset Mechanism # elif key == "reset": if not issubclass(value.__class__, ResetMechanism): raise ValueError("'reset' Value must be a subclass" " of ResetMechanism") # # List of Logging Mechanism(s) # elif key == "logger": if len(value) < 1: raise ValueError("Must have at least one logging mechanism") for logger in value: if not callable(logger): raise ValueError("'logger' elements must be callable") self._logfunctions = value # # Cluster Manager Class # elif key == "CMclass": if not issubclass(value, ClusterManager): raise ValueError("'CMclass' must be a subclass of" " ClusterManager") # # Initial Random seed... # #elif key == "RandSeed": # if len(value) != 3: # raise ValueError("'Randseed' must be a 3-element list/tuple") # for elem in value: # if not isinstance(elem, types.IntType): # raise ValueError("'Randseed' list must all be ints") self.data[key] = value def IsValidNode(self, node): 'Return TRUE if the given node is valid' return self.Nodes.has_key(node) def __CheckNode(self, node): "Raise a ValueError if the given node isn't valid" if not self.IsValidNode(node): raise ValueError("Invalid node [%s] in CheckNode" % node) def RandomNode(self): '''Choose a random node from the cluster''' return self.RandomGen.choice(self["nodes"]) def ResetNode(self, node): "Reset a node, (normally) using a hardware mechanism" self.__CheckNode(node) return self["reset"].reset(node) def ResetNode2(self, init_node, target_node, reasons): self.__CheckNode(target_node) stonithd = Stonithd(self["nodes"]) ret = stonithd.reset(init_node, target_node) if not ret: reasons.append(stonithd.fail_reason) return ret def usage(arg): print "Illegal argument " + arg print "usage: " + sys.argv[0] \ + " --directory config-directory" \ + " -D config-directory" \ + " --logfile system-logfile-name" \ + " --trunc (truncate logfile before starting)" \ + " -L system-logfile-name" \ + " --limit-nodes maxnumnodes" \ + " --xmit-loss lost-rate(0.0-1.0)" \ + " --recv-loss lost-rate(0.0-1.0)" \ + " --suppressmonitoring" \ + " --syslog-facility syslog-facility" \ + " --facility syslog-facility" \ + " --choose testcase-name" \ + " --test-ip-base ip" \ + " --oprofile \"whitespace separated list of nodes to oprofile\"" \ + " (-2 |"\ + " -v2 |"\ + " --crm |"\ + " --classic)"\ + " (--populate-resources | -r)" \ + " --resource-can-stop" \ + " --stonith (1 | 0 | yes | no)" \ + " --stonith-type type" \ + " --stonith-args name=value" \ + " --standby (1 | 0 | yes | no)" \ + " --fencing (1 | 0 | yes | no)" \ + " --suppress_cib_writes (1 | 0 | yes | no)" \ + " -lstests" \ + " --seed" \ + " [number-of-iterations]" sys.exit(1) # # A little test code... # if __name__ == '__main__': from CTSaudits import AuditList from CTStests import TestList,RandomTests from CTS import Scenario, InitClusterManager, PingFest, PacketLoss, BasicSanityCheck import CM_hb HAdir = "@sysconfdir@/ha.d" LogFile = "@HA_VARLOGDIR@/ha-log-"+DefaultFacility DoStonith = 1 DoStandby = 1 DoFencing = 1 NumIter = 500 SuppressMonitoring = None Version = 1 CIBfilename = None CIBResource = 0 ClobberCIB = 0 LimitNodes = 0 TestCase = None LogFacility = None TruncateLog = 0 ResCanStop = 0 XmitLoss = "0.0" RecvLoss = "0.0" IPBase = "127.0.0.10" SuppressCib = 1 DoBSC = 0 ListTests = 0 HaveSeed = 0 oprofile = None StonithType = "external/ssh" StonithParams = None StonithParams = "hostlist=dynamic".split('=') # # The values of the rest of the parameters are now properly derived from # the configuration files. # # Stonith is configurable because it's slow, I have a few machines which # don't reboot very reliably, and it can mild damage to your machine if # you're using a real power switch. # # Standby is configurable because the test is very heartbeat specific # and I haven't written the code to set it properly yet. Patches are # being accepted... # Set the signal handler signal.signal(15, sig_handler) signal.signal(10, sig_handler) # Process arguments... skipthis=None args=sys.argv[1:] for i in range(0, len(args)): if skipthis: skipthis=None continue elif args[i] == "-D" or args[i] == "--directory": skipthis=1 HAdir = args[i+1] elif args[i] == "-l" or args[i] == "--limit-nodes": skipthis=1 LimitNodes = int(args[i+1]) elif args[i] == "-r" or args[i] == "--populate-resources": CIBResource = 1 elif args[i] == "-L" or args[i] == "--logfile": skipthis=1 LogFile = args[i+1] elif args[i] == "--test-ip-base": skipthis=1 IPBase = args[i+1] elif args[i] == "--oprofile": skipthis=1 oprofile = args[i+1].split(' ') elif args[i] == "--trunc": TruncateLog=1 elif args[i] == "-v2": Version=2 elif args[i] == "-lstests": ListTests=1 elif args[i] == "--stonith": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": DoStonith=1 elif args[i+1] == "0" or args[i+1] == "no": DoStonith=0 else: usage(args[i+1]) elif args[i] == "--stonith-type": StonithType = args[i+1] skipthis=1 elif args[i] == "--stonith-args": StonithParams = args[i+1].split('=') skipthis=1 elif args[i] == "--suppress-cib-writes": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": SuppressCib=1 elif args[i+1] == "0" or args[i+1] == "no": SuppressCib=0 else: usage(args[i+1]) elif args[i] == "--bsc": DoBSC=1 elif args[i] == "--standby": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": DoStandby=1 elif args[i+1] == "0" or args[i+1] == "no": DoStandby=0 else: usage(args[i+1]) elif args[i] == "--fencing": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": DoFencing=1 elif args[i+1] == "0" or args[i+1] == "no": DoFencing=0 else: usage(args[i+1]) elif args[i] == "--suppressmonitoring": SuppressMonitoring = 1 elif args[i] == "--resource-can-stop": ResCanStop = 1 elif args[i] == "-2" or args[i] == "--crm": Version = 2 elif args[i] == "-1" or args[i] == "--classic": Version = 1 elif args[i] == "--clobber-cib" or args[i] == "-c": ClobberCIB = 1 elif args[i] == "--cib-filename": skipthis=1 CIBfilename = args[i+1] elif args[i] == "--xmit-loss": try: float(args[i+1]) except ValueError: print ("--xmit-loss parameter should be float") usage(args[i+1]) skipthis=1 XmitLoss = args[i+1] elif args[i] == "--recv-loss": try: float(args[i+1]) except ValueError: print ("--recv-loss parameter should be float") usage(args[i+1]) skipthis=1 RecvLoss = args[i+1] elif args[i] == "--choose": skipthis=1 TestCase = args[i+1] elif args[i] == "--syslog-facility" or args[i] == "--facility": skipthis=1 LogFacility = args[i+1] elif args[i] == "--seed": skipthis=1 Seed=args[i+1] HaveSeed = 1 else: NumIter=int(args[i]) if not oprofile: oprofile = [] # # This reading of HBconfig here is ugly, and I suppose ought to # be done by the Cluster manager. This would probably mean moving the # list of cluster nodes into the ClusterManager class. A good thought # for our Copious Spare Time in the future... # config = CM_hb.HBConfig(HAdir) node_list = config.Parameters["node"] if DoBSC: NumIter = 2 Version = 2 while len(node_list) > 1: node_list.pop(len(node_list)-1) if LogFacility == None: if config.Parameters.has_key("logfacility"): LogFacility = config.Parameters["logfacility"][0] else: LogFacility = DefaultFacility if LimitNodes > 0: if len(node_list) > LimitNodes: print("Limiting the number of nodes configured=%d (max=%d)" %(len(node_list), LimitNodes)) while len(node_list) > LimitNodes: node_list.pop(len(node_list)-1) if StonithParams[0] == "hostlist": StonithParams[1] = string.join(node_list, " ") # alt_list = [] # for node in node_list: # alt_list.append(string.lower(node)) # node_list = alt_list Environment = CtsLab(node_list) Environment["HAdir"] = HAdir Environment["ClobberCIB"] = ClobberCIB Environment["CIBfilename"] = CIBfilename Environment["CIBResource"] = CIBResource Environment["LogFileName"] = LogFile Environment["DoStonith"] = DoStonith Environment["SyslogFacility"] = LogFacility Environment["DoStandby"] = DoStandby Environment["DoFencing"] = DoFencing Environment["ResCanStop"] = ResCanStop Environment["SuppressMonitoring"] = SuppressMonitoring Environment["XmitLoss"] = XmitLoss Environment["RecvLoss"] = RecvLoss Environment["IPBase"] = IPBase Environment["SuppressCib"] = SuppressCib Environment["DoBSC"] = 0 Environment["use_logd"] = 0 Environment["logfacility"] = LogFacility Environment["oprofile"] = oprofile if config.Parameters.has_key("use_logd"): Environment["use_logd"] = 1 if Version == 2: from CM_LinuxHAv2 import LinuxHAv2 Environment['CMclass']=LinuxHAv2 if HaveSeed: Environment["RandSeed"] = Seed Environment["reset"] = Stonith(sttype=StonithType, pName=StonithParams[0], pValue=StonithParams[1]) if DoBSC: Environment["DoBSC"] = 1 Environment["ClobberCIB"] = 1 Environment["CIBResource"] = 0 Environment["logger"] = (FileLog(Environment), StdErrLog(Environment)) scenario = Scenario([ BasicSanityCheck(Environment) ]) else: scenario = Scenario( [ InitClusterManager(Environment), PacketLoss(Environment)]) Environment.SupplyDefaults() # Your basic start up the world type of test scenario... #scenario = Scenario( #[ InitClusterManager(Environment) #, PingFest(Environment)]) # Create the Cluster Manager object cm = Environment['CMclass'](Environment) if TruncateLog: cm.log("Truncating %s" % LogFile) lf = open(LogFile, "w"); if lf != None: lf.truncate(0) lf.close() cm.log(">>>>>>>>>>>>>>>> BEGINNING " + repr(NumIter) + " TESTS ") cm.log("HA configuration directory: " + Environment["HAdir"]) cm.log("System log files: " + Environment["LogFileName"]) cm.log("Enable Stonith: %d" % Environment["DoStonith"]) cm.log("Enable Fencing: %d" % Environment["DoFencing"]) cm.log("Enable Standby: %d" % Environment["DoStandby"]) cm.log("Enable Resources: %d" % Environment["CIBResource"]) if Environment.has_key("SuppressMonitoring") \ and Environment["SuppressMonitoring"]: cm.log("Resource Monitoring is disabled") cm.ns.WaitForAllNodesToComeUp(config.Parameters["node"]) cm.log("Cluster nodes: ") for node in config.Parameters["node"]: (rc, lines) = cm.rsh.remote_py(node, "os", "system", "@sbindir@/crm_uuid") if not lines: cm.log(" * %s: __undefined_uuid__" % node) else: out=lines[0] out = out[:-1] cm.log(" * %s: %s" % (node, out)) Audits = AuditList(cm) Tests = [] if Environment["DoBSC"]: test = BSC_AddResource(cm) Tests.append(test) elif TestCase != None: for test in TestList(cm): if test.name == TestCase: Tests.append(test) if Tests == []: usage("--choose: No applicable/valid tests chosen") else: Tests = TestList(cm) if ListTests == 1 : cm.log("Total %d tests"%len(Tests)) for test in Tests : cm.log(str(test.name)); sys.exit(0) tests = RandomTests(scenario, cm, Tests, Audits) Environment.RandomTests = tests try : overall, detailed = tests.run(NumIter) except : cm.Env.log("Exception by %s" % sys.exc_info()[0]) for logmethod in Environment["logger"]: traceback.print_exc(50, logmethod) tests.summarize() if tests.Stats["failure"] > 0: sys.exit(tests.Stats["failure"]) elif tests.Stats["success"] != NumIter: cm.Env.log("No failure count but success != requested iterations") sys.exit(1) Heartbeat-3-0-7e3a82377fa8/cts/CTSproxy.py.in0000644000000000000000000000350711576626513020507 0ustar00usergroup00000000000000#!@PYTHON@ ''' proxy on remote node for remote python call ''' __copyright__=''' Author: Huang Zhen Copyright (C) 2005 International Business Machines ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # import sys, pickle, base64, binascii # check the number of arguments if len(sys.argv) != 4 : print "usage %s module function args"%sys.argv[0] sys.exit(1) # import the module try : module = __import__(sys.argv[1], globals(), locals(), []) except ImportError: print "can not find module %s"%sys.argv[1] sys.exit(1) # find the function try : func = getattr(module,sys.argv[2]) except AttributeError: print "can not find function %s"%sys.argv[2] sys.exit(1) # unpack the arguments of functions try : args = pickle.loads(binascii.a2b_base64(sys.argv[3])) except IndexError: print "can not unpickle args %s"%sys.argv[3] sys.exit(1) # call the function and return packed result try : result = apply(func,args) print binascii.b2a_base64(pickle.dumps(result)) sys.exit(0) except TypeError: print "parameter error" sys.exit(1) Heartbeat-3-0-7e3a82377fa8/cts/CTStests.py.in0000644000000000000000000025716011576626513020476 0ustar00usergroup00000000000000#!@PYTHON@ '''CTS: Cluster Testing System: Tests module There are a few things we want to do here: ''' __copyright__=''' Copyright (C) 2000, 2001 Alan Robertson Licensed under the GNU GPL. Add RecourceRecover testcase Zhao Kai ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # SPECIAL NOTE: # # Tests may NOT implement any cluster-manager-specific code in them. # EXTEND the ClusterManager object to provide the base capabilities # the test needs if you need to do something that the current CM classes # do not. Otherwise you screw up the whole point of the object structure # in CTS. # # Thank you. # import CTS from CM_hb import HBConfig import CTSaudits import time, os, re, types, string, tempfile, sys from CTSaudits import * from stat import * # List of all class objects for tests which we ought to # consider running. class RandomTests: ''' A collection of tests which are run at random. ''' def __init__(self, scenario, cm, tests, Audits): self.CM = cm self.Env = cm.Env self.Scenario = scenario self.Tests = [] self.Audits = [] self.ns=CTS.NodeStatus(self.Env) for test in tests: if not issubclass(test.__class__, CTSTest): raise ValueError("Init value must be a subclass of CTSTest") if test.is_applicable(): self.Tests.append(test) if not scenario.IsApplicable(): raise ValueError("Scenario not applicable in" " given Environment") self.Stats = {"success":0, "failure":0, "BadNews":0} self.IndividualStats= {} for audit in Audits: if not issubclass(audit.__class__, ClusterAudit): raise ValueError("Init value must be a subclass of ClusterAudit") if audit.is_applicable(): self.Audits.append(audit) def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 def audit(self, BadNews, test): errcount=0 BadNewsDebug=0 #BadNews.debug=1 ignorelist = [] ignorelist.append(" CTS: ") ignorelist.append("BadNews:") ignorelist.extend(self.CM.errorstoignore()) if test: ignorelist.extend(test.errorstoignore()) while errcount < 1000: if BadNewsDebug: print "Looking for BadNews" match=BadNews.look(0) if match: if BadNewsDebug: print "BadNews found: "+match add_err = 1 for ignore in ignorelist: if add_err == 1 and re.search(ignore, match): if BadNewsDebug: print "Ignoring based on pattern: ("+ignore+")" add_err = 0 if add_err == 1: self.CM.log("BadNews: " + match) self.incr("BadNews") errcount=errcount+1 else: break else: self.CM.log("Big problems. Shutting down.") self.CM.stopall() self.summarize() raise ValueError("Looks like we hit the jackpot! :-)") for audit in self.Audits: if not audit(): self.CM.log("Audit " + audit.name() + " FAILED.") self.incr("auditfail") if test: test.incr("auditfail") def summarize(self): self.CM.log("****************") self.CM.log("Overall Results:" + repr(self.Stats)) self.CM.log("****************") self.CM.log("Detailed Results") for test in self.Tests: self.CM.log("Test %s: \t%s" %(test.name, repr(test.Stats))) self.CM.log("<<<<<<<<<<<<<<<< TESTS COMPLETED") def run(self, max=1): ( ''' Set up the given scenario, then run the selected tests at random for the selected number of iterations. ''') BadNews=CTS.LogWatcher(self.CM["LogFileName"], self.CM["BadRegexes"] , timeout=0) BadNews.setwatch() self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"]) for node in self.CM.Env["nodes"]: if node in self.CM.Env["oprofile"]: self.CM.log("Enabling oprofile on %s" % node) self.CM.rsh.remote_py(node, "os", "system", "opcontrol --init") self.CM.rsh.remote_py(node, "os", "system", "opcontrol --start") if not self.Scenario.SetUp(self.CM): return None for node in self.CM.Env["nodes"]: if node in self.CM.Env["oprofile"]: self.CM.rsh.remote_py( node, "os", "system", "opcontrol --save=cts.setup") testcount=1 time.sleep(30) # This makes sure everything is stabilized before starting... self.audit(BadNews, None) while testcount <= max: test = self.Env.RandomGen.choice(self.Tests) # Some tests want a node as an argument. nodechoice = self.Env.RandomNode() #logsize = os.stat(self.CM["LogFileName"])[ST_SIZE] #self.CM.log("Running test %s (%s) \t[%d : %d]" # % (test.name, nodechoice, testcount, logsize)) self.CM.log("Running test %s (%s) \t[%d]" % (test.name, nodechoice, testcount)) testcount = testcount + 1 starttime=time.time() test.starttime=starttime ret=test(nodechoice) for node in self.CM.Env["nodes"]: if node in self.CM.Env["oprofile"]: self.CM.rsh.remote_py( node, "os", "system", "opcontrol --save=cts.%d" % (testcount-1)) if not self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"]): if os.path.isfile("./RecoverFromDeadNode"): self.CM.log("Calling ./RecoverFromDeadNode in an attempt to get things going again.") os.system("./RecoverFromDeadNode") if not self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"]): self.CM.log("One or more nodes will not come up - exiting") break if ret: self.incr("success") else: self.incr("failure") self.CM.log("Test %s (%s) \t[FAILED]" %(test.name,nodechoice)) # Better get the current info from the cluster... self.CM.statall() # Make sure logging is working and we have enough disk space... if not self.CM.Env["DoBSC"]: if not self.CM.TestLogging(): sys.exit(1) if not self.CM.CheckDf(): sys.exit(1) stoptime=time.time() elapsed_time = stoptime - starttime test_time = stoptime - test.starttime if not test.has_key("min_time"): test["elapsed_time"] = elapsed_time test["min_time"] = test_time test["max_time"] = test_time else: test["elapsed_time"] = test["elapsed_time"] + elapsed_time if test_time < test["min_time"]: test["min_time"] = test_time if test_time > test["max_time"]: test["max_time"] = test_time self.audit(BadNews, test) self.Scenario.TearDown(self.CM) for node in self.CM.Env["nodes"]: if node in self.CM.Env["oprofile"]: self.CM.log("Disabling oprofile on %s" % node) self.CM.rsh.remote_py(node, "os", "system", "opcontrol --shutdown") self.audit(BadNews, None) for test in self.Tests: self.IndividualStats[test.name] = test.Stats return self.Stats, self.IndividualStats AllTestClasses = [ ] class CTSTest: ''' A Cluster test. We implement the basic set of properties and behaviors for a generic cluster test. Cluster tests track their own statistics. We keep each of the kinds of counts we track as separate {name,value} pairs. ''' def __init__(self, cm): #self.name="the unnamed test" self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} # if not issubclass(cm.__class__, ClusterManager): # raise ValueError("Must be a ClusterManager object") self.CM = cm self.timeout=120 self.starttime=0 def has_key(self, key): return self.Stats.has_key(key) def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 def failure(self, reason="none"): '''Increment the failure count''' self.incr("failure") self.CM.log("Test " + self.name + " failed [reason:" + reason + "]") return None def success(self): '''Increment the success count''' self.incr("success") return 1 def skipped(self): '''Increment the skipped count''' self.incr("skipped") return 1 def __call__(self, node): '''Perform the given test''' raise ValueError("Abstract Class member (__call__)") self.incr("calls") return self.failure() def is_applicable(self): '''Return TRUE if we are applicable in the current test configuration''' raise ValueError("Abstract Class member (is_applicable)") return 1 def canrunnow(self): '''Return TRUE if we can meaningfully run right now''' return 1 def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [] ################################################################### class StopTest(CTSTest): ################################################################### '''Stop (deactivate) the cluster manager on a node''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name="Stop" self.uspat = self.CM["Pat:We_stopped"] self.thempat = self.CM["Pat:They_stopped"] def __call__(self, node): '''Perform the 'stop' test. ''' self.incr("calls") if self.CM.ShouldBeStatus[node] != self.CM["up"]: return self.skipped() patterns = [] # Technically we should always be able to notice ourselves stopping patterns.append(self.CM["Pat:We_stopped"] % node) if self.CM.Env["use_logd"]: patterns.append(self.CM["Pat:Logd_stopped"] % node) # Any active node needs to notice this one left # NOTE: This wont work if we have multiple partitions for other in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[other] == self.CM["up"] and other != node: patterns.append(self.CM["Pat:They_stopped"] %(other, node)) #self.debug("Checking %s will notice %s left"%(other, node)) watch = CTS.LogWatcher( self.CM["LogFileName"], patterns, self.CM["DeadTime"]) watch.setwatch() if node == self.CM.OurNode: self.incr("us") else: if self.CM.upcount() <= 1: self.incr("all") else: self.incr("them") self.CM.StopaCM(node) watch_result = watch.lookforall() failreason=None UnmatchedList = "||" if watch.unmatched: for regex in watch.unmatched: self.CM.log ("ERROR: Shutdown pattern not found: %s" % (regex)) UnmatchedList += regex + "||"; failreason="Missing shutdown pattern" self.CM.cluster_stable(self.CM["DeadTime"]) if not watch.unmatched or self.CM.upcount() == 0: return self.success() elif len(watch.unmatched) >= self.CM.upcount(): return self.failure("no match against (%s)" % UnmatchedList) if failreason == None: return self.success() else: return self.failure(failreason) # # We don't register StopTest because it's better when called by # another test... # ################################################################### class StartTest(CTSTest): ################################################################### '''Start (activate) the cluster manager on a node''' def __init__(self, cm, debug=None): CTSTest.__init__(self,cm) self.name="start" self.debug = debug self.uspat = self.CM["Pat:We_started"] self.thempat = self.CM["Pat:They_started"] def __call__(self, node): '''Perform the 'start' test. ''' self.incr("calls") if self.CM.upcount() == 0: self.incr("us") else: self.incr("them") if self.CM.ShouldBeStatus[node] != self.CM["down"]: return self.skipped() elif self.CM.StartaCM(node): return self.success() else: return self.failure("Startup %s on node %s failed" %(self.CM["Name"], node)) def is_applicable(self): '''StartTest is always applicable''' return 1 # # We don't register StartTest because it's better when called by # another test... # ################################################################### class FlipTest(CTSTest): ################################################################### '''If it's running, stop it. If it's stopped start it. Overthrow the status quo... ''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Flip" self.start = StartTest(cm) self.stop = StopTest(cm) def __call__(self, node): '''Perform the 'Flip' test. ''' self.incr("calls") if self.CM.ShouldBeStatus[node] == self.CM["up"]: self.incr("stopped") ret = self.stop(node) type="up->down" # Give the cluster time to recognize it's gone... time.sleep(self.CM["StableTime"]) elif self.CM.ShouldBeStatus[node] == self.CM["down"]: self.incr("started") ret = self.start(node) type="down->up" else: return self.skipped() self.incr(type) if ret: return self.success() else: return self.failure("%s failure" % type) def is_applicable(self): '''FlipTest is always applicable''' return 1 # Register FlipTest as a good test to run AllTestClasses.append(FlipTest) ################################################################### class RestartTest(CTSTest): ################################################################### '''Stop and restart a node''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Restart" self.start = StartTest(cm) self.stop = StopTest(cm) def __call__(self, node): '''Perform the 'restart' test. ''' self.incr("calls") self.incr("node:" + node) ret1 = 1 if self.CM.StataCM(node): self.incr("WasStopped") if not self.start(node): return self.failure("start (setup) failure: "+node) self.starttime=time.time() if not self.stop(node): return self.failure("stop failure: "+node) if not self.start(node): return self.failure("start failure: "+node) return self.success() def is_applicable(self): '''RestartTest is always applicable''' return 1 # Register RestartTest as a good test to run AllTestClasses.append(RestartTest) ################################################################### class StonithTest(CTSTest): ################################################################### '''Reboot a node by whacking it with stonith.''' def __init__(self, cm, timeout=900): CTSTest.__init__(self,cm) self.name="Stonith" self.theystopped = self.CM["Pat:They_dead"] self.allstopped = self.CM["Pat:All_stopped"] self.usstart = self.CM["Pat:We_started"] self.themstart = self.CM["Pat:They_started"] self.timeout = timeout self.ssherror = False def _reset(self, node): StonithWorked=False for tries in 1,2,3,4,5: if self.CM.Env.ResetNode(node): StonithWorked=True break return StonithWorked def setup(self, target_node): # nothing to do return 1 def __call__(self, node): '''Perform the 'stonith' test. (whack the node)''' self.incr("calls") stopwatch = 0 rc = 0 if not self.setup(node): return self.failure("Setup failed") # Figure out what log message to look for when/if it goes down # # Any active node needs to notice this one left # NOTE: This wont work if we have multiple partitions stop_patterns = [] for other in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[other] == self.CM["up"] and other != node: stop_patterns.append(self.CM["Pat:They_stopped"] %(other, node)) stopwatch = 1 #self.debug("Checking %s will notice %s left"%(other, node)) if self.CM.ShouldBeStatus[node] == self.CM["down"]: # actually no-one will notice this node die since HA isnt running stopwatch = 0 # Figure out what log message to look for when it comes up if self.CM.upcount() == 1 and self.CM.ShouldBeStatus[node] == self.CM["up"]: uppat = (self.usstart % node) else: uppat = (self.themstart % node) upwatch = CTS.LogWatcher(self.CM["LogFileName"], [uppat] , timeout=self.timeout) if stopwatch == 1: watch = CTS.LogWatcher(self.CM["LogFileName"], stop_patterns , timeout=self.CM["DeadTime"]+10) watch.setwatch() # Reset (stonith) the node self.CM.debug("Resetting: "+node) StonithWorked = self._reset(node) if not StonithWorked: return self.failure("Stonith didn't work") if self.ssherror == True: self.CM.log("NOTE: Stonith command reported success but node %s did not restart (atd, reboot or ssh error)" % node) return self.success() upwatch.setwatch() # Look() and see if the machine went down if stopwatch == 0: # Allow time for the node to die time.sleep(self.CM["DeadTime"]+10) elif not watch.lookforall(): if watch.unmatched: for regex in watch.unmatched: self.CM.log("Warn: STONITH pattern not found: %s"%regex) # !!no-one!! saw this node die if len(watch.unmatched) == len(stop_patterns): return self.failure("No-one saw %s die" %node) # else: syslog* lost a message # Alas I dont think this check is plausable (beekhof) # # Check it really stopped... #self.CM.ShouldBeStatus[node] = self.CM["down"] #if self.CM.StataCM(node) == 1: # ret1=0 # Look() and see if the machine came back up rc=0 if upwatch.look(): self.CM.debug("Startup pattern found: %s" %uppat) rc=1 else: self.CM.log("Warn: Startup pattern not found: %s" %uppat) # Check it really started... self.CM.ShouldBeStatus[node] = self.CM["up"] if rc == 0 and self.CM.StataCM(node) == 1: rc=1 # wait for the cluster to stabilize self.CM.cluster_stable() if node in self.CM.Env["oprofile"]: self.CM.log("Enabling oprofile on %s" % node) self.CM.rsh.remote_py(node, "os", "system", "opcontrol --init") self.CM.rsh.remote_py(node, "os", "system", "opcontrol --start") # return case processing if rc == 0: return self.failure("Node %s did not restart" %node) else: return self.success() def is_applicable(self): '''StonithTest is applicable unless suppressed by CM.Env["DoStonith"] == FALSE''' # for v2, stonithd test is a better test to run. if self.CM["Name"] == "linux-ha-v2": return None if self.CM.Env.has_key("DoStonith"): return self.CM.Env["DoStonith"] return 1 # Register StonithTest as a good test to run AllTestClasses.append(StonithTest) ################################################################### class StonithdTest(StonithTest): ################################################################### def __init__(self, cm, timeout=600): StonithTest.__init__(self, cm, timeout=600) self.name="Stonithd" self.startall = SimulStartLite(cm) self.start = StartTest(cm) self.stop = StopTest(cm) self.init_node = None def _reset(self, target_node): if len(self.CM.Env["nodes"]) < 2: return self.skipped() StonithWorked = False SshNotWork = 0 for tries in range(1,5): # For some unknown reason, every now and then the ssh plugin just # can't kill the target_node - everything works fine with stonithd # and the plugin, but atd, reboot or ssh (or maybe something else) # doesn't do its job and target_node remains alive. So look for # the indicative messages and bubble-up the error via ssherror watchpats = [] watchpats.append("Initiating ssh-reset") watchpats.append("CRIT: still able to ping") watch = CTS.LogWatcher(self.CM["LogFileName"], watchpats , timeout=self.CM["DeadTime"]+60) watch.setwatch() fail_reasons = [] if self.CM.Env.ResetNode2(self.init_node, target_node, fail_reasons): StonithWorked = True break if watch.lookforall(): SshNotWork = SshNotWork + 1 continue for reason in fail_reasons: self.CM.log(reason) if StonithWorked == False and SshNotWork == tries: StonithWorked = True self.ssherror = True return StonithWorked def setup(self, target_node): if len(self.CM.Env["nodes"]) < 2: return 1 self.init_node = self.CM.Env.RandomNode() while self.init_node == target_node: self.init_node = self.CM.Env.RandomNode() if not self.startall(None): return self.failure("Test setup failed") return 1 def is_applicable(self): if not self.CM["Name"] == "linux-ha-v2": return 0 if self.CM.Env.has_key("DoStonith"): return self.CM.Env["DoStonith"] return 1 AllTestClasses.append(StonithdTest) ################################################################### class IPaddrtest(CTSTest): ################################################################### '''Find the machine supporting a particular IP address, and knock it down. [Hint: This code isn't finished yet...] ''' def __init__(self, cm, IPaddrs): CTSTest.__init__(self,cm) self.name="IPaddrtest" self.IPaddrs = IPaddrs self.start = StartTest(cm) self.stop = StopTest(cm) def __call__(self, IPaddr): ''' Perform the IPaddr test... ''' self.incr("calls") node = self.CM.Env.RandomNode() self.incr("node:" + node) if self.CM.ShouldBeStatus[node] == self.CM["down"]: self.incr("WasStopped") self.start(node) ret1 = self.stop(node) # Give the cluster time to recognize we're gone... time.sleep(self.CM["StableTime"]) ret2 = self.start(node) if not ret1: return self.failure("Could not stop") if not ret2: return self.failure("Could not start") return self.success() def is_applicable(self): '''IPaddrtest is always applicable (but shouldn't be)''' return 1 ################################################################### class StartOnebyOne(CTSTest): ################################################################### '''Start all the nodes ~ one by one''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="StartOnebyOne" self.stopall = SimulStopLite(cm) self.start = StartTest(cm) self.ns=CTS.NodeStatus(cm.Env) def __call__(self, dummy): '''Perform the 'StartOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Test setup failed") failed=[] self.starttime=time.time() for node in self.CM.Env["nodes"]: if not self.start(node): failed.append(node) if len(failed) > 0: return self.failure("Some node failed to start: " + repr(failed)) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [] def is_applicable(self): '''StartOnebyOne is always applicable''' return 1 # Register StartOnebyOne as a good test to run AllTestClasses.append(StartOnebyOne) ################################################################### class SimulStart(CTSTest): ################################################################### '''Start all the nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStart" self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) def __call__(self, dummy): '''Perform the 'SimulStart' test. ''' self.incr("calls") # We ignore the "node" parameter... # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Setup failed") self.CM.clear_all_caches() if not self.startall(None): return self.failure("Startall failed") return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [] def is_applicable(self): '''SimulStart is always applicable''' return 1 # Register SimulStart as a good test to run AllTestClasses.append(SimulStart) ################################################################### class SimulStop(CTSTest): ################################################################### '''Stop all the nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStop" self.startall = SimulStartLite(cm) self.stopall = SimulStopLite(cm) def __call__(self, dummy): '''Perform the 'SimulStop' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") if not self.stopall(None): return self.failure("Stopall failed") return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [] def is_applicable(self): '''SimulStop is always applicable''' return 1 # Register SimulStop as a good test to run AllTestClasses.append(SimulStop) ################################################################### class StopOnebyOne(CTSTest): ################################################################### '''Stop all the nodes in order''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="StopOnebyOne" self.startall = SimulStartLite(cm) self.stop = StopTest(cm) def __call__(self, dummy): '''Perform the 'StopOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") failed=[] self.starttime=time.time() for node in self.CM.Env["nodes"]: if not self.stop(node): failed.append(node) if len(failed) > 0: return self.failure("Some node failed to stop: " + repr(failed)) self.CM.clear_all_caches() return self.success() def is_applicable(self): '''StopOnebyOne is always applicable''' return 1 # Register StopOnebyOne as a good test to run AllTestClasses.append(StopOnebyOne) ################################################################### class RestartOnebyOne(CTSTest): ################################################################### '''Restart all the nodes in order''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="RestartOnebyOne" self.startall = SimulStartLite(cm) def __call__(self, dummy): '''Perform the 'RestartOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") did_fail=[] self.starttime=time.time() self.restart = RestartTest(self.CM) for node in self.CM.Env["nodes"]: if not self.restart(node): did_fail.append(node) if did_fail: return self.failure("Could not restart %d nodes: %s" %(len(did_fail), repr(did_fail))) return self.success() def is_applicable(self): '''RestartOnebyOne is always applicable''' return 1 # Register StopOnebyOne as a good test to run AllTestClasses.append(RestartOnebyOne) ################################################################### class PartialStart(CTSTest): ################################################################### '''Start a node - but tell it to stop before it finishes starting up''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="PartialStart" self.startall = SimulStartLite(cm) self.stopall = SimulStopLite(cm) def __call__(self, node): '''Perform the 'PartialStart' test. ''' self.incr("calls") ret = self.stopall(None) if not ret: return self.failure("Setup failed") # FIXME! This should use the CM class to get the pattern # then it would be applicable in general watchpats = [] watchpats.append("Starting crmd") watch = CTS.LogWatcher(self.CM["LogFileName"], watchpats, timeout=self.CM["DeadTime"]+10) watch.setwatch() self.CM.StartaCMnoBlock(node) ret = watch.lookforall() if not ret: self.CM.log("Patterns not found: " + repr(watch.unmatched)) return self.failure("Setup of %s failed" % node) ret = self.stopall(None) if not ret: return self.failure("%s did not stop in time" % node) return self.success() def is_applicable(self): '''Partial is always applicable''' if self.CM["Name"] == "linux-ha-v2": return 1 else: return 0 # Register StopOnebyOne as a good test to run AllTestClasses.append(PartialStart) ################################################################### class StandbyTest(CTSTest): ################################################################### '''Put a node in standby mode''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="standby" self.successpat = self.CM["Pat:StandbyOK"] self.nostandbypat = self.CM["Pat:StandbyNONE"] self.transient = self.CM["Pat:StandbyTRANSIENT"] def __call__(self, node): '''Perform the 'standby' test. ''' self.incr("calls") if self.CM.ShouldBeStatus[node] == self.CM["down"]: return self.skipped() if self.CM.upcount() < 2: self.incr("nostandby") pat = self.nostandbypat else: self.incr("standby") pat = self.successpat # # You could make a good argument that the cluster manager # ought to give us good clues on when its a bad time to # switch over to the other side, but heartbeat doesn't... # It could also queue the request. But, heartbeat # doesn't do that either :-) # retrycount=0 while (retrycount < 10): watch = CTS.LogWatcher(self.CM["LogFileName"] , [pat, self.transient] , timeout=self.CM["DeadTime"]+10) watch.setwatch() self.CM.rsh(node, self.CM["Standby"]) match = watch.look() if match: if re.search(self.transient, match): self.incr("retries") time.sleep(2) retrycount=retrycount+1 else: return self.success() else: break # No point in retrying... return self.failure("did not find pattern " + pat) def is_applicable(self): '''StandbyTest is applicable when the CM has a Standby command''' if not self.CM.has_key("Standby"): return None else: #if self.CM.Env.has_key("DoStandby"): #flag=self.CM.Env["DoStandby"] #if type(flag) == types.IntType: #return flag #if not re.match("[yt]", flag, re.I): #return None # # We need to strip off everything after the first blank # cmd=self.CM["Standby"] cmd = cmd.split()[0] if not os.access(cmd, os.X_OK): return None cf = self.CM.cf if not cf.Parameters.has_key("auto_failback"): return None elif cf.Parameters["auto_failback"][0] == "legacy": return None return 1 # Register StandbyTest as a good test to run AllTestClasses.append(StandbyTest) ####################################################################### class StandbyTest2(CTSTest): ####################################################################### '''Standby with CRM of HA release 2''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="standby2" self.start = StartTest(cm) self.startall = SimulStartLite(cm) # make sure the node is active # set the node to standby mode # check resources, none resource should be running on the node # set the node to active mode # check resouces, resources should have been migrated back (SHOULD THEY?) def __call__(self, node): self.incr("calls") ret=self.startall(None) if not ret: return self.failure("Start all nodes failed") self.CM.debug("Make sure node %s is active" % node) if self.CM.StandbyStatus(node) != "off": if not self.CM.SetStandbyMode(node, "off"): return self.failure("can't set node %s to active mode" % node) self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "off": return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status)) self.CM.debug("Getting resources running on node %s" % node) rsc_on_node = [] for rsc in self.CM.Resources(): if rsc.IsRunningOn(node): rsc_on_node.append(rsc) self.CM.debug("Setting node %s to standby mode" % node) if not self.CM.SetStandbyMode(node, "on"): return self.failure("can't set node %s to standby mode" % node) time.sleep(30) # Allow time for the update to be applied and cause something self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "on": return self.failure("standby status of %s is [%s] but we expect [on]" % (node, status)) self.CM.debug("Checking resources") for rsc in self.CM.Resources(): if rsc.IsRunningOn(node): return self.failure("%s set to standby, %s is still running on it" % (node, rsc.rid)) self.CM.debug("Setting node %s to active mode" % node) if not self.CM.SetStandbyMode(node, "off"): return self.failure("can't set node %s to active mode" % node) time.sleep(30) # Allow time for the update to be applied and cause something self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "off": return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status)) self.CM.debug("Checking resources") for rsc in rsc_on_node: if not rsc.IsRunningOn(node): return self.failure("%s set to active but %s is NOT back" % (node, rsc.rid)) return self.success() def is_applicable(self): if self.CM["Name"] == "linux-ha-v2": return 1 return 0 AllTestClasses.append(StandbyTest2) ####################################################################### class Fastdetection(CTSTest): ####################################################################### '''Test the time which one node find out the other node is killed very quickly''' def __init__(self,cm,timeout=60): CTSTest.__init__(self, cm) self.name = "DetectionTime" self.they_stopped = self.CM["Pat:They_stopped"] self.timeout = timeout self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.standby = StandbyTest(cm) self.__setitem__("min", 0) self.__setitem__("max", 0) self.__setitem__("totaltime", 0) def __call__(self, node): '''Perform the fastfailureDetection test''' self.incr("calls") ret=self.startall(None) if not ret: return self.failure("Test setup failed") if self.CM.upcount() < 2: return self.skipped() # Make sure they're not holding any resources ret = self.standby(node) if not ret: return ret stoppat = (self.they_stopped % ("", node)) stopwatch = CTS.LogWatcher(self.CM["LogFileName"], [stoppat], timeout=self.timeout) stopwatch.setwatch() # # This test is CM-specific - FIXME!! # if self.CM.rsh(node, "killall -9 heartbeat")==0: Starttime = os.times()[4] if stopwatch.look(): Stoptime = os.times()[4] # This test is CM-specific - FIXME!! self.CM.rsh(node, "killall -9 @libdir@/heartbeat/ccm @libdir@/heartbeat/ipfail >/dev/null 2>&1; true") Detectiontime = Stoptime-Starttime detectms = int(Detectiontime*1000+0.5) self.CM.log("...failure detection time: %d ms" % detectms) self.Stats["totaltime"] = self.Stats["totaltime"] + Detectiontime if self.Stats["min"] == 0: self.Stats["min"] = Detectiontime if Detectiontime > self.Stats["max"]: self.Stats["max"] = Detectiontime if Detectiontime < self.Stats["min"]: self.Stats["min"] = Detectiontime self.CM.ShouldBeStatus[node] = self.CM["down"] self.start(node) return self.success() else: # This test is CM-specific - FIXME!! self.CM.rsh(node, "killall -9 @libdir@/heartbeat/ccm @libdir@/heartbeat/ipfail >/dev/null 2>&1; true") self.CM.ShouldBeStatus[node] = self.CM["down"] ret=self.start(node) return self.failure("Didn't find the log message") else: return self.failure("Couldn't kill cluster manager") def is_applicable(self): '''This test is applicable when auto_failback != legacy''' return self.standby.is_applicable() # This test is CM-specific - FIXME!! def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [ "ccm.*ERROR: ccm_control_process:failure to send protoversion request" , "ccm.*ERROR: Lost connection to heartbeat service. Need to bail out" ] AllTestClasses.append(Fastdetection) ############################################################################## class BandwidthTest(CTSTest): ############################################################################## # Tests should not be cluster-manager-specific # If you need to find out cluster manager configuration to do this, then # it should be added to the generic cluster manager API. '''Test the bandwidth which heartbeat uses''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name = "Bandwidth" self.start = StartTest(cm) self.__setitem__("min",0) self.__setitem__("max",0) self.__setitem__("totalbandwidth",0) self.tempfile = tempfile.mktemp(".cts") self.startall = SimulStartLite(cm) def __call__(self, node): '''Perform the Bandwidth test''' self.incr("calls") if self.CM.upcount()<1: return self.skipped() Path = self.CM.InternalCommConfig() if "ip" not in Path["mediatype"]: return self.skipped() port = Path["port"][0] port = int(port) ret = self.startall(None) if not ret: return self.failure("Test setup failed") time.sleep(5) # We get extra messages right after startup. fstmpfile = "/var/run/band_estimate" dumpcmd = "tcpdump -p -n -c 102 -i any udp port %d > %s 2>&1" \ % (port, fstmpfile) rc = self.CM.rsh(node, dumpcmd) if rc == 0: farfile = "root@%s:%s" % (node, fstmpfile) self.CM.rsh.cp(farfile, self.tempfile) Bandwidth = self.countbandwidth(self.tempfile) if not Bandwidth: self.CM.log("Could not compute bandwidth.") return self.success() intband = int(Bandwidth + 0.5) self.CM.log("...bandwidth: %d bits/sec" % intband) self.Stats["totalbandwidth"] = self.Stats["totalbandwidth"] + Bandwidth if self.Stats["min"] == 0: self.Stats["min"] = Bandwidth if Bandwidth > self.Stats["max"]: self.Stats["max"] = Bandwidth if Bandwidth < self.Stats["min"]: self.Stats["min"] = Bandwidth self.CM.rsh(node, "rm -f %s" % fstmpfile) os.unlink(self.tempfile) return self.success() else: return self.failure("no response from tcpdump command [%d]!" % rc) def countbandwidth(self, file): fp = open(file, "r") fp.seek(0) count = 0 sum = 0 while 1: line = fp.readline() if not line: return None if re.search("udp",line) or re.search("UDP,", line): count=count+1 linesplit = string.split(line," ") for j in range(len(linesplit)-1): if linesplit[j]=="udp": break if linesplit[j]=="length:": break try: sum = sum + int(linesplit[j+1]) except ValueError: self.CM.log("Invalid tcpdump line: %s" % line) return None T1 = linesplit[0] timesplit = string.split(T1,":") time2split = string.split(timesplit[2],".") time1 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001 break while count < 100: line = fp.readline() if not line: return None if re.search("udp",line) or re.search("UDP,", line): count = count+1 linessplit = string.split(line," ") for j in range(len(linessplit)-1): if linessplit[j] =="udp": break if linesplit[j]=="length:": break try: sum=int(linessplit[j+1])+sum except ValueError: self.CM.log("Invalid tcpdump line: %s" % line) return None T2 = linessplit[0] timesplit = string.split(T2,":") time2split = string.split(timesplit[2],".") time2 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001 time = time2-time1 if (time <= 0): return 0 return (sum*8)/time def is_applicable(self): '''BandwidthTest is always applicable''' return 0 AllTestClasses.append(BandwidthTest) ########################################################################## class RedundantpathTest(CTSTest): ########################################################################## '''In heartbeat, it has redundant path to communicate between the cluster''' # # Tests should not be cluster-manager specific # One needs to isolate what you need from the cluster manager and then # add a (new) API to do it. # def __init__(self,cm,timeout=60): CTSTest.__init__(self,cm) self.name = "RedundantpathTest" self.timeout = timeout def PathCount(self): '''Return number of communication paths''' Path = self.CM.InternalCommConfig() cf = self.CM.cf eths = [] serials = [] num = 0 for interface in Path["interface"]: if re.search("eth",interface): eths.append(interface) num = num + 1 if re.search("/dev",interface): serials.append(interface) num = num + 1 return (num, eths, serials) def __call__(self,node): '''Perform redundant path test''' self.incr("calls") if self.CM.ShouldBeStatus[node]!=self.CM["up"]: return self.skipped() (num, eths, serials) = self.PathCount() for eth in eths: if self.CM.rsh(node,"ifconfig %s down" % eth)==0: PathDown = "OK" break if PathDown != "OK": for serial in serials: if self.CM.rsh(node,"setserial %s uart none" % serial)==0: PathDown = "OK" break if PathDown != "OK": return self.failure("Cannot break the path") time.sleep(self.timeout) for audit in CTSaudits.AuditList(self.CM): if not audit(): for eth in eths: self.CM.rsh(node,"ifconfig %s up" % eth) for serial in serials: self.CM.rsh(node,"setserial %s uart 16550" % serial) return self.failure("Redundant path fail") for eth in eths: self.CM.rsh(node,"ifconfig %s up" % eth) for serial in serials: self.CM.rsh(node,"setserial %s uart 16550" % serial) return self.success() def is_applicable(self): '''It is applicable when you have more than one connection''' return self.PathCount()[0] > 1 # FIXME!! Why is this one commented out? #AllTestClasses.append(RedundantpathTest) ########################################################################## class DRBDTest(CTSTest): ########################################################################## '''In heartbeat, it provides replicated storage.''' def __init__(self,cm, timeout=10): CTSTest.__init__(self,cm) self.name = "DRBD" self.timeout = timeout def __call__(self, dummy): '''Perform the 'DRBD' test.''' self.incr("calls") for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == self.CM["down"]: return self.skipped() # Note: All these special cases with Start/Stop/StatusDRBD # should be reworked to use resource objects instead of # being hardwired to bypass the objects here. for node in self.CM.Env["nodes"]: done=time.time()+self.timeout+1 while (time.time()done: return self.failure("Can't start drbd, please check it") device={} for node in self.CM.Env["nodes"]: device[node]=self.getdevice(node) node = self.CM.Env["nodes"][0] done=time.time()+self.timeout+1 while 1: if (time.time()>done): return self.failure("the drbd could't sync") self.CM.rsh(node,"cp /proc/drbd /var/run >/dev/null 2>&1") if self.CM.rsh.cp("%s:/var/run/drbd" % node,"/var/run"): line = open("/tmp/var/run").readlines()[2] p = line.find("Primary") s1 = line.find("Secondary") s2 = line.rfind("Secondary") if s1!=s2: if self.CM.rsh(node,"drbdsetup %s primary" % device[node]): pass if p!=-1: if p/dev/null" % (self.rid, node)) watch.lookforall() self.CM.cluster_stable() recovernode=self.CM.ResourceLocation(self.rid) if len(recovernode)==1: self.CM.debug("Recovered: %s is running on %s" %(self.rid, recovernode[0])) if not watch.unmatched: return self.success() else: return self.failure("Patterns not found: %s" % repr(watch.unmatched)) elif len(recovernode)==0: return self.failure("%s was not recovered and is inactive" % self.rid) else: return self.failure("%s is now active on more than one node: %s" %(self.rid, str(recovernode))) def is_applicable(self): '''ResourceRecover is applicable only when there are resources running on our cluster and environment is linux-ha-v2''' if self.CM["Name"] == "linux-ha-v2": resourcelist=self.CM.Resources() if len(resourcelist)==0: self.CM.log("No resources on this cluster") return 0 else: return 1 return 0 def errorstoignore(self): '''Return list of errors which should be ignored''' return [ """Updating failcount for %s""" % self.rid, """Unknown operation: fail""", """ERROR: sending stonithRA op to stonithd failed.""", """ERROR: process_lrm_event: LRM operation %s_%s_%d""" % (self.rid, self.action, self.interval), """ERROR: process_graph_event: Action %s_%s_%d initiated outside of a transition""" % (self.rid, self.action, self.interval), ] AllTestClasses.append(ResourceRecover) ################################################################### class ComponentFail(CTSTest): ################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name="ComponentFail" self.startall = SimulStartLite(cm) self.complist = cm.Components() self.patterns = [] self.okerrpatterns = [] def __call__(self, node): '''Perform the 'ComponentFail' test. ''' self.incr("calls") self.patterns = [] self.okerrpatterns = [] # start all nodes ret = self.startall(None) if not ret: return self.failure("Setup failed") if not self.CM.cluster_stable(self.CM["StableTime"]): return self.failure("Setup failed - unstable") node_is_dc = self.CM.is_node_dc(node, None) # select a component to kill chosen = self.CM.Env.RandomGen.choice(self.complist) while chosen.dc_only == 1 and node_is_dc == 0: chosen = self.CM.Env.RandomGen.choice(self.complist) self.CM.log("...component %s (dc=%d,boot=%d)" % (chosen.name, node_is_dc,chosen.triggersreboot)) self.incr(chosen.name) self.patterns.extend(chosen.pats) # Make sure the node goes down and then comes back up if it should reboot... if chosen.triggersreboot: for other in self.CM.Env["nodes"]: if other != node: self.patterns.append(self.CM["Pat:They_stopped"] %(other, node)) self.patterns.append(self.CM["Pat:They_started"] % node) # In an ideal world, this next stuff should be in the "chosen" object as a member function if chosen.dc_only: if chosen.triggersreboot: # Sometimes these will be in the log, and sometimes they won't... self.okerrpatterns.append("%s crmd:.*Process %s:.* exited" %(node, chosen.name)) self.okerrpatterns.append("%s crmd:.*I_ERROR.*crmdManagedChildDied" %node) self.okerrpatterns.append("%s crmd:.*The %s subsystem terminated unexpectedly" %(node, chosen.name)) self.okerrpatterns.append("ERROR: Client .* exited with return code") else: self.patterns.append("%s crmd:.*Process %s:.* exited" %(node, chosen.name)) self.patterns.append("%s crmd:.*I_ERROR.*crmdManagedChildDied" %node) self.patterns.append("%s crmd:.*The %s subsystem terminated unexpectedly" %(node, chosen.name)) else: if chosen.triggersreboot: # Sometimes this won't be in the log... self.okerrpatterns.append("%s heartbeat.*%s.*killed by signal 9" %(node, chosen.name)) self.okerrpatterns.append("%s heartbeat.*Respawning client " % (node)) self.okerrpatterns.append("ERROR: Client .* exited with return code") else: self.patterns.append("%s heartbeat.*%s.*killed by signal 9" %(node, chosen.name)) self.patterns.append("%s heartbeat.*Respawning client.*%s" %(node, chosen.name)) if node_is_dc: self.patterns.extend(chosen.dc_pats) # supply a copy so self.patterns doesnt end up empty tmpPats = [] tmpPats.extend(self.patterns) self.patterns.extend(chosen.badnews_ignore) # set the watch for stable watch = CTS.LogWatcher( self.CM["LogFileName"], tmpPats, self.CM["DeadTime"] + self.CM["StableTime"] + self.CM["StartTime"]) watch.setwatch() # kill the component chosen.kill(node) # check to see Heartbeat noticed matched = watch.lookforall() if not matched: self.CM.log("Patterns not found: " + repr(watch.unmatched)) self.CM.cluster_stable(self.CM["StartTime"]) return self.failure("Didn't find all expected patterns") self.CM.debug("Found: "+ repr(matched)) # now watch it recover... for attempt in (1, 2, 3, 4, 5): self.CM.debug("Waiting for the cluster to recover...") if self.CM.cluster_stable(self.CM["StartTime"]): return self.success() return self.failure("Cluster did not become stable") def is_applicable(self): if self.CM["Name"] == "linux-ha-v2": return 1 return 0 def errorstoignore(self): '''Return list of errors which should be ignored''' # Note that okerrpatterns refers to the last time we ran this test # The good news is that this works fine for us... self.okerrpatterns.extend(self.patterns) return self.okerrpatterns AllTestClasses.append(ComponentFail) #################################################################### class Split_brainTest2(CTSTest): #################################################################### '''It is used to test split-brain. when the path between the two nodes break check the two nodes both take over the resource''' def __init__(self,cm): CTSTest.__init__(self,cm) self.name = "Split_brain2" self.start = StartTest(cm) self.startall = SimulStartLite(cm) def __call__(self, node): '''Perform split-brain test''' self.incr("calls") ret = self.startall(None) if not ret: return self.failure("Setup failed") count1 = self.CM.Env.RandomGen.randint(1,len(self.CM.Env["nodes"])-1) partition1 = [] while len(partition1) < count1: select = self.CM.Env.RandomGen.choice(self.CM.Env["nodes"]) if not select in partition1: partition1.append(select) partition2 = [] for member in self.CM.Env["nodes"]: if not member in partition1: partition2.append(member) allownodes1 = "" for member in partition1: allownodes1 += member + " " allownodes2 = "" for member in partition2: allownodes2 += member + " " self.CM.log("Partition1: " + str(partition1)) self.CM.log("Partition2: " + str(partition2)) '''isolate nodes, Look for node is dead message''' watchdeadpats = [ ] deadpat = self.CM["Pat:They_dead"] for member in self.CM.Env["nodes"]: thispat = (deadpat % member) watchdeadpats.append(thispat) watchdead = CTS.LogWatcher(self.CM["LogFileName"], watchdeadpats\ , timeout=self.CM["DeadTime"]+60) watchdead.ReturnOnlyMatch() watchdead.setwatch() for member in partition1: if float(self.CM.Env["XmitLoss"])!=0 or float(self.CM.Env["RecvLoss"])!=0 : self.CM.savecomm_node(node) if not self.CM.isolate_node(member,allownodes1): return self.failure("Could not isolate the nodes") for member in partition2: if float(self.CM.Env["XmitLoss"])!=0 or float(self.CM.Env["RecvLoss"])!=0 : self.CM.savecomm_node(node) if not self.CM.isolate_node(member,allownodes2): return self.failure("Could not isolate the nodes") if not watchdead.lookforall(): for member in self.CM.Env["nodes"]: self.CM.unisolate_node(member) self.CM.log("Patterns not found: " + repr(watchdead.unmatched)) return self.failure("Didn't find the log 'dead' message") dcnum=0 while dcnum < 2: dcnum = 0 for member in self.CM.Env["nodes"]: if self.CM.is_node_dc(member): dcnum += 1 time.sleep(1) ''' Unisolate the node, look for the return partition message and check whether they restart ''' watchpartitionpats = [self.CM["Pat:DC_IDLE"]] partitionpat = self.CM["Pat:Return_partition"] for member in self.CM.Env["nodes"]: thispat = (partitionpat % member) watchpartitionpats.append(thispat) watchpartition = CTS.LogWatcher(self.CM["LogFileName"], watchpartitionpats\ , timeout=self.CM["DeadTime"]+60) watchpartition.setwatch() for member in self.CM.Env["nodes"]: if float(self.CM.Env["XmitLoss"])!=0 or float(self.CM.Env["RecvLoss"])!=0 : self.CM.restorecomm_node(node) self.CM.unisolate_node(member) if not watchpartition.lookforall(): self.CM.log("Patterns not found: " + repr(watchpartition.unmatched)) return self.failure("Didn't find return from partition messages") return self.success() def is_applicable(self): if self.CM["Name"] == "linux-ha-v2": return 1 return 0 def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [ "ERROR:.*Both machines own.*resources" , "ERROR:.*lost a lot of packets!" , "ERROR: Cannot rexmit pkt .*: seqno too low" , "ERROR: Irretrievably lost packet: node" ] #AllTestClasses.append(Split_brainTest2) #################################################################### class MemoryTest(CTSTest): #################################################################### '''Check to see if anyone is leaking memory''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Memory" # self.test = ElectionMemoryTest(cm) self.test = ResourceRecover(cm) self.startall = SimulStartLite(cm) self.before = {} self.after = {} def __call__(self, node): ps_command='''ps -eo ucomm,pid,pmem,tsiz,dsiz,rss,vsize | grep -e ccm -e ha_logd -e cib -e crmd -e lrmd -e tengine -e pengine''' memory_error = [ "", "", "", "Code", "Data", "Resident", "Total" ] ret = self.startall(None) if not ret: return self.failure("Test setup failed") time.sleep(10) for node in self.CM.Env["nodes"]: self.before[node] = {} rsh_pipe = self.CM.rsh.popen(node, ps_command) rsh_pipe.tochild.close() result = rsh_pipe.fromchild.readline() while result: tokens = result.split() self.before[node][tokens[1]] = result result = rsh_pipe.fromchild.readline() rsh_pipe.fromchild.close() self.lastrc = rsh_pipe.wait() # do something... if not self.test(node): return self.failure("Underlying test failed") time.sleep(10) for node in self.CM.Env["nodes"]: self.after[node] = {} rsh_pipe = self.CM.rsh.popen(node, ps_command) rsh_pipe.tochild.close() result = rsh_pipe.fromchild.readline() while result: tokens = result.split() self.after[node][tokens[1]] = result result = rsh_pipe.fromchild.readline() rsh_pipe.fromchild.close() self.lastrc = rsh_pipe.wait() failed_nodes = [] for node in self.CM.Env["nodes"]: failed = 0 for process in self.before[node]: messages = [] before_line = self.before[node][process] after_line = self.after[node][process] if not after_line: self.CM.log("%s %s[%s] exited during the test" %(node, before_tokens[0], before_tokens[1])) continue before_tokens = before_line.split() after_tokens = after_line.split() # 3 : Code size # 4 : Data size # 5 : Resident size # 6 : Total size for index in [ 3, 4, 6 ]: mem_before = int(before_tokens[index]) mem_after = int(after_tokens[index]) mem_diff = mem_after - mem_before mem_allow = mem_before * 0.01 # for now... mem_allow = 0 if mem_diff > mem_allow: failed = 1 messages.append("%s size grew by %dkB (%dkB)" %(memory_error[index], mem_diff, mem_after)) elif mem_diff < 0: messages.append("%s size shrank by %dkB (%dkB)" %(memory_error[index], mem_diff, mem_after)) if len(messages) > 0: self.CM.log("Process %s[%s] on %s: %s" %(before_tokens[0], before_tokens[1], node, repr(messages))) self.CM.debug("%s Before: %s[%s] (%s%%):\tcode=%skB, data=%skB, resident=%skB, total=%skB" %(node, before_tokens[0], before_tokens[1], before_tokens[2], before_tokens[3], before_tokens[4], before_tokens[5], before_tokens[6])) self.CM.debug("%s After: %s[%s] (%s%%):\tcode=%skB, data=%skB, resident=%skB, total=%skB" %(node, after_tokens[0], after_tokens[1], after_tokens[2], after_tokens[3], after_tokens[4], after_tokens[5], after_tokens[6])) if failed == 1: failed_nodes.append(node) if len(failed_nodes) > 0: return self.failure("Memory leaked on: " + repr(failed_nodes)) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [ """ERROR: .* LRM operation.*monitor on .*: not running""", """pengine:.*Handling failed """] def is_applicable(self): if self.CM["Name"] == "linux-ha-v2": return 1 return 0 #AllTestClasses.append(MemoryTest) #################################################################### class ElectionMemoryTest(CTSTest): #################################################################### '''Check to see if anyone is leaking memory''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Election" def __call__(self, node): self.rsh.readaline(node, self.CM["ElectionCmd"]%node) if self.CM.cluster_stable(): return self.success() return self.failure("Cluster not stable") def errorstoignore(self): '''Return list of errors which should be ignored''' return [] def is_applicable(self): '''Never applicable, only for use by the memory test''' return 0 AllTestClasses.append(ElectionMemoryTest) #################################################################### class SpecialTest1(CTSTest): #################################################################### '''Set up a custom test to cause quorum failure issues for Andrew''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SpecialTest1" self.startall = SimulStartLite(cm) self.restart1 = RestartTest(cm) self.stopall = SimulStopLite(cm) def __call__(self, node): '''Perform the 'SpecialTest1' test for Andrew. ''' self.incr("calls") # Shut down all the nodes... ret = self.stopall(None) if not ret: return ret # Start the selected node ret = self.restart1(node) if not ret: return ret # Start all remaining nodes ret = self.startall(None) return ret def errorstoignore(self): '''Return list of errors which should be ignored''' return [] def is_applicable(self): return 1 AllTestClasses.append(SpecialTest1) ################################################################### class NearQuorumPointTest(CTSTest): ################################################################### ''' This test brings larger clusters near the quorum point (50%). In addition, it will test doing starts and stops at the same time. Here is how I think it should work: - loop over the nodes and decide randomly which will be up and which will be down Use a 50% probability for each of up/down. - figure out what to do to get into that state from the current state - in parallel, bring up those going up and bring those going down. ''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="NearQuorumPoint" def __call__(self, dummy): '''Perform the 'NearQuorumPoint' test. ''' self.incr("calls") startset = [] stopset = [] #decide what to do with each node for node in self.CM.Env["nodes"]: action = self.CM.Env.RandomGen.choice(["start","stop"]) #action = self.CM.Env.RandomGen.choice(["start","stop","no change"]) if action == "start" : startset.append(node) elif action == "stop" : stopset.append(node) self.CM.debug("start nodes:" + repr(startset)) self.CM.debug("stop nodes:" + repr(stopset)) #add search patterns watchpats = [ ] for node in stopset: if self.CM.ShouldBeStatus[node] == self.CM["up"]: watchpats.append(self.CM["Pat:We_stopped"] % node) for node in startset: if self.CM.ShouldBeStatus[node] == self.CM["down"]: watchpats.append(self.CM["Pat:They_started"] % node) if len(watchpats) == 0: return self.skipped() if len(startset) != 0: watchpats.append(self.CM["Pat:DC_IDLE"]) watch = CTS.LogWatcher(self.CM["LogFileName"], watchpats , timeout=self.CM["DeadTime"]+10) watch.setwatch() #begin actions for node in stopset: if self.CM.ShouldBeStatus[node] == self.CM["up"]: self.CM.StopaCMnoBlock(node) for node in startset: if self.CM.ShouldBeStatus[node] == self.CM["down"]: self.CM.StartaCMnoBlock(node) #get the result if watch.lookforall(): self.CM.cluster_stable() return self.success() self.CM.log("Warn: Patterns not found: " + repr(watch.unmatched)) #get the "bad" nodes upnodes = [] for node in stopset: if self.CM.StataCM(node) == 1: upnodes.append(node) downnodes = [] for node in startset: if self.CM.StataCM(node) == 0: downnodes.append(node) if upnodes == [] and downnodes == []: self.CM.cluster_stable() return self.success() if len(upnodes) > 0: self.CM.log("Warn: Unstoppable nodes: " + repr(upnodes)) if len(downnodes) > 0: self.CM.log("Warn: Unstartable nodes: " + repr(downnodes)) return self.failure() def errorstoignore(self): '''Return list of errors which should be ignored''' return [] def is_applicable(self): if self.CM["Name"] == "linux-ha-v2": return 1 return 0 AllTestClasses.append(NearQuorumPointTest) ################################################################### class BSC_AddResource(CTSTest): ################################################################### '''Add a resource to the cluster''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name="AddResource" self.resource_offset = 0 self.cib_cmd="""@sbindir@/cibadmin -C -o %s -X '%s' """ def __call__(self, node): self.resource_offset = self.resource_offset + 1 r_id = "bsc-rsc-%s-%d" % (node, self.resource_offset) start_pat = "crmd.*%s_start_0.*complete" patterns = [] patterns.append(start_pat % r_id) watch = CTS.LogWatcher( self.CM["LogFileName"], patterns, self.CM["DeadTime"]) watch.setwatch() fields = string.split(self.CM.Env["IPBase"], '.') fields[3] = str(int(fields[3])+1) ip = string.join(fields, '.') self.CM.Env["IPBase"] = ip if not self.make_ip_resource(node, r_id, "ocf", "IPaddr", ip): return self.failure("Make resource %s failed" % r_id) failed = 0 watch_result = watch.lookforall() if watch.unmatched: for regex in watch.unmatched: self.CM.log ("Warn: Pattern not found: %s" % (regex)) failed = 1 if failed: return self.failure("Resource pattern(s) not found") if not self.CM.cluster_stable(self.CM["DeadTime"]): return self.failure("Unstable cluster") return self.success() def make_ip_resource(self, node, id, rclass, type, ip): self.CM.log("Creating %s::%s:%s (%s) on %s" % (rclass,type,id,ip,node)) rsc_xml=""" """ % (id, rclass, type, id, id, ip) node_constraint=""" """ % (id, id, id, id, node) rc = 0 (rc, lines) = self.CM.rsh.remote_py(node, "os", "system", self.cib_cmd % ("constraints", node_constraint)) if rc != 0: self.CM.log("Constraint creation failed: %d" % rc) return None (rc, lines) = self.CM.rsh.remote_py(node, "os", "system", self.cib_cmd % ("resources", rsc_xml)) if rc != 0: self.CM.log("Resource creation failed: %d" % rc) return None return 1 def is_applicable(self): if self.CM["Name"] == "linux-ha-v2" and self.CM.Env["DoBSC"]: return 1 return None def TestList(cm): result = [] for testclass in AllTestClasses: bound_test = testclass(cm) if bound_test.is_applicable(): result.append(bound_test) return result class SimulStopLite(CTSTest): ################################################################### '''Stop any active nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStopLite" def __call__(self, dummy): '''Perform the 'SimulStopLite' setup work. ''' self.incr("calls") self.CM.debug("Setup: " + self.name) # We ignore the "node" parameter... watchpats = [ ] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == self.CM["up"]: self.incr("WasStarted") watchpats.append(self.CM["Pat:All_stopped"] % node) if self.CM.Env["use_logd"]: watchpats.append(self.CM["Pat:Logd_stopped"] % node) if len(watchpats) == 0: self.CM.clear_all_caches() return self.skipped() # Stop all the nodes - at about the same time... watch = CTS.LogWatcher(self.CM["LogFileName"], watchpats , timeout=self.CM["DeadTime"]+10) watch.setwatch() self.starttime=time.time() for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == self.CM["up"]: self.CM.StopaCMnoBlock(node) if watch.lookforall(): self.CM.clear_all_caches() return self.success() did_fail=0 up_nodes = [] for node in self.CM.Env["nodes"]: if self.CM.StataCM(node) == 1: did_fail=1 up_nodes.append(node) if did_fail: return self.failure("Active nodes exist: " + repr(up_nodes)) self.CM.log("Warn: All nodes stopped but CTS didnt detect: " + repr(watch.unmatched)) self.CM.clear_all_caches() return self.failure("Missing log message: "+repr(watch.unmatched)) def is_applicable(self): '''SimulStopLite is a setup test and never applicable''' return 0 ################################################################### class SimulStartLite(CTSTest): ################################################################### '''Start any stopped nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStartLite" def __call__(self, dummy): '''Perform the 'SimulStartList' setup work. ''' self.incr("calls") self.CM.debug("Setup: " + self.name) # We ignore the "node" parameter... watchpats = [ ] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == self.CM["down"]: self.incr("WasStopped") watchpats.append(self.CM["Pat:They_started"] % node) if len(watchpats) == 0: return self.skipped() # Start all the nodes - at about the same time... watch = CTS.LogWatcher(self.CM["LogFileName"], watchpats , timeout=self.CM["DeadTime"]+10) watch.setwatch() self.starttime=time.time() for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == self.CM["down"]: self.CM.StartaCMnoBlock(node) if watch.lookforall(): for attempt in (1, 2, 3, 4, 5): if self.CM.cluster_stable(): return self.success() return self.failure("Cluster did not stabilize") did_fail=0 unstable = [] for node in self.CM.Env["nodes"]: if self.CM.StataCM(node) == 0: did_fail=1 unstable.append(node) if did_fail: return self.failure("Unstarted nodes exist: " + repr(unstable)) unstable = [] for node in self.CM.Env["nodes"]: if not self.CM.node_stable(node): did_fail=1 unstable.append(node) if did_fail: return self.failure("Unstable cluster nodes exist: " + repr(unstable)) self.CM.log("ERROR: All nodes started but CTS didnt detect: " + repr(watch.unmatched)) return self.failure() def is_applicable(self): '''SimulStartLite is a setup test and never applicable''' return 0 ################################################################### class LoggingTest(CTSTest): ################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Logging" def __call__(self, dummy): '''Perform the 'Logging' test. ''' self.incr("calls") # Make sure logging is working and we have enough disk space... if not self.CM.TestLogging(): sys.exit(1) if not self.CM.CheckDf(): sys.exit(1) def is_applicable(self): '''ResourceRecover is applicable only when there are resources running on our cluster and environment is linux-ha-v2''' return self.CM.Env["DoBSC"] def errorstoignore(self): '''Return list of errors which should be ignored''' return [] #AllTestClasses.append(LoggingTest) Heartbeat-3-0-7e3a82377fa8/cts/LSBDummy.in0000755000000000000000000000027411576626513017762 0ustar00usergroup00000000000000#!/bin/sh # # WARNING: This script is for CTS testing only # . @HB_RA_DIR@/hto-mapfuncs OCF_TYPE=Dummy OCF_RESOURCE_INSTANCE=LSBDummy export OCF_TYPE OCF_RESOURCE_INSTANCE ra_execocf $1 Heartbeat-3-0-7e3a82377fa8/cts/Makefile.am0000644000000000000000000000232411576626513020025 0ustar00usergroup00000000000000# # heartbeat: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in ctsdir = @HA_NOARCHDATAHBDIR@/cts cts_PYTHON = \ CM_fs.py \ CM_hb.py \ CM_LinuxHAv2.py \ CTS.py \ CTSaudits.py \ CTSlab.py \ CTStests.py \ extracttests.py \ getpeinputs.sh \ OCFIPraTest.py \ CIB.py cts_DATA = README cts_SCRIPTS = \ CTSproxy.py \ getpeinputs.sh \ LSBDummy all-local: $(cts_PYTHON) clean-local: rm -f $(cts_PYTHON) $(cts_SCRIPTS) Heartbeat-3-0-7e3a82377fa8/cts/OCFIPraTest.py.in0000755000000000000000000001234211576626513020777 0ustar00usergroup00000000000000#!@PYTHON@ '''OCF IPaddr/IPaddr2 Resource Agent Test''' __copyright__=''' Author: Huang Zhen Copyright (C) 2004 International Business Machines Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import string,sys,struct,os,random,time,syslog def usage(): print "usage: " + sys.argv[0] \ + " [-2]"\ + " [--ipbase|-i first-test-ip]"\ + " [--ipnum|-n test-ip-num]"\ + " [--help|-h]"\ + " [--perform|-p op]"\ + " [number-of-iterations]" sys.exit(1) def perform_op(ra, ip, op): os.environ["OCF_RA_VERSION_MAJOR"] = "1" os.environ["OCF_RA_VERSION_MINOR"] = "0" os.environ["OCF_ROOT"] = "@OCF_ROOT_DIR@" os.environ["OCF_RESOURCE_INSTANCE"] = ip os.environ["OCF_RESOURCE_TYPE"] = ra os.environ["OCF_RESKEY_ip"] = ip os.environ["HA_LOGFILE"] = "/dev/null" os.environ["HA_LOGFACILITY"] = "local7" path = "@OCF_RA_DIR@"+"/heartbeat/" + ra return os.spawnvpe(os.P_WAIT, path, [ra, op], os.environ) def audit(ra, iplist, ipstatus, summary): passed = 1 for ip in iplist: ret = perform_op(ra, ip, "monitor") if ret != ipstatus[ip]: passed = 0 log("audit: status of %s should be %d but it is %d\t [failure]"% (ip,ipstatus[ip],ret)) ipstatus[ip] = ret summary["audit"]["called"] += 1; if passed : summary["audit"]["success"] += 1 else : summary["audit"]["failure"] += 1 def log(towrite): t = time.strftime("%Y/%m/%d_%H:%M:%S\t", time.localtime(time.time())) logstr = t + " "+str(towrite) syslog.syslog(logstr) print logstr if __name__ == '__main__': ra = "IPaddr" ipbase = "127.0.0.10" ipnum = 1 itnum = 50 perform = None summary = { "start":{"called":0,"success":0,"failure":0}, "stop" :{"called":0,"success":0,"failure":0}, "audit":{"called":0,"success":0,"failure":0} } syslog.openlog(sys.argv[0], 0, syslog.LOG_LOCAL7) # Process arguments... skipthis = None args = sys.argv[1:] for i in range(0, len(args)) : if skipthis : skipthis = None continue elif args[i] == "-2" : ra = "IPaddr2" elif args[i] == "--ip" or args[i] == "-i" : skipthis = 1 ipbase = args[i+1] elif args[i] == "--ipnum" or args[i] == "-n" : skipthis = 1 ipnum = int(args[i+1]) elif args[i] == "--perform" or args[i] == "-p" : skipthis = 1 perform = args[i+1] elif args[i] == "--help" or args[i] == "-h" : usage() else: itnum = int(args[i]) log("Begin OCF IPaddr/IPaddr2 Test") # Generate the test ips iplist = [] ipstatus = {} fields = string.split(ipbase, '.') for i in range(0, ipnum) : ip = string.join(fields, '.') iplist.append(ip) ipstatus[ip]=perform_op(ra,ip,"monitor") fields[3] = str(int(fields[3])+1) log("Test ip:" + str(iplist)) # If use ask perform an operation if perform != None: log("Perform opeartion %s"%perform) for ip in iplist: perform_op(ra, ip, perform) log("Done") sys.exit() log("RA Type:" + ra) log("Test Count:" + str(itnum)) # Prepare Random f = open("/dev/urandom", "r") seed=struct.unpack("BBB", f.read(3)) f.close() #seed=(123,321,231) rand = random.Random() rand.seed(seed[0]) log("Test Random Seed:" + str(seed)) # # Begin Tests log(">>>>>>>>>>>>>>>>>>>>>>>>") for i in range(0, itnum): ip = rand.choice(iplist) if ipstatus[ip] == 0: op = "stop" elif ipstatus[ip] == 7: op = "start" else : op = rand.choice(["start","stop"]) ret = perform_op(ra, ip, op) # update status if op == "start" and ret == 0: ipstatus[ip] = 0 elif op == "stop" and ret == 0: ipstatus[ip] = 7 else : ipstatus[ip] = 1 result = "" if ret == 0: result = "success" else : result = "failure" summary[op]["called"] += 1 summary[op][result] += 1 log( "%d:%s %s \t[%s]"%(i, op, ip, result)) audit(ra, iplist, ipstatus, summary) log("<<<<<<<<<<<<<<<<<<<<<<<<") log("start:\t" + str(summary["start"])) log("stop: \t" + str(summary["stop"])) log("audit:\t" + str(summary["audit"])) Heartbeat-3-0-7e3a82377fa8/cts/README0000644000000000000000000005451511576626513016662 0ustar00usergroup00000000000000BASIC REQUIREMENTS BEFORE STARTING: Three machines: one test exerciser and two test cluster machines. The two test cluster machines need to be on the same subnet and they should have journalling filesystems for all of their filesystems other than /boot You also need two free IP addresses on that subnet to test mutual IP address takeover The test exerciser machine doesn't need to be on the same subnet as the test cluster machines. Minimal demands are made on the exerciser machine - it just has to stay up during the tests ;-). However, it does need to have a current copy of the cts test scripts. It is worth noting that these scripts are coordinated with particular versions of linux-ha, so that in general you have to have the same version of test scripts as the rest of linux-ha. Install heartbeat, heartbeat-pils, and heartbeat-stonith on all three machines. Set up the configuration on the cluster machines *and make a copy of it on the test exerciser machine*. These are the necessary files: /etc/ha.d/ha.cf /etc/ha.d/haresources /etc/ha.d/authkeys NOTE: Do not run heartbeat on the test exerciser machine. After installing all three packages, run the following to make sure that heartbeat isn't automatically started every time the test exerciser machine boots: rm /etc/rc.d/rc3.d/*heartbeat rm /etc/rc.d/rc5.d/*heartbeat NOTE: Wherever machine names are mentioned in these configuration files, they must match the machines' `uname -n` name. This may or may not match the machines' FQDN (fully qualified domain name) - it depends on how you (and your OS) have named the machines. It helps a lot in tracking problems if the three machines' clocks are closely synchronized. xntpd does this, but you can do it by hand if you want. Make sure the 'at' daemon is enabled on the test cluster machines (this is normally the 'atd' service started by /etc/init.d/atd). This doesn't mean just start it, it means enable it to start on every boot into your default init state (probably either 3 or 5). Enabling it for both state 3 and 5 is a good minimum. We don't need this in production - just for these tests. This typically means you need a symlink for /etc/rc.d/rc3.d/S*atd to /etc/init.d/atd, and one in /etc/rc.d/rc5.d/S*atd. Make sure all your filesystems are journalling filesystems (/boot can be ext2 if you want). This means filesystems like jfs, ext3, or reiserfs. Here's what you need to do to run CTS: Configure the two cluster machines with their logging of heartbeat messages redirected via syslog to the exerciser machine. The exerciser doesn't have to be the same OS as the others but it needs to be one that supports a lot of the other things (like ssh and remote syslog logging). You may want to configure the cluster machines to boot into run level 3, that is without Xdm logins - particularly if they're behind a KVM switch. Some distros refuse to boot correctly without knowing what kind of mouse is present, and the kvm switch will likely make it impossible to figure out without manual intervention. And since some of the tests cause the machine to reboot without manual intervention this would be a problem. Configure syslog on the cluster machines accordingly. (see the Mini-MOWTOs at the end for more details) The exerciser needs to be able to ssh over to the cluster nodes as root without a password challenge. Configure ssh accordingly. (see the Mini-HOWTOs at the end for more details) The exerciser needs to have the IP addresses of the test machines available to it - either by DNS or by /etc/hosts. It uses this to validate configuration information. The StonithdTest uses 'ssh' as its default plugin. To run this test, the cluster nodes need to be able to ssh over to each other without a password challenge. Configure ssh accordingly. (see the Mini-HOWTOs at the end for more details) The "heartbeat" service (init script) needs to be enabled to automatically start in the default run level on the cluster machines. This typically means you need a symlink for /etc/rc.d/rc3.d/S*heartbeat to /etc/init.d/heartbeat, and one in /etc/rc.d/rc5.d/S*heartbeat. If you don't do this, then things will look fine until you run the STONITH test - and it will always fail... The test software is called cts and is in the (surprise!) cts directory. It's in the tarball, and (for later versions) is installed in /usr/lib/heartbeat/cts. The cts system consists of the following files: CM_fs.py - ignore this - it's for failsafe CM_hb.py - interacts with heartbeat CTS.py - the core common code for testing CTSaudits.py - performs audits at the end of each test CTSlab.py - defines the "lab" (test) environment CTStests.py - contains the definitions of the tests You probably should look at the CTSlab.py file, but you should no longer need to modify it. OK. Now assuming you did all this and the stuff described below, what you need to do is run CTSlab.py. If you run any other file, it won't test your cluster ;-) Depending on permissions, etc., this may be either done as: ./CTSlab.py number-of-tests-to-run or as python ./CTSlab.py number-of-tests-to-run The test output goes to standard error, so you'll probably want to catch stderr with the usual 2>&1 construct like this: ./CTSlab.py > outputfile 2>&1 & followed by a tail -f outputfile Options for CTSlab: --suppressmonitoring Don't "monitor" resources as part of the audits --directory dirname Directory to find config info in. Defaults to /etc/ha.d --logfile Directory to find logging information in defaults to /var/log/ha-log-local7 --stonith (yes|no) Enable/disable STONITH tests --standby (yes|no) Enable/disable standby tests -v2 Test release 2.x (includes 1.99.x) (see the Mini-HOWTOs at the end for more details) ============== Mini-HOWTOs: ============== -------------------------------------------------------------------------------- How to redirect linux-HA logging the way CTS wants it using syslog -------------------------------------------------------------------------------- NOTE: There have been reports of messages being lost with vanilla syslog. At least one of the developers recommends using syslog-ng or indeed anything else to avoid these problems. 1) Redirect each machine to go (at least) to syslog local7: Change /etc/ha.d/ha.cf on each test machine to say this: logfacility local7 (you can also log to a dedicated local file with logfile if you want) 2) Change /etc/syslog.conf to redirect local7 on each of your cluster machines to redirect to your exerciser machine by adding this line somewhere near the top of /etc/syslog.conf local7.* @exerciser-machine 3) Change syslog on the exerciser machine to accept remote logging requests. You do this by making sure it gets invoked with the "-r" option. On SuSE Linux you need to change /etc/rc.config to put have this line for SYSLOGD_PARAMS: SYSLOGD_PARAMS="-r" If you're on a recent version of SuSE/UL, this parameter has moved into /etc/sysconfig/syslog. You'll have to restart syslog after putting these parameters into effect. 4) Change syslog on the exerciser machine to redirect messages from local7 into /var/log/ha-log-local7 by adding this line to /etc/syslog.conf local7.* -/var/log/ha-log-local7 and then (on SuSE) run this command: /etc/rc.d/syslog restart Use the corresponding function for your distro. -------------------------------------------------------------------------------- How to make OpenSSH allow you to login as root across the network without a password. -------------------------------------------------------------------------------- All our scripts run ssh -l root, so you don't have to do any of your testing logged in as root on the test machine 1) Grab your key from the exerciser machine: take the single line out of ~/.ssh/identity.pub and put it into root's authorized_keys file. [This has changed to: copying the line from ~/.ssh/id_dsa.pub into root's authorized_keys file ] NOTE: If you don't have an id_dsa.pub file, create it by running: ssh-keygen -t dsa 2) Run this command on each of the cluster machines as root: ssh -v -l myid ererciser-machine cat /home/myid/.ssh/identity.pub \ >> ~root/.ssh/authorized_keys [For most people, this has changed to: ssh -v -l myid exerciser-machine cat /home/myid/.ssh/id_dsa.pub \ >> ~root/.ssh/authorized_keys ] You will probably have to provide your password, and possibly say "yes" to some questions about accepting the identity of the test machines 3) You must also do the corresponding update for the exerciser machine itself as root: cat /home/myid/.ssh/identity.pub >> ~root/.ssh/authorized_keys To test this, try this command from the exerciser machine for each of your cluster machines, and for the exerciser machine itself. ssh -l root cluster-machine If this works without prompting for a password, you're in business... If not, you need to look at the ssh/openssh documentation and the output from the -v options above... -------------------------------------------------------------------------------- How to redirect linux-HA logging the way CTS wants it using syslog-ng -------------------------------------------------------------------------------- why syslog-ng: Syslog-ng can use tcp and guarantee availability of logs. It is important we get every log message or our test may fail with loss of some important messages. The following instructions apply to RedHat systems: 1) Redirect each machine to go (at least) to syslog local7: Change /etc/ha.d/ha.cf on each test machine to say this: logfacility local7 2) On all machines: download syslog-ng rpm and install it. Or you can download its source at http://www.balabit.com/products/syslog_ng/ and compile and install it. 3) On all machines: stop syslog and disable it on reboot (SEE NOTE BELOW FIRST) /etc/init.d/syslog stop chkconfig --level 2345 syslog off 4) On exerciser machine, add the source and destination for remote log in the file /etc/syslog-ng/syslog-ng.conf. You can change the port number to other number, but it must be the same with the port number in cluster machines (see 5) options { long_hostnames(off); sync(0); perm(0640); stats(3600); time_reap(300); }; source s_tcp { tcp(port(15789) max-connections(512)); }; destination d_ha { file("/var/log/ha-log-local7"); }; log { source(s_tcp); destination(d_ha); }; 5) On cluster machines, send log out to a remote machine by adding the following lines to /etc/syslog-ng/syslog-ng.conf after the definition of f_boot. Note the port number must be the same with 4). Change exerciser-machine to the exerciser machine's IP or name source s_tcp { tcp(port(15789) max-connections(512)); }; filter f_ha { facility(local7); }; filter f_ha_tcp { facility(local7); }; destination ha_local { file("/var/log/cluster.log" perm(0644)); }; destination ha_tcp { tcp(exerciser-machine port(15789));}; log { source(src); filter(f_ha_tcp); destination(ha_tcp); }; log { source(src); source(s_tcp); filter(f_ha); destination(ha_local); }; 6) Start syslog-ng and enable it upon reboot in all machines (SEE NOTE BELOW FIRST) /etc/init.d/syslog-ng start chkconfig --level 2345 syslog-ng on NOTE: Instead of disabling syslog and enabling syslog-ng, it is possible with newer versions of syslog to simply tell it to use the syslog-ng daemon instead of its own. If you prefer this method, then step 3 becomes: /etc/init.d/syslog stop and step 6 becomes: echo SYSLOG_DAEMON="syslog-ng" >> /etc/sysconfig/syslog echo SYSLOG_NG_CREATE_CONFIG="no" >> /etc/sysconfig/syslog /etc/init.d/syslog start -------------------------------------------------------------------------------- How to redirect linux-HA logging CTS wants with evlog -------------------------------------------------------------------------------- Related background introduction evlog is a new logging system. It's open source, and its source/binary is licensed under GPL/LGPL. its web site is as below. http://evlog.sourceforge.net/ evlog is compliance with draft POSIX Standard 1003.25. It can provide more advanced logging capacities (please refer to its web site for more details). Among its several important features, when comparing with syslog, the remote logging with tcp protocol is preferred here. Why? Because when testing linux-ha as described above, you may have to need remote logging support. Of course you can use syslog to get it via suitable setting as the steps described above. But, syslog itself only supports remote logging with udp protocol. As you know, sometimes udp protocol is not reliable enough, especially under heavy workload, may lose some udp packages, cause cts' log to become mess and difficult to analyze. Evlog is a good way to resolve this issue. Briefly, we can locally forward syslog message to evlog, then continue forwarding the log message to remote machine with evlog's tcp remote logging capacity. This don't require to rewrite related applications, such as heartbeat. It's a big advantage for us. Since by default evlog isn't configured to support tcp remote logging, so need to configure it. The following is the brief steps. Some of them are abstracted from evlog documents. 1) Get the evlog, build binary if needed. ----------------------------------------- You can download evlog binary or source from evlog project page: http://sourceforge.net/projects/evlog Some linux distributions, such as SLES, include evlog, but normally it doesn't contain remote tcp logging module named as tcp_rmtlog_be. So you may need to get additional package from there. If luck you can get the suitable binary packages for your system from there. As for rpm package, you need two packages as below. evlog -- Standard package, including most functions tcp_rmtlog_be -- Module to support remote tcp logging If you have to build binary for yourself, the simple steps is as below. Here suppose you begin from evlog-1.5.3 tarball. a. Log in as root b. Download evlog-1.5.3.tar.gz c. Untar the tarball tar -xzvf evlog-1.5.3.tar.gz d. cd to evlog-1.5.3 e. To run configuration scripts. ./autogen.sh ./configure f. Build and install. Normal way. make make install make startall Or Build rpm do the following: make rpm make rpm-tcp make rpm-udp Then you can see the evlog and tcp_rmtlog_be in top build directory. you can install them with rpm command. When install is successful, you will see messages like these... /etc/rc.d/init.d/evlog start Starting enterprise event logger: [ OK ] sleep 1 /etc/init.d/evlogrmt start Starting remote event logger: [ OK ] sleep 1 /etc/rc.d/init.d/evlnotify start Starting enterprise event log notification: [ OK ] sleep 1 /etc/rc.d/init.d/evlaction start Starting notification action daemon: [ OK ] 2) Configure remote event consolidator, which normally run CTS test scripts. ---------------------------------------------------------------------------- This procedure configures the evlogrmtd daemon to accept events from other two hosts running heartbeat testing hosts on the network. So that events from multiple hosts can be consolidated into a single log file. a. Log in as root b. Edit /etc/evlog.d/evlhosts to add an entry for each of two testing host that run heartbeat. Each entry must specify host name, either simple name or fqdn, and also a unique identifier for each host. This identifier can be up to 2 bytes, but cannot be equal to 0 (it will be ignored). For example, the following are all valid entries: (identifier) (hostname) 1 hatest1 2 hatest2 c. There is also a configuration file, /etc/evlog.d/evlogrmtd.conf which contains the following as default: Password=password TCPPort=12000 "Password" is used only by TCP clients to authenticate remote hosts when attempting to connect. "TCPPort" must match the TCP port used by other two test machines for sending events to the event consolidator. d. Restart the evlogrmtd daemon... /etc/init.d/evlogrmt restart If evlogrmtd cannot resolve any of the hosts listed in evlhosts, or there are no entries in /etc/evlog.d/evlhosts, then the evlogmrtd will exit. 3) Configure the two test machine on which heartbeat will run. ------------------------------------------------------------- This procedure installs and configures an event tcp logging plugin for forwarding events to a remote event consolidator. The local logging software must be installed. a. Log in as root. b. If have installed tcp_rmtlog_be, then skip to next step. Or execute the following command (shown for i386 rpm): rpm -i tcp_rmtlog_be-1.5.3-1.i386.rpm c. cd to /etc/evlog.d/plugins, then edit tcp_rmtlog_be.conf. you need to specify the following items. IP address, or hostname - for the event consolidator. Port number - should match the port number used by the event consolidator. Disable=no - to send events using TCP Password - must match password expected by the event consolidator when the TCP connection is attempted. BufferLenInKbytes - Specifies the size of the memory buffer for events being transmitted via TCP. This reduces the chances of losing events during temporary loss of connection. Default size=128. Recommended range = 32 to 1024. A sample tcp_rmtlog_be.conf may like as below. Remote Host=172.30.1.180 Password=password Port=12000 BufferLenInKbytes=128 Disable=no d. Restart the evlogd daemon to load the plugin... /etc/init.d/evlog restart 4) Configure syslog on the pair of HA machines. ----------------------------------------------- For forwarding syslog messages to the evlog on the same machine. Issue this command, which is from evlog package. /sbin/slog_fwd This will forward syslog messages immediately, and after every subsequent reboot. To disable syslog forwarding: /sbin/slog_fwd -r 5) Test your configure work. ---------------------------- For example, on you one of the pair of HA machines, issue this command: logger -p local7.info "logging hello from hatest1" Then go to remote event consolidator, which run CTS test scripts. issue this command, which is from evlog package. evlview -m | grep hatest1 you should see its result. Apr 7 13:32:04 hadev1 logging hello from hatest1 Notes, by default event log message of evlog is store in file /var/evlog/eventlog It's a file containing binary structure messages, so you should use evlview to read them. Enjoy evlog ;). The end. -------------------------------------------------------------------------------- How to configure release 2.x (including 1.99.x) for cts testing -------------------------------------------------------------------------------- To test release 2.x (including 1.99.x) you need to do the following additional work. 1. Please build the source code with --enable-crm option and install it. 2. Configure ha.cf, and make it same on the test exerciser and all test cluster machines. Please add the following lines to ha.cf: auto_failback legacy crm yes Also, if the following line is in ha.cf, please remove it as it does not apply to 2.x: respawn hacluster /usr/lib/heartbeat/ipfail 3. Remove all lines from haresources and save it as an empty file until bug 613 (http://www.osdl.org/developer_bugzilla/show_bug.cgi?id=613) is resolved. 4. Now we can start cts test on the exerciser node: ./CTSlab.py -v2 number-of-tests-to-run or as python ./CTSlab.py -v2 number-of-tests-to-run 5. To automatically generate and install a sample CIB: add the -r and -c options to the above command -------------------------------------------------------------------------------- How to configure OpenSSH for StonithdTest -------------------------------------------------------------------------------- This configure enables cluster machines to ssh over to each other without a password challenge. 1) On each of the cluster machines, grab your key: take the single line out of ~/.ssh/identity.pub and put it into root's authorized_keys file. [This has changed to: copying the line from ~/.ssh/id_dsa.pub into root's authorized_keys file ] NOTE: If you don't have an id_dsa.pub file, create it by running: ssh-keygen -t dsa 2) Run this command on each of the cluster machines as root: ssh -v -l myid cluster_machine_1 cat /home/myid/.ssh/identity.pub \ >> ~root/.ssh/authorized_keys ssh -v -l myid cluster_machine_2 cat /home/myid/.ssh/identity.pub \ >> ~root/.ssh/authorized_keys ...... ssh -v -l myid cluster_machine_n cat /home/myid/.ssh/identity.pub \ >> ~root/.ssh/authorized_keys [For most people, this has changed to: ssh -v -l myid cluster_machine cat /home/myid/.ssh/id_dsa.pub \ >> ~root/.ssh/authorized_keys ] You will probably have to provide your password, and possibly say "yes" to some questions about accepting the identity of the test machines To test this, try this command from any machine for each of other cluster machines, and for the machine itself. ssh -l root cluster-machine This should work without prompting for a password, If not, you need to look at the ssh/openssh documentation and the output from the -v options above... ---------------------------------------------------------------------------------- How to set up host based authentication ---------------------------------------------------------------------------------- Client Machine 1) set HostbasedAuthentication yes in /etc/ssh/ssh_config Server Machine 1) in server, in /etc/ssh/sshd_config, set HostbasedAuthentication yes IgnoreUserKnownHosts yes and restart sshd 2) put client's machine name to /etc/ssh/shosts.equiv (assume the client is posic021) posic021 posic021.domain.name 3) put the client machine's host key to /etc/ssh/ssh_known_hosts, add the client machine's name/full domain name/ip before that, e.g posic021,posic021.domain.name,141.142.xxx.xxx, ssh-rsa ..... 4) For HostBasedAuthentication to work with root, In the server machine, you need have the file /root/.shosts posic021.domain.name root You also need to set IgnoreRhosts no in /etc/ssh/sshd_config and then restart sshd. Heartbeat-3-0-7e3a82377fa8/cts/extracttests.py.in0000644000000000000000000000621611576626513021511 0ustar00usergroup00000000000000#!@PYTHON@ __copyright__=''' Copyright: (C) 2005 International Business Machines, Inc. Author: Alan Robertson Support: linux-ha-dev@lists.tummy.com License: GNU General Public License (GPL) ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import sys import re #Numeric comparison sorting def sorttestnum(lhs, rhs): return int(lhs) - int(rhs) # # Compress sorted list of tests into runs (ranges) of sequential tests # So that (1,2,3,4) gets compressed down into (1,5) # We add one so we get the marker showing the beginning of the next test # # # def testruns(list): if len(list) < 1: return None, None first=int(list.pop(0)) last=first+1 while len(list) >= 1 and list[0] == last: last=list.pop(0)+1 return (int(first), int(last)) # # Scanning for particular tests... # class ExtractTests: def expandtestranges(self, testlist): outlist = [] for j in range(len(testlist)): match = re.match("([0-9]+)[-:]([0-9]+)", testlist[j]) if match: for k in range(int(match.groups()[0]),int(match.groups()[1])+1): outlist.append(k) else: outlist.append(testlist[j]) return outlist def __init__(self, filename, testlist): self.file = open(filename, "r") testlist = self.expandtestranges(testlist) testlist.sort(sorttestnum) self.testlist = testlist print "Extracting tests ", self.testlist self.regex=re.compile(" CTS: Running test .*\[([0-9]+)") self.CTSregex=re.compile(" CTS: ") def __call__(self): first, last = testruns(self.testlist) curtest=0 while 1: line = self.file.readline() lineprinted=None if line == None or line == "": break regexmatchobj=self.regex.search(line) if regexmatchobj: curtest= int(regexmatchobj.groups()[0]) if curtest == 0: if self.CTSregex.search(line): sys.stdout.write(line) lineprinted=1 if curtest >= first and not lineprinted: sys.stdout.write(line) if curtest >= last: first, last = testruns(self.testlist) if first == None: break if len(sys.argv) < 3: print "Usage:", sys.argv[0] , "logfilename testnumber ..." sys.exit(1) foo = ExtractTests (sys.argv[1], sys.argv[2:]) foo() Heartbeat-3-0-7e3a82377fa8/cts/getpeinputs.sh.in0000755000000000000000000000533211576626513021306 0ustar00usergroup00000000000000#!/bin/sh # # NB: this should be part of extracttests.py! # we expect input from extracttests.py (only 1 test per run!) # output: directory ($testnum) with pe inputs from all nodes usage() { cat<. NOTE: The work of everyone on this project is dearly appreciated. If you are not listed here but should be, please notify us! The following people have kindly helped with heartbeat development by providing code, documentation or testing, or fixes: Alan Robertson Author of large parts of the infrastructure. Has been the Main Architect and project maintainer for a long time. Is no longer actively involved with the project. Contributors, ordered by last name: Eric Ayers Nightware RPC100S, and WTI RPS10 support Andrew Beekhof Cluster Resource Manager Darwin Cleanups Stéphane Billiart - unicast support Gregor Binder - "meatware" STONITH support Juan Pedro Paredes Caballero - Debian support Dusan Djordjevic - documentation Kevin Dwyer - ipfail, bug fixes. Joachim Gleissner ssh STONITH support Luis Claudio R. Gonçalves nice_failback code, hb_standby code, fixes Thomas Hepper solaris and Debian support Masaki Hasegawa - send_arp research, fix Andrew Hildebrand - IRIX port. Horms LVS support, ldirectord, many fixes Gregor Howey Holger Kiehl Kirk Lawson - Audible alarm support Mike Ledoux APC master switch and WTI [WT]PS-xxx support David Lee - autoconf and solaris support Xiaoxiang Liu interprocess communication library Friedrich Lobenstock - SuSE compatibility fixes Lars Marowsky-Brée LVM, ICP and ssh STONITH support, many fixes SuSE package maintainer STABLE_1_0 / 1.0.x release maintenance Michael Moerz - automake support Ram Pai - Consensus Membership module Rudy Pawul Documentation, first production customer Andreas Piesk APC Masterswitch (snmp) and APCSmart support Jacob Rief - ldirectord Stefan Salzer Mitja Sarp Initial authentication support (first big contribution) Guochun Shi Communication/core/membership layer support Matthew Soffen FreeBSD port, Solaris fixes, and release manager Guenther Thomsen Debian compliance Mike Tilstra - VACM support Marcelo Tosatti API support, initial plugin support Iñaki Fernández Villanueva - debian support Todd Wheeling APC master switch and WTI [WT]PS-xxx support Volker Wiegand /proc file system, initial SuSE support, documentation Chris Wright Multicast support, inet_pton replacement, fixes Code for the following pieces borrowed from code by: Tom Vogt (udp code) yuri volobuev (send_arp.c) 'Network UPS Tools' by Russell Kroll homepage: http://www.exploits.org/nut/ (Andreas Piesk borrowed code for APCSmart support) David C. Teigland wrote original meatware support for GFS stomith salz@pebbles.bbn.com - replacement scandir function Internet Software Consortium (bind) - replacement inet_pton() function Heartbeat-3-0-7e3a82377fa8/doc/COPYING0000644000000000000000000004310511576626513017002 0ustar00usergroup00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) 19yy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) 19yy name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. Heartbeat-3-0-7e3a82377fa8/doc/COPYING.LGPL0000644000000000000000000006364411576626513017551 0ustar00usergroup00000000000000 GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. ^L Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. ^L GNU LESSER GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. ^L Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. ^L 6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. ^L 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. ^L 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. ^L 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS ^L How to Apply These Terms to Your New Libraries If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Also add information on how to contact you by electronic and paper mail. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the library `Frob' (a library for tweaking knobs) written by James Random Hacker. , 1 April 1990 Ty Coon, President of Vice That's all there is to it! Heartbeat-3-0-7e3a82377fa8/doc/Design0000644000000000000000000001272311576626513017105 0ustar00usergroup00000000000000I'm in the process of putting together a Phase I architecture as I mentioned in an earlier posting, and is mentioned on the web site. The first question that comes to mind are Phase I goals: Service/Resource failover Service Groups or dependencies Resource Diagnostics /proc (or /ha) interface for status and control 2-n node cluster size redundant NIC failover shared/mirrored filesystem takeovers {ala Poor Man's data replication} Towards this end I've drawn a few pictures, and written a few words of text. The picture is related to Tom Vogt's picture, but has several differences. It is very similar to the proposed project framework on the HA web site. Each component in this picture is replicated on each node in the cluster. manual requests | v +---------+----------------+ | | | | v v diagnostics +----------------+ (scheduled | | and manual) ------------->| Configuration +------> Application | Management | Notification monitoring/heartbeat-------->| | API +--------+-------+ | ((Re)configuration | Modules) +-------------+---------------+----------+ | | | | v v v v IP Takeover Filesystem Application etc. Takeover Start/Restart The discussion below talks about the system in terms of objects, although the current code is written in C (not C++), and I expect the new framework to be implemented in the same way. There is a presumption in this design that each node has a copy of the whole cluster configuration. Otherwise, when the cluster is reforming itself, it doesn't know what resources provided by missing nodes should be instantiated in a failover mode on a "replacement" node. The most common/important kind of object in this model is the resource - Resources are things like IP addresses, NICs, filesystems, disks, applications, etc. The following methods exist for every resource: name() The name of this resource (ASCII string) provided_by() Returns the node providing this resource type() Returns the "type" of a given resource See "resource_type" object below. service_state() returns IN_SERVICE, OOS or PENDING_OOS (REMOVED or PENDING_REMOVE?) in_service() Brings the resource into service oo_service() Takes the resource out of service, gracefully force_oos() Takes the resource out of service, immediately. mark_oos() Mark a failed resource as out of service takes no action to take the resource out of service There is a (static member) function which locates a resource: find_resource() Returns the resource that corresponds to the given name There is also an object called resource-list-item It has the following methods: next() The next resource-list-item in the list resource() The resource associated with this list item Resources also have the following member functions (which returns a resource-list) dependson_list() Returns the list of resources which this resource depends directly on dependents_list() Returns the list of resources which depend directly on this resource These dependency lists can be manipulated by these two functions: dependson(r1, r2) Mark resource r1 as dependent on resource r2 There is also a fundamental object called a resource_type It has member functions like these: instantiate() Creates a resource of the given type typename() returns an ASCII string naming the type ................................. ................................. Things which are not yet defined but I know I need. Diagnostics objects Application notification objects (so an application can register that it wants to be notified about cluster transitions, etc.) Message: A set of {name,value} pairs sent from a node to all nodes, or to a single node. This is what I now implement in "heartbeat" (this version is awaiting testing by SuSE users before release) Some of the things I haven't defined, I don't know I need, and some of them are just lower-level details that I haven't gotten to. ----------------------------------------------------------------------- Continuing on... ----------------------------------------------------------------------- Inside the Configuration Management Subsystem, there exists what I call a configuration strategy module. At this point I assume that this is a plug-in module, which can be replaced with any one of a number of strategy modules, according to the needs of the system or whims of the administrator. My initial thoughts on a first-cut node transition strategy module go like this: Timeouts and new transitions cause restart from step 1 Step 1: Declare a cluster transition Step 2: (one everyone has ACKed the cluster transition) Declare a "transition master" node (somehow -lowest node name?) Step 2: (once everyone has ACKed the transition master) Master Requests a config report (what resources each node has) Step 3: (once every node has reported) For each resource group, the transition master requests a cost of providing it from each node (infinite if it cannot be provided) Step 4: Transition Manager requests And then I finish this document later... Heartbeat-3-0-7e3a82377fa8/doc/DirectoryMap.txt0000644000000000000000000003137211576626513021115 0ustar00usergroup00000000000000Author: Yi Zhu. NOTE NOTE Content last updated 2003, links may be broken, NOTE information may be incorrect. NOTE linux-ha -. |-- cts : Cluster Testing System (Python) |-- include : linux-ha header files directory |-- ldirectord : Linux Director Daemon (Perl) which monitors real services like http, https and ftp |-- replace : replacement implementations for some common library (inet_pton, setenv, strerror, etc) \-- lib -. |-- apphb : client side application heartbeat library |-- pils : Plugin system library |-- recoverymgr : recovery manager client library |-- clplumbing : cluster plumbing library (IPC, CPU, realtime, etc) |-- plugins -. | |-- AppHBNotification : Recovery manager client plug-in implementation | |-- HBauth : heartbeat authentication (CRC, MD5 and SHA1) | |-- HBcomm : heartbeat communication method (mcast, ucast, serial, etc) | |-- InterfaceMgr : Generic interface (implementation) manager | \-- stonith : stonith plugins |-- stonith : stonith library and utility |-- snmp_subagent : SNMP remote management for Linux-HA cluster |-- doc : linux-ha documentation |-- libltdl : GNU libltdl, a system independent dlopen wrapper for GNU libtool |-- membership -. Directory for membership implementations \-- ccm : Consensus Cluster Manager |-- heartbeat : heartbeat framework |-- contrib -. | \-- ipfail : IP Failover plugin for Linux-HA \-- telecom -. |-- apphbd : application heartbeat daemon \-- recoverymgrd : Recovery manager (apphbd error recovery only) ============================================================================================================== OBSOLETE OR UNUSED DIRECTORIES: |-- proc-ha : (at top level) Linux-HA /proc interface |-- lib/tinyxml : XML parse library |-- stonith (at top level) : OLD stonith library and utility ============================================================================= A little more complete explanation of each of these directories, in non-pictorial form: ----------------------------------------------------------------------------- /contrib: A place to hold contributed code not yet part of the main system .......................................................................... ----------------------------------------------------------------------------- /contrib/ipfail : IP Failover plugin for Linux-HA ................................................. This code is a heartbeat client which forces failovers when IP connectivity becomes compromised. It uses voting techniques to ensure that services are being moved to a better node. It could be voting on anything which we don't control and which is intrinsic to the local node. This is my vision of its possible future. This code is reasonably mature at this point, and really deserves to be moved out of the contrib directory. ----------------------------------------------------------------------------- /cts : Cluster Testing System (Python) ...................................... CTS is how the majority of the testing for Linux-HA is done. This combined with the BasicSanityCheck tool does all our automated testing. CTS generates random cluster events and sees if the system recovers from them. If you think we ought to test it, it needs to go in here or in the BasicSanityCheck tool -- or it won't likely get done. ----------------------------------------------------------------------------- /doc : linux-ha documentation ............................. This is where various interesting READMEs and documents like the GettingStarted and FAQ document live. ----------------------------------------------------------------------------- /heartbeat : heartbeat framework ................................ This is the heartbeat program. It is one of the main things which people currently use out of the Linux-HA project. It provides basic failover services, a reasonable API, and some nice communication facilities. The API is documented in the doc directory. There is a paper on heartbeat dated Summer 2000 which can be found here: http://linux-ha.org/comm/ ----------------------------------------------------------------------------- /include : linux-ha header files directory and subdirectories ............................................................. ----------------------------------------------------------------------------- /ldirectord : Linux Director Daemon (Perl) which monitors real services like http, https and ftp ....................................................................... This code allows us to manage LVS instances for high-availability. ----------------------------------------------------------------------------- /lib/apphb : Application heartbeat client library ....................................................................... This code is what clients of the Application Heartbeat daemon in telecom/apphbd link against to get application heartbeat services. ----------------------------------------------------------------------------- /lib/clplumbing : cluster plumbing library (IPC, CPU, realtime, etc) ....................................................................... There is a TON of really useful things in here. Although they're in one directory, developers should really know about most of them. Headers for these services can be found in the /include/clplumbing/ directory. cl_log: IPC code: abstraction for fast nonblocking sockets-like communication manages queuing and other details quite nicely includes an authentication API as part of it. proctrack: child process tracking code longclock: 64-bit timing services which don't move when clock is reset GSource: functions to make high-level Glib mainloop sources cl_poll: low-latency poll(2) call replacement code realtime: manage realtime processes -- locking, priorities, etc. uids: privilege management (become nobody, return to root, etc.) cl_signal: signal wrapper code Gmain_timeout integrates longclock timers into Glib mainloop paradigm base64: conversion to/from base64 for making strings from binary cpulimits: provide limits on CPU consumption to keep realtime systems from destroying the system if they run amok mkstemp_mode: safe(r) version of mkstemp(3) timers: provides some simple millisecond resolution timer services to use instead of alarm(2) and friends. ----------------------------------------------------------------------------- /lib/pils: PILS plugin loading and interface management system .............................................................. Reasonably nice plugin management software. It is documented by slides and a paper. They can be found here: http://linux-ha.org/heartbeat/PILS.pdf -- slides from PILS talk http://linux-ha.org/heartbeat/pils.pdf -- paper on PILS ----------------------------------------------------------------------------- /lib/plugins/ : Various kinds of PILS plugins ............................................. ----------------------------------------------------------------------------- /lib/plugins/AppHBNotification : Recovery manager client plug-in implementation ............................................................................... The apphb API provides for notification when HA-aware clients stop heartbeating or exit abnormally. This notification is accomplished by providing and configuring plugins which notify anyone you want any way you want. We currently only have one such plugin -- one which notifies the recovery manager daemon. ----------------------------------------------------------------------------- /lib/plugins/HBauth : heartbeat authentication (CRC, MD5 and SHA1) .................................................................. These plugins are used to generate and validate digital signatures for heartbeat packets. ------------------------------------------------------------------------------- /lib/plugins/HBcomm : heartbeat communication method (mcast, ucast, serial, etc) ................................................................................ These plugins are used to send and recieve packets for heartbeat. ----------------------------------------------------------------------------- /lib/plugins/InterfaceMgr : PILS generic interface (implementation) manager ........................................................................... These plugins are by PILS applications to load and manage other plugins. ----------------------------------------------------------------------------- /lib/plugins/stonith : stonith implementation plugins ..................................................... STONITH stands for Shoot The Other Node In The Head. Stonith is a method of ensuring that misbehaving machines have really stopped using resources by killing (shooting) them. It takes the speculation "I think node X is dead", and makes it into a certainty. This directory contains the plugins for implementing the STONITH API for various kinds of hardware which actually carry out the dirty deed. ----------------------------------------------------------------------------- /lib/recoverymgr : recovery manager client library .................................................. This library provides services which send event notifications to the recovery manager. It is used by the recovery manager plugin in AppHBNotification. ----------------------------------------------------------------------------- /lib/stonith : stonith library and utility .......................................... This is the library which people link against who want STONITH services. There is also a "stonith" program used for testing the implementations. ----------------------------------------------------------------------------- /libltdl : GNU libltdl, a system independent dlopen wrapper for GNU libtool ........................................................................... Magic libtool stuff. We need it -- even if I don't understand it ;-) ----------------------------------------------------------------------------- /membership: Membership implementations ....................................... Implementations of membership APIs. Perhaps we should have a common API library or plugin set that folks could use to avoid duplicating and fixing that for each implementation. ----------------------------------------------------------------------------- /membership/ccm : Consensus Cluster Membership (CCM) implementation ................................................................... Ram Pai's consensus cluster membership implementation. Includes a quorum capability. Built on top of the heartbeat API. ----------------------------------------------------------------------------- /replace : replacement implementations for some common library functions (inet_pton, setenv, strerror, etc) ....................................................................... autoconf portability stuff. ----------------------------------------------------------------------------- /snmp_subagent : SNMP remote management for Linux-HA cluster ............................................................ We have an SNMP MIB reserved for Linux-HA. This code is supposed to implement various kinds of SNMP queries and traps. It was written by Yixiong Zou. ------------------------------------------------------------------------------ /telecom : telecommunication type functions required for HA-aware applications .............................................................................. Things here aren't *strictly* for telecom type applications, but I had to name it something... ;-) ----------------------------------------------------------------------------- /telecom/apphbd: application heartbeat daemon ............................................. The application heartbeat daemon. Processes register with apphbd, and then when they fail to heartbeat "often enough", or they die without unregistering, various interested parties are notified according to the configured set of plugins. Currently, we only have a plugin for the recovery manager daemon. ----------------------------------------------------------------------------- /telecom/recoverymgrd : Recovery manager (apphbd error recovery only) ..................................................................... This program takes configured actions to recover from problems that apphbd notifies it of. This typically involves killing and restarting the errant processes. Heartbeat-3-0-7e3a82377fa8/doc/FHSnotes0000644000000000000000000001556511576626513017374 0ustar00usergroup00000000000000/etc/ha.d/haresources and /etc/ha.d/ha.cf are config files which are specifically permitted by the FHS. "/etc contains configuration files and directories that are specific to the current system." /etc/ha.d/harc can be where it is by extension of the examples of /etc/rc.d/rc since it is analagous by design. Ditto for shellfuncs The scripts in /etc/ha.d/init.d, rc.d, and resource.d are analagous by design to the scripts in /etc/rc.d/init.d. I think /etc/ha.d/bin should be moved over to /usr/lib/heartbeat It isn't completely clear where heartbeat-fifo should go. I would suggest /var/run because it is somewhat analagous to the transient UNIX-domain socket example mentioned in the FHS. "Programs that maintain transient UNIX-domain sockets should place them in this directory". [/var/run] ------------------------------------------------------------------------------ /etc contains configuration files and directories that are specific to the current system. No binaries should be located under /etc. ------------------------------------------------------------------------------ /opt is reserved for the installation of add-on application software packages. I don't consider this an applicaiton software package. ------------------------------------------------------------------------------ /usr/lib includes object files, libraries, and internal binaries that are not intended to be executed directly by users or shell scripts. Applications may use a single subdirectory under /usr/lib. If an application uses a subdirectory, all architecture-dependent data exclusively used by the application should be placed within that subdirectory. For example, the perl5 subdirectory for Perl 5 modules and libraries. Miscellaneous architecture-independent application-specific static files and subdirectories should be placed in /usr/share. ------------------------------------------------------------------------------ 5.9 /var/run : Run-time variable files This directory contains system information files describing the system since it was booted. Files in this directory should be cleared (removed or truncated as appropriate) at the beginning of the boot process. Process identifier (PID) files, which were originally placed in /etc, should be placed in /var/run. The naming convention for PID files is .pid. For example, the crond PID file is named /var/run/crond.pid. The internal format of PID files remains unchanged. The file should consist of the process identifier in ASCII-encoded decimal, followed by a newline character. For example, if crond was process number 25, /var/run/crond.pid would contain three characters: two, five, and newline. Programs that read PID files should be somewhat flexible in what they accept; i.e., they should ignore extra whitespace, leading zeroes, absence of the trailing newline, or additional lines in the PID file. Programs that create PID files should use the simple specification located in the above paragraph. ------------------------------------------------------------------------------ set no 5.11 /var/state : Variable state information /var/state misc xdm Variable state information Editor backup files and state Miscellaneous state data X display manager variable data Packaging support files State data for packages and subsystems Tree 5.11.1 This hierarchy holds state information pertaining to an application or the system. State information is data that programs modify while they run, and that pertains to one specific host. Users should never need to modify files in /var/state to configure a package's operation. State information is generally used to preserve the condition of an application (or a group of inter-related applications) between invocations and between different instances of the same application. State information should generally remain valid after a reboot, should not be logging output, and should not be spooled data. An application (or a group of inter-related applications) should use a subdirectory of /var/state for its data. There is one required subdirectory, /var/state/misc, which is intended for state files that don't need a subdirectory; the other subdirectories should only be present if the application in question is included in the distribution. /var/state/ is the location that should be used for all distribution packaging support. Different distributions may use different names, of course. Previous releases of this standard used the name /var/lib for this hierarchy. /var/lib is deprecated, but it may be used in parallel with the required /var/state hierarchy, as a transitional measure for application-specific data. Note, however, that this allowance will be removed in a future release of the standard. Alternately, /var/lib may be made a symbolic link to /var/state. BEGIN RATIONALE /usr/lib is increasingly used solely for object files or archives of them; this is true of the current BSD UNIX variants as well as current GNU packages. Accordingly, the name /var/lib seemed inappropriate. BSD uses the name /var/db for a similar directory. This name seemed overly constricting, as it implied a directory structure intended primarily for database (.db) files. END RATIONALE ------------------------------------------------------------------------------ 4.5 /usr/lib : Libraries for programming and packages /usr/lib includes object files, libraries, and internal binaries that are not intended to be executed directly by users or shell scripts. Applications may use a single subdirectory under /usr/lib. If an application uses a subdirectory, all architecture-dependent data exclusively used by the application should be placed within that subdirectory. For example, the perl5 subdirectory for Perl 5 modules and libraries. Miscellaneous architecture-independent application-specific static files and subdirectories should be placed in /usr/share. Some executable commands such as makewhatis and sendmail have also been traditionally placed in /usr/lib. makewhatis is an internal binary and should be placed in a binary directory; users access only catman. Newer sendmail binaries are now placed by default in /usr/sbin; a symbolic link should remain from /usr/lib. Additionally, systems using Smail should place Smail in /usr/sbin/smail, and /usr/sbin/sendmail should be a symbolic link to it. A symbolic link /usr/lib/X11 pointing to the lib/X11 directory of the default X distribution is required if X is installed. Note: No host-specific data for the X Window System should be stored in /usr/lib/X11. Host-specific configuration files such as Xconfig or XF86Config should be stored in /etc/X11. This should include configuration data such as system.twmrc even if it is only made a symbolic link to a more global configuration file (probably in /usr/X11R6/lib/X11). Heartbeat-3-0-7e3a82377fa8/doc/Makefile.am0000644000000000000000000000422211576626513020000 0ustar00usergroup00000000000000# # doc: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in htmlfiles = heartbeat_api.html txtfiles = $(htmlfiles:.html=.txt) CLEANFILES = ChangeLog $(txtfiles) $(man_MANS) SPECSRC = $(top_builddir)/heartbeat-fedora.spec OTHER_DOCS = AUTHORS COPYING COPYING.LGPL ChangeLog README \ authkeys ha.cf haresources \ apphbd.cf doc_DATA = $(OTHER_DOCS) man_MANS = heartbeat.8 apphbd.8 cl_status.1 \ hb_standby.1 hb_takeover.1 hb_addnode.1 hb_delnode.1 \ ha.cf.5 authkeys.5 STYLESHEET_PREFIX ?= http://docbook.sourceforge.net/release/xsl/current MANPAGES_STYLESHEET ?= $(STYLESHEET_PREFIX)/manpages/docbook.xsl HTML_STYLESHEET ?= $(STYLESHEET_PREFIX)/xhtml/docbook.xsl FO_STYLESHEET ?= $(STYLESHEET_PREFIX)/fo/docbook.xsl XSLTPROC_OPTIONS ?= --xinclude XSLTPROC_MANPAGES_OPTIONS ?= $(XSLTPROC_OPTIONS) XSLTPROC_HTML_OPTIONS ?= $(XSLTPROC_OPTIONS) XSLTPROC_FO_OPTIONS ?= $(XSLTPROC_OPTIONS) EXTRA_DIST = $(txtfiles) $(htmlfiles) $(man_MANS) $(OTHER_DOCS) ChangeLog: $(SPECSRC) rm -fr ChangeLog sed -e '1,/^%changelog/d' -e '/^%/,$$d' < $(SPECSRC) > $@ .html.txt: if [ "X$(HTML2TXT)" = "X" ]; then echo "Lynx or w3m or user-defined HTML2TXT required to convert $< to $@" >$@ ; else $(HTML2TXT) -dump $< >$@ ; fi %.5 %.8 %.1: %.xml $(XSLTPROC) \ $(XSLTPROC_MANPAGES_OPTIONS) \ $(MANPAGES_STYLESHEET) $< Heartbeat-3-0-7e3a82377fa8/doc/OBSOLETE-DOCS0000644000000000000000000000066111576626513017634 0ustar00usergroup00000000000000The following files in this directory are unmaintained, out of date, or obsolete. They still exist in the repository for reference purposes, but are no longer included in release tarballs, and are not installed on target systems. Writers who either pick up one of these orphaned documents, or remove them from the repository, should remove them from the list below. Design DirectoryMap.txt FHSnotes ipfail-diagram.pdf startstop.in Heartbeat-3-0-7e3a82377fa8/doc/README0000644000000000000000000000556711576626513016641 0ustar00usergroup00000000000000Linux-HA ======== Providing Open Source High-Availability Software for Linux and other Platforms since 1999 The Linux-HA project maintains a set of building blocks for high availability cluster systems, including * a cluster messaging layer (heartbeat, see below), * a huge number of resource agents for a variety of applications, * a plumbing library and error reporting toolkit (aka cluster-glue) http://linux-ha.org/wiki/Main_Page http://en.wikipedia.org/wiki/Computer_cluster http://linux-ha.org/wiki/Heartbeat http://linux-ha.org/wiki/Resource_Agents http://linux-ha.org/wiki/Cluster_Glue Historical note =============== Since up to release 2.1.4 the messaging layer (Heartbeat proper), the Local Resource Manager, "plumbing" infrastructure and STONITH (now known as Cluster Glue), the Resource Agents, and the Cluster Resource Manager (now Pacemaker) were all part of a single package named heartbeat, the name was often applied to the Linux-HA project as a whole. This generalization is no longer accurate, the name heartbeat should thus be used for the messaging layer exclusively. Heartbeat ========= Heartbeat is a daemon that provides cluster infrastructure (communication and membership) services to its clients. This allows clients to know about the presence (or disappearance!) of peer processes on other machines and to easily exchange messages with them. In order to be useful to users, the Heartbeat daemon needs to be combined with a cluster resource manager. A cluster resource manager (CRM) has the task of starting and stopping the services (IP addresses, web servers, etc.) which the cluster will make highly available. Heartbeat still comes with an integrated primitive resource manager, which basically is just a shell script. This is also referred to as "v1 style" or "haresources style" configuration. The haresources mode of operation is deprecated. If you miss it, please see http://www.planet-ha.org/#Configuring+Heartbeat+v1+Was+So+Simple Pacemaker is the preferred cluster resource manager for clusters based on the Heartbeat infrastructure layer. Reference Documentation ======================= To get you started, for further information, reference documentation and setup recommendations, refer to http://linux-ha.org/wiki/Documentation http://clusterlabs.org/wiki/Documentation Help and Support ================ For community support, mailing lists, IRC channels, and other ways to report problems and get them solved, see http://linux-ha.org/wiki/Support See Also ======== Csync2 an easy way to keep a bunch of infrequently updated files in sync over groups of nodes. http://oss.linbit.com/csync2/ DRBD a nicely integrated (with Pacemaker) replication solution, you won't need shared disks to do HA-clustering. http://www.drbd.org Heartbeat-3-0-7e3a82377fa8/doc/apphbd.cf0000644000000000000000000000352111576626513017515 0ustar00usergroup00000000000000# apphbd.cf # Default Configure file for apphbd # # Currently this file includes configure items: debug_level, debugfile # logfile, watchdog_device, watchdog_interval_ms, notify_plugin, realtime. # If one of these items is not configured, apphbd will use default configuration: # debug_level = 3 # debugfile = NULL # logfile = NULL # watchdog_device = NULL # watchdog_interval_ms = 1000 # notify_plugin : no plugin # realtime = yes # Debug level: Currently effective setting includes 0, 1, 2. # The higher the debug level, the more detailed debug information you will get. #debug_level 0 # File to write debug messages to. # Only effective when debug level is larger than 0 # Since apphbd will run as nobody, be sure to specify a file apphbd can write to. #debugfile /var/log/apphbd.debug # File to log other messages to. # Only effective when debug level is larger than 0 # Since apphbd will run as nobody, be sure to specify a file apphbd can write to. #logfile /var/log/apphbd.log # Watchdog device file. #watchdog_device /dev/watchdog # If devfs is enabled, the watchdog device might be: #watchdog_device /dev/misc/watchdog # Apphbd tickles watchdog timer at this interval (microseconds). # If watchdog_device is not set, setting watchdog_interval_ms will take no effect. #watchdog_interval_ms 2000 # Notification plug-ins. # By default apphbd searches plug-in in /usr/lib/heartbeat/plugins/AppHBNotification # You can load more than one plug-ins #recmgr plug-in will send notification to recovery manager when an apphb event #happens. See apphb_notify.h for definition of these events. #notify_plugin recmgr # Other plug-ins if available #notify_plugin other_plugins # If set 'realtime' as 'yes', apphbd will be scheduled at "realtime" priority. # Otherwise, set as 'no' will disable it. #realtime yes Heartbeat-3-0-7e3a82377fa8/doc/apphbd.xml.in0000644000000000000000000001273311576626513020337 0ustar00usergroup00000000000000 November 26, 2009 Heartbeat @VERSION@ Alan Robertson apphbd alanr@unix.sh Andrea Brugger man page andrea.l.brugger@intel.com Florian Haas man page florian.haas@linbit.com apphbd 8 System administration utilities apphbd Application Heartbeat Monitor for High-Availability Linux apphbd [] [ file] Description apphbd is deprecated; its use is strongly discouraged. The functionality provided by apphbd has been replaced by resource-level monitoring in Pacemaker. @HA_LIBHBDIR@/apphbd is a basic application heartbeat monitor daemon for Linux-HA. A group of Application Heartbeat APIs are defined for this heartbeat monitoring service. Applications may register with the daemon in order to be monitored. If an application fails to send a heartbeat within the specified interval, the daemon will log an event. apphbd may use a watchdog timer to monitor itself. apphbd extends its functionality by using plugins. A plugin, recmgr notifies the recovery manager daemon if certain events occur (e.g. an application fails to heartbeat). The Recovery Manager daemon (@HA_LIBHBDIR@/recoverymgrd) receives notification from the recmgr plugin, then it tries to execute recovery scripts as configured. See the recoverymgrd default configuration file, recoverymgrd.conf for details. recoverymgrd registers itself with apphbd as a client application. apphbd should be started first with the recmgr plugin loaded. Then, recoverymgrd should be configured and started Options The following options are supported: Show the status of apphbd, running or stopped. Stop (kill) the daemon. Restart the daemon. apphbd will reload its configuration file when restarted. level Set the debug level. Show a brief usage message. file Set an alternate configuration file. The default configuration file is ./apphbd.cf. For details on the file format and supported options, refer to the example apphbd.cf file found in the documentation directory. Files @HA_VARRUNDIR@/apphbd.pid – default PID file apphbd.cf – Default configuration file for apphbd. apphbd searches the file in its working directory. recoverymgrd.conf – default configuration file for recoverymgrd. recoverymgrd searches the file in its working directory. An alternative configuration file may be specified on the command line. @HA_PLUGIN_DIR@/AppHBNotification – directory containing plugins for apphbd. See also heartbeat8 Heartbeat-3-0-7e3a82377fa8/doc/authkeys0000644000000000000000000000120511576626513017522 0ustar00usergroup00000000000000# # Authentication file. Must be mode 600 # # # Must have exactly one auth directive at the front. # auth send authentication using this method-id # # Then, list the method and key that go with that method-id # # Available methods: crc sha1, md5. Crc doesn't need/want a key. # # You normally only have one authentication method-id listed in this file # # Put more than one to make a smooth transition when changing auth # methods and/or keys. # # # sha1 is believed to be the "best", md5 next best. # # crc adds no security, except from packet corruption. # Use only on physically secure networks. # #auth 1 #1 crc #2 sha1 HI! #3 md5 Hello! Heartbeat-3-0-7e3a82377fa8/doc/authkeys.xml.in0000644000000000000000000000773411576626513020743 0ustar00usergroup00000000000000 24 Nov 2009 Heartbeat @VERSION@ Alan Robertson heartbeat, original Wiki page alanr@unix.sh Florian Haas man page florian.haas@linbit.com authkeys 5 Configuration Files authkeys Authentication file for the Heartbeat cluster messaging layer Description @HA_HBCONF_DIR@/authkeys is read by heartbeat 8. It enables Heartbeat to securely authenticate cluster nodes. This file must not be readable or writable by any users other than root. File format Two lines are required in the authkeys file: A line which says which key to use in signing outgoing packets One or more lines defining how incoming packets might be being signed. The file must follow the following format: num num method secret num method secret num method secret ... num is a numerical identifier, between 1 and 15 inclusive. It must be unique within the file. method is one of the available authentication signature methods (see below for supported methods). secret is an alphanumerical shared secret used to identify cluster nodes to each other. num selects the currently active authentication method and secret. Supported signature methods The following signature methods are supported in authkeys (listed here in alphabetical order): MD5 hash method. This method requires a shared secret. SHA-1 hash method. This method requires a shared secret. Cyclic Redundancy Check hash method. This method does not require a shared secret and is insecure; it's use is strongly discouraged. An absolutely up-to-date list of authentication methods supported may be retrieved by running ls @HA_PLUGIN_DIR@/HBauth/*.so. Heartbeat-3-0-7e3a82377fa8/doc/cl_status.xml.in0000644000000000000000000001445311576626513021103 0ustar00usergroup00000000000000 24 Nov 2009 Heartbeat @VERSION@ Alan Robertson cl_status alanr@unix.sh Juan Pedro Paredes Caballero man page juampe@retemail.es Simon Horman man page horms@verge.net.au Florian Haas man page florian.haas@linbit.com cl_status 1 User commands cl_status Check status of the High-Availability Linux (Linux-HA) subsystem cl_status sub-command parameters Description cl_status is used to check the status of the High-Availability Linux subsystem. Supported sub-commands hbstatus Indicate if heartbeat is running on the local system. listnodes List the nodes in the cluster. nodetype ping|normal List the nodes of the given type. Ping nodes are obsolete in Pacemaker cluster, having been replaced with the pingd resource agent. listhblinks node List the network interfaces used as heartbeat links. node should be specified as listed in the ha.cf5 file for the cluster. hblinkstatus node link Show the status of a heartbeat link. node should be specified as listed in the ha.cf5 file for the cluster. link should be as per the output of the listhblinks subcommand. clientstatus node client [timeout] Show the status of heartbeat clients. node and client should be specified as listed in the ha.cf5 file for the cluster. Timeout is in milliseconds, the default is 100ms. rscstatus Show the status of cluster resources. Status will be one of: local, foreign, all or none. This option is deprecated, it is obsolete in Pacemaker clusters. parameter parameter Retrieve the value of cluster parameters. The parameters may be one of the following: apiauth, auto_failback, baud, debug, debugfile, deadping, deadtime, hbversion, hopfudge, initdead, keepalive, logfacility, logfile, msgfmt, nice_failback, node, normalpoll, stonith, udpport, warntime, watchdog. Some of these options are deprecated; see ha.cf5 Options The following options are supported by heartbeat: Make the output more human readable. The default output should be easier for scripts to parse. Available with all commands. List only 'ping' nodes. Available with listnodes sub-command. Ping nodes are obsolete in Pacemaker cluster, having been replaced with the pingd resource agent. List only 'normal' nodes. Available with listnodes sub-command. See also heartbeat8, ha.cf5, authkeys5 Heartbeat-3-0-7e3a82377fa8/doc/ha.cf0000644000000000000000000002440611576626513016654 0ustar00usergroup00000000000000# # There are lots of options in this file. All you have to have is a set # of nodes listed {"node ...} one of {serial, bcast, mcast, or ucast}, # and a value for "auto_failback". # # ATTENTION: As the configuration file is read line by line, # THE ORDER OF DIRECTIVE MATTERS! # # In particular, make sure that the udpport, serial baud rate # etc. are set before the heartbeat media are defined! # debug and log file directives go into effect when they # are encountered. # # All will be fine if you keep them ordered as in this example. # # # Note on logging: # If all of debugfile, logfile and logfacility are not defined, # logging is the same as use_logd yes. In other case, they are # respectively effective. if detering the logging to syslog, # logfacility must be "none". # # File to write debug messages to #debugfile /var/log/ha-debug # # # File to write other messages to # #logfile /var/log/ha-log # # # Facility to use for syslog()/logger # logfacility local0 # # # A note on specifying "how long" times below... # # The default time unit is seconds # 10 means ten seconds # # You can also specify them in milliseconds # 1500ms means 1.5 seconds # # # keepalive: how long between heartbeats? # #keepalive 2 # # deadtime: how long-to-declare-host-dead? # # If you set this too low you will get the problematic # split-brain (or cluster partition) problem. # See the FAQ for how to use warntime to tune deadtime. # #deadtime 30 # # warntime: how long before issuing "late heartbeat" warning? # See the FAQ for how to use warntime to tune deadtime. # #warntime 10 # # # Very first dead time (initdead) # # On some machines/OSes, etc. the network takes a while to come up # and start working right after you've been rebooted. As a result # we have a separate dead time for when things first come up. # It should be at least twice the normal dead time. # #initdead 120 # # # What UDP port to use for bcast/ucast communication? # #udpport 694 # # Baud rate for serial ports... # #baud 19200 # # serial serialportname ... #serial /dev/ttyS0 # Linux #serial /dev/cuaa0 # FreeBSD #serial /dev/cuad0 # FreeBSD 6.x #serial /dev/cua/a # Solaris # # # What interfaces to broadcast heartbeats over? # #bcast eth0 # Linux #bcast eth1 eth2 # Linux #bcast le0 # Solaris #bcast le1 le2 # Solaris # # Set up a multicast heartbeat medium # mcast [dev] [mcast group] [port] [ttl] [loop] # # [dev] device to send/rcv heartbeats on # [mcast group] multicast group to join (class D multicast address # 224.0.0.0 - 239.255.255.255) # [port] udp port to sendto/rcvfrom (set this value to the # same value as "udpport" above) # [ttl] the ttl value for outbound heartbeats. this effects # how far the multicast packet will propagate. (0-255) # Must be greater than zero. # [loop] toggles loopback for outbound multicast heartbeats. # if enabled, an outbound packet will be looped back and # received by the interface it was sent on. (0 or 1) # Set this value to zero. # # #mcast eth0 225.0.0.1 694 1 0 # # Set up a unicast / udp heartbeat medium # ucast [dev] [peer-ip-addr] # # [dev] device to send/rcv heartbeats on # [peer-ip-addr] IP address of peer to send packets to # #ucast eth0 192.168.1.2 # # # About boolean values... # # Any of the following case-insensitive values will work for true: # true, on, yes, y, 1 # Any of the following case-insensitive values will work for false: # false, off, no, n, 0 # # # # auto_failback: determines whether a resource will # automatically fail back to its "primary" node, or remain # on whatever node is serving it until that node fails, or # an administrator intervenes. # # The possible values for auto_failback are: # on - enable automatic failbacks # off - disable automatic failbacks # legacy - enable automatic failbacks in systems # where all nodes do not yet support # the auto_failback option. # # auto_failback "on" and "off" are backwards compatible with the old # "nice_failback on" setting. # # See the FAQ for information on how to convert # from "legacy" to "on" without a flash cut. # (i.e., using a "rolling upgrade" process) # # The default value for auto_failback is "legacy", which # will issue a warning at startup. So, make sure you put # an auto_failback directive in your ha.cf file. # (note: auto_failback can be any boolean or "legacy") # auto_failback on # # # Basic STONITH support # Using this directive assumes that there is one stonith # device in the cluster. Parameters to this device are # read from a configuration file. The format of this line is: # # stonith # # NOTE: it is up to you to maintain this file on each node in the # cluster! # #stonith baytech /etc/ha.d/conf/stonith.baytech # # STONITH support # You can configure multiple stonith devices using this directive. # The format of the line is: # stonith_host # is the machine the stonith device is attached # to or * to mean it is accessible from any host. # is the type of stonith device (a list of # supported drives is in /usr/lib/stonith.) # are driver specific parameters. To see the # format for a particular device, run: # stonith -l -t # # # Note that if you put your stonith device access information in # here, and you make this file publically readable, you're asking # for a denial of service attack ;-) # # To get a list of supported stonith devices, run # stonith -L # For detailed information on which stonith devices are supported # and their detailed configuration options, run this command: # stonith -h # #stonith_host * baytech 10.0.0.3 mylogin mysecretpassword #stonith_host ken3 rps10 /dev/ttyS1 kathy 0 #stonith_host kathy rps10 /dev/ttyS1 ken3 0 # # Watchdog is the watchdog timer. If our own heart doesn't beat for # a minute, then our machine will reboot. # NOTE: If you are using the software watchdog, you very likely # wish to load the module with the parameter "nowayout=0" or # compile it without CONFIG_WATCHDOG_NOWAYOUT set. Otherwise even # an orderly shutdown of heartbeat will trigger a reboot, which is # very likely NOT what you want. # #watchdog /dev/watchdog # # Tell what machines are in the cluster # node nodename ... -- must match uname -n #node ken3 #node kathy # # Less common options... # # Treats 10.10.10.254 as a psuedo-cluster-member # Used together with ipfail below... # note: don't use a cluster node as ping node # #ping 10.10.10.254 # # Treats 10.10.10.254 and 10.10.10.253 as a psuedo-cluster-member # called group1. If either 10.10.10.254 or 10.10.10.253 are up # then group1 is up # Used together with ipfail below... # #ping_group group1 10.10.10.254 10.10.10.253 # # HBA ping derective for Fiber Channel # Treats fc-card-name as psudo-cluster-member # used with ipfail below ... # # You can obtain HBAAPI from http://hbaapi.sourceforge.net. You need # to get the library specific to your HBA directly from the vender # To install HBAAPI stuff, all You need to do is to compile the common # part you obtained from the sourceforge. This will produce libHBAAPI.so # which you need to copy to /usr/lib. You need also copy hbaapi.h to # /usr/include. # # The fc-card-name is the name obtained from the hbaapitest program # that is part of the hbaapi package. Running hbaapitest will produce # a verbose output. One of the first line is similar to: # Apapter number 0 is named: qlogic-qla2200-0 # Here fc-card-name is qlogic-qla2200-0. # #hbaping fc-card-name # # # Processes started and stopped with heartbeat. Restarted unless # they exit with rc=100 # #respawn userid /path/name/to/run #respawn hacluster /usr/lib/heartbeat/ipfail # # Access control for client api # default is no access # #apiauth client-name gid=gidlist uid=uidlist #apiauth ipfail gid=haclient uid=hacluster ########################### # # Unusual options. # ########################### # # hopfudge maximum hop count minus number of nodes in config #hopfudge 1 # # deadping - dead time for ping nodes #deadping 30 # # hbgenmethod - Heartbeat generation number creation method # Normally these are stored on disk and incremented as needed. #hbgenmethod time # # realtime - enable/disable realtime execution (high priority, etc.) # defaults to on #realtime off # # debug - set debug level # defaults to zero #debug 1 # # API Authentication - replaces the fifo-permissions-based system of the past # # # You can put a uid list and/or a gid list. # If you put both, then a process is authorized if it qualifies under either # the uid list, or under the gid list. # # The groupname "default" has special meaning. If it is specified, then # this will be used for authorizing groupless clients, and any client groups # not otherwise specified. # # There is a subtle exception to this. "default" will never be used in the # following cases (actual default auth directives noted in brackets) # ipfail (uid=HA_CCMUSER) # ccm (uid=HA_CCMUSER) # ping (gid=HA_APIGROUP) # cl_status (gid=HA_APIGROUP) # # This is done to avoid creating a gaping security hole and matches the most # likely desired configuration. # #apiauth ipfail uid=hacluster #apiauth ccm uid=hacluster #apiauth cms uid=hacluster #apiauth ping gid=haclient uid=alanr,root #apiauth default gid=haclient # message format in the wire, it can be classic or netstring, # default: classic #msgfmt classic/netstring # Do we use logging daemon? # If logging daemon is used, logfile/debugfile/logfacility in this file # are not meaningful any longer. You should check the config file for logging # daemon (the default is /etc/logd.cf) # more infomartion can be fould in the man page. # Setting use_logd to "yes" is recommended # # use_logd yes/no # # the interval we reconnect to logging daemon if the previous connection failed # default: 60 seconds #conn_logd_time 60 # # # Configure compression module # It could be zlib or bz2, depending on whether u have the corresponding # library in the system. #compression bz2 # # Confiugre compression threshold # This value determines the threshold to compress a message, # e.g. if the threshold is 1, then any message with size greater than 1 KB # will be compressed, the default is 2 (KB) #compression_threshold 2 Heartbeat-3-0-7e3a82377fa8/doc/ha.cf.xml.in0000644000000000000000000010635211576626513020061 0ustar00usergroup00000000000000 24 Nov 2009 Heartbeat @VERSION@ Alan Robertson heartbeat, original Wiki page alanr@unix.sh Florian Haas man page florian.haas@linbit.com ha.cf 5 Configuration Files ha.cf Configuration file for the Heartbeat cluster messaging layer Description @HA_HBCONF_DIR@/ha.cf is read by heartbeat 8 upon node start-up. It lists the communication facilities enabled between nodes, enables or disables certain features, and optionally lists the cluster nodes by host name. This file can safely be made world readable, but should be writable only by root. Global directives Some directives in ha.cf are global in nature. The order of these global options is important in configuring the ha.cf file, since each directive is interpreted as it is encountered in ha.cf. These directives are and . It is recommended that these be placed first in the ha.cf file when they are entered. Other directives in this category are , , , and , but those directives are deprecated and should no longer be used. Supported directives The following directives are supported in ha.cf (listed here in alphabetical order): This directive specifies what users and/or groups are allowed to connect to a specific API group name. The syntax is simple: apiauth apigroupname [uid=uid1,uid2 ...] [gid=gid1,gid2 ...] You can specify either a uid list, or a gid list, or both. However you must specify either a uid list or a gid list. If you include both a uid list and a gid list, then a process is authorized to connect to that API group if if it is either in the uid-list or it is in the gid-list. The API group name default has special meaning. If it is specified, it will be used for authorizing clients without any API group name, and all client groups not identified by any other apiauth directive. Unless you specify otherwise in the ha.cf file, certain services will be provided default authorizations as follows: Default service authorizations Service Default apiauth ipfail uid=hacluster ccm gid=haclient ping gid=haclient cl_status gid=haclient lha-snmpagent uid=root crmd uid=hacluster
The autojoin directive enables nodes to join automatically just by communicating with the cluster, hence not requiring node directives in the ha.cf file. Since our communication is normally strongly authenticated, only nodes which know the cluster key can join (automatically or otherwise). The values you can give for the autojoin directive have the following meanings: none: disables automatic joining. other: allows nodes other than ourself who are not listed in ha.cf to join automatically. In other words, our node has to be listed in ha.cf, but other nodes do not. any: allows any node to join automatically without being listed in ha.cf, even the current node. Note that the set of nodes currently considered part of the cluster is kept in the hostcache file. With autojoin enabled, the node directive is no longer authoritative - the hostcache file is. The bcast directive is used to configure which interfaces Heartbeat sends UDP broadcast traffic on. More than one interface can be specified on the line. The udpport directive is used to configure which port is used for these broadcast communications if the udpport directive is specified before the bcast directive, otherwise the default port will be used. A couple of sample bcast lines are shown below. bcast eth0 eth1 # on Linux systems bcast le0 # for Solaris systems Broadcast links are not supported in Pacemaker clusters on BSD systems. The compression directive sets which compression method will be used when a message is big and compression is needed. It could be either zlib or bz2, depending on whether you have the corresponding library in the system. You can check @HA_PLUGIN_DIR@/HBcompress to see what compression module is available. If this directive is not set, there will be no compression. The compression_threshold directive sets the threshold to compress a message, e.g. if the threshold is 1, then any message with size greater than 1 KB will be compressed. The default is 2 (KB). This directive only makes sense if you have set the compression directive. The conn_logd_time directive specifies the time Heartbeat will reconnect to the logging daemon if the connection between Heartbeat and the logging daemon is broken. The conn_logd_time is specified according to the Heartbeat time syntax, for example: conn_logd_time 60 #60 seconds The default is 60 seconds. Heartbeat will not automatically reconnect to the logging daemon. It only tries to reconnect when it needs to log a message and conn_logd_time have passed since the last attempt to connect. The coredumps directive tells Heartbeat to do things to enable making core dumps - should it need to dump core. The allowed values are and . historical, for Cluster Resource Manager, now an alias to Enables the Pacemaker cluster manager. For historical reasons, the default for this option is ; however, it should always be set to . When set to , the directive automatically implies: apiauth stonithd uid=root apiauth stonithd-ng uid=root apiauth attrd uid=hacluster apiauth crmd uid=hacluster apiauth cib uid=hacluster respawn hacluster ccm respawn hacluster cib respawn hacluster attrd respawn root stonithd respawn root lrmd respawn hacluster crmd The deadtime directive is used to specify how quickly Heartbeat should decide that a node in a cluster is dead. Setting this value too low will cause the system to falsely declare itself dead. Setting it too high will delay takeover after the failure of a node in the cluster. The debug directive is used to set the level of debugging in effect in the system. Production systems should have their debug level set to zero (i.e., turned off). This is the default. Legal values of the debug option are between 0-255. The most useful values are between 0 (off) and 3. Setting the debug level greater than 1 can have an adverse effect on the size of your log files, and on the system's ability to send heartbeats at rapid rates, thus affecting the cluster reliability. The debug level of the system can also be specified on the command line using the -d option. Additionally, the debug level of the system can be dynamically changed by sending the heartbeat process SIGUSR1 and SIGUSR2 signals. SIGUSR1 raises the debug level, and SIGUSR2 lowers it. time|file The hbgenmethod directive specifies how Heartbeat should compute its current generation number for communications. This is a specialized and obscure directive, used mainly in firewalls which have no local disk, and other devices which do not have a method of storing data persistently across reboots. It defaults to storing the Heartbeat generations in a file. Generation numbers are used by Heartbeat for replay attack protection. If one specifies the time method, there are certain possible cases where troubles can arise. If a machine restarts Heartbeat and its local time of day clock is less than or equal to than the value of the time of day clock when Heartbeat last started, then that node will be unable to join the cluster. The initdead parameter is used to set the time that it takes to declare a cluster node dead when Heartbeat is first started. This parameter generally needs to be set to a higher value, because experience suggests that it sometimes takes operating systems many seconds for their communication systems before they operate correctly. initdead is specified according to the Heartbeat time syntax. A sample initdead value is shown below: initdead 30 In some switched network environments, switches engage in a spanning tree algorithm whenever a NIC connects to a port. This can take a long time to complete, and it is only necessary if the NIC being connected is another switch. If this is the case, you may be able to configure certain NICs as not being switches and shrink the connection delay significantly. If not, you'll need to raise initdead to make this problem go away. If this is set too low, you'll see one node declare the other as dead. The keepalive directive sets the interval between heartbeat packets. It is specified according to the Heartbeat time syntax. The logfacility is used to tell Heartbeat which syslog logging facility it should use for logging its messages. The possible values for logfacility vary by operating system, but some of the most common ones are {auth, authpriv, daemon, syslog, user, local0, local1, local2, local3, local4, local5, local6, local7}. A sample logfacility directive is shown below: logfacility local7 If you want to disable logging to syslog: logfacility none The mcast directive is used to configure a multicast communication path. The syntax of an mcast directive is: mcast dev mcast-group udp-port ttl 0 dev - IP device to send/rcv heartbeats on mcast-group - multicast group to join (class D multicast address 224.0.0.0 - 239.255.255.255). For most Heartbeat uses, the first byte should be 239. port - UDP port to sendto/rcvfrom (set this to the same value as udpport) ttl - the ttl value for outbound heartbeats. This affects how far the multicast packet will propagate. (0-255). Set to 1 for the current subnet. Must be greater than zero. A sample mcast directive is shown below: mcast eth0 239.0.0.1 694 1 0 The mcast6 directive is to configure an IPv6 multicast communication path. The syntax of an mcast directive is: mcast6 [device] [mcast6 group] [port] [mcast6 hops] [mcast6 loop] For example, using link-local scope with some "transient" group: mcast6 eth0 ff12::1:2:3:4 694 1 0 device - IP device to send/rcv heartbeats on mcast6 group - multicast group to join. Refer to http://tools.ietf.org/html/rfc3513#section-2.7 for valid and reserved IPv6 multicast addresses. For most heartbeat uses, addresses should be taken from: ff12::/16 Plausibility checking code during config file parsing will reject some, but will probably not be able to catch all unsuitable addresses. Please understand the IPv6 multicast addressing scheme first. Do not use reserved or well known multicast addresses. You likely would seriously confuse a lot of network devices. port - UDP port to sendto/rcvfrom mcast6 hops - affects how far the multicast packet will propagate (sockopt: IPV6_MULTICAST_HOPS). (0-4). Set to 1 for link-local. loop - sockopt IPV6_MULTICAST_LOOP; always set to 0 classic|netstring The msgfmt directive specifies the format Heartbeat uses in wire. classic - Heartbeat will convert a message into a string and transmit in wire. Binary values are converted with a base64 library. netstring - Binary messages will be transmitted directly. This is more efficient since it avoids conversion between string and binary values. When in doubt, leave the default (classic). The node directive tells what machines are in the cluster. The syntax of the node directive is simple: node nodename1 nodename2 ... Node names in the directive must match the "uname -n" of that machine. You can declare multiple node names in one directive. You can also use the directive multiple times. Normally every node in the cluster must be listed in the ha.cf file, including the current node, unless the autojoin directive is enabled. The node directive is not completely authoritative with regard to nodes heartbeat will communicate with. If a node has ever been added in the past, it will tend to remain in the hostcache file more until it's manually removed. on|off The realtime directive specifies whether or not Heartbeat should try and take advantage of the operating system's realtime scheduling features. When enabled, Heartbeat will lock itself into memory, and raise its priority to a realtime priority (as set by the rtprio directive). This feature is mainly used for debugging various kinds of loops which might otherwise cripple the system and impair debugging them. The default is on. The rtprio directive is used to specify the priority at which Heartbeat runs. It does not need to be specified unless other realtime priority programs are also running on the system. The minimum and maximum values for this field can be determined from the sched_get_priority_min(SCHED_FIFO) and sched_get_priority_max(SCHED_FIFO) calls respectively. The default value for rtprio is halfway between the minimum and maximum values. A sample rtprio directive is shown below: rtprio 5 The ucast directive configures Heartbeat to communicate over a UDP unicast communications link. The udpport directive is used to configure which port is used for these unicast communications if the udpport directive is specified before the ucast directive, otherwise the default port will be used. The general syntax of a ucast directive is: ucast dev peer-ip-address Where dev is the device to use when talking to the peer, and peer-ip-address is the IP address we will send packets to. A sample ucast directive is shown below: ucast eth0 10.10.10.133 This directive will cause us to send packets to 10.10.10.133 over interface eth0. Note that ucast directives which go to the local machine are effectively ignored. This allows the ha.cf directives on all machines to be identical. The udpport directive specifies which port Heartbeat will use for its UDP intra-cluster communication. There are two common reasons for overriding this value: there are multiple bcast clusters on the same subnet, or this port is already in use in accordance with some locally-established policy. The default value for this parameter is the the port ha-cluster in /etc/services (if present), or 694 if port ha-cluster is not in /etc/services. 694 is the IANA registered port number for Heartbeat (a.k.a. ha-cluster). A sample udpport directive is shown below. udpport 694 You have to configure udpport (in ha.cf) before you configure ucast or bcast, if not heartbeat will use the default port (694). Due to a specification error in the syntax of the mcast directive, this directive does not apply to mcast communications. on|off The use_logd directive specifies whether Heartbeat logs its messages through logging daemon or not. If the logging daemon is used, all log messages will be sent through IPC to the logging daemon, which then writes them into log files. In case the logging daemon dies (for whatever reason), a warning message will be logged and all messages will be written to log files directly. If the logging daemon is used, logfile/debugfile/logfacility in this file are not meaningful any longer. You should check the config file for logging daemon (the default is /etc/logd.cf). If use_logd is not used, all log messages will be written to log files directly. The logging daemon is started/stopped in heartbeat script. Setting use_logd to "on" is recommended. In the normal case, heartbeat generates a UUID for each node in the system as a way of uniquely identifying a node - even if it should change nodenames. This UUID is typically stored in the file /var/lib/heartbeat/hb_uuid. For certain kinds of installations (those booting from CDs or other read-only media), it is impossible for heartbeat to save a generated to disk as it normally does. In these cases, one can use the uuidfrom directive to instruct heartbeat to use the nodename as though it were a UUID, by specifying uuidfrom nodename. All possible legal uuidfrom directives are shown below. uuidfrom file uuidfrom nodename The warntime directive is used to specify how quickly Heartbeat should issue a "late heartbeat" warning. The warntime value is specified according to the HeartbeatTimeSyntax. A sample warntime specification is shown below. warntime 10 # 10 seconds The warntime directive is important for tuning deadtime
Deprecated directives The following directives are interpreted by the configuration file parser for historical reasons, but should be considered deprecated and should no longer be used. In legacy Heartbeat clusters, the auto_failback option would determine whether a resource would automatically fail back to its "primary" node, or remain on whatever node is serving it until that node fails, or an administrator intervenes. The possible values for auto_failback were: on - enable automatic failbacks off - disable automatic failback legacy - enable automatic failbacks in systems where all nodes in the cluster do not yet support the auto_failback option. This option has been replaced the configurable failback policies in Pacemaker, and should no longer be used. The baud directive is used to set the speed for serial communications. Any of the following speeds can be specified, provided they are supported by your operating system: 9600, 19200, 38400, 57600, 115200, 230400, 460800. The default speed is 19200. This option is obsolete as serial links should not be used in Pacemaker clusters. The deadping directive is used to specify how quickly Heartbeat should decide that a ping node in a cluster is dead. Setting this value too low will cause the system to falsely declare the ping node dead. Setting it too high will delay detection of communication failure. This feature has been replaced by the more flexible pingd resource agent in Pacemaker, and should no longer be used. The debugfile directive specifies the file Heartbeat will write debug messages to. This directive is ignored when is specified. Enabling is the recommended approach. Hbaping directives are given to declare fiber channel devices as ping nodes. This directive was never fully supported in Heartbeat (requiring manual modifications to the code base) and should not be used. The hopfudge directive controls how many nodes a packet can be forwarded through before it is thrown away in the worst case. However, the hopfudge value is added to the number of nodes in the system. It defaults to 1. This option applies to serial links only, which are deprecated. The logfile directive configures a log file. All non-debug messages from Heartbeat will go into this file. This directive is ignored when is specified. Enabling is the recommended approach. Ping directives are given to declare ping nodes to Heartbeat. The syntax of the ping directive is simple: ping ip-address ... Each IP address listed in a ping directive is considered to be independent. That is, connectivity to each node is considered to be equally important. In order to declare that a group of nodes are equally qualified for a particular function, and that the presence of any of them indicates successful communication, use the ping_group directive. This feature has been replaced by the more flexible pingd resource agent in Pacemaker, and should no longer be used. Ping group directives are given to declare a group ping node to Heartbeat. syntax of the ping_group directive is as follows: ping_group group-name ip-address ... Each IP address listed in a ping_group directive is considered to be related, and connectivity to any one node is considered to be connectivity to the group. A ping group is considered by Heartbeat to be a single cluster node (group-name). The ability to communicate with any of the group members means that the group-name member is reachable. This is useful when (for example) two different routers may be used to contact the internet, depending on which is up, or when finding an appropriate reliable single ping node is difficult. This feature has been replaced by the more flexible pingd resource agent in Pacemaker, and should no longer be used. The respawn directive is used to specify a program to run and monitor while it runs. If this program exits with anything other than exit code 100, it will be automatically restarted. The first parameter is the user id to run the program under, and the second parameter is the program to run. Subsequent parameters will be given to the program as arguments. This functionality was primarily designed for the legacy ipfail program, which has been replaced by the more flexible pingd resource agent in Pacemaker. Thus, this directive should no longer be used, except when it is implicitly generated by . The serial directive tells Heartbeat to use the specified serial port(s) for its communication. The parameters to the serial directive are the names of tty devices suitable for opening without waiting for carrier first. On Linux, those ports are typically named /dev/ttySX. A few sample serial directives are shown below: serial /dev/ttyS0 /dev/ttyS1 # Linux serial /dev/cuaa0 # FreeBSD serial /dev/cua/a # Solaris The baud directive is used to configure the baud rate for the port(s) if the baud directive is specified before the serial directive, otherwise the default baud rate will be used. Using this option is strongly discouraged in Pacemaker clusters, as its CIB updates can easily hit practical message size limits for serial links, with undefined results. The stonith directive is used to configure Heartbeat's legacy STONITH configuration. It assumes you're going to put in a STONITH configuration file on each machine in the cluster to configure the (single) STONITH device that this node will use to reset the other node in the cluster. This functionality has been replaced by STONITH agents in Pacemaker. The stonith_host directive is used to configure Heartbeat's (release 1 only), STONITH configuration. With this directive, you put all the STONITH configuration information for the devices in your cluster in the ha.cf file, rather than in a separate file. This functionality has been replaced by STONITH agents in Pacemaker. on|off This directive enables traditional compression. It is highly recommended that this be set to off (the default); otherwise heartbeat performance can be significantly negatively impacted. The watchdog directive configures Heartbeat to use a watchdog device. In some circumstances, a watchdog device can be used in place of a STONITH device. In any case, it is a reasonable thing to configure if you don't have a STONITH device, or if you wish, in addition to your STONITH device. It is the purpose of a watchdog device to shut the machine down if Heartbeat does not hear its own heartbeats as often as it thinks it should. This keeps things like scheduler bugs from becoming split-brain configurations. The general syntax of a watchdog directive is: watchdog watchdog-device-name A sample watchdog directive is shown below: watchdog /dev/watchdog The most common watchdog device currently used with general Linux systems is the softdog device. The softdog device is a software-based watchdog device and is usually referred to as /dev/watchdog - although like most UNIX devices, this is a convention not a rule. This functionality has been replaced by cluster self-monitoring and STONITH resource agents in Pacemaker. This directive should no longer be used. Required directives The following directives must always be present in ha.cf: At least one communication topology directive (, , or ); Either one or more directives, or . Example Below is an example ha.cf for a 2-node Pacemaker cluster with redundant network communication paths: use_logd on mcast eth0 239.0.0.42 694 1 0 bcast eth1 node alice node bob pacemaker respawn
Heartbeat-3-0-7e3a82377fa8/doc/haresources0000644000000000000000000001342111576626513020213 0ustar00usergroup00000000000000# # This is a list of resources that move from machine to machine as # nodes go down and come up in the cluster. Do not include # "administrative" or fixed IP addresses in this file. # # # The haresources files MUST BE IDENTICAL on all nodes of the cluster. # # The node names listed in front of the resource group information # is the name of the preferred node to run the service. It is # not necessarily the name of the current machine. If you are running # auto_failback ON (or legacy), then these services will be started # up on the preferred nodes - any time they're up. # # If you are running with auto_failback OFF, then the node information # will be used in the case of a simultaneous start-up, or when using # the hb_standby {foreign,local} command. # # BUT FOR ALL OF THESE CASES, the haresources files MUST BE IDENTICAL. # If your files are different then almost certainly something # won't work right. # # # # We refer to this file when we're coming up, and when a machine is being # taken over after going down. # # You need to make this right for your installation, then install it in # /etc/ha.d # # Each logical line in the file constitutes a "resource group". # A resource group is a list of resources which move together from # one node to another - in the order listed. It is assumed that there # is no relationship between different resource groups. These # resource in a resource group are started left-to-right, and stopped # right-to-left. Long lists of resources can be continued from line # to line by ending the lines with backslashes ("\"). # # These resources in this file are either IP addresses, or the name # of scripts to run to "start" or "stop" the given resource. # # The format is like this: # #node-name resource1 resource2 ... resourceN # # # If the resource name contains an :: in the middle of it, the # part after the :: is passed to the resource script as an argument. # Multiple arguments are separated by the :: delimeter # # In the case of IP addresses, the resource script name IPaddr is # implied. # # For example, the IP address 135.9.8.7 could also be represented # as IPaddr::135.9.8.7 # # THIS IS IMPORTANT!! vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv # # The given IP address is directed to an interface which has a route # to the given address. This means you have to have a net route # set up outside of the High-Availability structure. We don't set it # up here -- we key off of it. # # The broadcast address for the IP alias that is created to support # an IP address defaults to the highest address on the subnet. # # The netmask for the IP alias that is created defaults to the same # netmask as the route that it selected in in the step above. # # The base interface for the IPalias that is created defaults to the # same netmask as the route that it selected in in the step above. # # If you want to specify that this IP address is to be brought up # on a subnet with a netmask of 255.255.255.0, you would specify # this as IPaddr::135.9.8.7/24 . # # If you wished to tell it that the broadcast address for this subnet # was 135.9.8.210, then you would specify that this way: # IPaddr::135.9.8.7/24/135.9.8.210 # # If you wished to tell it that the interface to add the address to # is eth0, then you would need to specify it this way: # IPaddr::135.9.8.7/24/eth0 # # And this way to specify both the broadcast address and the # interface: # IPaddr::135.9.8.7/24/eth0/135.9.8.210 # # The IP addresses you list in this file are called "service" addresses, # since they're they're the publicly advertised addresses that clients # use to get at highly available services. # # For a hot/standby (non load-sharing) 2-node system with only # a single service address, # you will probably only put one system name and one IP address in here. # The name you give the address to is the name of the default "hot" # system. # # Where the nodename is the name of the node which "normally" owns the # resource. If this machine is up, it will always have the resource # it is shown as owning. # # The string you put in for nodename must match the uname -n name # of your machine. Depending on how you have it administered, it could # be a short name or a FQDN. # #------------------------------------------------------------------- # # Simple case: One service address, default subnet and netmask # No servers that go up and down with the IP address # #just.linux-ha.org 135.9.216.110 # #------------------------------------------------------------------- # # Assuming the adminstrative addresses are on the same subnet... # A little more complex case: One service address, default subnet # and netmask, and you want to start and stop http when you get # the IP address... # #just.linux-ha.org 135.9.216.110 http #------------------------------------------------------------------- # # A little more complex case: Three service addresses, default subnet # and netmask, and you want to start and stop http when you get # the IP address... # #just.linux-ha.org 135.9.216.110 135.9.215.111 135.9.216.112 httpd #------------------------------------------------------------------- # # One service address, with the subnet, interface and bcast addr # explicitly defined. # #just.linux-ha.org 135.9.216.3/28/eth0/135.9.216.12 httpd # #------------------------------------------------------------------- # # An example where a shared filesystem is to be used. # Note that multiple aguments are passed to this script using # the delimiter '::' to separate each argument. # #node1 10.0.0.170 Filesystem::/dev/sda1::/data1::ext2 # # Regarding the node-names in this file: # # They must match the names of the nodes listed in ha.cf, which in turn # must match the `uname -n` of some node in the cluster. So they aren't # virtual in any sense of the word. # Heartbeat-3-0-7e3a82377fa8/doc/hb_addnode.xml.in0000644000000000000000000000455611576626513021154 0ustar00usergroup00000000000000 November 26, 2009 Heartbeat @VERSION@ Alan Robertson hb_delnode alanr@unix.sh Shi Guochun man page gshi@ncsa.uiuc.edu Florian Haas man page florian.haas@linbit.com hb_addnode 1 General commands hb_addnode sends a message to a Heartbeat cluster to add new nodes hb_addnode node [node] [node] Description @HA_NOARCHDATAHBDIR@/hb_addnode adds a new node, or multiple nodes, to the cluster configuration. If there is any node in the arguments that is already a cluster member, the command fails and no nodes are added. Options The following options are supported: Issues a brief usage message. See also hb_delnode1, heartbeat8, cl_status1 Heartbeat-3-0-7e3a82377fa8/doc/hb_delnode.xml.in0000644000000000000000000000460211576626513021160 0ustar00usergroup00000000000000 November 26, 2009 Heartbeat @VERSION@ Alan Robertson hb_delnode alanr@unix.sh Shi Guochun man page gshi@ncsa.uiuc.edu Florian Haas man page florian.haas@linbit.com hb_delnode 1 General commands hb_delnode sends a message to a Heartbeat cluster to remove one or more nodes hb_delnode node [node] [node] Description @HA_NOARCHDATAHBDIR@/hb_delnode removes a node, or multiple nodes, from the cluster configuration. If there is any node in the arguments that is currently not a cluster member, the command fails and no nodes are removed. Options The following options are supported: Issues a brief usage message. See also hb_addnode1, heartbeat8, cl_status1 Heartbeat-3-0-7e3a82377fa8/doc/hb_standby.xml.in0000644000000000000000000001220611576626513021211 0ustar00usergroup00000000000000 November 26, 2009 Heartbeat @VERSION@ Alan Robertson hb_standby alanr@unix.sh Shi Guochun man page gshi@ncsa.uiuc.edu Florian Haas man page florian.haas@linbit.com hb_standby 1 General commands hb_standby issues a failover request to the cluster manager hb_standby [all|foreign|local|failback] Description This command is deprecated. It is only suitable for legacy Heartbeat clusters without Pacemaker enabled. In Pacemaker-enabled clusters, the crm8 shell supports switching individual nodes into standby mode, and replaces hb_standby. @HA_NOARCHDATAHBDIR@/hb_standby issues a request to the cluster to move resources from the node where it is invoked, to the other node (if it is currently available). The meaning of the options is relative. This manual assumes the following configuration to be present in @HA_HBCONF_DIR@/haresources: alice drbddisk::r0 Filesystem::/dev/drbd0::/local/groups::ext3 10.0.0.1 smb bob drbddisk::r1 Filesystem::/dev/drbd1::/local/ldap::ext3 10.0.0.2 ldap Options The following options are supported: local Migrates any resources that the local node is the preferred node for. When invoked on alice, Samba would be shut down, the IP address 10.0.0.1 would be released, /local/groups would be unmounted, /dev/drbd0 would be placed into the secondary role and bob would take all these services over. When run on bob, OpenLDAP would shut down, 10.0.0.2 would be released, /local/ldap would be unmounted, /dev/drbd1 would be placed into the Secondary role and alice would take over all these services. foreign|failback Migrates any resources that the local node is not the preferred node for. When run on alice, OpenLDAP would shut down, 10.0.0.2 would be released, /local/ldap would be unmounted, /dev/drbd1 would be placed into the Secondary role and bob would take over all these services. When invoked on bob, Samba would be shut down, the IP address 10.0.0.1 would be released, /local/groups would be unmounted, /dev/drbd0 would be placed into the secondary role and alice would take all these services over. all Migrates all resources to the other node. Invoking hb_standby without any options is identical to hb_standby all. Caveats hb_standby is only usable in R1-style configurations (i.e., those configured using the haresources file). See also hb_takeover1, heartbeat8, cl_status1 Heartbeat-3-0-7e3a82377fa8/doc/hb_takeover.xml.in0000644000000000000000000000543211576626513021370 0ustar00usergroup00000000000000 November 26, 2009 Heartbeat @VERSION@ Alan Robertson hb_takeover alanr@unix.sh Shi Guochun man page gshi@ncsa.uiuc.edu Florian Haas man page florian.haas@linbit.com hb_takeover 1 General commands hb_takeover issues a failover request to the cluster manager hb_takeover [all|foreign|local|failback] Description This command is deprecated. It is only suitable for legacy Heartbeat clusters without Pacemaker enabled. In Pacemaker-enabled clusters, the crm8 shell supports switching individual nodes into standby mode, and replaces hb_takeover. @HA_NOARCHDATAHBDIR@/hb_takeover issues a request to the cluster to move resources to the node where it is invoked, from the other node. Issuing hb_takeover on the current node is equivalent to performing hb_standby on the other node. Caveats hb_takeover is only usable in R1-style configurations (i.e., those configured using the haresources file). See also hb_standby1, heartbeat8, cl_status1 Heartbeat-3-0-7e3a82377fa8/doc/heartbeat.xml.in0000644000000000000000000001220011576626513021025 0ustar00usergroup00000000000000 24 Nov 2009 Heartbeat @VERSION@ Alan Robertson heartbeat alanr@unix.sh Juan Pedro Paredes Caballero man page juampe@retemail.es Simon Horman man page horms@verge.net.au Florian Haas man page florian.haas@linbit.com heartbeat 8 System administration utilities heartbeat Heartbeat subsystem for High-Availability Linux Description heartbeat is a basic heartbeat subsystem for Linux-HA. It will run scripts at initialisation, and when machines go up or down. This version will also perform IP address takeover using gratuitous ARPs. It works correctly for a 2-node configuration, and is extensible to larger configurations. It implements the following kinds of heartbeats: UDP/IP broadcast; UDP/IP multicast; UDP/IP unicast; Bidirectional Serial Rings ("raw" serial ports) — this type is deprecated and should no longer be used; special "ping" heartbeats for routers, etc. — this type has been superseded by functionality in pacemaker and should no longer be used. Comprehensive documentation on heartbeat is available in the Heartbeat User's Guide. If this documentation is not installed on your system, it can be found at http://linux-ha.org/. Options The following options are supported by heartbeat: Increment debugging level. Higher levels are more verbose. Reload heartbeat. This option is functionally identical to sending a running heartbeat process a HUP signal. If the configuration has not changed, then this option is essentially a no-op. If ha.cf5 or authkeys5 has changed, then heartbeat will re-read these files and update its configuration. This option may not be used together with . Kill (stop) heartbeat. Report heartbeat status. Heartbeat restart exec flag (internal use only). May not be used with . Heartbeat current resource state for restart (internal use only). Only valid with . Print out heartbeat version. Note that most of these options are used for supporting the heartbeat init script, which provides the conventional start, stop, status and restart options (among others). It is recommended to use this rather than invoking the heartbeat command directly. See also ha.cf5, authkeys5 Heartbeat-3-0-7e3a82377fa8/doc/heartbeat_api.html0000644000000000000000000010242311576626513021424 0ustar00usergroup00000000000000 HEARTBEAT API

HEARTBEAT API

Introduction:

This document describes the application program interface for heartbeat. The following are the interfaces available for heartbeat.
  1. ll_cluster_new()
  2. singon()
  3. signoff()
  4. delete()
  5. set_msg_callback()
  6. set_nstatus_callback()
  7. set _ifstatus_callback()
  8. init_nodewalk()
  9. nextnode()
  10. end_nodewalk()
  11. init_ifwalk()
  12. nextif()
  13. end_ifwalk()
  14. if_status()
  15. sendclustermsg()
  16. sendnodemsg()
  17. inputfd()
  18. msgready()
  19. setmsgsignal()
  20. rcvmsg()
  21. readmsg()
  22. setfmode()
  23. get_parameter()
  24. get_deadtime()
  25. get_keepalive()
  26. get_mynodeid()
  27. get_logfacility()
  28. get_resources()
  29. errmsg()

Description:

The details of each of the api functions are as follows:

ll_cluster_new()

ll_cluster_t *ll_cluster_new(const char *type)
Parameter description:
'type' identifies the service provider. It's value must currently be "heartbeat".
Description:
Register with the heartbeat library and create a client instance of type "heartbeat"
Return Value:
returns a handle to the heartbeat library instance. This handle is a opaque object with a set of member functions. The member functions provide the necessary mechanism to interact with the heartbeat daemon. Returns NULL on failure.
Observation:
Why is 'type' parameter required when we know for sure that we are only interacting with the heartbeat daemon? Or is it that the library wants to be generic and wants to interact with any arbitrary daemon? Yes.  The latter.

signon()

int *signon(struct ll_cluster cinfo*, const char *service)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new() 'service' is the type of service, which the calling process registers with the heartbeat daemon.
Description:
This function sets up a communication link with the local heartbeat daemon and registers itself for the 'service' service. In the case of casual client the clientid is NULL. But in the case of non-casual client the clientid is non-NULL and it indicates the service to which this client wants to sign on for. This special communication link setup with the heartbeat daemon is a fifo. Non-casual clients interact using a fifo whose name is derived from the pid of the clients process. Casual clients interact with heartbeat daemon using the fifo whose name is derived from the 'service'string.

The heartbeat daemon has no notion for 'service'. Only the clients registered with the heartbeat daemons for the same service, have the notion for that service.  'service' mostly is a mechanism of providing primitive group services to a set of non-casual clients registered for that service.

Return value:
On success returns HA_OK
On failure returns HA_FAIL

signoff()

int signoff(struct ll_cluster *cinfo)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new()
Description:
This function deletes the communication link with the local heartbeat daemon and unsubscribe itself from being a heartbeat client.
Return Value:
On success it returns HA_OK.
On failure it returns HA_FAIL.

delete()


int delete(struct ll_cluster *cinfo)
Parameter Description:
cinfo'
is the handle to the library instance got through ll_cluster_new()
Description:
This operation is reverse of ll_cluster_new(). It unsubscribes itself from the heartbeat library and cleans up the corresponding datastructures.
Return Value:
On success returns HA_OK
On failure returns HA_FAIL

set_msg_callback()

int set_msg_callback(struct ll_cluster *cinfo,   const char *msgtype,   llc_msg_callback_t callback, void * p)
Parameter Description:

cinfo'
is the handle to the library instance got through ll_cluster_new()
'msgtype'
is the type of message on receipt of which the 'callback' is called.
'callback'
is the function to be called on receipt of a message of type 'msgtype'.
'p'
is the parameter to be passed to the 'callback' function.
Description:
This function registers a callback function along with its parameter. The callback function is called by the heartbeat library on receipt of a message of type 'msgtype' from the heartbeat daemon.
NOTE:
Neither the heartbeat library nor heartbeat daemon interpret 'msgtype'. The 'msgtype' is interpreted only by the client.  Clients can coin their own msgtype as long as all the clients signed up for the same service can interpret it.
Return Value:
On success HA_OK is returned.
On failure HA_FAIL is returned.

set_nstatus_callback()

int set_nstatus_callback(struct ll_cluster *cinfo, llc_nstatus_callback_t cbf, void *p)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new
'cbf'
is the function pointer to be called on change of status of any node in the cluster.
'p'
is the parameter to be passed to the callback function.
Description:
This function registers a callback function along with its parameter. The callback function is called by the heartbeat library on change of status of any node in the cluster.
Return Value:
On success HA_OK is returned.
Observation:
There is a bug in this function. It does not validate the 'cinfo' parameter'. It should return HA_FAIL on failure.

set_ifstatus_callback()

int set_ifstatus_callback(struct ll_cluster *cinfo, llc_ifstatus_callback_t cbf, void *p)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new()
cbf'
is the function pointer to be called on change of status of any interface in the cluster.
'p'
is the parameter to be passed for the callback function.
Description:
This function registers a callback function along with its parameter. The callback function is called by the heartbeat library on change of status of any network interface in the cluster in the view of the current cluster node.
Return Value:
On success HA_OK is returned.

Also there is a bug in this function. It does not validate the 'cinfo' parameter'. It should return HA_FAIL on failure.

init_nodewalk()


int init_nodewalk(struct ll_cluster_t *cinfo)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new
Description:
This function talks to the heartbeat daemon and retrieves the set of nodes that are part of the cluster. If the retrieval is successful HA_OK is returned.
Return Value:
On success HA_OK is returned
On failure HA_FAIL is returned

nextnode()

char *nextnode(ll_cluster_t *cinfo)
Parameter Description:
'cinfo'
is the handle to the library instance gotten through ll_cluster_new().
Description/Return Value:,
This function returns the next node in the cluster, which is cached in the heartbeat library. If no more node is available it returns NULL.

end_nodewalk()


int end_nodewalk(ll_cluster_t *cinfo)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new().
Description:
This clears the cluster node information cached in the heartbeat library.
Return Value:
returns HA_OK on success  and HA_FAIL on failure.

init_ifwalk()

int init_ifwalk(struct ll_cluster_t *cinfo, char *host)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new
'host'
is the node in the cluster whose network interfaces are of interest.
Description:
This function talks to the heartbeat daemon and caches all the network interface information associated with 'host' node. This caching is done in the heartbeat library.
Return Value:
If the network interface information can be successfully retrieved, HA_OK is returned. Failure returns HA_FAIL.

nextif()

  char * nextif(ll_cluster_t *cinfo)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new().
Description:
This function returns the next network interface of the 'host' node in the cluster, which is cached in the heartbeat library.
NOTE:
The 'host' is the node of the cluster specified during the call to init_ifwalk()
Return Value:
If no more interfaces are available it returns NULL.

end_ifwalk()


int end_ifwalk(ll_cluster_t *cinfo)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new().
Description:
This clears the network interface information cached in the heartbeat library.
Return Value:
returns HA_OK on success and HA_FAIL of failure.

if_status()

char *  if_status(ll_cluster_t *cinfo, const char *host,
const char *ifname)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new().
'host'
is the node name whose interface's status is queried.
'ifname'
is the name of the interface whose status is queried.
Description:
This function returns a string that specifies the state of the interface 'ifname' on cluster node 'host'
Return Value:
It returns "up" if the link is active and "dead" if the link is down. NULL if there was a error.

sendclustermsg()

int sendclustermsg(ll_cluster_t *cinfo, struct ha_msg* msg)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new().
'msg'
is the message to be sent to all non-casual clients signed up for the same service as that of the calling client.
Description:
This function broadcasts the message to all members of the group.
Return Value:
This function returns HA_FAIL if called by a casual client or if there was some other failure.
On success it returns HA_OK.

sendnodemsg()

int sendnodemsg(ll_cluster_t *cinfo, struct ha_msg* msg,
const char *nodename)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new().
'msg'
is the message to be sent to a non-casual client signed on to the same service on node 'nodename'.
Description:
This function sends the message to a non-casual client residing on node 'nodename'. The client belongs to the same group as that of the calling client.
Return Value:
This function returns HA_FAIL if called by a casual client or if there was some other failure.
On success it returns HA_OK.

inputfd()

int inputfd(ll_cluster * cinfo)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new().

Description/Return Value:
returns the file descriptor of the communication link setup with the local heartbeat daemon.
On failure it returns -1.
Observation:
This function is needed in order to be able to perform a select function which selects on incoming API messages. It should not be used for any other purpose.

msgready()

int msgready(ll_cluster_t *cinfo)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new().
Description:
checks if there is any message to be read.
Return Value:
If a message is ready to be read returns non zero value. Otherwise returns 0.

setmsgsignal()

int setmsgsignal(ll_cluster_t *cinfo, int nsig)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new().
'nsig'
is the signal to be used by heartbeat daemon to signal the client process.
Description:
This function informs the heartbeat daemon send signal number 'nsig' whenever a message arrives for it.
Return Value:
returns HA_OK on success
and HA_FAIL on failure

rcvmsg()

int rcvmsg(ll_cluster_t *cinfo,  int blocking)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new().
'blocking'
indicates whether to block for the receipt of a message or not.
Description/Return Value:
This function returns true if a message for which a callback function was not registered is received.

If blocking is set, then it waits till a message arrives. But it returns true only if that message does not have a callback registered.

NOTE:
In all cases, received messages are deleted by this call.
Observation:
This interface is provided as a convenience to allow for clients which only process certain message types through registered callback functions to never have to worry about the possibility of failing to destroy their messages. If you process all messages you handle with callbacks, then this function is a convenient choice. Otherwise, you likely need to use readmsg().

readmsg()


struct ha_msg * readmsg(ll_cluster_t *cinfo, int blocking)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new().
'blocking'
indicates whether to block for the receipt of a message or not.
Description/Return Value:
This function is same as rcvmsg except that it returns messages without registered callbacks to the caller (and does not delete them). It returns the next message for which a callback is not registered.

If 'blocking' is set to FALSE, it will not wait for a message to arrive. It returns NULL if there is not currently a message ready to read which is not intercepted by a callback function.

NOTE:
The message returned must be disposed off by the caller by calling ha_msg_del().

setfmode()


int setfmode(ll_cluster_t *cinfo, int mode)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new().
'mode'
indicates the type of messages to be delivered to the client and to filter out the rest.

The modes have the following meanings:

LLC_FILTER_DEFAULT
All messages destined to this client are received, along with all that are addressed to all clients.
LLC_FILTER_PMODE
All messages, but filter out heartbeats that don't tell us anything new.
LLC_FILTER_ALLHB
All heartbeats including those that don't change status.
LLC_FILTER_RAW
All packets from all interfaces, even duplicates. Packets with authentication errors are still ignored.
Description:
This function sets up the type of messages that the client is interested; to be received from the local heartbeat daemon. It is mainly used for debugging.
Return Value:
returns HA_OK on success and HA_FAIL on failure

get_parameter()
char* get_parameter(struct ll_cluster_t * cinfo, char *pname)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new
'pname'
is the name of the parameter to retrieve
Description:
This function retrieves the value of the named parameter from heartbeat. The parameters supported are listed in <hb_api.h>.
Return Value:
If the network interface information can be successfully retrieved, a pointer to a malloced string is returned. NULL is returned on failure.  This value is malloced, and the caller must free the return value is no longer needed.
get_deadtime()
long get_deadtime(struct ll_cluster_t * cinfo)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new
Description:
This function retrieves the deadtime value from heartbeat.  The value returned is in milliseconds.
Return Value:
The return value is the deadtime, measured in milliseconds.
get_keepalive()
long get_keepalive(struct ll_cluster_t * cinfo)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new().

Description:
This function retrieves the keepalive value from heartbeat.  The value returned is in milliseconds.
Return Value:
The return value is the keepalive interval, measured in milliseconds.

get_mynodeid()

const char * get_mynodeid(ll_cluster_t *cinfo)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new
Description/Return Value:
This function returns the node id of the current cluster node.  It is generally (though not always) the same as the uname -n name of the current server.  The return value is a static value.

get_logfacility()
long get_logfacility(struct ll_cluster_t * cinfo)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new().

Description:
This function retrieves the logfacility value from heartbeat.  
Return Value:
The return value is the logfacility currently being used by heartbeat.  If there is no log facility being used by heartbeat, then it will return -1.

get_resources()

const char * get_resources(ll_cluster_t *cinfo)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new
Description/Return Value:
This function returns the current resource allocation as supported by the current resource management function.  The return value is a static string containing one of "local", "foreign", "all", "none" or "transition"..  "local" means the local nodes is supporting only local resources.  "foreign" means the local node is supporting only foreign resources (those belonging to the other node).  "all" means the local node is supporting all cluster resources.  "none" means the local node is not providing any cluster resources.  "transition" means that resources are currently in transition.   If heartbeat is not providing resource management, the return value will be NULL.

errmsg()

const char * errmsg(ll_cluster_t *cinfo)
Parameter Description:
'cinfo'
is the handle to the library instance got through ll_cluster_new
Description/Return Value:
This function returns the last set of error messages from the previous API call. This is not a very wonderful interface (according to alanr)

Heartbeat-3-0-7e3a82377fa8/doc/ipfail-diagram.pdf0000644000000000000000000022314411576626513021313 0ustar00usergroup00000000000000%PDF-1.3 %Çì¢ 6 0 obj <> stream xœYËr[ÇÝã+f;%çýðR¡©’Rl‹UYƒà%‰äˆAP4ÿ>§ç ‚QY(-¤>˜éÇ™žžî+Á%ô§ü½Ú,~[\þjØýãB²õÂð {^X¡wš9î‘)e"W,ZÉ•6l?-¾œ¬ñÆpᘊš[ïó,ÆB¡3Rp˜·<:ãØ¦!uûŒå'æƒäFù¦Ú;n­~¥Ú+òÊ ªëöŠÌƒ·6n¢UÞÃØ£U"ð¬ª.ÊÀ¸Æ(Å… Ã¢Š¤UJ:M aÓ-×JçU!?®*È[«¢õ<†aQÝF‹œ ôõEÖpõ1ÁÅãjE ­¹fÚ.±9 ^ Å©½æ^’o.0ª2ÇPUVs(À}äÀo0•2¯üµÚ°÷×È@0ŒCž]ß-rjJf•å’¢”\†€Ÿ6‹®–‡§Í×ÿ^(ÕENv}»øáór3Jо‰I‘Š\üS*.ü¦•Wûåó–½ù‰–Kd4Öþ#ÿ°ÞÞÿô㟓·ÂFva4Gâ»lâis3íÓ6¹uR1i7~ýËîvJ¿]àH´ÉÙ1ø›ñã/¼âÎKË`JÂ…´v³üÓÒ×ì—E¦`ÿ Æz •1§H-~QH|‘ûûôu½eWÏ/Ó¾)‡N=71í£d‚'–¢•L+GPi¸ü6¯v‹·7è #÷êÕ9lPtú´ASN¢&(/ ÷TNœàóõG¶eÀ¶+[tÞ"‘‘êPFr¯€€—U¤sš;T#'ú›"R­ 1¾Ä¤¢"š*k* ê€P¬Ù,ò ¹éSi«ÈLHä16D™à‘ Mg—³UÒQâÙ< Ù÷¦3‡ÖŒfqµxÀuì[ „,¥cI©z(ƒR¥cJ Ÿ?}þûüÏ«Lµª1*žR ÌW¤S_‘Ê£±Áe«ì=Õïν1ZPFÍ¢d#ÞÜ[cñUîÄW¤_‘JbRØ…l¯³^}š${Ý–¨šÉ"gÞ¿shð‚â0<âù8&]Ž·6ÏSÑÃ>#Q)¥tN?[…ú›/ˆiwêÿùa@‡Ý@ÕâN‘‡¸hPHn|Ú¦ýv9§b‰WO5*–Ÿ§ÃónÿÔÊo{æ¹w¨ã1pÄ—=³çx Ü T¨ì؇ßÿc2` òXEéóƒþAŽyœŠeEp:8iœl8i9Ê8iJÓ xT±’ŠŠ ±¤¢¨L TT£EF¢IcÉXEh(À‰Qt ŠkÝtv9[%ÉŽÍ#kê`*2Á&bC< ,Î4]ÎVIGFªgó€Àñª°FUMV¹Î®g|±4â~\I˘nR=òË£Ó¦ƒˆ¸ÐÎåÑájz\ßoÙ—§Íù‡ B úÊE¯8¢ ù¼E¸TêR Ñga¥¨'ÆS¨%Ío8ÇŠ`îQœNA+äŒ2o4ÿ+›>)m7Z£%JßÛ"ª•ö"†Y¾òY—Z¦îQCú>##}k9EŒŽ«ü=Ô8øG ‹¦'‰ GRvTRËW?> endobj 17 0 obj <> endobj 18 0 obj <> endobj 5 0 obj <> /Contents 6 0 R >> endobj 3 0 obj << /Type /Pages /Kids [ 5 0 R ] /Count 1 >> endobj 1 0 obj <> endobj 4 0 obj <> endobj 10 0 obj <>stream ÿØÿîAdobedÿÛC    $.' ",#(7),01444'9=82<.342ÿÀEÏR"GBÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÚ RGB?èl|;¢_6¥sy£é÷¾«ºY­‘Ù±u($dðü*ßü"^ÿ¡IÿÀ8ÿ¬hßê/ÿì+¨é\µó¯Št{ù|]­HeZþvzò ïV|O¬Úx[EÓn—ðêOr°)h9'cf­øŸY´ð¶‰¦Ý/‡`Ôžä`RÐrNÆÎkè/øD¼7ÿBþ“ÿ€qÿ…ð‰xoþ…ý'ÿãÿ ùŸûQÿŸü}ÆìMGþ}ÿñõÿå?áhCÿB ÷ØÿãUÊÂуþ„ÿï±ÿÆ«éøD¼7ÿBþ“ÿ€qÿ…ð‰xoþ…ý'ÿãÿ ùŸûQÿŸü}ÆìMGþ}ÿñõÿ?áhCÿB ÷ØÿãTÂуþ„ÿï±ÿÆ«éøD¼7ÿBþ“ÿ€qÿ…ð‰xoþ…ý'ÿãÿ ùŸûQÿŸü}ÆìMGþ}ÿñõÿ?áhCÿB ÷ØÿãTÂуþ„ÿï±ÿÆ«éøD¼7ÿBþ•ÿ€Qÿ…/ü"^ÿ¡IÿÀ8ÿ¼£àîwgâë¹'‹b›îŸ2?Cí^ß]ï„îl|M¡®£&mdÍ#'’ȬF;çhþUßxNêÃÄÚê2hÖLÒ2y,ŠÇŽùÚ?•cÂ%á¿úôŸüü*H|1áø|z–­‚2,ãèx=«Äž:ÿ„{ÄQé?Ù¿hßh.L¾~ÌeÊíÆÓéœçð®ÃùÖ…˜Ð/®îí­mmk7 :ý˜ ŒzrG=JгõÝݵ­­£Ífá'_³±ŽqÉôíšÇ>ðãÿÂ?¥gþ¼£ÿ OøD¼9ÿBö•ÿ€Qÿ…lö¢®dé¿ô´ÿ¿+þsû#Mÿ u§ýù_ð¬oøD¼9ÿBö•ÿ€Qÿ…ð‰xsþ…í+ÿ£ÿ Ù¢ì7þöŸ÷åÂì7þöŸ÷屿áðçý ÚWþGþÂ%áÏú´¯üü+gò¢ì7þöŸ÷åÂì7þöŸ÷屿áðçý ÚWþGþÂ%áÏú´¯üü+gÚŠ?²tßúÚß•ÿ ?²4ßúÚß•ÿ Æÿ„Kßô/i_øøQÿ—‡?è^Ò¿ð ?ð­š(þÉÓèiÿ~Wü(þÈÓèiÿ~Wü+þ/н¥àá\ÄmI±¿ÒÓK²·Ep\En‰¸ƒ3Î23^±^mñGþB:'ýq¹þp×;ã­6Â/Þ¼vVÈá¡Ã,Jýê{W9ã½6Â^¼vVÈááÃ,Jýê{WŸý‚Ïþ} ÿ¿b¨ë–ÑiSÁ"aA9bR+Zëá%½æÏ´j{ögoîÆ~íT¾øZ}Ä—2Ú^­hÑîž2«’èqÐsÅz­wÞð¾›®x}gÖôH"¸iZ(ÞP¸ŽÍwÞð¶›®xyn5½®FSo(\Ç æ¹? x"? ]´±^yÈbhÄ~VÜeƒg;§ë]mWk¦é–zEšÙØB!·S•@Äù×m¦évzE’ÙØ@!·S•@Äù“^=ñ#Ÿˆvÿö _ýõì f¼{âGü”K|ÿÐ)ôk× j–z‡‹t¸¯¬íî¢[¶ Ñó1\ç×=+¡£>õ—ƒ|7<ëÐ4¡»<ýŠ3ý+™ñt~ð¶­ü"zUÖøÛþÏc,ÃØ»ú×K©]ivöò H­Õ‚ûC·' ÀŒšéµ JëK°këÈ-"¶¿Ú¹=8“]ÕµÈxÂÓÁ¾†ú6…qy+¨ 1ɰ†ùÀÁ$eqÓ­,V^ _ÞhºšºIåÃ4(wRØ@ÅFIØxÆ~µë2G¨\ØºØ ›XLó¯Ú›ƒ“åã¸ã9¨ßY’=BêÅÅ€¸µ„Ï:ý©± rO—ŽãŒæºê+ðe†¼^·Œ¾Òm–ÜFAÑÈvïöG÷Z¯á(ü7âV[øD´«]wýž7ÎF1°z™¼× §¼Ih˨³­©óäaS†ÿ–\`ú⣃_k”Óž´eÔYÖÔùò~ð©ÃË.0}q]ÕµÂj á»§‡¿áÒ¤Ý<0ùÿgŒ}ð§;vvÝëÚ­øÇOðß„—üRÚU×ÚwÿË´qíÛ·ý“Ÿ½úPúóGmrÉh"°—ɹo>Oݾí¸ÿUÏ>™¥}y㵿¹t´éòù7-çÉû·Ý·ò¹çÓ5ØzÑ\vµ§øoGð®­ÿÂ-¥MöÏ/÷?fvoBÿ{iÎ1Ž‚µ|9áo øƒA¶ÔÿáÒ ó·~ïìq¶6±^»G¦zUˆ5«HéðÁh×Bqåý¡ÇîÎ0rcÇqÇZ±o¨ÝÝjgN† F»òÇ—ö‡»8ÁÉÇkr¼Ûâü„tOúãsüá­¿øc@·ðž³4:™±ØÎèéiea ‚AÏzÄø¡ÿ!þ¹\ÿ8k Æ·7x_T·¸‚(ÌbÙÁŽRùÝ0Ôcîþµƒãk›‰<-ª[ÜAf1làÇ)pwLuû¿­põŸ­ÿÈøþ„+B³õ¿ùÏÿÿÐ…y-ÿüƒîë“#^G}ÿ ûŸúäßÈ×Ð7ú›ÿû ßÿé\µ¡Yú7ú›ÿû êúW5h×ÐZüƒ­ëŠ!_Biÿò µÿ®)ü…%ƒoªëwË4¶zVž`K‰ V›PtfòähÉ B@ÉRq“S}§ÄŸô Ò¿ðg'ÿ¨“Vµ‘ÑnYeYme QòÔI«ZÈŠè·,Œ2¬¶’G¨ùkbŠÇûO‰?è¥àÎOþ1GÚ|Iÿ@+ÿrñŠ_í;ùçwÿ€’ÿñ4ïí8?çßþKÿÄÖÀ¬¿M,Öf†GŽXìft‘†V’=ˆ¦}«ÄŸô Ò¿ðg'ÿ¨.ƽ}e=¥Î¥<´R§ö¤£r°Ádp{T·ÑÜXÜCÝ,’Fʬme$`»P^ßGqcq Kt²I"±´˜`@þã Ûêºü§j¢ÞÎÞêK&ûV <çFÄK¸Í‡Ü9Âã=ªç…¤ÔÞ]*ëPP´¸}RKsÓÝ’/²Èà˜æ‘³óŽ¿ìÖ¯ö«ÿ>?ùtßñø4mRÞî ¡¥ÛÉ, ^/?ÄWs*±R¤íxÈ·NõËÛiSAyo7—Äs+¶-¥Î7‚qû®¸ÿ¬+”´Ñî-ï­§;ŠÅ0vf—¦ðÝ¡€þUÖÖ=ÏüŽZgýƒîÿôeµ/Ú|Iÿ@+ÿrò=Gmm«\x‚ÞþþÚÊÞ(-f„.žffvˆ÷p–}z×Qut—qÇ 1\—3Dß5´Š0$Ry*@k«»ºK¸ã†®K™âošÚE,ŠIÉP:]-‡ü~Çøÿ#^cñkþF«oúòOý ëÓ¬?ãö?Çùó‹_ò5Û×’ÿèoTü}ÿ"$ßõò•OÇÿò!Mÿ_IY_¼=·Æ–ºÏöÅ©óVû÷?¾Læ8ñÄgo¯ßfõ©¼K ÿl|ÐçþÖ·±ûÓ¿—wsåE>ZC€0wIòü£Ð¿­Cñ‹ÃÛ|ik¬ÿlZŸ5a_±OsûäÁncFvúýöoZ›ÄºöÇÁÍík{±Í;ùww>TSå¤8tŸ/Ê= úשYçľ?û+w™jïå}£oïc;óü?ÞǶ+†Ô¬óâˆÿegÌ´wò~Ñþ·÷±ùþïcÛ»ðM6Yji¹[lvÃrœƒÃò=ªÂ_ù®¿ëÉÿô4«ßÓe–¦›•¶Çl7)ÈŒc÷·\z~ðVÎŇÞ1ûÛ®?í £_ÿ’Ëý~Ú(ëSãüÁí¿þÓ¬½þK,?õûiü£­OŒ?óÿ¶ÿûN¤¼ÿ‘gÇŸöú©~ ¶²‘y)VÔáÜS<1àû/ µÛZ]^Ü ÍÔŠälÝŒaG÷{$vÚÛxƒG¸‚lhëh¢xüÌ|û½ù)ùW³Em­·ˆ4kˆ&ÆŒ¶Š.#ó1óì~v÷ä§åRh—PØøvúîåü¸ ¼¿–GÁ;Une$àrx«GHÕ-µ­. BÓ“.xuÃ)«)÷ÆG+O¶šó­­±„O5Æ£~r†MÆâ`7#'‘ƒÇcW<)cw¦øfÎÖþ8Rå7HcDUÜìÀaP@  g<ž¦æ—=Ø“M·±89“g`ØÝŸBxÇãW4©îÄšm¸ˆýˆé¡Ì›8óÀìúÆ?óßüQÖü;âËÝ*Ò×Ox òö´Ñ¹c¹ŽHp:±íX?ð»ZOýù“ÿ‹­/ˆŸðœÿÂaqý—ý·ýŸåÇö°y›1´nÏ—ßvî¼ã±Z_ ?á1þÙ½þÝþÕûÙøþÐßþ³pÛ·=7çtÏjóYï÷Pñ-¼¶V—1‹5RðÄ\½Î2^GçX^:åðDÑÆŒïö”;TdÖŽãy| 4q£;ý¥Õ?•`übðößZë?Û§ÍXWìSÜþù0[˜ãǾ¿}›Ö¦ñ.ƒý±ðsC¸þÖ·±ûÓ¿—wsåE>ZC€0wIòü£Ð¿­MñSÀúž£â¸µ.½àÍGÄ_ ôx-æk{Í>Iæ{9ÙH¥œýÀ¤™:méÃ7­qzŽ‘rþ#ñÒ®…+­Í³´q‰N.›Þ·MØöÅqZŽ‘rþ$ñÚ g[›Whãœ\63¼7n›±íŠÒø&ž]ž¤›•¶Çl7)ÈÓö}V÷÷¿jÙ»uÄÓaÇÞÇZƒþ§ýKŸù=ÿÚ袟©|HñmŽ©wgm«l‚ Þ(Óìñª¬@+“À©u‰>-±Õ.í-µmA3Å}š#µUˆ%rx•7ÄÝâi&›Àd’ÈÅÝÝã,ÌNI$ÅÉ4ÏøYÿ¢y¥~qÿñª(¬£ãß3k«RO$>ߟür²¼DÌY®­I'$>ߟürøYÿ¢y¥~qÿñª?ádxoþ‰æ•ùÇÿƨ¢“þÏÿÏͧþ íÿøŠOøOmùÆ>žÔQY’kòÆÑ¼ùVHؼƒøV\š½ô±´o>Ul^Aü+ÿÙ endstream endobj 11 0 obj <>stream ÿØÿîAdobedÿÛC    $.' ",#(7),01444'9=82<.342ÿÀEÏR"GBÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÚ RGB?èl|;¢_6¥sy£é÷¾«ºY­‘Ù±u($dðü*ßü"^ÿ¡IÿÀ8ÿ¬hßê/ÿì+¨é\µó¯Št{ù|]­HeZþvzò ïV|O¬Úx[EÓn—ðêOr°)h9'cf­øŸY´ð¶‰¦Ý/‡`Ôžä`RÐrNÆÎkè/øD¼7ÿBþ“ÿ€qÿ…ð‰xoþ…ý'ÿãÿ ùŸûQÿŸü}ÆìMGþ}ÿñõÿå?áhCÿB ÷ØÿãUÊÂуþ„ÿï±ÿÆ«éøD¼7ÿBþ“ÿ€qÿ…ð‰xoþ…ý'ÿãÿ ùŸûQÿŸü}ÆìMGþ}ÿñõÿ?áhCÿB ÷ØÿãTÂуþ„ÿï±ÿÆ«éøD¼7ÿBþ“ÿ€qÿ…ð‰xoþ…ý'ÿãÿ ùŸûQÿŸü}ÆìMGþ}ÿñõÿ?áhCÿB ÷ØÿãTÂуþ„ÿï±ÿÆ«éøD¼7ÿBþ•ÿ€Qÿ…/ü"^ÿ¡IÿÀ8ÿ¼£àîwgâë¹'‹b›îŸ2?Cí^ß]ï„îl|M¡®£&mdÍ#'’ȬF;çhþUßxNêÃÄÚê2hÖLÒ2y,ŠÇŽùÚ?•cÂ%á¿úôŸüü*H|1áø|z–­‚2,ãèx=«Äž:ÿ„{ÄQé?Ù¿hßh.L¾~ÌeÊíÆÓéœçð®ÃùÖ…˜Ð/®îí­mmk7 :ý˜ ŒzrG=JгõÝݵ­­£Ífá'_³±ŽqÉôíšÇ>ðãÿÂ?¥gþ¼£ÿ OøD¼9ÿBö•ÿ€Qÿ…lö¢®dé¿ô´ÿ¿+þsû#Mÿ u§ýù_ð¬oøD¼9ÿBö•ÿ€Qÿ…ð‰xsþ…í+ÿ£ÿ Ù¢ì7þöŸ÷åÂì7þöŸ÷屿áðçý ÚWþGþÂ%áÏú´¯üü+gò¢ì7þöŸ÷åÂì7þöŸ÷屿áðçý ÚWþGþÂ%áÏú´¯üü+gÚŠ?²tßúÚß•ÿ ?²4ßúÚß•ÿ Æÿ„Kßô/i_øøQÿ—‡?è^Ò¿ð ?ð­š(þÉÓèiÿ~Wü(þÈÓèiÿ~Wü+þ/н¥àá\ÄmI±¿ÒÓK²·Ep\En‰¸ƒ3Î23^±^mñGþB:'ýq¹þp×;ã­6Â/Þ¼vVÈá¡Ã,Jýê{W9ã½6Â^¼vVÈááÃ,Jýê{WŸý‚Ïþ} ÿ¿b¨ë–ÑiSÁ"aA9bR+Zëá%½æÏ´j{ögoîÆ~íT¾øZ}Ä—2Ú^­hÑîž2«’èqÐsÅz­wÞð¾›®x}gÖôH"¸iZ(ÞP¸ŽÍwÞð¶›®xyn5½®FSo(\Ç æ¹? x"? ]´±^yÈbhÄ~VÜeƒg;§ë]mWk¦é–zEšÙØB!·S•@Äù×m¦évzE’ÙØ@!·S•@Äù“^=ñ#Ÿˆvÿö _ýõì f¼{âGü”K|ÿÐ)ôk× j–z‡‹t¸¯¬íî¢[¶ Ñó1\ç×=+¡£>õ—ƒ|7<ëÐ4¡»<ýŠ3ý+™ñt~ð¶­ü"zUÖøÛþÏc,ÃØ»ú×K©]ivöò H­Õ‚ûC·' ÀŒšéµ JëK°këÈ-"¶¿Ú¹=8“]ÕµÈxÂÓÁ¾†ú6…qy+¨ 1ɰ†ùÀÁ$eqÓ­,V^ _ÞhºšºIåÃ4(wRØ@ÅFIØxÆ~µë2G¨\ØºØ ›XLó¯Ú›ƒ“åã¸ã9¨ßY’=BêÅÅ€¸µ„Ï:ý©± rO—ŽãŒæºê+ðe†¼^·Œ¾Òm–ÜFAÑÈvïöG÷Z¯á(ü7âV[øD´«]wýž7ÎF1°z™¼× §¼Ih˨³­©óäaS†ÿ–\`ú⣃_k”Óž´eÔYÖÔùò~ð©ÃË.0}q]ÕµÂj á»§‡¿áÒ¤Ý<0ùÿgŒ}ð§;vvÝëÚ­øÇOðß„—üRÚU×ÚwÿË´qíÛ·ý“Ÿ½úPúóGmrÉh"°—ɹo>Oݾí¸ÿUÏ>™¥}y㵿¹t´éòù7-çÉû·Ý·ò¹çÓ5ØzÑ\vµ§øoGð®­ÿÂ-¥MöÏ/÷?fvoBÿ{iÎ1Ž‚µ|9áo øƒA¶ÔÿáÒ ó·~ïìq¶6±^»G¦zUˆ5«HéðÁh×Bqåý¡ÇîÎ0rcÇqÇZ±o¨ÝÝjgN† F»òÇ—ö‡»8ÁÉÇkr¼Ûâü„tOúãsüá­¿øc@·ðž³4:™±ØÎèéiea ‚AÏzÄø¡ÿ!þ¹\ÿ8k Æ·7x_T·¸‚(ÌbÙÁŽRùÝ0Ôcîþµƒãk›‰<-ª[ÜAf1làÇ)pwLuû¿­põŸ­ÿÈøþ„+B³õ¿ùÏÿÿÐ…y-ÿüƒîë“#^G}ÿ ûŸúäßÈ×Ð7ú›ÿû ßÿé\µ¡Yú7ú›ÿû êúW5h×ÐZüƒ­ëŠ!_Biÿò µÿ®)ü…%ƒoªëwË4¶zVž`K‰ V›PtfòähÉ B@ÉRq“S}§ÄŸô Ò¿ðg'ÿ¨“Vµ‘ÑnYeYme QòÔI«ZÈŠè·,Œ2¬¶’G¨ùkbŠÇûO‰?è¥àÎOþ1GÚ|Iÿ@+ÿrñŠ_í;ùçwÿ€’ÿñ4ïí8?çßþKÿÄÖÀ¬¿M,Öf†GŽXìft‘†V’=ˆ¦}«ÄŸô Ò¿ðg'ÿ¨.ƽ}e=¥Î¥<´R§ö¤£r°Ádp{T·ÑÜXÜCÝ,’Fʬme$`»P^ßGqcq Kt²I"±´˜`@þã Ûêºü§j¢ÞÎÞêK&ûV <çFÄK¸Í‡Ü9Âã=ªç…¤ÔÞ]*ëPP´¸}RKsÓÝ’/²Èà˜æ‘³óŽ¿ìÖ¯ö«ÿ>?ùtßñø4mRÞî ¡¥ÛÉ, ^/?ÄWs*±R¤íxÈ·NõËÛiSAyo7—Äs+¶-¥Î7‚qû®¸ÿ¬+”´Ñî-ï­§;ŠÅ0vf—¦ðÝ¡€þUÖÖ=ÏüŽZgýƒîÿôeµ/Ú|Iÿ@+ÿrò=Gmm«\x‚ÞþþÚÊÞ(-f„.žffvˆ÷p–}z×Qut—qÇ 1\—3Dß5´Š0$Ry*@k«»ºK¸ã†®K™âošÚE,ŠIÉP:]-‡ü~Çøÿ#^cñkþF«oúòOý ëÓ¬?ãö?Çùó‹_ò5Û×’ÿèoTü}ÿ"$ßõò•OÇÿò!Mÿ_IY_¼=·Æ–ºÏöÅ©óVû÷?¾Læ8ñÄgo¯ßfõ©¼K ÿl|ÐçþÖ·±ûÓ¿—wsåE>ZC€0wIòü£Ð¿­Cñ‹ÃÛ|ik¬ÿlZŸ5a_±OsûäÁncFvúýöoZ›ÄºöÇÁÍík{±Í;ùww>TSå¤8tŸ/Ê= úשYçľ?û+w™jïå}£oïc;óü?ÞǶ+†Ô¬óâˆÿegÌ´wò~Ñþ·÷±ùþïcÛ»ðM6Yji¹[lvÃrœƒÃò=ªÂ_ù®¿ëÉÿô4«ßÓe–¦›•¶Çl7)ÈŒc÷·\z~ðVÎŇÞ1ûÛ®?í £_ÿ’Ëý~Ú(ëSãüÁí¿þÓ¬½þK,?õûiü£­OŒ?óÿ¶ÿûN¤¼ÿ‘gÇŸöú©~ ¶²‘y)VÔáÜS<1àû/ µÛZ]^Ü ÍÔŠälÝŒaG÷{$vÚÛxƒG¸‚lhëh¢xüÌ|û½ù)ùW³Em­·ˆ4kˆ&ÆŒ¶Š.#ó1óì~v÷ä§åRh—PØøvúîåü¸ ¼¿–GÁ;Une$àrx«GHÕ-µ­. BÓ“.xuÃ)«)÷ÆG+O¶šó­­±„O5Æ£~r†MÆâ`7#'‘ƒÇcW<)cw¦øfÎÖþ8Rå7HcDUÜìÀaP@  g<ž¦æ—=Ø“M·±89“g`ØÝŸBxÇãW4©îÄšm¸ˆýˆé¡Ì›8óÀìúÆ?óßüQÖü;âËÝ*Ò×Ox òö´Ñ¹c¹ŽHp:±íX?ð»ZOýù“ÿ‹­/ˆŸðœÿÂaqý—ý·ýŸåÇö°y›1´nÏ—ßvî¼ã±Z_ ?á1þÙ½þÝþÕûÙøþÐßþ³pÛ·=7çtÏjóYï÷Pñ-¼¶V—1‹5RðÄ\½Î2^GçX^:åðDÑÆŒïö”;TdÖŽãy| 4q£;ý¥Õ?•`übðößZë?Û§ÍXWìSÜþù0[˜ãǾ¿}›Ö¦ñ.ƒý±ðsC¸þÖ·±ûÓ¿—wsåE>ZC€0wIòü£Ð¿­MñSÀúž£â¸µ.½àÍGÄ_ ôx-æk{Í>Iæ{9ÙH¥œýÀ¤™:méÃ7­qzŽ‘rþ#ñÒ®…+­Í³´q‰N.›Þ·MØöÅqZŽ‘rþ$ñÚ g[›Whãœ\63¼7n›±íŠÒø&ž]ž¤›•¶Çl7)ÈÓö}V÷÷¿jÙ»uÄÓaÇÞÇZƒþ§ýKŸù=ÿÚ袟©|HñmŽ©wgm«l‚ Þ(Óìñª¬@+“À©u‰>-±Õ.í-µmA3Å}š#µUˆ%rx•7ÄÝâi&›Àd’ÈÅÝÝã,ÌNI$ÅÉ4ÏøYÿ¢y¥~qÿñª(¬£ãß3k«RO$>ߟür²¼DÌY®­I'$>ߟürøYÿ¢y¥~qÿñª?ádxoþ‰æ•ùÇÿƨ¢“þÏÿÏͧþ íÿøŠOøOmùÆ>žÔQY’kòÆÑ¼ùVHؼƒøV\š½ô±´o>Ul^Aü+ÿÙ endstream endobj 12 0 obj <>stream ÿØÿîAdobedÿÛC    $.' ",#(7),01444'9=82<.342ÿÀJhR"GBÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÚ RGB?å?·ußúµÏü\ñtnë¿ô1kŸø4¸ÿâê6Ñè*m£ÐUïíÝwþ†-sÿ—ü]Ûºïý Zçþ .?øº£EG £hô{ûw]ÿ¡‹\ÿÁ¥ÇÿGöî»ÿC¹ÿƒKþ.¨ÑFÑè(Ú=^þÝ×èb×?ðiqÿÅÑý»®ÿÐÅ®àÒãÿ‹ª4Q´z 6AW¿·ußúµÏü\ñtnë¿ô1kŸø4¸ÿâêm‚£ÐUïíÝwþ†-sÿ—ü]Ûºïý Zçþ .?øº£EG £hô{ûw]ÿ¡‹\ÿÁ¥ÇÿGöî»ÿC¹ÿƒKþ.¨ÑFÑè(Ú=^þÝ×èb×?ðiqÿÅÕè¦ñ<ÚDÚšxƒZûâ@Ä ý–!ëóŒg Œ:íì5}6 EBÓYI —H…ºËqó«I”,JæÜ ˜ŽÛó[°µ·¸yî#0‡'ÔþnÂÖÞá丌lÂp\œSøVl6þ.¸Ó^þw\’³ûc”Ô®ŽÔóZ,6ʳº¤ç‚*Íá‚k­[ÄEp»¡yu •Yƒ•%¹Žž¢µ"š |>`[›q(Й6´Ê§p¿2äýíƒp^¤t5©{¬ÚÃâH¯BépiÓëq_­æyf•܇uÞÛ0®IRªrÜ?³,Þ˜Iåæ8Ë19›9=8ÆäŽ¿¹ý™fðÄÂO/1ÆY‰ÈÜÙÉéÆ0$uü8íã(ïã±{ï­ä‹¹-ÚöèHëÏ!wdŽäj½Ö¥â[—·»Ö¼Co:ctRê7(Ë‘‘_#‚ né³ +w½·ŠòÔ§Ò8Möª±èÄ,„œçr3…ªËšv†©"3GdÊáXö‰Ž¡ÁèEWžÊÞ8”²È¼ìlp0œ}rçþùéé^{+xàvRË"ó±±ÀÂqõËŸûç§¥ëˆü[m{§Ù>»­»õS#WŸr±‘£Ù|+nSO <§ˆž[³x»R¹·›ÍÿIT»žR‡“!ˆo•nç8{ÎA Þ¯äϱY£û3¶ÝÊr8<Ioy ]\Åo·ºY\"/Ùdbp5Qµ-'06¥f²†Øc3¨`ÙÆ1ž¹¨YÒÒsjVk(m†3:† œcëšmÉfŠ 4‰“Œ»3øÕ»[?´jÖ¿k³“϶KŒÛMæ˜wýÜ£dƒo̼ã#š²×0¬ëy­ÑG'¦©eÔ-a½‚Íæâ|ùq€I8?Nz¯E7ûOÃôÿÉI?Â¥ItÛ«w—OÔ>Õ庫%“nàHë×¯¦]L°Ûê6“JÙڑάǿŽ _Lº™a·Ôm&•³µ#Y~4Ê(²0_ê1ئ¡§ÛÈåƒKyr"Š,r탴q´q÷ˆéâ]6*ÆûPÔ>ÉöÄ,‰ä´1žGÔSäÔ¬¢Vi.¡DLîvp`ÉéÕ‡çHÚÆœ“KÞB ³aT–g è –;±Â“PÃàkö2ù·Vª±Û¼ÄÆþ`8Žpùp~Ñ/ bp2yëFÒu=,5_>XbiŠ}×åçê*£M¹‚ØÜÚÃ,Ò¤`ÜÌ#D Áw1ç 3’qÐÓ]kB{s1ɱÌÉw¹[Û×éZm®è ¦µèŠ6·‹s4Éu¹HG“ô滋‡—6—0Å6¡$¿†Ç+á˜.öÁ#£8Ú¼3®\£4Ⱦj÷6š\Ö×Rµû2m©T*ÌΤ££v-÷°§i5Ȳ‚+‰oõ"І·[†ž9X3Œ£2§a 㡤:Æ€aXOˆ\ÄŒYPÚË´1À$S´~CÒ›'ˆü/Ÿ¿XaÞÑø#`Aù†zd;÷¦ŸøW $‚’XÖHÑï‚6Ö ƒ†é2;×a£ø µ}2Îíu$¶76·E§Œ,(±9B îÎrPœ¨Xœ’¸0ÚxS¹µ¶¸š÷M³K˜VhþÑ9k,Ž mhÙ6[g ËLÖ‚î-®„öÒ!q)Bƒx>êj§Šx.§†÷NÛlaSÝšc#”ýÒwíÆ[‘€ií¬èqÅlïjh÷K’D£h%†ã½énûG `¿R5¾¡ewÉmwѧßhä êAâ›E"²º†RHÈ äKS‚ƒÐŠœ@ ä„QERÒÑEQEÃÿò1§ýzKÿ¡ÅT*}2ö ;ZŽâäÈ"6ò&䉟æ-åÐþUÏøÚ7—ÂÉ3¹òðª2Oïªß²¥¨f!TK$œ7­w4V7ü%:Oüõ¸ÿÀI¿øš?á)Òç­ÇþMÿÄ×gÞÿÏ¥Çýû?áNþвÿŸ»ûø?ƵÇü„tŸû ØÿéLuïUóhñF“öÝ:O6ãl:…¤Î~É7“£±û½•Iü+Öÿánø'þ‚·ø.¹ÿãuïß³î¥c¢èZÄz­í½ƒÉrŒ‹u*ÄXmêšìþëº=“j?jÕl`Þ"ÛæÜ"îÆüã'ÜWoEqð·|ÿA[ü\ÿñº?ánø'þ‚·ø.¹ÿãuì_ð•øsþ† +ÿ#ÿí¿á+ðçý WþGþ5ã_óTîkÿÛú*K¨¯¾!ÛÞ@XÁsâTš&d*Yô2œÈ ò(®_á›]I”‚¥a އï×/ð̓.¤ÊAR°GC÷ë&°¼J»¢µ]³>dÆÔè}¿ÞôükvŠó½JËûCO–×ÌòüÌ|ÛsŒz~çÆý­¥Obeò„  ûwcŸ…SñG„nî ]fÌÉrò¤!­£‹,ª# s“Èßڣ𧂯廇Q½Y-¼ÑËIÍ.ÖÉdè:Žô–šö¤USu![BÞPÜ@nÀàôùE"ëú¨ðÏÛMôÍ>Ü\ÿ>•ƶ™áËaõ‚—{|™ï^á&7Ãç9ÝÈŽ=8®A´Ï\묻ÛäÏzðÿ 1¾ç9Îî@qéÅVñGüƒ#ÿ®Ãù5^ðÿÄòá¶Ì3!¥ê~Vàÿµëõ¬Ùõ[¤Ðm/—Íi6·Ëûùàt«ÚΡs¦Ù¤ö’¥2:ƒÓò«ßh±:Å׈¼é¼¸-!™¡ò†Jº6ÜÝy=‡Ö­´–/®Mâ_>o.ÖÖ)Ú$d«£ÁÝןo­gë¾ Ôt›˜’Ö;B'LùÀ~VÏ €N;sß>ÕÑÙxfOh¬óO¾[§ˆ¼{ò˜+ädŸ½×Ú³n]Ùúæ›ee£®Ÿ(¸–{‹]FôÙ„òöÈH9 Ó*?¨ì"±Ó´xí®$¸š{‹mNïì!<½…dóÈn™ÇåÈì(h^ Ôuk™Rê;>$Lù“@~fÏF{óÛõ¿ã¯¹ÿlÿöZ£ øƒTÔ>ÓçÞJvWF3ž:ôâ©&³{q¢^_Í.û&ÕfÀ;GãŒ÷£K‹D°ð´éj÷8Ôí.§t¶Æ—8ãqÀ}iú\Z%‡…§KW¹Æ§ipÍ; -¶0Àü¹ÇŽëëU|#Gâ›5q $«|0pFïlÏ«âßß[ÞÉf&½[©äwH¡$Å“8$ž§œžôZ_\>öÝûn'`ê1†\àþ• Þ#Õãðå½Ð¿›Ï’B¥ËvËvéÚ¡›LÑ£ðÒÙÜÉu"s¨Ã2 VDùÝ‚Ç#¯ž•]´­ ‹™n¥CjPÌ‘…dC°c°[9ã“Ò¶ü-á['OŸTº‘’iìäClÑà¦NA'>Š8ÀÆ}«’ñ"îÕ#Y¿qœ/^­ÏÐu­ŸkÚ•’Û¬WR¶ðÌH`1Ç^‡4Ù¯fÄ6ö‡“$D•#¿ÍÎ »ui¤Ç§ŸÛ¼ðý–îÙdr›÷<œƒÉïß°ô«·Ö:HÒßÃVòOÙîmÒI ~fç“y=ûö•kCðÿü$K?=­vH’îdÞOúÁÓ#ƒ»5ƒ¦øCT¼ÕÒÆêÖæÒ2Y^v€².=x$c9ïZwzÍõ†µkmm3G»|À¤Ùb9úv«0kz“ë÷Vmy1…# £yÈ8_ñ¨oô SÔm#˜Ü í®#±”¨ÂÌBnóÀ‘‘ƒÏãPÝèÚýí”>Òh¬%e\,Ä&FF~Q…##žµªé-¤h리¦á£´p&ÒÅ™ÏLŸ\W¦Äßo²c ¤4¹ :0tã·é[Ã\¾¸Ôu%žO4A˜÷äãiÀëÏùÅXЯf¾°i'`βÜ20õ©<­+ZÖ4¸í%ž³C,1FÑP…êNztëžþ…ÓÚé^ Õt¨m'žßì¨Éma¶'QŒ“žƒŽ¾þ†ÿü#wqu>³fd¹y][GYT(\ƒœž@àþÔx#Â7v÷Pk7†Kg‰Ü-´‘a™J•É9ÈäžíïXÖÞ)Ö%Ó/çkÉ7ÆT¡Ü~]ǽ»gõ«¿ðjŸð}·í’ùøÆwœ}í¹ÆzÕkh|5u®6·]ä[>¤b#Œ¤„ëœîåéüª+h|5u®6·]ä[>¤b#Œ¤„ëœîåéü«;Å»ýb3rËÀÏ'Z×ð%›^éz©c¹†XÕÎ\ã¿ÿZ²µVêÖÇN’6]ò y _½€8úÕígQ¼Ó,Òk)Þ@…—©' «…ôäÖõ­aä¸uŠšx`4X;¹ã9éVYtÕñ¯¯<· ¶Ï·òÉÆw<zu¬½SÁºµŽ«ö;k[‹¸Ûo—:E…lúœ¸9žÙé]¢èðŽøwìŸiûFû¿7—³Lc>•¨xƒT·Ö¬í£¼”Dûw眱½©×ZµókööR\<–òE¸£’pß7#ò¤Ñôý CÕï'·[–•nÒÑCtˆ¿@9ärO<~%Ú>Ÿ¡hz½äöërÒ­ÚZ(n‘è<Ž@ÉçÄò×kºúívÌùºÆÔèyn?ÞôükºñO…n5m> RÖFy ³²Ç’ø9$ú1ã8÷¬.î{‹íB9_rC&Ô-þŸ¦ëÚ”ÚÅÕ£]H ˆ0UˆÚëPÚiÚSébÖêIæU¹xâ>^ë?' {çºqÖ«YiZQ°h.ežhõ‹†Ž/Ý„1:4œœ1Ï'ÃŽ´žðmõÅìw÷‚k%µž7D– ¸9#’è9ÁëíGU™' ì’@è>aýhÓ5ýVmêêk餖"åräÓ˜ÚÝóèfå¥àÜyfB£$cv}3QEý‡§øIìa7h/íá¦d Ä)Ç#v3Î0=94Ø¿°ôÿ =Œ&íý£Ü4̘…8änÆyƧ&·4ÿùZÿ×þB¬Ô6²´öpLÀ’5bL‘šš»‹ ¢ÆÜ#nQà‘ŒŒzWqdXÛ„mÊ"\1‘J­$óý¥¡†Ûj+òêHì§û´o½ÿŸ{ûþßüE ÿ!9ÿëŒÍêÍC M:»›¹—÷Ž¡US u_j†šuw7s/ïBª¦ @ê¾Õ[}ïüûÛÿßöÿâ)[ÄRÍ ²¨$Üÿ¾*ÕG5åÆž©{hŽ×6Ò¤ÐìêÀ«A‚ãšYã’ y%këŒ"?"ƒ=6óM¼Yí¬§o'fŠ6p ¦{!?5sÝJãŠÕÐôe¸$ür¬­®°êtå*FAø#þø¦[Ý\_£ÞÞ Õ̲M7˜bìå˜$“Àkvk:àX «k}BÞgŽK[‰¥‰C²ª‡Q²@ަ샑P¯ÚOŠâ;¹ÙQ°È«ÁÆIŒg8Á¨¡k©t«{¯´ËæÉlË´7c=#-“Ûd†F:î` 9  «ÖLín|É Œ$uÜÀ@r@AEQV*Å^пähÐì+gÿ£ÒŠ4/ù4û Ùÿèô¢»¿†ß{TúCÿ³×wðÛïjŸHöz£Tµ ¤µU/vЭ儅d2IÆÕmλPó–#ŽjídköW7°B¶ñ#•bNH 8ìOo_¼×[óÿ±î °Ì*",ò:m漿Ćìx~ìØ‰ÍÈ °AÿxtÇ=3ZÑø^XṘ+ï±¹šTx1õ-Ÿj•|#t4ϰwhpFL‹ž¹õõ«—:„lút‘ÙXÝ5¼F #Ôm|èÂ¶ÌºÃæ0:d1äU»RÎm]7}Ùßn`ó¤Øåvî<Œžþõ Ø@si‘ìHž4ÉâE™—žÅ‰ü½è[îcM2=‰Æ™?¼ÄžqÐ"ó·qæ¼)ÚF­qswTfÈc†è3Àäu¬ø¬>Ñý™çifÝ."h® V,ªª§`lpG¦zf²~À÷0“K{hn"x¯ F,–Ž0;g¹«ÏáIå’Ö2æYeQ*g‹}‡Ó¡¥ºð¬Ò^Ã{qVŒª¨2 Rw|½úäÒ®Û^ZÇwµ•ªy“\­ÔVÀ\L²ù³ÀRç× / ŽtåñšJ.•i©2E"%¦§’Í®FNHãî—^7nn-@Ó.§]9®#¬ÿÖ¿Ýéï·Ž¢¯ÜZ¦]Nº4s\GX7ÿ¬(»ÓßoEr7:\¥öëÔ¸H¤tûH…ÕœDÎ7˜ÁÈßÁÆN3íÅ.—¡Íy kò*ñ—.%U'“‘€0sïNñlÑ­­-‘­ ŠdP¥Šñ‘Ï{úŠÕÑïa³ðþŸiwgòZ’ïlè ¾÷Êã¿ œàá€l`À¶k)öxKXáûR!. ¶@ê2}ý*¤z{m6,ÒÝncŒŒÄ—ÎX`cpûþ•_OðÖš®!·r\‚ÅäRxéßëU—ÂO™uh˜U·JD¨Y Øã§8Çzè4­bÚÇOHd‡l…ÞFKx‚D…ܹTáFìaYv²¥¹l„GWÇs¸7~sn?irNí¤¤`ý:¤²Ž(´ËxtDh^)T8Ýe†;ç¿ÓÞ­IeQi–ðèˆÐ:¼R¨pº8Ë wϧ½bÛCè±\8•Ï–°×2¼°“~w–?t¦6ŒçµhIàù^Î5•ÀRdAç&òäýáÛ¸ªVÓÛø¼j›e*w àº=xü³] Í嬗r:ÙZ¿Ÿy Ë]Kl Ä+O‘<†(=0 pÙâ(tõ6× &š%i-£I7 A@þTŽG`j­ƒ5­êÜiÍ(2ÛD’åqoUWºJ€;½ªZ§…f¼€=ÜF4„2 QBŒs’N1ÇéUõ}K)U»y-DxŒÈ»$Ú§9!r76 À$p2+£¿Õ¬/E©’ fK{˜®Ú@¾]ÀFÅ'_•º888ÁÂÔ[íº´†(ÅÍÅÃܘ[Â\’c‰rv¨ÏÊ~x©/íZMBñIR’@&ûb¿&TÎÁÎGb OªZ»Ý߬zNCÛ …â?Ìf@v £œŽÆªYèŸÛ­ ôYšù$ù#.¡ŽÂ˹€$ Á$f¯Áᛓ-üQ—w6ĨT@#¯PWC bÏÃWZuØ’9.ax²€6Üîê3÷‡zÞ±Öíb-3éðY;¢+[iñ€2ç.GÌÄœœt ¼íÜe±±‹%€:Ç<’>w Bc'Ütç“»ØÔš]Š›{žÇ÷ïSÍ$™ßçÁ-þÐèsÉÝìk|,wwkù×QÉ”3&v’7=‰˜¬å‚ o° ©f•JBcWsµ÷¾õØ®·Gú—ÔìÝï#m2ÅþÑwÚ_Éjî/f蕳·–§9ã.6Ùƹm=ö¿oyJaofÈ ìóíOz Ön‹q4:9µ– ‚°´ž4g×e±R¹ @bÈߦI~•šº½äúwÚäÑ­ÍËYÍ;+[’|Å ¿L’:ý+YâÒ£xQ µõ»oÇî) -®šØ[Û•w•åïY÷v³†ÑÉra*$Ú2z‘ô5=¼2¯‰îæ1¸‰¢9S‚p½ÿ ¶·LoVÒ-•MÔ0’`ä+E½Ô7mn˜Þ¬-¤[*›¨a$ÁÈV‹{¨n*XÆ4SI6¬çÌ"!Æ?E4xí页·FòÏáŒæ³4«¨4ýMeÕž2Š1˧¯QM¸µ¸o ZB ”ʲ’P!ÈnßRMVäébáô;dœÙK?–mϲQ޼ƒœuªIªÜ,\>‡l“›)gò͹áÖ@ª1×s޵¬É¤$ÐÄÑZo˜f1å©Ü;vïÛÖ¬gÙÏ¿ýú_ð¬Iì.¿´t¢!b±Ç¹…*rrÎk[QÓšý ÿHx–&ÜBõ'ŽzðG<ûÖ„’Ý%ù}$"ÅŒ&ðT’öMhÙÍ5ÄWìtH$EŠ-‚=à…'–㌓[Z6‡ןäùvûvçdCæë銂îãA±»’Öã[Uš#µÕm°}22+_óÃÚ|Ùc;q½€ÏZ¯cm{¦ê›Câ䳺¸7ŧë¦Ä+7Þ.¢)º3Ç ï¡¨=Ý¢ÛE§ÅFÎDäc•XOzÒÔîÑm¢Óâ#g"Gò ‚1‚Gʬ'½P´¸Ðo®ãµ·ÖÕ¦”íEkg\ŸLœ ·©éŸÙ¾Wï¼Ï3?ÃŒcþõ5ݽ֣¨é­u¯ßËek?Ú$Šû^7»GÈT.AêH'ï7ˆ§†³yRÇ&7gcŽ”ý2ké¼ñx¨B8Ȱ´[Æ?+1=N3OÓ&¾›ÏŠ„#‹ E¼`ò³Ôã5GBÿ‘£Aÿ°­ŸþJ(пähÐì+gÿ£ÒŠô¯†ß{TúCÿ³×¥|6ûÚ§Òýž¨ÑEÂW EQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQE{Bÿ‘£Aÿ°­ŸþJ(пähÐì+gÿ£ÒŠîþ}íSéþÏ]ßÃo½ª}!ÿÙëÿÙ endstream endobj 13 0 obj <>stream ÿØÿîAdobedÿÛC    $.' ",#(7),01444'9=82<.342ÿÀJhR"GBÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÚ RGB?å?·ußúµÏü\ñtnë¿ô1kŸø4¸ÿâê6Ñè*m£ÐUïíÝwþ†-sÿ—ü]Ûºïý Zçþ .?øº£EG £hô{ûw]ÿ¡‹\ÿÁ¥ÇÿGöî»ÿC¹ÿƒKþ.¨ÑFÑè(Ú=^þÝ×èb×?ðiqÿÅÑý»®ÿÐÅ®àÒãÿ‹ª4Q´z 6AW¿·ußúµÏü\ñtnë¿ô1kŸø4¸ÿâêm‚£ÐUïíÝwþ†-sÿ—ü]Ûºïý Zçþ .?øº£EG £hô{ûw]ÿ¡‹\ÿÁ¥ÇÿGöî»ÿC¹ÿƒKþ.¨ÑFÑè(Ú=^þÝ×èb×?ðiqÿÅÕè¦ñ<ÚDÚšxƒZûâ@Ä ý–!ëóŒg Œ:íì5}6 EBÓYI —H…ºËqó«I”,JæÜ ˜ŽÛó[°µ·¸yî#0‡'ÔþnÂÖÞá丌lÂp\œSøVl6þ.¸Ó^þw\’³ûc”Ô®ŽÔóZ,6ʳº¤ç‚*Íá‚k­[ÄEp»¡yu •Yƒ•%¹Žž¢µ"š |>`[›q(Й6´Ê§p¿2äýíƒp^¤t5©{¬ÚÃâH¯BépiÓëq_­æyf•܇uÞÛ0®IRªrÜ?³,Þ˜Iåæ8Ë19›9=8ÆäŽ¿¹ý™fðÄÂO/1ÆY‰ÈÜÙÉéÆ0$uü8íã(ïã±{ï­ä‹¹-ÚöèHëÏ!wdŽäj½Ö¥â[—·»Ö¼Co:ctRê7(Ë‘‘_#‚ né³ +w½·ŠòÔ§Ò8Möª±èÄ,„œçr3…ªËšv†©"3GdÊáXö‰Ž¡ÁèEWžÊÞ8”²È¼ìlp0œ}rçþùéé^{+xàvRË"ó±±ÀÂqõËŸûç§¥ëˆü[m{§Ù>»­»õS#WŸr±‘£Ù|+nSO <§ˆž[³x»R¹·›ÍÿIT»žR‡“!ˆo•nç8{ÎA Þ¯äϱY£û3¶ÝÊr8<Ioy ]\Åo·ºY\"/Ùdbp5Qµ-'06¥f²†Øc3¨`ÙÆ1ž¹¨YÒÒsjVk(m†3:† œcëšmÉfŠ 4‰“Œ»3øÕ»[?´jÖ¿k³“϶KŒÛMæ˜wýÜ£dƒo̼ã#š²×0¬ëy­ÑG'¦©eÔ-a½‚Íæâ|ùq€I8?Nz¯E7ûOÃôÿÉI?Â¥ItÛ«w—OÔ>Õ庫%“nàHë×¯¦]L°Ûê6“JÙڑάǿŽ _Lº™a·Ôm&•³µ#Y~4Ê(²0_ê1ئ¡§ÛÈåƒKyr"Š,r탴q´q÷ˆéâ]6*ÆûPÔ>ÉöÄ,‰ä´1žGÔSäÔ¬¢Vi.¡DLîvp`ÉéÕ‡çHÚÆœ“KÞB ³aT–g è –;±Â“PÃàkö2ù·Vª±Û¼ÄÆþ`8Žpùp~Ñ/ bp2yëFÒu=,5_>XbiŠ}×åçê*£M¹‚ØÜÚÃ,Ò¤`ÜÌ#D Áw1ç 3’qÐÓ]kB{s1ɱÌÉw¹[Û×éZm®è ¦µèŠ6·‹s4Éu¹HG“ô滋‡—6—0Å6¡$¿†Ç+á˜.öÁ#£8Ú¼3®\£4Ⱦj÷6š\Ö×Rµû2m©T*ÌΤ££v-÷°§i5Ȳ‚+‰oõ"І·[†ž9X3Œ£2§a 㡤:Æ€aXOˆ\ÄŒYPÚË´1À$S´~CÒ›'ˆü/Ÿ¿XaÞÑø#`Aù†zd;÷¦ŸøW $‚’XÖHÑï‚6Ö ƒ†é2;×a£ø µ}2Îíu$¶76·E§Œ,(±9B îÎrPœ¨Xœ’¸0ÚxS¹µ¶¸š÷M³K˜VhþÑ9k,Ž mhÙ6[g ËLÖ‚î-®„öÒ!q)Bƒx>êj§Šx.§†÷NÛlaSÝšc#”ýÒwíÆ[‘€ií¬èqÅlïjh÷K’D£h%†ã½énûG `¿R5¾¡ewÉmwѧßhä êAâ›E"²º†RHÈ äKS‚ƒÐŠœ@ ä„QERÒÑEQEÃÿò1§ýzKÿ¡ÅT*}2ö ;ZŽâäÈ"6ò&䉟æ-åÐþUÏøÚ7—ÂÉ3¹òðª2Oïªß²¥¨f!TK$œ7­w4V7ü%:Oüõ¸ÿÀI¿øš?á)Òç­ÇþMÿÄ×gÞÿÏ¥Çýû?áNþвÿŸ»ûø?ƵÇü„tŸû ØÿéLuïUóhñF“öÝ:O6ãl:…¤Î~É7“£±û½•Iü+Öÿánø'þ‚·ø.¹ÿãuïß³î¥c¢èZÄz­í½ƒÉrŒ‹u*ÄXmêšìþëº=“j?jÕl`Þ"ÛæÜ"îÆüã'ÜWoEqð·|ÿA[ü\ÿñº?ánø'þ‚·ø.¹ÿãuì_ð•øsþ† +ÿ#ÿí¿á+ðçý WþGþ5ã_óTîkÿÛú*K¨¯¾!ÛÞ@XÁsâTš&d*Yô2œÈ ò(®_á›]I”‚¥a އï×/ð̓.¤ÊAR°GC÷ë&°¼J»¢µ]³>dÆÔè}¿ÞôükvŠó½JËûCO–×ÌòüÌ|ÛsŒz~çÆý­¥Obeò„  ûwcŸ…SñG„nî ]fÌÉrò¤!­£‹,ª# s“Èßڣ𧂯廇Q½Y-¼ÑËIÍ.ÖÉdè:Žô–šö¤USu![BÞPÜ@nÀàôùE"ëú¨ðÏÛMôÍ>Ü\ÿ>•ƶ™áËaõ‚—{|™ï^á&7Ãç9ÝÈŽ=8®A´Ï\묻ÛäÏzðÿ 1¾ç9Îî@qéÅVñGüƒ#ÿ®Ãù5^ðÿÄòá¶Ì3!¥ê~Vàÿµëõ¬Ùõ[¤Ðm/—Íi6·Ëûùàt«ÚΡs¦Ù¤ö’¥2:ƒÓò«ßh±:Å׈¼é¼¸-!™¡ò†Jº6ÜÝy=‡Ö­´–/®Mâ_>o.ÖÖ)Ú$d«£ÁÝןo­gë¾ Ôt›˜’Ö;B'LùÀ~VÏ €N;sß>ÕÑÙxfOh¬óO¾[§ˆ¼{ò˜+ädŸ½×Ú³n]Ùúæ›ee£®Ÿ(¸–{‹]FôÙ„òöÈH9 Ó*?¨ì"±Ó´xí®$¸š{‹mNïì!<½…dóÈn™ÇåÈì(h^ Ôuk™Rê;>$Lù“@~fÏF{óÛõ¿ã¯¹ÿlÿöZ£ øƒTÔ>ÓçÞJvWF3ž:ôâ©&³{q¢^_Í.û&ÕfÀ;GãŒ÷£K‹D°ð´éj÷8Ôí.§t¶Æ—8ãqÀ}iú\Z%‡…§KW¹Æ§ipÍ; -¶0Àü¹ÇŽëëU|#Gâ›5q $«|0pFïlÏ«âßß[ÞÉf&½[©äwH¡$Å“8$ž§œžôZ_\>öÝûn'`ê1†\àþ• Þ#Õãðå½Ð¿›Ï’B¥ËvËvéÚ¡›LÑ£ðÒÙÜÉu"s¨Ã2 VDùÝ‚Ç#¯ž•]´­ ‹™n¥CjPÌ‘…dC°c°[9ã“Ò¶ü-á['OŸTº‘’iìäClÑà¦NA'>Š8ÀÆ}«’ñ"îÕ#Y¿qœ/^­ÏÐu­ŸkÚ•’Û¬WR¶ðÌH`1Ç^‡4Ù¯fÄ6ö‡“$D•#¿ÍÎ »ui¤Ç§ŸÛ¼ðý–îÙdr›÷<œƒÉïß°ô«·Ö:HÒßÃVòOÙîmÒI ~fç“y=ûö•kCðÿü$K?=­vH’îdÞOúÁÓ#ƒ»5ƒ¦øCT¼ÕÒÆêÖæÒ2Y^v€².=x$c9ïZwzÍõ†µkmm3G»|À¤Ùb9úv«0kz“ë÷Vmy1…# £yÈ8_ñ¨oô SÔm#˜Ü í®#±”¨ÂÌBnóÀ‘‘ƒÏãPÝèÚýí”>Òh¬%e\,Ä&FF~Q…##žµªé-¤h리¦á£´p&ÒÅ™ÏLŸ\W¦Äßo²c ¤4¹ :0tã·é[Ã\¾¸Ôu%žO4A˜÷äãiÀëÏùÅXЯf¾°i'`βÜ20õ©<­+ZÖ4¸í%ž³C,1FÑP…êNztëžþ…ÓÚé^ Õt¨m'žßì¨Éma¶'QŒ“žƒŽ¾þ†ÿü#wqu>³fd¹y][GYT(\ƒœž@àþÔx#Â7v÷Pk7†Kg‰Ü-´‘a™J•É9ÈäžíïXÖÞ)Ö%Ó/çkÉ7ÆT¡Ü~]ǽ»gõ«¿ðjŸð}·í’ùøÆwœ}í¹ÆzÕkh|5u®6·]ä[>¤b#Œ¤„ëœîåéüª+h|5u®6·]ä[>¤b#Œ¤„ëœîåéü«;Å»ýb3rËÀÏ'Z×ð%›^éz©c¹†XÕÎ\ã¿ÿZ²µVêÖÇN’6]ò y _½€8úÕígQ¼Ó,Òk)Þ@…—©' «…ôäÖõ­aä¸uŠšx`4X;¹ã9éVYtÕñ¯¯<· ¶Ï·òÉÆw<zu¬½SÁºµŽ«ö;k[‹¸Ûo—:E…lúœ¸9žÙé]¢èðŽøwìŸiûFû¿7—³Lc>•¨xƒT·Ö¬í£¼”Dûw眱½©×ZµókööR\<–òE¸£’pß7#ò¤Ñôý CÕï'·[–•nÒÑCtˆ¿@9ärO<~%Ú>Ÿ¡hz½äöërÒ­ÚZ(n‘è<Ž@ÉçÄò×kºúívÌùºÆÔèyn?ÞôükºñO…n5m> RÖFy ³²Ç’ø9$ú1ã8÷¬.î{‹íB9_rC&Ô-þŸ¦ëÚ”ÚÅÕ£]H ˆ0UˆÚëPÚiÚSébÖêIæU¹xâ>^ë?' {çºqÖ«YiZQ°h.ežhõ‹†Ž/Ý„1:4œœ1Ï'ÃŽ´žðmõÅìw÷‚k%µž7D– ¸9#’è9ÁëíGU™' ì’@è>aýhÓ5ýVmêêk餖"åräÓ˜ÚÝóèfå¥àÜyfB£$cv}3QEý‡§øIìa7h/íá¦d Ä)Ç#v3Î0=94Ø¿°ôÿ =Œ&íý£Ü4̘…8änÆyƧ&·4ÿùZÿ×þB¬Ô6²´öpLÀ’5bL‘šš»‹ ¢ÆÜ#nQà‘ŒŒzWqdXÛ„mÊ"\1‘J­$óý¥¡†Ûj+òêHì§û´o½ÿŸ{ûþßüE ÿ!9ÿëŒÍêÍC M:»›¹—÷Ž¡US u_j†šuw7s/ïBª¦ @ê¾Õ[}ïüûÛÿßöÿâ)[ÄRÍ ²¨$Üÿ¾*ÕG5åÆž©{hŽ×6Ò¤ÐìêÀ«A‚ãšYã’ y%këŒ"?"ƒ=6óM¼Yí¬§o'fŠ6p ¦{!?5sÝJãŠÕÐôe¸$ür¬­®°êtå*FAø#þø¦[Ý\_£ÞÞ Õ̲M7˜bìå˜$“Àkvk:àX «k}BÞgŽK[‰¥‰C²ª‡Q²@ަ샑P¯ÚOŠâ;¹ÙQ°È«ÁÆIŒg8Á¨¡k©t«{¯´ËæÉlË´7c=#-“Ûd†F:î` 9  «ÖLín|É Œ$uÜÀ@r@AEQV*Å^пähÐì+gÿ£ÒŠ4/ù4û Ùÿèô¢»¿†ß{TúCÿ³×wðÛïjŸHöz£Tµ ¤µU/vЭ儅d2IÆÕmλPó–#ŽjídköW7°B¶ñ#•bNH 8ìOo_¼×[óÿ±î °Ì*",ò:m漿Ćìx~ìØ‰ÍÈ °AÿxtÇ=3ZÑø^XṘ+ï±¹šTx1õ-Ÿj•|#t4ϰwhpFL‹ž¹õõ«—:„lút‘ÙXÝ5¼F #Ôm|èÂ¶ÌºÃæ0:d1äU»RÎm]7}Ùßn`ó¤Øåvî<Œžþõ Ø@si‘ìHž4ÉâE™—žÅ‰ü½è[îcM2=‰Æ™?¼ÄžqÐ"ó·qæ¼)ÚF­qswTfÈc†è3Àäu¬ø¬>Ñý™çifÝ."h® V,ªª§`lpG¦zf²~À÷0“K{hn"x¯ F,–Ž0;g¹«ÏáIå’Ö2æYeQ*g‹}‡Ó¡¥ºð¬Ò^Ã{qVŒª¨2 Rw|½úäÒ®Û^ZÇwµ•ªy“\­ÔVÀ\L²ù³ÀRç× / ŽtåñšJ.•i©2E"%¦§’Í®FNHãî—^7nn-@Ó.§]9®#¬ÿÖ¿Ýéï·Ž¢¯ÜZ¦]Nº4s\GX7ÿ¬(»ÓßoEr7:\¥öëÔ¸H¤tûH…ÕœDÎ7˜ÁÈßÁÆN3íÅ.—¡Íy kò*ñ—.%U'“‘€0sïNñlÑ­­-‘­ ŠdP¥Šñ‘Ï{úŠÕÑïa³ðþŸiwgòZ’ïlè ¾÷Êã¿ œàá€l`À¶k)öxKXáûR!. ¶@ê2}ý*¤z{m6,ÒÝncŒŒÄ—ÎX`cpûþ•_OðÖš®!·r\‚ÅäRxéßëU—ÂO™uh˜U·JD¨Y Øã§8Çzè4­bÚÇOHd‡l…ÞFKx‚D…ܹTáFìaYv²¥¹l„GWÇs¸7~sn?irNí¤¤`ý:¤²Ž(´ËxtDh^)T8Ýe†;ç¿ÓÞ­IeQi–ðèˆÐ:¼R¨pº8Ë wϧ½bÛCè±\8•Ï–°×2¼°“~w–?t¦6ŒçµhIàù^Î5•ÀRdAç&òäýáÛ¸ªVÓÛø¼j›e*w àº=xü³] Í嬗r:ÙZ¿Ÿy Ë]Kl Ä+O‘<†(=0 pÙâ(tõ6× &š%i-£I7 A@þTŽG`j­ƒ5­êÜiÍ(2ÛD’åqoUWºJ€;½ªZ§…f¼€=ÜF4„2 QBŒs’N1ÇéUõ}K)U»y-DxŒÈ»$Ú§9!r76 À$p2+£¿Õ¬/E©’ fK{˜®Ú@¾]ÀFÅ'_•º888ÁÂÔ[íº´†(ÅÍÅÃܘ[Â\’c‰rv¨ÏÊ~x©/íZMBñIR’@&ûb¿&TÎÁÎGb OªZ»Ý߬zNCÛ …â?Ìf@v £œŽÆªYèŸÛ­ ôYšù$ù#.¡ŽÂ˹€$ Á$f¯Áᛓ-üQ—w6ĨT@#¯PWC bÏÃWZuØ’9.ax²€6Üîê3÷‡zÞ±Öíb-3éðY;¢+[iñ€2ç.GÌÄœœt ¼íÜe±±‹%€:Ç<’>w Bc'Ütç“»ØÔš]Š›{žÇ÷ïSÍ$™ßçÁ-þÐèsÉÝìk|,wwkù×QÉ”3&v’7=‰˜¬å‚ o° ©f•JBcWsµ÷¾õØ®·Gú—ÔìÝï#m2ÅþÑwÚ_Éjî/f蕳·–§9ã.6Ùƹm=ö¿oyJaofÈ ìóíOz Ön‹q4:9µ– ‚°´ž4g×e±R¹ @bÈߦI~•šº½äúwÚäÑ­ÍËYÍ;+[’|Å ¿L’:ý+YâÒ£xQ µõ»oÇî) -®šØ[Û•w•åïY÷v³†ÑÉra*$Ú2z‘ô5=¼2¯‰îæ1¸‰¢9S‚p½ÿ ¶·LoVÒ-•MÔ0’`ä+E½Ô7mn˜Þ¬-¤[*›¨a$ÁÈV‹{¨n*XÆ4SI6¬çÌ"!Æ?E4xí页·FòÏáŒæ³4«¨4ýMeÕž2Š1˧¯QM¸µ¸o ZB ”ʲ’P!ÈnßRMVäébáô;dœÙK?–mϲQ޼ƒœuªIªÜ,\>‡l“›)gò͹áÖ@ª1×s޵¬É¤$ÐÄÑZo˜f1å©Ü;vïÛÖ¬gÙÏ¿ýú_ð¬Iì.¿´t¢!b±Ç¹…*rrÎk[QÓšý ÿHx–&ÜBõ'ŽzðG<ûÖ„’Ý%ù}$"ÅŒ&ðT’öMhÙÍ5ÄWìtH$EŠ-‚=à…'–㌓[Z6‡ןäùvûvçdCæë銂îãA±»’Öã[Uš#µÕm°}22+_óÃÚ|Ùc;q½€ÏZ¯cm{¦ê›Câ䳺¸7ŧë¦Ä+7Þ.¢)º3Ç ï¡¨=Ý¢ÛE§ÅFÎDäc•XOzÒÔîÑm¢Óâ#g"Gò ‚1‚Gʬ'½P´¸Ðo®ãµ·ÖÕ¦”íEkg\ŸLœ ·©éŸÙ¾Wï¼Ï3?ÃŒcþõ5ݽ֣¨é­u¯ßËek?Ú$Šû^7»GÈT.AêH'ï7ˆ§†³yRÇ&7gcŽ”ý2ké¼ñx¨B8Ȱ´[Æ?+1=N3OÓ&¾›ÏŠ„#‹ E¼`ò³Ôã5GBÿ‘£Aÿ°­ŸþJ(пähÐì+gÿ£ÒŠô¯†ß{TúCÿ³×¥|6ûÚ§Òýž¨ÑEÂW EQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQE{Bÿ‘£Aÿ°­ŸþJ(пähÐì+gÿ£ÒŠîþ}íSéþÏ]ßÃo½ª}!ÿÙëÿÙ endstream endobj 14 0 obj <>stream ÿØÿîAdobedÿÛC    $.' ",#(7),01444'9=82<.342ÿÀgR"GBÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÚ RGB?÷Ê(¢½ú½òŠ(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š­5Ð^# žçÒ™$‹åŽ)¯"ƹcаÌeˆ¢k¨—¹?ATI,rı¤ª¯zäüª÷檵㺠z¾·Q1ÆH9Ç"¥0È QYtä‘£åXŠõùÔê(KÂ>úƒô­:*nVCµ¸lþ§«i"È»”äU¤u‘r§"Š(¢N¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢™+ùq–ü© ¤ž‚ “ÐT7SmZõ=MS¥'q$óžM%fK!•Ë…gK!•˽¨¢çE2™EQE‚ëWíæóS ~aÖ¨S£s»ƒRÁ)ŠLÿ ê*He1IŸá<ZtR+Pᥭ r2:Vˆ9QEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEt4Ï6?ùèŸ÷Ф,R! u S覉ŽÔŸ@iÔB ƒÐ报w.dòùÂÿ:ºH$àVS¶çfõ$õª×ÒmŒ(êÇôª÷¶0 ýãúQÆhéMú3Š Ö©æÞŽß…&E/õ§ (úQEQE1Š(¢Š·fùR™éÈ«Um™IýkF´-t8=Tâ¯Z¾èpz¯QE=OEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQE€2NQQ¼ñG÷ÜéÞ¨Ü_3’‘«ýîõOßùŠ£6¢íˆÇsÒ©M¨;bPØîzV¸¼€}L¬eH ÷…Ú¤Šw…²‡ê;dz“nýâ z­2=E·~ñ=V¶¨¨ gMËÁG¡¦\]¬Qó>:vZ¼fbó ]3F±ù…†ÏZ–YR%Ü烹úV|·ò>B€÷ïùÕgv‘™˜’ÄóIÒ³'¾’C„%Û©¬é¯dá Eöêiæfc–;¸îiCqQv¢«ïlòsõªâFÎsŸ­N¡æ¤IdXàq‚j¥81ÿSNYJœ‚Aõž“9ä{ŠÐk°Ð2‘‡#ƪcüâ— Î9¥ÇjšGyp\䊙äiH,r@Å7Z\}?P1Ïz)}i”›iGQÅ4š8Z^”î:œRg·ZOÆŠ7ÔŸ….Oÿ^–›E cÞŠwþµi,ªcWbFzÖfqëKÖ§‚àÅ»9õ©a˜Å»9­´ÅýÿÐÓ–hØd8üx¬Ú>•(½“<ªÔ‚ñóÊ®+VŠÎŽgX;E[ŠádàðÞžµb+”àü­èjxîRCº} METÕ5QEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQT5ºB¿VÇéþ~•rYQ4‡ ˆÌY‹7$œš¥¨Ï²?)~óõöNþ}‘ˆÇÞn¿JOÆŠ(ý­dÖmu4Ÿ­ÊŠ*Hfhr`’1Í5ݤrìrO$šmíí°&ã´ã4»›`\ ç¿…õ¤ü?J=©2i)h©!Ì? — •c,2x§ˆ‰äñUúRÆ2ãŒÔ<Ž 6/¾}@£aWéš6ê J>´QœSsžÜT¤â¦Í)?'åGÒŠibi(¢Š?Ò’Š(£Šo˜×éFqÖ:)Ôu¨Ì™R9ä÷¦ïÓ ÷5-”f Ït HAéIæú ³Kþ4Õ9–¦©{QEQE\·¸Ý„½Øú՚ʫöóy©Î7µvÖ}ß»sÏcëW-§Ýò1ç±õ©¨¢ŠµVh¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢šò$k¹Ø(÷ªrê #Lû·øTRÜEùÜéÞ£’xá;};Õê+¯.`È@>€ bßMvÈ[ëÍV:¤ ýÇÇáUŽ¥<«cð­º+!5YT€ê¬;ö?çð­.á¸û†þëpjho`œíWÃz7¦†ò ÎðÞÁ©è¢Š±SÕ-Jo.N~súò++Ìö«š³æhÓŒ*îüÏÿZ³ûb°µ K^¸…À‰+Çð¸¤2³úÐdÿgõ¨é{Õ]ÍëU··¯éOó§ëG˜;ƒQÒ€Ogè)w5Û×?…H$JPAE(¶c÷˜Ö†µaÐäúSöKŒì©ËŒ”Íø“sdôÕb ’Áj䈆s“Í:¾LÓ­:¾LéRQÞëEZ«4~tÜ|ù˜âƒÍ¦±¥!çµhâŠ)´Q×ñ¢‚@äÔm&G! R3ëO,©üª3&zb›Ôç4SKíQ´„ô⎴QGÚe f“"Š(¤Î?ýtžhÝEOù=…>¢„ã8õÍKÛùUˆÎPTéÊ =袊u:Š|OåÈgþ”ÊjPJGQJ ê+P@#¡¥ªöo˜ög•þUb´ã25oZÒüÈÃzÑESéÔQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQETW­¼{'²ç­:i’ËÈpëX3Ü<Ò'žÞÕRöð[&ÕæFéíïUo/ºmÈz{{Ô²Ü4­ºFÏ=; €Éè1L¥éX­#;$’z“Xí+;IÉïHNzšÒŽÔS*:(© è}èâŒÑœQZÖ7Þf!˜üÿÂÇø¿úõ¡\ÏN{Ö¾Ÿyç/•#~ðtÏqþ5«§ßï" OÍü,{ûV­…ñr!”üßÂÇ¿µSÔ˜µëxPöã5S½[ÔЭã1þ þ_Ò©þµvOÚæÏ÷ÏóªYûT¹ë¼ÒäP½'­Ö¡ÝíPÓ” I«‘D±öZe´`.óßùTõrÞ 9žžÕj¶®ò9==¨ÿ (£µOSÓ$dn=¥9FÕUôRÐi6Û±Í&Ð lsëE4œÒ“šJBsA¢ÿUgÿ×M¢ŽôÆ|p:Ò3ôÇôÊioJß° œòy£ëEãµ2££ëGçš3Å%!>”QøÑGùSsF(âŠB@äÒ^ÔZBÀw:ŠŒÈM&æ'ü)»Å4È¿ZµùOéý*( ”wÌzŒÔµb3ò*ÌdŸü©)£ŠuJE?­QøÑKEIì•Nx'kF²«J&ß·<ŽõrÉþò~"­Ù¿ Ÿˆ§ÑEn­QEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQU¯®<‹sƒó· ÏëM’EŠ6‘º(Í6I(ÙÛ¢ŒÖn¡r'› ~DÈþõS×­Z+œ–FšV‘¾ó×=,,­#I¢ŽôIïQ“Še.})?J;ÑI’h¢ƒMfÜúS ‡·ÒÀSYÀ©*TÄêèØaÐÔ'ÔÒzÒ 9ÐÒyÄ@ÁkJþhæÜ÷R¾˜?ýz¢dïÅ'˜|£·o§ùô¦SçœÍ)“Œ°úÒÏpe¿`3õ©<Ïnþ´±±wT©Ö¢­Y²\ÌN ù‘M‹/*®zšH‹<ª¹àšÐAÒ§áGOŠÕéZ”QÞŠ:ÑE©¹É¥>Ô©¬{RQô¢Š ÷¦ÑA8¨™²}¨vÜ}©´ÆlôéQ;dàt£ÚŠ;RfšN)”QEÒsE•©‚õ9ö¨™‹SKMg îjFp¹Ç4ÂìÀ mÂÄÔeÉöúQþzÑGj %6Žô~Tgš2i2( {u«p¹‘pqŸj§RÛœJ;äOŠM®cÅI•ǃVéG”~¥[®S¨£§^´T€äRÐsøÕÛC˜HÎpzzU*µfß3ŽüšÕ±p¾üTÖÇ~*ÝQZ5~Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢«Ïy<½éõªRÞË!8mŠ{ñªó^C#;˜v·qDH'sz Õ¢°ÚC–bÇÜÓGµV:¨ˆsÿÿëUs©sÄGïVý†³JªÈàÁ¸«)©HƒçPÃëƒRG©Âß|÷ëOMB#÷Á_Ö´è¨ »†à|†þéàÔõmdPÈÁ”÷­#¬Š0=ʼn¨Ì%º*B|¸ÉëßüûVÓ0E,Ç “\Û1'$äŸ^j†¯.Ø’0~ñÉü*Ž«)X’1üG'ð¤¤ÍZ+šÉ¢Šk8w¨É,y¦ÞšÎÜÔ…Àíšc1oþ°¦ûÑM,MFÎO~( õ¤&Ž´ÒE6ƒŠ\úR}(¤Üh¥ü?:LûÑE4RäúÕÝ=×n9 ~U^Þ<˜ä(ûǸ­£X”*8<úÕ»(XÈ%#åǽZ³…‹‰HùFqOü}( QZ~ŽôÞ¼ÒEŠ(¦RQQ»dãµ,‡°ïQŠc£vì(âŽÔwÍ'_𦊎ŒþTQG¾i´PxëQ³ú~tŒÛ¸)¼ú©ŒÞ•¿aÓÖŽ¹æŠ;ûÑMÍGG ¤Íw¦–¢ŒÑüé¬àzar}…4°úÓKïRž™Å! w=OZ>”ÝþÔß7Ú¥.£½:7D=­AR@»§O®iQ‰uô#±‘@Ç$V~4V•jQÓð§SiG•N –§³ KŒõ^*óô©­¿ãáåSBq:¼*HN&O¨«ôQEjV•QEQEQEQEQEQEQEQEQEQEQEQEQEQEÙ$X£.Ý!!A$à¦$ক™QK1ÀMeÜ^<¹UùSõ?ZŽyÚy77téQv¬»«Ö””*ž½ÍfÜÞ4¹HÉTõîh=h£§áIЦMT£ùQEîi´Pp5 ;ˆ4¬Ùéқژ͞*'lœ”Uû]E„˜–OOJtS½»ï°{ŽÆ–¤÷!Áî;ݾ-‹²°ù†¾úÕ…ßšÌæ$¶P6@ôÿ9¨ÉÚ2i×·_j‘\ .1ïR^\ý¦E|mqŠ üDÌXñÒ‘›q¤ã<Õ2ÙéÒ©3çÒŽÔQ×ëIL'ÊZJ(ÇjLÑEZ>´RQEPHù ÑGÒ” œ’Oi…Ö¥µ!îã\¹ü¿ýT©‡‘TI„Ã:®y$ ÔŠ1 J‡~´ú?•´ªBŽ€`VÀ@p(¢Š8––ÿõé(ÿ&­0œšOz)àf—Üþu ž”Ö8ô×m¢“©ç“G¥ƒQ“PÒhëEÃÍ:Ô%‹éNvϧze1›<•“GZ=è>ô”ÂqL¢ŽübƒÿUFd'À¦3c­!`½iÌÛ~µbÝzzR}hè)…³íQ3“ì=(úÑüé;Qži¥©´}QI“EÕ‹d;‹úp>µ_ð«ñ"Lc ©m“t¹?ÃÍKn»¤ÉíÍXS¹Fi{ÔHpG¥KøÖŠœzÑFܾôQGÿ®ŒRÓ©ÙÏ×¥Mmÿ øÔ¯Ö¬Zf$Ž@ÍMoóLƒý¡RCÌÉõzŠ(­ZÒ¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š dœ Ê»¸3I´‘Oþuföã`òÔóüU›õþuq“ä©à}êϾ¸Éò”ñüT ;QG¨UCGùQM'4w¢£öýiìp=MCÞ˜çµ2Fã(éGoŠeEGn)(úÐ}é¤ÑA jrsJç&›Q³géQ;dã°¢ŒñI×ÚL&™ßÚŠ(¢›E~~TÖ}§Íâ:Ó©¥Àuö¨ÉÝɤ¦ôÃ' üiÅÉ>ŸJoZ(4Ü“L$žI¢®éŸñðßî…R«ºg þçõ©­?ãî?­KiÿqýkRŠ(íøVÝmÑH{zÒšm5ºQÒÿ]ç4QÒ›ILö¨èêsëG¿õ¨Éɨî9 ÷4žô¤ÒcŠa4”t¦9ì?:sPÓö¦HØ¢çGj 0œ Š“¯ò£=ûQÇ\qQ3EFÍ­#6ÑC6ìzv¦þRw¨ÉîjrsÞŠ:QE39¢Š?É¢‚@ÑESCØ~tÝì{ÓKŠi‘Aîj_z¿ú¤ú É=süë]bªç8U›#–sŽ1SÙ¶æ~0¥þu,m•Õoœ§*Ꜻ‡ RúQE%MGáW,†K7p1TÿÏ5~Íq 8êzã­X³¹_lššÔfqíÍX¢Š+N´(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¦K Š2ç·J}Q½“2켚Šâ_*Ýú­G<žTE»ôNf.üõÏ'ZSɤëøÖ);‰5ŽçsëE!¥=3IÚšÔÚ(£óÿ F;TÓOƒÀÍFç-ì8¦ö qEEÖ ''>´w¤ëJy¤¤cÚõ£Þ™#ëNcš‡?çÖ£cÚ™#`bqGáG­ ¨É¨¨QGåM¢ŽÿäÑEDížJBqHÍ´R´žŸ3õ¢Ž¾ôÂIëP–,rh£½¿ ZJNÔcÚƒÖ“>ô™ŽÔ½½ªÖžán€þòŸôª)ÈÅ$W%X|Rùs#ö4øŸË•Ђk þt‘HŒ)È# ÒûÖø9+x€GJß”õ¤¦“IE2CÆ=iõŸ¥1ŽÖšç õ¦Ð}hïAüê3À¨i;ÑïE#{ÓO‚qQ¹Ë{S~¢Œ *#P“š &(ëMcÇSÅ1ziÀ¦ÈÙã?Zf>”{Š>•õ %ŽM'Z(£üõ¦RQAâMDÄž¼RÇ֚ʹ{Ó™ÉéùÓ¿Z1íE0’zÔD–äÒô¤£·ùô¢’’î‘WžOnµ¬+>ÍwMŸAÚ¯Ž¢®ÙŒF[ÔÕû%Äe½M/J(¢­Õš•T{qN¨£?0Ïz–¤S‘S!ÊÑþsOI¤Œa_vê2Šr±S•$cO©È$cW#»9ǵhà ‚=EeÖ¬[Ïå§î“ùUÛ{£²ƒßÒ¬ÃrwmäzúUê(¢®ÕÊ(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¤f ¥@3Xò±$“ÔžkFñöÂ8,JÌ“¨Ò³µ 2áaT/äÉÚ;S;QEªBõ£ü(ïš)§­TnyÕ!8ô¨?1Ïjd‡ŒzÑÒJ(¦TTŸÌQš)®p¾¹¦ÔÐN1Î[éMè(£·¡¨‰ïP’M%QL¤£ÚŠ)¬ØéÔÐNh'4Ù<~f™ß¥¨95;ŽM¨£·ùô£€sIIG™ôÍÈ£ÖšM~T±[M(܈Hõè?úõcû2l}øÿ3þ"[Í ÊÄÄzâ¤Kyr±±¸ª_•jK „' Žªj±I`Ž ö¦¼RFpèËõ×ã8t+õ¡§Ý(Qœsò“ü«Gð52L؉ ŸÒµíc’+p²Onø•¥§Ï#G岪8éZVÈÈ•‡íô©½è¢j·W)áI5 I!~u1ºÔR›”…%)¤¨Ú™G¿ëQÈr@ôéO'·­CþM1Ïjd‡ŒzÑô ÑIQ“ÅEGÖ¡c“š’C…ÅGƒØ8Î3ŠaQÈrqéIüé8¢ŠˆšŽŠ;dÑøTr7ji8ŒÛFi·þsMíAïEGP“’I¥ïIÚŽE'SëHN)3Kž8£?ϱ¤ÿ±kw àíîiQZG ;Ó£F‘ÂŽ¦¬ÚÆc‡,0Ìrxüªz(­4@ˆtµ QÐ ^Ô½)£®)jAÒÞ€qƒÚ§Ö ©På=QRDy"ÚŠ(§Ô”ªqô¥éMéÍ;õ§-(«Ö²oiêµ=fÂÛ%Víž~•¥ZV²o‹ªñWí¤ßUâŠ(¢§©¨¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š*…ã›oeª'ï­O#‘˜w$ÕqX—¾VoRkå÷ÈO©&ŽÔ~tö ñQ• %…~Ê)’*?j|‡‘L¨Û–¨_—4v¤=E/jO¥4ô¦ÑøÔrŸ\T™Ç>•äôëQ¹ãÉ=hüé:ÒÒvTTQEÚ(þu Ç5#Ÿ—ùÓÒÊáÀ;õ'ýzM!ÚŠ[×4ÆW‘¶¢–Ç\ ÕåE]þÌ›®èÿ3þ‡M˜†BG`M;ì“ÿÏ&¥û,ÿóɪŸJOÂ¥–ÞhGÏ޽ª*…Õíe*}ÅDÊÊpÊTúŠ9üjõ˜“K‚¿Â¾¿_j©fYQñ=q[à` 8•kN¶Y\Èã*½©«vë+™eW õ4€1ÒŠ=èþ•¯Z´~…C=¬S•2GqÜz}*n)2)®¨êUÀ#ÐÓ]×k(+èi5 £ ;S±Ÿ­7&Œ}hÈqÒ”` À¢Š(ÿm‡æ¦ÿ*¯š¿½FAÏ=êv}h<ýi(4SSM¦HxÇ­Fiòu3ñ¨˜üÕ ýóFi(4iM¨œå³éS*n±~¹VȪüþu¡nmÀlr9¢óé´Ò[¯™+猩¬ê)Ò!ÊŸÿ]6«A õ R1ÀÍDc€ 'Ò!ç¯J–Ò=Ó=ŸÇµ5TÉ(AÜâ£Á’Pƒ×Ñ4RmaŽ:Ôu­W% àÓ50¶|u^}éÑBrûv«4Ø`Ü2àóÐQ¸#sƒÏJ‚;`\çžÕ8ࣧJ1EYHÖ1…÷©Ñ}ÐN1Š)¨rƒéN©úÕ‘ÈŠZLcšQNZZZ|g’)”ä8aèzÓ”àҩíKE¨©*z)Ã¥7­8t¥^´ +N6ß¶s‘Ífúªý«n€qÐâ®Y6%+ê*Í›bB¾¢¦¢Š*õ]¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š)²±» Nª×R§–цË™3ˆâf'sL‘ÂFÄœqÅPs…5 I'Aõ¨ÿÏéX÷«CóQíAê(=h4ÃÒ™ëIG¿½Sh¨ŸïS{~­÷Ö’¢=M@ßxÑÞRõ¤¦µ%#/½CÚ¤“ úÔuýêŠCóP%µu¦Qßè}jŽ«Móµ={š-müæ%²zûûV•Zµ´ó÷û½‡­Z¶µó˜ÿw°õ¨ã‚8†yèOz’Z=zV‚¨Q…AW•BŒ(ÂŽëE--^{(¦ÉÆ×þòÿQV=éô¦IH»dPG½5ãI×PG½e[Àö÷ñ FÞ¸nÇÿUjñéLt(t ‚;Þ×¼BÝYåIÈÍGo€2/*NFzÒ“è)2hÆ:ÑRäžõ-QE%QþMQEQŸ¥4ºôÎh'i2R)ßΡ#œdç i$òiŒAéQ»úŠm¸¤é•Ÿ˜ÓM)äýi;T=êrI¤4Ù>ï¹§wÍ2Oº8¦7CL¸i€d€I8«À €b«[¦é ÆqüêÕIl¸RÞ´ûe–õ¨å‰e‡±ª¯©ÃaïW¸4S¥d9è}iòB¯ÏCëY©o,‚¥FyÈéZ Æ›T}}éÝèâ’rǹ¤Š‹$±îh=j)`?q×éRæàŒcƒR8 ¤TŽ»ÔÊ«ÿ:QÖ“Ó# VïPRö¢ŠÉ4ú*XÏËô4êlq¸É*@§GPGáR€BŒ‚*t`È"’—®)){RŽ´´R¯ßž´S¨žŠ-X£ëJ)(iGÞSªí§ú£þñªUvÏýQÿz­Z¯ü Xµÿ\>•bŠ(­ ½EQEQEQEQEQEQE€2NNê|-ûÄ*d²¬1–oÀzÓ%D…áMžè¾Q8_^æ«zQF;zÖT’4­¹ÎMg;´¹ŽM2N‚£©$íQöÿ>•}ãUßïš(ïGzJkt¦ÑG4úºT«vü¨T,p*©c€*ª«9;FOµL¶¤}æÇn*È ôŠ‘m”}ï˜Ó–ÙGÞùD-âÇ+ŸrOô§£l^8äSÿÏZ*AŠ¿•H#AÑåLhc`sút¨šÎ&<ØÕŠ=¿#EuE?…#E}äSøU ,]NèØ0àõª®meÁ÷³üéUñ‘œñªòX#d¡*1PIdʧÐò)°Æ"‰Puïõ§ÑEZU ¡G@1V@ p(¢ŒM>($ Zq#œõ¤Ï< CGáHXÑš?h£üõ¢›IGáEQEQÒ˜d§4! u4ÿÂ:šŒ¹>›îi¥Çja“ÐT†ON´Òì{â›íš:ÓKM.ǽûÑEô”Ú(¢Š2(¢‚?:Ný¨Ï½!"Šƒ}hÇ>µ3Ýi†?C횈©íQÈéȨÅ4©v >µ(ˆ÷?¥H(àLò‹uàR‹xˆ¡Ný(¢¦ÆJ˜ AEzÑþy¢Š(£ q‘Büí…9&ާ­N3ÍÊçô©¼•Ç$šaŒ‚¸ä{v§˜œ âœQ‡jªßxýiZ¸¶È9bXŸ|Sü˜öµqL®y$cLÎy$j¢âçß…éÅJÓGJJaµ?j(~5-XRŽ´”£¨¥xQKWí€9$æ¨V¢¨ õrÉs#7 ÅZ³¼cè)h¢Š½W(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š*)åò£ÏsÀ¬ßƬ\K¾SƒÀàU_Ö³.äó$ãîdó%àð8QGùúQUêŽNÔÊ’NƒëQŽŸ…Fßz¡¾h£Û½Iå@RäH ³bœ‰³–ëôéOíE:¨QÒ§Gùæ­RÒÑE¨ëÖŠ(ÿ=h¢Š(¢ŽôœÒgÒ‚qE.qÍ7>œQÇsE0¶i(¢Š)(£ùQGáEQE!`9&˜ÎOAŠBÀR3ïÏ¥<£'ŠaçŠg^h¦–'§žœ Ï^´´vÿ>”´ÚeQIøÑ(¥£<úšJ;Òn¢Š(íGznI¢Š(£ÓúÑESK¨ç?•!“®ëI¸zÒ½?ð£µDY¶i§žM&ÿjiv1`:‘×Ö›¼zf£¢“yö¦™Òžd>Ô›Û±¦Õ¸"Ú¡ÏÞ#éK´€qêicW•° ¦£Xd`76ÑïJm›ß“ïVh«>BcœŸrjǘç$ú“T^&¨ã=A«6ê< {·ZàðF~´Õ]ªtíH±åÜF:zR$!%Èäcò§œLŠJ*BÆ¥ “ëE…€êi¤úÒSŠ^(¦aL.OÈSKŠi‘G½MI‘ê*;Roö¤ó}ª]ê?úÔžg¢þµSEÍ4ÈßJy“=¦w£úÑœÓKg©¤,Iæ§£ÒŠ*ZžŠQÁ¤ö¥ŸéJ>𢧶MÓŽš¿QA”œýãÖ¥­Khü¸€#“É­xü¸€#“É¢Š(©ªZ(¢Š(¢Š(¢Š(¢Š(¢™+”‰˜uú­xÃb¯|æ™3ì…›¾8¦JÛ"fïŽ*4õ4êCÁýk)ºVi¤¢ò(úS))®>\ÔU9éøT¦8æ¢r (ŽSVð¨£Sœž*_Ƥˆ`dõ4ø†Oz(£ëE>ŸEQEQÚŠ(¢Š Ç4™=¿JJBØ¢—>Ô¨¢™IEQEQGÔñQ™:có¤$ BÁzÓËäšasž8¦{ŸÆJab}ª3!>ÔsÍ£ÒzJePM'éHN(¥ÏJN£¥Rh£ñ£üâóõ¢’Š?ÏJ?Ȥ,˜d=°) HXµ'ãM2(÷¨É'©¤¦—ô¨Ì‡°§ öúSO¿Z:ÐzSI&šI=M­”R¨,ÁGSGSIÔûš ¶Ð?´–è¸ÝÉî;SãŒF  ûš}[ŠQ–·ò«q@ª2ÀþTUz(¼b¢6IPIîE;éô¢¥ÀÆ01éRíc•[.àW¦yÔôzRgŠ@ª™ c4ŠŠ„àš^‡Þ›ž(ïE!bih£üóFqÔôõ¨ËÿtfšHi êjLÍ0È;~f£'=MÒþ•žœR’OzJ3IøÓ ¦f—¥&F(£¥&ê)zž))¥”wý)¦OaH[ÞšYGz’Š‹Ìoj ·÷©»Å'˜¾†¥ëJ¼°÷¨ $u'êiU¶¶@€ã#Ž(ŒŒŽ*ïÒŠŽ9VN1ƒéëRUC ƒ‘V•ƒ ƒ‘EZ³ˆ12688ÞªÕë/õ'ýê±h¡®#8æ§µPÓ ŽœÕš(¢µ+BŠ(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š£vs7û£š½Y÷ îGn*½áÄ zš¯vq¦¢ïHzƒúÒÒÏ=*‘¤¢ŠP3L¤  óš…è?Rö¢žip=(¢Š)h¢Š(¢Š:QEôQFqÍ7”ÖoJJ(¢ŠmQëEQE!!zþB‘›:ûÔG®i¥±ÀëLgÛJÇqöô¤£ÑÚ˜Nj#ך?•w¢Ž”QÛÚ“4~4SI¢Š?•ÊçIE¥Rdâ˜\žJBÀSY€§–Q×Âäô¦ 1L,OµF\ŸaGõ u¢ŠJmüâŽÔ~=)3I‘E/½â“õ¢µ¿J±j\õè*8 i~èõ«h‹í*{h˜¸vvÍOD°b83N¢ŽÔ\«TQœ`ÐN)½i Å&hÎhïŠ(íL&ŠÆN}óHÍžÓ)…½*7~ËK’NIɤâ‚i3ÿÖ¦“Qçš\ûþ4˜üè¤.Žþ‚˜O­!8äÒûóH]FyϪ6bGµ6š_Ò˜dôó!=)¹'­!ç­ÂÞ¦˜I'š:õ£¥!çšBÊ;àÓKb›Ÿ¥;>™¤Í3ÌqšO1±Ú“xõ¤ó¤ïÒ©¨··ù››®ãšo™õ¤ó¡©óµ‘Í^S•w¬šÕŒê«ŒX·¥X´rÌÃSªýŸúƨUÛ.øV•‘ÅÀ÷´mN'àÕª(¢´ëBŠ(¢Š(¢Š(¢Š(¢Š(¢Š(¢ŠÍ—ýsýMiVl¿ëŸýãUo~âýjµçÜ_­2Š(ªG¥S¦Ó€ãÞ›ßáÈÕÒ (£ÿ×E:–Š>´QE}h¢Š(£üóMÏzRsÅ'¿çM-IGáGj(¦ÑEÑGáE‘LvÇ­ßÂ*:k7aQ»öúÑÛüúRô¤íL¨è£¥”„âŠ4QøP:Šnh¢ÎHH9 ÑïKÇZc?eüé…‹RS zTM&zqA÷ïE¿ )´Ê(ü¨éÍ&}?âŠZCúQGzi4QG½4¸_síL,[Ø{SKMg îiåÀã©ô§B¦y‚ôIqPUËœ÷üþ–æLªzD9–uSО•w(ëE©Ztu£ß­ÓŒÒEç$QGó£óâ™IF{š‰›v?•rqÚ›ýiŒÙàt¨²qÚŠ>i4S ¦QúÑ9Î)…æ¡''&˜Í­5ßoÖžÏéÅ2Š3ϽFO­DI'&ŒûÒgÚŒõçñ4Æ“Ÿ—ó¦3cÚšX/SN,©¦yœð)™ïIšŒ½Fd'§âIêx¤â’ŽÿáM-šfsÖŒþtsëE™4QE!`9ÝH]GsøRdzÒn¸«–>a<)àZ½íUlN`b:n?ÈU®+JÕT@¥G^MiÚªˆ×’iW“W¬Àò‰ÇSTG¯Z¿h€Üæ´,‡ï¿W­ï¾€ÔôQEhÕê(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š+>à;€?ÏZЪ7cgÔf«Þ ¡¨.ÆaÐÔ (ªFôÍ(<jCÓõ ):7ÖŽô¿ÒóúÑGOÃüih QþJ=¨¢Š) )=ûÓG¾)¬xÅ%è¥"’›GJ(¢Ž´QEFíÛó4®ØàTtÖn£wì(¢Š;Ó*: v¤÷¤'QïÖŠ(¦ÑGùéEúSY±Ó­!8Ò“JÌzŸJ‹$ä“HhïL'56ãEh£½%6ŠN(ëÒjB}(¢Ž(ïÏãMgÛÇSM'šB@4¥¶õ'ð¨Ùóì) ÉÉüóIL-šœž(íGt¤ÏZa8¦PMZ±|LTž ñŽãüš«@;H àŽx§G!ŽUCN‰Ìr«úÚǵâ¡·¸YTtWî3×éSt­dutܧ Öª²º†R4‡Ú“ñ óGëHNM/z=ê7=…=Ž¿ó¨zŸZc™#`bŠó¢“ß5Âíî_ÄzÒ¤æܼúZÝä{QÇ5• ü‘€_~¿X’m9·vÁÈ5y/aaËm>„Uä¾—–Ú}«2˜{S?Ï5Q/IÐ`*“ƒÜæ­ö¥IVl•9â‘fI‹2ŒÒ?( žiÔÉ·­GJÜ“IÖ¢'&¡c–4)(ëLãZza8\Ó‚Ùü©¡ƒA œ ã'ó¥ŠIàžÝª¹›gÖ ù™øîi( ñÁÏò¤'hÉÞhÔR_zO|ÒEÖÿU…#¢šNh'Ó]»wõ¨ûQEFNM@ıɣ¸¢ûÑL&’­QIEN´QEQMfÜÐÍ·ëQ}i¬ØàSñÀ äõ9&” ž:ÒTˆ˜ç½4 šAf¨Ç4v§º dS( Ž´ àÒ‚AÈÏ­\Šð` ÏLÖ©g½'n”䙡'iëÔS¢™¢o”ýG­^{ålç’}=ªÆðÈNA’HÇ5nhÔî'·jš‰Îá};Uˆnew;†AôíSv£¿J? ÏJ›µKIQ9ËTئ*`äòi…K` k‚@"`dŽ•H'@\ö©Bà T¨œ{SãOAÅ!`®j3k`NqØSQôÅ9£Fê Ô¦4n¨§ð¦¤i¨§ã§sF(§ª€8JpFv£ü(Ç4S•r}©Ýi@ÉÅ9¿­>ŽÔw©@ÀÅL«´bŠ;ÑJOJ)h´v©!O2P;u5*)$(êx§*–`£©5rÝ6@£¹äÔ´QZª¡(è+MT*…ÅQE--QEQEQEQEQEQEQEQU®`'2.2O½Y¢™$bT*ÔÙ#!SYTU¹­rwG×ûµW8ôõ¬é"h› >‡Ö³ä‰¢l0úZJ£¥ö¨ÈȦS{QøÑÒŠe'j)¬;Ž :ŠÈ ŒÔ[ÛüŠ ·®? W^sùÓ?FIf¡%”ã&ƒÍ'CKÖŽ´†š}hëE^´Qž(¤¢Š?ýtÊ(¢ŠFàý(íA¨Üå¿JoJ?ª*€œœÑGj?ÆŽ”RPi?:J)‡­TNAn;TŒp ¨iŽ{TrÔPJ:ÑïM¨é9ŠZ>´›h¤¢—üô¢“m”Œp ïÛÞœGçLdÜyaHAÇ‘³Ž5ã@°æ¥Žæœ^€ hŒ÷⣞üSV¿ÊŒÒRQF0(¦ÑGÒšÿw§Zu!Ž”‡i"¡ïGoóéOd=©½ G‚:Ô$pi(£­¥¥%%zÑÜÓ(¦HpFjWRÄqJ¨}ñM*Ktâ£d,ç°¦óÖ”ÇèJ}Rí§ykéQ#°£cg¥KE&ÁIåz‹aô£a©hüèØ=èòÇ©¨¶îѰýKþMlycÞ£{šQúâŸõ¢—hô¥£¶i¡íNÇ>ô¥ç z PíŠ})qG|Òí¥þT´QGzp¢E¡rzqGZ)çÍ=cèOLt§*€)ÜSÂúÔ‹sùQoÊŠ(§T”qKZ õ¥½w£µZ=»Ó¨£Û½J‹ŽOZ1ÏSNÅ=Wžµ*&MQþMçëN§ÑÛüóJM&}ýiÀœRš;е:428P:õ>‚„¤8QžÄö­âXÆÉõïV`€ÊÙ< êjh 2ž~´¨‹íQÇó§QEh Z0QE-QEQEQEQEQEQEQEQEQEQEQEQEÄ^jq÷‡J r ÏÔª·0g÷ˆ9ïþ5Vê ÃÌQÈëïU®aÜ<Å޾õS¥ÒŠ*•S¤#½%:Œr5‡zLRzQGò£ëøÓh£þU&9Ô”~„f‘”0÷¨?;~÷Nâ™ß¥0‚*84õèõÅw¤#4””Rÿ…&;õ¦‘ERç–Â’Œ{Tm¥0ðzTø¤ Ȧ”¥1£ôâ¡ÅJ >Ýé¥ê)»HíL(õ7½áG#¨ÇáG­%6Šüè õ¢ŽÔw¤À¥êh4`QF(ÆizRŸÒŒJ(ǵ¢ƒÖŒJ(Å£ü(ïE¢Š)zQE'ùÍÏjxŒõéK‚z P¤ôξô Ò¤òÀíšv)BzÓ„g½0F^M>Š?ÏÖž P: (¢Œ*Í-¥Å:QŽ‚œ(£ÒŠ?”x–ŠOóŠ•o'“B¦Þiاªã“Ö¥DÇ&Š( ƒïWŤf5ÜlrA©¢æÎÌqëSÇK¸ãÖ¨}hêj÷Ø£þóþbœ¶‘)9ÉúÔ¢ÆlÿçO’ç°üjˆ^@O¥XŽÑ›—ùG§z¶¨¨>UV"²UÁsŸaÒ¦KE¹Ï°éMDT\(À§QEZ€0*È EQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEVkRX´`»UH*Jž¿JÔ¦¼k"á†j´¶ŠÙd;O§j¯-ª±,‡Óµf}húŠ´ÖÜný FmeA÷ªµ¼«Õ úsUš Wªôæ #šLU‘i!áO½J¶h,K{P-%sÂãëÅ m#^*šFò‘sMþU¬ªª0 ì+$‚ ¨íIqoä*s’sšYáòBó’sš?šk oñ§QÞ #5ŒP•#¯JJŸëL)èqL+éQ´dt¨ûQEÚe¤Å-„E'áE.9úRcüⓊ(ÿ?J:QÅ%cüšnÅö§Qü¨ ´…AêFcôýhòϨ©(ü?JnÑIå­E´úRm>Ÿ¥MÅQ°S|¡ëPàŽÇò£Óô©¨£`õ£Ê÷¨vŸOÒŒCùTÔqFÁëG”=j ‡4¾Yã¥IGáFÁK寽0GϽ8(¥þTcÚ—v§ ¢òhÿ&óÖ––Š(ëéKŽzÒâŠJ1ëKŠ)BÑEQßñ¥¢”)*N2_AIZV1jKr$È?N•Yàò©çÐúŠí™!ŽNÌ9ö©ÚÕ–$“³ Ÿj…PžM<£Š^”S‘T/ÖŠ(èiiÕ%ºo™Glç§jÓ¨m¡1Gó}ãÖ¦­KHŒPüßy¹5¡mŽ.zžMQEOSQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQY³®ÙŸ=sŸÎ´ª¥â «ã=V½|9ÂsP]¦è²?„Õ:?•`úŠSÁ¬â1T)(¢ŠJ)Ö˜c#¥IE!Pi¬¡ºÔΊ˜€zŒÓ xèi¥H¦ÈéÍ2ƒÖ‚1Ô~4SižÔQEQGô÷£Š)6Š)(¥ïïEh¤úfŠ^¦Ž½©6š)3E.9õ£Q´ÑIÞŠZ£i¢“­´q×m¢’ŒRôh¥Ú(£çQEQGoÂŽ´}ih¢—¥IíOT©¥ M8)=©€RÅ™"¯súRŠ¿k w0ùò©í­üÙ@=$ÔööÂI<É©À  €6HÖUà û÷ú+\¨+´JÔ* àJÎ’ÖHϲúЇ“ÖµéU†Aâª=‚’J>ßb3UžÉOÜl{ÊUgl($ŸJ½¨ ü¸éè*À E>4ˆîc¹¾œ |V«ÜÇqúQEUšžŠ(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¦ºB§¡§QH@ ƒÐÐ@ ƒÐÖc¡Šž£Ò›ý+BhaÏ ê*“Ã"¹xîk:{v§cYó@ѱÀÊö>•0))ꥎ’}¹«ZrAï·üj4äl(ü})‰HÀ(üMWh™#W8ÃtÅ3V¤Ñ‰b+ß·Ö³1ŽÁ§\Áä0Ç*E>x|¦}Ò:ÒzÑÅT ¦”ÿ­N¢‚3Ö“õÃõ晵½:ÔÔSv iŒQPQSjnÁÛ?…&ÃM1žÆ¢£ÒŸåûþ”ýÿJM§Ò“czS(§l>”?Ý£Ò“kzm)Û~í§ëF¡£iô4Óõ£ô§loO‡Óõ£ÐÑ´úoéGoóéOò϶hòñÐÑ´úRìoJe'—ëŸLSÒ,° ¹'¦)BqJ"bqŠ€ öÍ8#jãhøu"›Þ”Ç´á²íNò¶œ6r;S<±ëšpU€)h¥Ôà tv£šP ($žÕnNCIÿ|Ô±Bó(ã¹ì*Hây}M6ÞØ±H8ôõ«´QZp°¦ÕüO­hEĸ_ÄúÑET”ú(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢ª]Óæ¡«tu4ÉbÆTþÆ™,bD* ÉÇ4•<ðùM‘÷J‹«-â(ÅOVs!F*G4Ú)qéš0zÓ"›IE¥”QEäQEQEQE}*«; ’h'­dàuö¥HÚF àg­!6AiAŠ08ÝÜŠy†zº¶Œø~ãU¥³Ê[ zŒVOj+I­âc’ƒðâ“ì°ÿsõ4Ãa.xdýi>Ç&xe¬êUVc…RO°­·…NBÇš”À§.žßÄà}*Ù7ñ8J£›±Ì‡hôš·I g­>е¼pò£ŸSÖ¬GE÷G>¦€Ã= BÖ‘1Î ý OE=ãI>úƒõæE¼ ýj£XŒå\î3NQ€2Xþ5fŠŒZÂ|±Lñ‚š‘¤u@§QEJQ€‚¤(Àj(¢ŠZZ(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¤e ¥Xd¡4&#ê¾µ¡H@#=ê)¡Y—чCQÍ•}t5—ÞJµ-§ÇŸ÷j±N ú¡$OÃǵQxÚ3†÷¤£Š?Îi˜Í2ξ´´“I´zQŠL¿&?Î)Õ4vï'8Ú¾¤R¬EÎiUÛ 5 £±Ú “è+B+êÇ©§GÆ0£¾ôú½oj!ù›—þUv aÜÜ·ò¢Š(«5=QEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQMtW` :ŠB##Þ‚#"«5š“ò±Ýi†ÍóÃ)rЉ­boáÇÐÔMm>•Kìrz¯çO[1üOõÀ«TP-bî“õ4‚Ú!ü9úš BóêjJ(©UUF=ªUP£ ÔQE´´QEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQ_ÿÙ endstream endobj 15 0 obj <>stream ÿØÿîAdobedÿÛC    $.' ",#(7),01444'9=82<.342ÿÀgR"GBÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÚ RGB?÷Ê(¢½ú½òŠ(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š­5Ð^# žçÒ™$‹åŽ)¯"ƹcаÌeˆ¢k¨—¹?ATI,rı¤ª¯zäüª÷檵㺠z¾·Q1ÆH9Ç"¥0È QYtä‘£åXŠõùÔê(KÂ>úƒô­:*nVCµ¸lþ§«i"È»”äU¤u‘r§"Š(¢N¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢™+ùq–ü© ¤ž‚ “ÐT7SmZõ=MS¥'q$óžM%fK!•Ë…gK!•˽¨¢çE2™EQE‚ëWíæóS ~aÖ¨S£s»ƒRÁ)ŠLÿ ê*He1IŸá<ZtR+Pᥭ r2:Vˆ9QEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEt4Ï6?ùèŸ÷Ф,R! u S覉ŽÔŸ@iÔB ƒÐ报w.dòùÂÿ:ºH$àVS¶çfõ$õª×ÒmŒ(êÇôª÷¶0 ýãúQÆhéMú3Š Ö©æÞŽß…&E/õ§ (úQEQE1Š(¢Š·fùR™éÈ«Um™IýkF´-t8=Tâ¯Z¾èpz¯QE=OEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQE€2NQQ¼ñG÷ÜéÞ¨Ü_3’‘«ýîõOßùŠ£6¢íˆÇsÒ©M¨;bPØîzV¸¼€}L¬eH ÷…Ú¤Šw…²‡ê;dz“nýâ z­2=E·~ñ=V¶¨¨ gMËÁG¡¦\]¬Qó>:vZ¼fbó ]3F±ù…†ÏZ–YR%Ü烹úV|·ò>B€÷ïùÕgv‘™˜’ÄóIÒ³'¾’C„%Û©¬é¯dá Eöêiæfc–;¸îiCqQv¢«ïlòsõªâFÎsŸ­N¡æ¤IdXàq‚j¥81ÿSNYJœ‚Aõž“9ä{ŠÐk°Ð2‘‡#ƪcüâ— Î9¥ÇjšGyp\䊙äiH,r@Å7Z\}?P1Ïz)}i”›iGQÅ4š8Z^”î:œRg·ZOÆŠ7ÔŸ….Oÿ^–›E cÞŠwþµi,ªcWbFzÖfqëKÖ§‚àÅ»9õ©a˜Å»9­´ÅýÿÐÓ–hØd8üx¬Ú>•(½“<ªÔ‚ñóÊ®+VŠÎŽgX;E[ŠádàðÞžµb+”àü­èjxîRCº} METÕ5QEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQT5ºB¿VÇéþ~•rYQ4‡ ˆÌY‹7$œš¥¨Ï²?)~óõöNþ}‘ˆÇÞn¿JOÆŠ(ý­dÖmu4Ÿ­ÊŠ*Hfhr`’1Í5ݤrìrO$šmíí°&ã´ã4»›`\ ç¿…õ¤ü?J=©2i)h©!Ì? — •c,2x§ˆ‰äñUúRÆ2ãŒÔ<Ž 6/¾}@£aWéš6ê J>´QœSsžÜT¤â¦Í)?'åGÒŠibi(¢Š?Ò’Š(£Šo˜×éFqÖ:)Ôu¨Ì™R9ä÷¦ïÓ ÷5-”f Ït HAéIæú ³Kþ4Õ9–¦©{QEQE\·¸Ý„½Øú՚ʫöóy©Î7µvÖ}ß»sÏcëW-§Ýò1ç±õ©¨¢ŠµVh¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢šò$k¹Ø(÷ªrê #Lû·øTRÜEùÜéÞ£’xá;};Õê+¯.`È@>€ bßMvÈ[ëÍV:¤ ýÇÇáUŽ¥<«cð­º+!5YT€ê¬;ö?çð­.á¸û†þëpjho`œíWÃz7¦†ò ÎðÞÁ©è¢Š±SÕ-Jo.N~súò++Ìö«š³æhÓŒ*îüÏÿZ³ûb°µ K^¸…À‰+Çð¸¤2³úÐdÿgõ¨é{Õ]ÍëU··¯éOó§ëG˜;ƒQÒ€Ogè)w5Û×?…H$JPAE(¶c÷˜Ö†µaÐäúSöKŒì©ËŒ”Íø“sdôÕb ’Áj䈆s“Í:¾LÓ­:¾LéRQÞëEZ«4~tÜ|ù˜âƒÍ¦±¥!çµhâŠ)´Q×ñ¢‚@äÔm&G! R3ëO,©üª3&zb›Ôç4SKíQ´„ô⎴QGÚe f“"Š(¤Î?ýtžhÝEOù=…>¢„ã8õÍKÛùUˆÎPTéÊ =袊u:Š|OåÈgþ”ÊjPJGQJ ê+P@#¡¥ªöo˜ög•þUb´ã25oZÒüÈÃzÑESéÔQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQETW­¼{'²ç­:i’ËÈpëX3Ü<Ò'žÞÕRöð[&ÕæFéíïUo/ºmÈz{{Ô²Ü4­ºFÏ=; €Éè1L¥éX­#;$’z“Xí+;IÉïHNzšÒŽÔS*:(© è}èâŒÑœQZÖ7Þf!˜üÿÂÇø¿úõ¡\ÏN{Ö¾Ÿyç/•#~ðtÏqþ5«§ßï" OÍü,{ûV­…ñr!”üßÂÇ¿µSÔ˜µëxPöã5S½[ÔЭã1þ þ_Ò©þµvOÚæÏ÷ÏóªYûT¹ë¼ÒäP½'­Ö¡ÝíPÓ” I«‘D±öZe´`.óßùTõrÞ 9žžÕj¶®ò9==¨ÿ (£µOSÓ$dn=¥9FÕUôRÐi6Û±Í&Ð lsëE4œÒ“šJBsA¢ÿUgÿ×M¢ŽôÆ|p:Ò3ôÇôÊioJß° œòy£ëEãµ2££ëGçš3Å%!>”QøÑGùSsF(âŠB@äÒ^ÔZBÀw:ŠŒÈM&æ'ü)»Å4È¿ZµùOéý*( ”wÌzŒÔµb3ò*ÌdŸü©)£ŠuJE?­QøÑKEIì•Nx'kF²«J&ß·<ŽõrÉþò~"­Ù¿ Ÿˆ§ÑEn­QEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQU¯®<‹sƒó· ÏëM’EŠ6‘º(Í6I(ÙÛ¢ŒÖn¡r'› ~DÈþõS×­Z+œ–FšV‘¾ó×=,,­#I¢ŽôIïQ“Še.})?J;ÑI’h¢ƒMfÜúS ‡·ÒÀSYÀ©*TÄêèØaÐÔ'ÔÒzÒ 9ÐÒyÄ@ÁkJþhæÜ÷R¾˜?ýz¢dïÅ'˜|£·o§ùô¦SçœÍ)“Œ°úÒÏpe¿`3õ©<Ïnþ´±±wT©Ö¢­Y²\ÌN ù‘M‹/*®zšH‹<ª¹àšÐAÒ§áGOŠÕéZ”QÞŠ:ÑE©¹É¥>Ô©¬{RQô¢Š ÷¦ÑA8¨™²}¨vÜ}©´ÆlôéQ;dàt£ÚŠ;RfšN)”QEÒsE•©‚õ9ö¨™‹SKMg îjFp¹Ç4ÂìÀ mÂÄÔeÉöúQþzÑGj %6Žô~Tgš2i2( {u«p¹‘pqŸj§RÛœJ;äOŠM®cÅI•ǃVéG”~¥[®S¨£§^´T€äRÐsøÕÛC˜HÎpzzU*µfß3ŽüšÕ±p¾üTÖÇ~*ÝQZ5~Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢«Ïy<½éõªRÞË!8mŠ{ñªó^C#;˜v·qDH'sz Õ¢°ÚC–bÇÜÓGµV:¨ˆsÿÿëUs©sÄGïVý†³JªÈàÁ¸«)©HƒçPÃëƒRG©Âß|÷ëOMB#÷Á_Ö´è¨ »†à|†þéàÔõmdPÈÁ”÷­#¬Š0=ʼn¨Ì%º*B|¸ÉëßüûVÓ0E,Ç “\Û1'$äŸ^j†¯.Ø’0~ñÉü*Ž«)X’1üG'ð¤¤ÍZ+šÉ¢Šk8w¨É,y¦ÞšÎÜÔ…Àíšc1oþ°¦ûÑM,MFÎO~( õ¤&Ž´ÒE6ƒŠ\úR}(¤Üh¥ü?:LûÑE4RäúÕÝ=×n9 ~U^Þ<˜ä(ûǸ­£X”*8<úÕ»(XÈ%#åǽZ³…‹‰HùFqOü}( QZ~ŽôÞ¼ÒEŠ(¦RQQ»dãµ,‡°ïQŠc£vì(âŽÔwÍ'_𦊎ŒþTQG¾i´PxëQ³ú~tŒÛ¸)¼ú©ŒÞ•¿aÓÖŽ¹æŠ;ûÑMÍGG ¤Íw¦–¢ŒÑüé¬àzar}…4°úÓKïRž™Å! w=OZ>”ÝþÔß7Ú¥.£½:7D=­AR@»§O®iQ‰uô#±‘@Ç$V~4V•jQÓð§SiG•N –§³ KŒõ^*óô©­¿ãáåSBq:¼*HN&O¨«ôQEjV•QEQEQEQEQEQEQEQEQEQEQEQEQEQEÙ$X£.Ý!!A$à¦$ক™QK1ÀMeÜ^<¹UùSõ?ZŽyÚy77téQv¬»«Ö””*ž½ÍfÜÞ4¹HÉTõîh=h£§áIЦMT£ùQEîi´Pp5 ;ˆ4¬Ùéқژ͞*'lœ”Uû]E„˜–OOJtS½»ï°{ŽÆ–¤÷!Áî;ݾ-‹²°ù†¾úÕ…ßšÌæ$¶P6@ôÿ9¨ÉÚ2i×·_j‘\ .1ïR^\ý¦E|mqŠ üDÌXñÒ‘›q¤ã<Õ2ÙéÒ©3çÒŽÔQ×ëIL'ÊZJ(ÇjLÑEZ>´RQEPHù ÑGÒ” œ’Oi…Ö¥µ!îã\¹ü¿ýT©‡‘TI„Ã:®y$ ÔŠ1 J‡~´ú?•´ªBŽ€`VÀ@p(¢Š8––ÿõé(ÿ&­0œšOz)àf—Üþu ž”Ö8ô×m¢“©ç“G¥ƒQ“PÒhëEÃÍ:Ô%‹éNvϧze1›<•“GZ=è>ô”ÂqL¢ŽübƒÿUFd'À¦3c­!`½iÌÛ~µbÝzzR}hè)…³íQ3“ì=(úÑüé;Qži¥©´}QI“EÕ‹d;‹úp>µ_ð«ñ"Lc ©m“t¹?ÃÍKn»¤ÉíÍXS¹Fi{ÔHpG¥KøÖŠœzÑFܾôQGÿ®ŒRÓ©ÙÏ×¥Mmÿ øÔ¯Ö¬Zf$Ž@ÍMoóLƒý¡RCÌÉõzŠ(­ZÒ¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š dœ Ê»¸3I´‘Oþuföã`òÔóüU›õþuq“ä©à}êϾ¸Éò”ñüT ;QG¨UCGùQM'4w¢£öýiìp=MCÞ˜çµ2Fã(éGoŠeEGn)(úÐ}é¤ÑA jrsJç&›Q³géQ;dã°¢ŒñI×ÚL&™ßÚŠ(¢›E~~TÖ}§Íâ:Ó©¥Àuö¨ÉÝɤ¦ôÃ' üiÅÉ>ŸJoZ(4Ü“L$žI¢®éŸñðßî…R«ºg þçõ©­?ãî?­KiÿqýkRŠ(íøVÝmÑH{zÒšm5ºQÒÿ]ç4QÒ›ILö¨èêsëG¿õ¨Éɨî9 ÷4žô¤ÒcŠa4”t¦9ì?:sPÓö¦HØ¢çGj 0œ Š“¯ò£=ûQÇ\qQ3EFÍ­#6ÑC6ìzv¦þRw¨ÉîjrsÞŠ:QE39¢Š?É¢‚@ÑESCØ~tÝì{ÓKŠi‘Aîj_z¿ú¤ú É=süë]bªç8U›#–sŽ1SÙ¶æ~0¥þu,m•Õoœ§*Ꜻ‡ RúQE%MGáW,†K7p1TÿÏ5~Íq 8êzã­X³¹_lššÔfqíÍX¢Š+N´(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¦K Š2ç·J}Q½“2켚Šâ_*Ýú­G<žTE»ôNf.üõÏ'ZSɤëøÖ);‰5ŽçsëE!¥=3IÚšÔÚ(£óÿ F;TÓOƒÀÍFç-ì8¦ö qEEÖ ''>´w¤ëJy¤¤cÚõ£Þ™#ëNcš‡?çÖ£cÚ™#`bqGáG­ ¨É¨¨QGåM¢ŽÿäÑEDížJBqHÍ´R´žŸ3õ¢Ž¾ôÂIëP–,rh£½¿ ZJNÔcÚƒÖ“>ô™ŽÔ½½ªÖžán€þòŸôª)ÈÅ$W%X|Rùs#ö4øŸË•Ђk þt‘HŒ)È# ÒûÖø9+x€GJß”õ¤¦“IE2CÆ=iõŸ¥1ŽÖšç õ¦Ð}hïAüê3À¨i;ÑïE#{ÓO‚qQ¹Ë{S~¢Œ *#P“š &(ëMcÇSÅ1ziÀ¦ÈÙã?Zf>”{Š>•õ %ŽM'Z(£üõ¦RQAâMDÄž¼RÇ֚ʹ{Ó™ÉéùÓ¿Z1íE0’zÔD–äÒô¤£·ùô¢’’î‘WžOnµ¬+>ÍwMŸAÚ¯Ž¢®ÙŒF[ÔÕû%Äe½M/J(¢­Õš•T{qN¨£?0Ïz–¤S‘S!ÊÑþsOI¤Œa_vê2Šr±S•$cO©È$cW#»9ǵhà ‚=EeÖ¬[Ïå§î“ùUÛ{£²ƒßÒ¬ÃrwmäzúUê(¢®ÕÊ(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¤f ¥@3Xò±$“ÔžkFñöÂ8,JÌ“¨Ò³µ 2áaT/äÉÚ;S;QEªBõ£ü(ïš)§­TnyÕ!8ô¨?1Ïjd‡ŒzÑÒJ(¦TTŸÌQš)®p¾¹¦ÔÐN1Î[éMè(£·¡¨‰ïP’M%QL¤£ÚŠ)¬ØéÔÐNh'4Ù<~f™ß¥¨95;ŽM¨£·ùô£€sIIG™ôÍÈ£ÖšM~T±[M(܈Hõè?úõcû2l}øÿ3þ"[Í ÊÄÄzâ¤Kyr±±¸ª_•jK „' Žªj±I`Ž ö¦¼RFpèËõ×ã8t+õ¡§Ý(Qœsò“ü«Gð52L؉ ŸÒµíc’+p²Onø•¥§Ï#G岪8éZVÈÈ•‡íô©½è¢j·W)áI5 I!~u1ºÔR›”…%)¤¨Ú™G¿ëQÈr@ôéO'·­CþM1Ïjd‡ŒzÑô ÑIQ“ÅEGÖ¡c“š’C…ÅGƒØ8Î3ŠaQÈrqéIüé8¢ŠˆšŽŠ;dÑøTr7ji8ŒÛFi·þsMíAïEGP“’I¥ïIÚŽE'SëHN)3Kž8£?ϱ¤ÿ±kw àíîiQZG ;Ó£F‘ÂŽ¦¬ÚÆc‡,0Ìrxüªz(­4@ˆtµ QÐ ^Ô½)£®)jAÒÞ€qƒÚ§Ö ©På=QRDy"ÚŠ(§Ô”ªqô¥éMéÍ;õ§-(«Ö²oiêµ=fÂÛ%Víž~•¥ZV²o‹ªñWí¤ßUâŠ(¢§©¨¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š*…ã›oeª'ï­O#‘˜w$ÕqX—¾VoRkå÷ÈO©&ŽÔ~tö ñQ• %…~Ê)’*?j|‡‘L¨Û–¨_—4v¤=E/jO¥4ô¦ÑøÔrŸ\T™Ç>•äôëQ¹ãÉ=hüé:ÒÒvTTQEÚ(þu Ç5#Ÿ—ùÓÒÊáÀ;õ'ýzM!ÚŠ[×4ÆW‘¶¢–Ç\ ÕåE]þÌ›®èÿ3þ‡M˜†BG`M;ì“ÿÏ&¥û,ÿóɪŸJOÂ¥–ÞhGÏ޽ª*…Õíe*}ÅDÊÊpÊTúŠ9üjõ˜“K‚¿Â¾¿_j©fYQñ=q[à` 8•kN¶Y\Èã*½©«vë+™eW õ4€1ÒŠ=èþ•¯Z´~…C=¬S•2GqÜz}*n)2)®¨êUÀ#ÐÓ]×k(+èi5 £ ;S±Ÿ­7&Œ}hÈqÒ”` À¢Š(ÿm‡æ¦ÿ*¯š¿½FAÏ=êv}h<ýi(4SSM¦HxÇ­Fiòu3ñ¨˜üÕ ýóFi(4iM¨œå³éS*n±~¹VȪüþu¡nmÀlr9¢óé´Ò[¯™+猩¬ê)Ò!ÊŸÿ]6«A õ R1ÀÍDc€ 'Ò!ç¯J–Ò=Ó=ŸÇµ5TÉ(AÜâ£Á’Pƒ×Ñ4RmaŽ:Ôu­W% àÓ50¶|u^}éÑBrûv«4Ø`Ü2àóÐQ¸#sƒÏJ‚;`\çžÕ8ࣧJ1EYHÖ1…÷©Ñ}ÐN1Š)¨rƒéN©úÕ‘ÈŠZLcšQNZZZ|g’)”ä8aèzÓ”àҩíKE¨©*z)Ã¥7­8t¥^´ +N6ß¶s‘Ífúªý«n€qÐâ®Y6%+ê*Í›bB¾¢¦¢Š*õ]¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š)²±» Nª×R§–цË™3ˆâf'sL‘ÂFÄœqÅPs…5 I'Aõ¨ÿÏéX÷«CóQíAê(=h4ÃÒ™ëIG¿½Sh¨ŸïS{~­÷Ö’¢=M@ßxÑÞRõ¤¦µ%#/½CÚ¤“ úÔuýêŠCóP%µu¦Qßè}jŽ«Móµ={š-müæ%²zûûV•Zµ´ó÷û½‡­Z¶µó˜ÿw°õ¨ã‚8†yèOz’Z=zV‚¨Q…AW•BŒ(ÂŽëE--^{(¦ÉÆ×þòÿQV=éô¦IH»dPG½5ãI×PG½e[Àö÷ñ FÞ¸nÇÿUjñéLt(t ‚;Þ×¼BÝYåIÈÍGo€2/*NFzÒ“è)2hÆ:ÑRäžõ-QE%QþMQEQŸ¥4ºôÎh'i2R)ßΡ#œdç i$òiŒAéQ»úŠm¸¤é•Ÿ˜ÓM)äýi;T=êrI¤4Ù>ï¹§wÍ2Oº8¦7CL¸i€d€I8«À €b«[¦é ÆqüêÕIl¸RÞ´ûe–õ¨å‰e‡±ª¯©ÃaïW¸4S¥d9è}iòB¯ÏCëY©o,‚¥FyÈéZ Æ›T}}éÝèâ’rǹ¤Š‹$±îh=j)`?q×éRæàŒcƒR8 ¤TŽ»ÔÊ«ÿ:QÖ“Ó# VïPRö¢ŠÉ4ú*XÏËô4êlq¸É*@§GPGáR€BŒ‚*t`È"’—®)){RŽ´´R¯ßž´S¨žŠ-X£ëJ)(iGÞSªí§ú£þñªUvÏýQÿz­Z¯ü Xµÿ\>•bŠ(­ ½EQEQEQEQEQEQE€2NNê|-ûÄ*d²¬1–oÀzÓ%D…áMžè¾Q8_^æ«zQF;zÖT’4­¹ÎMg;´¹ŽM2N‚£©$íQöÿ>•}ãUßïš(ïGzJkt¦ÑG4úºT«vü¨T,p*©c€*ª«9;FOµL¶¤}æÇn*È ôŠ‘m”}ï˜Ó–ÙGÞùD-âÇ+ŸrOô§£l^8äSÿÏZ*AŠ¿•H#AÑåLhc`sút¨šÎ&<ØÕŠ=¿#EuE?…#E}äSøU ,]NèØ0àõª®meÁ÷³üéUñ‘œñªòX#d¡*1PIdʧÐò)°Æ"‰Puïõ§ÑEZU ¡G@1V@ p(¢ŒM>($ Zq#œõ¤Ï< CGáHXÑš?h£üõ¢›IGáEQEQÒ˜d§4! u4ÿÂ:šŒ¹>›îi¥Çja“ÐT†ON´Òì{â›íš:ÓKM.ǽûÑEô”Ú(¢Š2(¢‚?:Ný¨Ï½!"Šƒ}hÇ>µ3Ýi†?C횈©íQÈéȨÅ4©v >µ(ˆ÷?¥H(àLò‹uàR‹xˆ¡Ný(¢¦ÆJ˜ AEzÑþy¢Š(£ q‘Büí…9&ާ­N3ÍÊçô©¼•Ç$šaŒ‚¸ä{v§˜œ âœQ‡jªßxýiZ¸¶È9bXŸ|Sü˜öµqL®y$cLÎy$j¢âçß…éÅJÓGJJaµ?j(~5-XRŽ´”£¨¥xQKWí€9$æ¨V¢¨ õrÉs#7 ÅZ³¼cè)h¢Š½W(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š*)åò£ÏsÀ¬ßƬ\K¾SƒÀàU_Ö³.äó$ãîdó%àð8QGùúQUêŽNÔÊ’NƒëQŽŸ…Fßz¡¾h£Û½Iå@RäH ³bœ‰³–ëôéOíE:¨QÒ§Gùæ­RÒÑE¨ëÖŠ(ÿ=h¢Š(¢ŽôœÒgÒ‚qE.qÍ7>œQÇsE0¶i(¢Š)(£ùQGáEQE!`9&˜ÎOAŠBÀR3ïÏ¥<£'ŠaçŠg^h¦–'§žœ Ï^´´vÿ>”´ÚeQIøÑ(¥£<úšJ;Òn¢Š(íGznI¢Š(£ÓúÑESK¨ç?•!“®ëI¸zÒ½?ð£µDY¶i§žM&ÿjiv1`:‘×Ö›¼zf£¢“yö¦™Òžd>Ô›Û±¦Õ¸"Ú¡ÏÞ#éK´€qêicW•° ¦£Xd`76ÑïJm›ß“ïVh«>BcœŸrjǘç$ú“T^&¨ã=A«6ê< {·ZàðF~´Õ]ªtíH±åÜF:zR$!%Èäcò§œLŠJ*BÆ¥ “ëE…€êi¤úÒSŠ^(¦aL.OÈSKŠi‘G½MI‘ê*;Roö¤ó}ª]ê?úÔžg¢þµSEÍ4ÈßJy“=¦w£úÑœÓKg©¤,Iæ§£ÒŠ*ZžŠQÁ¤ö¥ŸéJ>𢧶MÓŽš¿QA”œýãÖ¥­Khü¸€#“É­xü¸€#“É¢Š(©ªZ(¢Š(¢Š(¢Š(¢Š(¢™+”‰˜uú­xÃb¯|æ™3ì…›¾8¦JÛ"fïŽ*4õ4êCÁýk)ºVi¤¢ò(úS))®>\ÔU9éøT¦8æ¢r (ŽSVð¨£Sœž*_Ƥˆ`dõ4ø†Oz(£ëE>ŸEQEQÚŠ(¢Š Ç4™=¿JJBØ¢—>Ô¨¢™IEQEQGÔñQ™:có¤$ BÁzÓËäšasž8¦{ŸÆJab}ª3!>ÔsÍ£ÒzJePM'éHN(¥ÏJN£¥Rh£ñ£üâóõ¢’Š?ÏJ?Ȥ,˜d=°) HXµ'ãM2(÷¨É'©¤¦—ô¨Ì‡°§ öúSO¿Z:ÐzSI&šI=M­”R¨,ÁGSGSIÔûš ¶Ð?´–è¸ÝÉî;SãŒF  ûš}[ŠQ–·ò«q@ª2ÀþTUz(¼b¢6IPIîE;éô¢¥ÀÆ01éRíc•[.àW¦yÔôzRgŠ@ª™ c4ŠŠ„àš^‡Þ›ž(ïE!bih£üóFqÔôõ¨ËÿtfšHi êjLÍ0È;~f£'=MÒþ•žœR’OzJ3IøÓ ¦f—¥&F(£¥&ê)zž))¥”wý)¦OaH[ÞšYGz’Š‹Ìoj ·÷©»Å'˜¾†¥ëJ¼°÷¨ $u'êiU¶¶@€ã#Ž(ŒŒŽ*ïÒŠŽ9VN1ƒéëRUC ƒ‘V•ƒ ƒ‘EZ³ˆ12688ÞªÕë/õ'ýê±h¡®#8æ§µPÓ ŽœÕš(¢µ+BŠ(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š£vs7û£š½Y÷ îGn*½áÄ zš¯vq¦¢ïHzƒúÒÒÏ=*‘¤¢ŠP3L¤  óš…è?Rö¢žip=(¢Š)h¢Š(¢Š:QEôQFqÍ7”ÖoJJ(¢ŠmQëEQE!!zþB‘›:ûÔG®i¥±ÀëLgÛJÇqöô¤£ÑÚ˜Nj#ך?•w¢Ž”QÛÚ“4~4SI¢Š?•ÊçIE¥Rdâ˜\žJBÀSY€§–Q×Âäô¦ 1L,OµF\ŸaGõ u¢ŠJmüâŽÔ~=)3I‘E/½â“õ¢µ¿J±j\õè*8 i~èõ«h‹í*{h˜¸vvÍOD°b83N¢ŽÔ\«TQœ`ÐN)½i Å&hÎhïŠ(íL&ŠÆN}óHÍžÓ)…½*7~ËK’NIɤâ‚i3ÿÖ¦“Qçš\ûþ4˜üè¤.Žþ‚˜O­!8äÒûóH]FyϪ6bGµ6š_Ò˜dôó!=)¹'­!ç­ÂÞ¦˜I'š:õ£¥!çšBÊ;àÓKb›Ÿ¥;>™¤Í3ÌqšO1±Ú“xõ¤ó¤ïÒ©¨··ù››®ãšo™õ¤ó¡©óµ‘Í^S•w¬šÕŒê«ŒX·¥X´rÌÃSªýŸúƨUÛ.øV•‘ÅÀ÷´mN'àÕª(¢´ëBŠ(¢Š(¢Š(¢Š(¢Š(¢Š(¢ŠÍ—ýsýMiVl¿ëŸýãUo~âýjµçÜ_­2Š(ªG¥S¦Ó€ãÞ›ßáÈÕÒ (£ÿ×E:–Š>´QE}h¢Š(£üóMÏzRsÅ'¿çM-IGáGj(¦ÑEÑGáE‘LvÇ­ßÂ*:k7aQ»öúÑÛüúRô¤íL¨è£¥”„âŠ4QøP:Šnh¢ÎHH9 ÑïKÇZc?eüé…‹RS zTM&zqA÷ïE¿ )´Ê(ü¨éÍ&}?âŠZCúQGzi4QG½4¸_síL,[Ø{SKMg îiåÀã©ô§B¦y‚ôIqPUËœ÷üþ–æLªzD9–uSО•w(ëE©Ztu£ß­ÓŒÒEç$QGó£óâ™IF{š‰›v?•rqÚ›ýiŒÙàt¨²qÚŠ>i4S ¦QúÑ9Î)…æ¡''&˜Í­5ßoÖžÏéÅ2Š3ϽFO­DI'&ŒûÒgÚŒõçñ4Æ“Ÿ—ó¦3cÚšX/SN,©¦yœð)™ïIšŒ½Fd'§âIêx¤â’ŽÿáM-šfsÖŒþtsëE™4QE!`9ÝH]GsøRdzÒn¸«–>a<)àZ½íUlN`b:n?ÈU®+JÕT@¥G^MiÚªˆ×’iW“W¬Àò‰ÇSTG¯Z¿h€Üæ´,‡ï¿W­ï¾€ÔôQEhÕê(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š+>à;€?ÏZЪ7cgÔf«Þ ¡¨.ÆaÐÔ (ªFôÍ(<jCÓõ ):7ÖŽô¿ÒóúÑGOÃüih QþJ=¨¢Š) )=ûÓG¾)¬xÅ%è¥"’›GJ(¢Ž´QEFíÛó4®ØàTtÖn£wì(¢Š;Ó*: v¤÷¤'QïÖŠ(¦ÑGùéEúSY±Ó­!8Ò“JÌzŸJ‹$ä“HhïL'56ãEh£½%6ŠN(ëÒjB}(¢Ž(ïÏãMgÛÇSM'šB@4¥¶õ'ð¨Ùóì) ÉÉüóIL-šœž(íGt¤ÏZa8¦PMZ±|LTž ñŽãüš«@;H àŽx§G!ŽUCN‰Ìr«úÚǵâ¡·¸YTtWî3×éSt­dutܧ Öª²º†R4‡Ú“ñ óGëHNM/z=ê7=…=Ž¿ó¨zŸZc™#`bŠó¢“ß5Âíî_ÄzÒ¤æܼúZÝä{QÇ5• ü‘€_~¿X’m9·vÁÈ5y/aaËm>„Uä¾—–Ú}«2˜{S?Ï5Q/IÐ`*“ƒÜæ­ö¥IVl•9â‘fI‹2ŒÒ?( žiÔÉ·­GJÜ“IÖ¢'&¡c–4)(ëLãZza8\Ó‚Ùü©¡ƒA œ ã'ó¥ŠIàžÝª¹›gÖ ù™øîi( ñÁÏò¤'hÉÞhÔR_zO|ÒEÖÿU…#¢šNh'Ó]»wõ¨ûQEFNM@ıɣ¸¢ûÑL&’­QIEN´QEQMfÜÐÍ·ëQ}i¬ØàSñÀ äõ9&” ž:ÒTˆ˜ç½4 šAf¨Ç4v§º dS( Ž´ àÒ‚AÈÏ­\Šð` ÏLÖ©g½'n”䙡'iëÔS¢™¢o”ýG­^{ålç’}=ªÆðÈNA’HÇ5nhÔî'·jš‰Îá};Uˆnew;†AôíSv£¿J? ÏJ›µKIQ9ËTئ*`äòi…K` k‚@"`dŽ•H'@\ö©Bà T¨œ{SãOAÅ!`®j3k`NqØSQôÅ9£Fê Ô¦4n¨§ð¦¤i¨§ã§sF(§ª€8JpFv£ü(Ç4S•r}©Ýi@ÉÅ9¿­>ŽÔw©@ÀÅL«´bŠ;ÑJOJ)h´v©!O2P;u5*)$(êx§*–`£©5rÝ6@£¹äÔ´QZª¡(è+MT*…ÅQE--QEQEQEQEQEQEQEQU®`'2.2O½Y¢™$bT*ÔÙ#!SYTU¹­rwG×ûµW8ôõ¬é"h› >‡Ö³ä‰¢l0úZJ£¥ö¨ÈȦS{QøÑÒŠe'j)¬;Ž :ŠÈ ŒÔ[ÛüŠ ·®? W^sùÓ?FIf¡%”ã&ƒÍ'CKÖŽ´†š}hëE^´Qž(¤¢Š?ýtÊ(¢ŠFàý(íA¨Üå¿JoJ?ª*€œœÑGj?ÆŽ”RPi?:J)‡­TNAn;TŒp ¨iŽ{TrÔPJ:ÑïM¨é9ŠZ>´›h¤¢—üô¢“m”Œp ïÛÞœGçLdÜyaHAÇ‘³Ž5ã@°æ¥Žæœ^€ hŒ÷⣞üSV¿ÊŒÒRQF0(¦ÑGÒšÿw§Zu!Ž”‡i"¡ïGoóéOd=©½ G‚:Ô$pi(£­¥¥%%zÑÜÓ(¦HpFjWRÄqJ¨}ñM*Ktâ£d,ç°¦óÖ”ÇèJ}Rí§ykéQ#°£cg¥KE&ÁIåz‹aô£a©hüèØ=èòÇ©¨¶îѰýKþMlycÞ£{šQúâŸõ¢—hô¥£¶i¡íNÇ>ô¥ç z PíŠ})qG|Òí¥þT´QGzp¢E¡rzqGZ)çÍ=cèOLt§*€)ÜSÂúÔ‹sùQoÊŠ(§T”qKZ õ¥½w£µZ=»Ó¨£Û½J‹ŽOZ1ÏSNÅ=Wžµ*&MQþMçëN§ÑÛüóJM&}ýiÀœRš;е:428P:õ>‚„¤8QžÄö­âXÆÉõïV`€ÊÙ< êjh 2ž~´¨‹íQÇó§QEh Z0QE-QEQEQEQEQEQEQEQEQEQEQEQEÄ^jq÷‡J r ÏÔª·0g÷ˆ9ïþ5Vê ÃÌQÈëïU®aÜ<Å޾õS¥ÒŠ*•S¤#½%:Œr5‡zLRzQGò£ëøÓh£þU&9Ô”~„f‘”0÷¨?;~÷Nâ™ß¥0‚*84õèõÅw¤#4””Rÿ…&;õ¦‘ERç–Â’Œ{Tm¥0ðzTø¤ Ȧ”¥1£ôâ¡ÅJ >Ýé¥ê)»HíL(õ7½áG#¨ÇáG­%6Šüè õ¢ŽÔw¤À¥êh4`QF(ÆizRŸÒŒJ(ǵ¢ƒÖŒJ(Å£ü(ïE¢Š)zQE'ùÍÏjxŒõéK‚z P¤ôξô Ò¤òÀíšv)BzÓ„g½0F^M>Š?ÏÖž P: (¢Œ*Í-¥Å:QŽ‚œ(£ÒŠ?”x–ŠOóŠ•o'“B¦Þiاªã“Ö¥DÇ&Š( ƒïWŤf5ÜlrA©¢æÎÌqëSÇK¸ãÖ¨}hêj÷Ø£þóþbœ¶‘)9ÉúÔ¢ÆlÿçO’ç°üjˆ^@O¥XŽÑ›—ùG§z¶¨¨>UV"²UÁsŸaÒ¦KE¹Ï°éMDT\(À§QEZ€0*È EQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEVkRX´`»UH*Jž¿JÔ¦¼k"á†j´¶ŠÙd;O§j¯-ª±,‡Óµf}húŠ´ÖÜný FmeA÷ªµ¼«Õ úsUš Wªôæ #šLU‘i!áO½J¶h,K{P-%sÂãëÅ m#^*šFò‘sMþU¬ªª0 ì+$‚ ¨íIqoä*s’sšYáòBó’sš?šk oñ§QÞ #5ŒP•#¯JJŸëL)èqL+éQ´dt¨ûQEÚe¤Å-„E'áE.9úRcüⓊ(ÿ?J:QÅ%cüšnÅö§Qü¨ ´…AêFcôýhòϨ©(ü?JnÑIå­E´úRm>Ÿ¥MÅQ°S|¡ëPàŽÇò£Óô©¨£`õ£Ê÷¨vŸOÒŒCùTÔqFÁëG”=j ‡4¾Yã¥IGáFÁK寽0GϽ8(¥þTcÚ—v§ ¢òhÿ&óÖ––Š(ëéKŽzÒâŠJ1ëKŠ)BÑEQßñ¥¢”)*N2_AIZV1jKr$È?N•Yàò©çÐúŠí™!ŽNÌ9ö©ÚÕ–$“³ Ÿj…PžM<£Š^”S‘T/ÖŠ(èiiÕ%ºo™Glç§jÓ¨m¡1Gó}ãÖ¦­KHŒPüßy¹5¡mŽ.zžMQEOSQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQY³®ÙŸ=sŸÎ´ª¥â «ã=V½|9ÂsP]¦è²?„Õ:?•`úŠSÁ¬â1T)(¢ŠJ)Ö˜c#¥IE!Pi¬¡ºÔΊ˜€zŒÓ xèi¥H¦ÈéÍ2ƒÖ‚1Ô~4SižÔQEQGô÷£Š)6Š)(¥ïïEh¤úfŠ^¦Ž½©6š)3E.9õ£Q´ÑIÞŠZ£i¢“­´q×m¢’ŒRôh¥Ú(£çQEQGoÂŽ´}ih¢—¥IíOT©¥ M8)=©€RÅ™"¯súRŠ¿k w0ùò©í­üÙ@=$ÔööÂI<É©À  €6HÖUà û÷ú+\¨+´JÔ* àJÎ’ÖHϲúЇ“ÖµéU†Aâª=‚’J>ßb3UžÉOÜl{ÊUgl($ŸJ½¨ ü¸éè*À E>4ˆîc¹¾œ |V«ÜÇqúQEUšžŠ(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¦ºB§¡§QH@ ƒÐÐ@ ƒÐÖc¡Šž£Ò›ý+BhaÏ ê*“Ã"¹xîk:{v§cYó@ѱÀÊö>•0))ꥎ’}¹«ZrAï·üj4äl(ü})‰HÀ(üMWh™#W8ÃtÅ3V¤Ñ‰b+ß·Ö³1ŽÁ§\Áä0Ç*E>x|¦}Ò:ÒzÑÅT ¦”ÿ­N¢‚3Ö“õÃõ晵½:ÔÔSv iŒQPQSjnÁÛ?…&ÃM1žÆ¢£ÒŸåûþ”ýÿJM§Ò“czS(§l>”?Ý£Ò“kzm)Û~í§ëF¡£iô4Óõ£ô§loO‡Óõ£ÐÑ´úoéGoóéOò϶hòñÐÑ´úRìoJe'—ëŸLSÒ,° ¹'¦)BqJ"bqŠ€ öÍ8#jãhøu"›Þ”Ç´á²íNò¶œ6r;S<±ëšpU€)h¥Ôà tv£šP ($žÕnNCIÿ|Ô±Bó(ã¹ì*Hây}M6ÞØ±H8ôõ«´QZp°¦ÕüO­hEĸ_ÄúÑET”ú(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢ª]Óæ¡«tu4ÉbÆTþÆ™,bD* ÉÇ4•<ðùM‘÷J‹«-â(ÅOVs!F*G4Ú)qéš0zÓ"›IE¥”QEäQEQEQE}*«; ’h'­dàuö¥HÚF àg­!6AiAŠ08ÝÜŠy†zº¶Œø~ãU¥³Ê[ zŒVOj+I­âc’ƒðâ“ì°ÿsõ4Ãa.xdýi>Ç&xe¬êUVc…RO°­·…NBÇš”À§.žßÄà}*Ù7ñ8J£›±Ì‡hôš·I g­>е¼pò£ŸSÖ¬GE÷G>¦€Ã= BÖ‘1Î ý OE=ãI>úƒõæE¼ ýj£XŒå\î3NQ€2Xþ5fŠŒZÂ|±Lñ‚š‘¤u@§QEJQ€‚¤(Àj(¢ŠZZ(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¤e ¥Xd¡4&#ê¾µ¡H@#=ê)¡Y—чCQÍ•}t5—ÞJµ-§ÇŸ÷j±N ú¡$OÃǵQxÚ3†÷¤£Š?Îi˜Í2ξ´´“I´zQŠL¿&?Î)Õ4vï'8Ú¾¤R¬EÎiUÛ 5 £±Ú “è+B+êÇ©§GÆ0£¾ôú½oj!ù›—þUv aÜÜ·ò¢Š(«5=QEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQMtW` :ŠB##Þ‚#"«5š“ò±Ýi†ÍóÃ)rЉ­boáÇÐÔMm>•Kìrz¯çO[1üOõÀ«TP-bî“õ4‚Ú!ü9úš BóêjJ(©UUF=ªUP£ ÔQE´´QEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQEQ_ÿÙ endstream endobj 8 0 obj <> endobj 9 0 obj <> endobj 19 0 obj <> endobj 2 0 obj <>endobj xref 0 20 0000000000 65535 f 0000002812 00000 n 0000074833 00000 n 0000002753 00000 n 0000002860 00000 n 0000002578 00000 n 0000000015 00000 n 0000002411 00000 n 0000073685 00000 n 0000073749 00000 n 0000002929 00000 n 0000009593 00000 n 0000016257 00000 n 0000027079 00000 n 0000037901 00000 n 0000055793 00000 n 0000002431 00000 n 0000002461 00000 n 0000002548 00000 n 0000074775 00000 n trailer << /Size 20 /Root 1 0 R /Info 2 0 R >> startxref 74885 %%EOF Heartbeat-3-0-7e3a82377fa8/doc/ldirectord0000644000000000000000000001462011576626513020025 0ustar00usergroup00000000000000The ldirectord (Linux Director Daemon) is a stand-alone daemon to monitor services of real servers, currently http, https and ftp service. It is simple to install and can be started on its own or be started from the heartbeat daemon. It is very useful to manage real servers through ipvsadm. ipvsadm is part of the Linux Virtual Server (http://www.linuxvirtualserver.org/) Project and is used to redirect TCP- and UDP/IP traffic from a virtual server to many real servers. Thus LVS, heartbeat and ldirectord can be used to build up a redundant, fault-tolerant server farm of http-, https- and ftp-servers. Other types of services may be added in future or upon request. Installation and configuration guide: ldirectord is written in Perl, thus no compilation is neccessary. Two Perl- modules and one library must be installed so that ldirectord can monitor http- and https-servers. These are: libwww-perl from www.CPAN.org to check http service libnet-perl from www.CPAN.org to check ftp service Crypt-SSLeay from www.CPAN.org openssl from www.openssl.org to check https service If for any reason You don't need to check certain services, simply comment out the full function check_http, check_https or check_ftp in ldirectord. ldirectord is located in /etc/ha.d/resource.d/ldirectord If ldirectord often must be started manually it is useful to create a softlink. As root type 'ln -s /etc/ha.d/resource.d/ldirectord /usr/sbin/ldirectord' Create a file with any name and place it into /etc/ha.d/conf. This is a sample configuration to monitor two virtual servers with two real servers each: /etc/ha.d/conf/www ------------------------------------------------- # config file for www.abc.com # the number of second until a real server is declared dead timeout = 30 # the number of second between server checks checkinterval = 10 # reload the configuration automatically after a file modification autoreload = yes # virtual = x.y.z.w:p # protocol = tcp|udp # scheduler = rr|wrr|lc|wlc # real = x.y.z.w:p gate|masq|ipip [weight] # ... # virtual = 204.202.136.32:80 real = 192.168.0.1:80 gate 5 real = 192.168.0.2:80 gate 10 service = http scheduler = wrr request = "/.testpage" receive = "test page" virtual = 204.202.136.33:443 real = 192.168.0.3:443 masq 1 real = 192.168.0.4:443 masq 1 service = https scheduler = wlc request = "/.testpage" receive = "test page" persistent = 1800 ------------------------------------------------- You need to create the .testpage file at the DocumentRoot directory of each web server. echo "test page" > .testpage No You may test the configuration. Start the ldirectord as root with: ldirectord www start You may check if everything works fine. Type 'ipvsadm -l' and see if the network address translation shows the above configuration. Look at the logfile /var/log/ldirectord.log for details. If that works fine You may add the following entry to Your /etc/ha.d/haresources file: ------------------------------------------------- node1 IPaddr::204.202.136.32 IPaddr::204.202.136.33 ldirectord::www ------------------------------------------------- Restart heartbeat with /etc/rc.d/init.d/heartbeat restart and look if Your host takes the IP-addresses 204.202.136.32 and 204.202.136.33 and if ipvsadm -l maps the virtual servers onto the real ones. Now install the files /etc/ha.d/conf/www and /etc/ha.d/haresources on Your second heartbeated host. Shutdown the first host and look if everything still works. ----- Automatic reload and automatic transfer ----- This feature has been added since I often have to adopt my configuration files and I did not want to restart ldirectord and transfer the configuration files manually onto the second host. In order to automatically reload the configuration put the line ------------------------------------------------- autoreload = yes ------------------------------------------------- into the configuration file. After each checkinterval a checksum is built over the file, and if this changed ldirectord automatically reloads the configuration. To automatically transfer the configuration onto the second machine You must create a new user on both heartbeated hosts with their home directories located at /etc/ha.d/conf. Do this with useradd -u 123 -g daemon -c 'High Availability Daemon' -d /etc/ha.d/conf had chown -R had /etc/ha.d/conf The users 'had' must be able to remote copy files from one host to the other one without the need of a password. I prefer to use scp (secure copy) but You may also use rcp (remote copy) if You feel safe with that or if Your laws forbid to use ssh. If You use ssh put the name of the other host into the file /etc/shosts.equiv (/etc/hosts.equiv for rsh users), create keys with ssh-keygen on each host and exchange the public part of the keys by doing once a ssh connection from one host to the other and vice versa (rsh users can skip this step). Refer to the ssh or rsh manual pages for details. These lines must be set in the /etc/sshd_config files on both hosts: ------------------------------------------------- RhostsRSAAuthentication yes RSAAuthentication yes ------------------------------------------------- Now the user 'had' should be able to 'scp' any file into the directory /etc/ha.d/conf of the other host. Remember: For safty reasons the user root may not copy files without a password. If copying files onto the remote host works You may put the line: ------------------------------------------------- callback = "/etc/ha.d/resource.d/putcfg" ------------------------------------------------- into Your configuration file and create a shellscript named putcfg located in /etc/ha.d/resource.d. Remember to make the shellscript executable. /etc/ha.d/resource.d/putcfg: ------------------------------------------------- #!/bin/sh chown had.daemon /etc/ha.d/conf/$1 su - had -c "scp -p -q /etc/ha.d/conf/$1 name-of-other-host:/etc/ha.d/conf" exit $? ------------------------------------------------- Now You may safly modify You configuration files without having to manually update Your configurations by ftp or worry that they will not be up-to-date after a servercrash of the main loadbalancing host. There is more documentation about ldirectord. Just type ldirectord -h to read the online manual. For questions and suggestions refer to Jacob Rief PGP-fingerprint: 2F 8E 63 FC 6C 35 64 4F 20 CA 68 F7 28 0A 18 2F Heartbeat-3-0-7e3a82377fa8/doc/startstop.in0000644000000000000000000000144511576626513020343 0ustar00usergroup00000000000000#!/bin/sh # # High-Availability Pre-Startup/Shutdown Script # # Description: Runs on Startup or shutdown of heartbeat (not resource based). # Also runs before start, after start or before stop, # and after stop. # # Author: Matthew Soffen # # Support: linux-ha@lists.linux-ha.org # # License: GNU Lesser General Public License (LGPL) # # Copyright: (C) 2002 Matthew Soffen # # unset LC_ALL; export LC_ALL unset LANGUAGE; export LANGUAGE prefix=@prefix@ exec_prefix=@exec_prefix@ . @sysconfdir@/ha.d/shellfuncs case "$1" in 'start') ;; 'pre-start') ;; 'post-start') ;; 'stop') ;; 'pre-stop') ;; 'post-stop') ;; *) echo "Usage: $0 { start | pre-start | post-start | stop | pre-stop | post-stop }" ;; esac exit 0 Heartbeat-3-0-7e3a82377fa8/heartbeat-fedora.spec0000644000000000000000000017705711576626513021271 0ustar00usergroup00000000000000 # When downloading directly from Mercurial, it will automatically add this prefix # Invoking 'hg archive' wont but you can add one with: # hg archive -t tgz -p "Linux-HA-Dev-" -r $upstreamversion $upstreamversion.tar.gz %global specversion 1 %global upstreamprefix heartbeat #global upstreamversion 0daab7da36a8 #global alphatag %{upstreamversion}.hg %global gname haclient %global uname hacluster %global heartbeat_docdir %{_defaultdocdir}/%{name}-%{version} Summary: Messaging and membership subsystem for High-Availability Linux Name: heartbeat Version: 3.0.5 #Release: %{?alphatag:0.}%{specversion}%{?alphatag:.%{alphatag}}%{?dist} Release: 1%{?dist} License: GPLv2 and LGPLv2+ URL: http://linux-ha.org/ Group: System Environment/Daemons Source0: heartbeat.tar.bz2 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) BuildRequires: glib2-devel BuildRequires: iputils %if 0%{?fedora} || 0%{?centos} > 4 || 0%{?rhel} > 4 BuildRequires: libtool-ltdl-devel %endif BuildRequires: bzip2-devel BuildRequires: ncurses-devel BuildRequires: openssl-devel BuildRequires: libtool BuildRequires: gettext BuildRequires: bison BuildRequires: flex BuildRequires: zlib-devel BuildRequires: mailx BuildRequires: which BuildRequires: cluster-glue-libs-devel BuildRequires: libxslt docbook-dtds docbook-style-xsl Requires: heartbeat-libs = %{version}-%{release} Requires: PyXML Requires: resource-agents Requires: cluster-glue-libs Requires(pre): shadow-utils Requires(pre): cluster-glue Requires(post): /sbin/chkconfig Requires(preun): /sbin/chkconfig Obsoletes: heartbeat-gui < %{version}-%{release} %description Heartbeat is a daemon that provides cluster infrastructure (communication and membership) services to its clients. This allows clients to know about the presence (or disappearance!) of peer processes on other machines and to easily exchange messages with them. Reference documentation is available online: http://www.linux-ha.org/doc/ Extensive manual pages for system administration commands and configuration files are included. In order to be useful to users, the Heartbeat daemon needs to be combined with a cluster resource manager (CRM) which has the task of starting and stopping the services (IP addresses, web servers, etc.) that cluster will make highly available. Pacemaker is the preferred cluster resource manager for clusters based on Heartbeat, supporting "n-node" clusters with significant capabilities for managing resources and dependencies. In addition Heartbeat continues to support the legacy realease 1 style of 2-node clustering. It implements the following kinds of heartbeats: - Serial ports - UDP/IP multicast (ethernet, etc) - UDP/IP broadcast (ethernet, etc) - UDP/IP unicast heartbeats - "ping" heartbeats (for routers, switches, etc.) %package libs Summary: Heartbeat libraries Group: System Environment/Daemons %description libs Heartbeat library package %package devel Summary: Heartbeat development package Group: System Environment/Daemons Requires: heartbeat-libs = %{version}-%{release} %description devel Headers and shared libraries for writing programs for Heartbeat %prep %setup -q -n %{upstreamprefix}%{?upstreamversion} %build ./bootstrap # disable-fatal-warnings flag used to disable gcc4.x warnings of 'difference in signedness' %if 0%{?fedora} < 11 || 0%{?centos_version} <= 5 || 0%{?rhel} <= 5 export docdir=%{heartbeat_docdir} %endif CFLAGS=${RPM_OPT_FLAGS} %configure \ --disable-fatal-warnings \ --disable-static \ %if 0%{?fedora} >= 11 || 0%{?centos_version} > 5 || 0%{?rhel} > 5 --docdir=%{heartbeat_docdir} %endif # get rid of rpath sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool make %{?_smp_mflags} docdir=%{heartbeat_docdir} %install rm -rf $RPM_BUILD_ROOT mkdir -p $RPM_BUILD_ROOT make DESTDIR=$RPM_BUILD_ROOT docdir=%{heartbeat_docdir} install # cleanup [ -d $RPM_BUILD_ROOT/usr/man ] && rm -rf $RPM_BUILD_ROOT/usr/man [ -d $RPM_BUILD_ROOT/usr/share/libtool ] && rm -rf $RPM_BUILD_ROOT/usr/share/libtool find $RPM_BUILD_ROOT -type f -name *.la -exec rm -f {} ';' rm -rf $RPM_BUILD_ROOT/%{_datadir}/heartbeat/cts %clean rm -rf $RPM_BUILD_ROOT %post /sbin/ldconfig /sbin/chkconfig --add heartbeat %postun -p /sbin/ldconfig %preun if [ $1 = 0 ] ; then /sbin/service heartbeat stop /sbin/chkconfig --del heartbeat fi /sbin/ldconfig %files %defattr(-,root,root,-) %doc %{_datadir}/doc/%{name}-%{version} %dir %{_sysconfdir}/ha.d %{_sysconfdir}/ha.d/harc %{_sysconfdir}/ha.d/rc.d %config(noreplace) %{_sysconfdir}/ha.d/README.config %{_datadir}/heartbeat/ %{_sysconfdir}/ha.d/resource.d/ %{_sysconfdir}/init.d/heartbeat %config(noreplace) %{_sysconfdir}/logrotate.d/heartbeat %dir %{_var}/lib/heartbeat %dir %{_var}/run/heartbeat %dir %attr (0750, %{uname}, %{gname}) %{_var}/run/heartbeat/dopd %attr (2755, %{uname}, %{gname}) %{_bindir}/cl_status %{_bindir}/cl_respawn %dir %attr (755, %{uname}, %{gname}) %{_var}/run/heartbeat/ccm %{_mandir}/man1/cl_status.1* %{_mandir}/man1/hb_standby.1* %{_mandir}/man1/hb_takeover.1* %{_mandir}/man1/hb_addnode.1* %{_mandir}/man1/hb_delnode.1* %{_mandir}/man5/ha.cf.5* %{_mandir}/man5/authkeys.5* %{_mandir}/man8/heartbeat.8* %{_mandir}/man8/apphbd.8* %files libs %defattr(-,root,root,-) %{_libdir}/heartbeat %{_libdir}/libapphb.so.* %{_libdir}/libccmclient.so.* %{_libdir}/libclm.so.* %{_libdir}/libhbclient.so.* %files devel %defattr(-,root,root,-) %doc %{_datadir}/doc/%{name}-%{version} %{_includedir}/heartbeat/ %{_includedir}/saf/ %{_includedir}/ocf/ %{_libdir}/*.so %changelog * Thu Jun 16 2011 Lars Ellenberg - 3.0.5-1 - do not request retransmission of lost messages from dead members - fix segfault due to recursion in api_remove_client_pid - properly cleanup pending delayed rexmit requests before reset of seqtrack - create HA_RSCTMP on start, if necessary - improve detection of pacemaker clusters in init script * Tue Nov 30 2010 Lars Ellenberg - 3.0.4-1 - better support for Pacemaker >= 1.1 - say Pacemaker support, not "v2", favor "pacemaker on" in ha.cf - fix message rexmit request logic, it could cause rexmit packet storms - increase ccm ipc message queue length - new mcast6 UDP IPv6 communication plugin - improve some log messages - drop headers which are now in glue - fixed/dropped some package dependencies - fixed/dropped some build dependencies - new proof-of-concept-only known-to-be-broken RDS communication plugin * Wed Apr 14 2010 Lars Ellenberg - 3.0.3-1 - added /var/run/* directory permission paranoia to init script - added SBD and lrmadmin configuration support to init script - drop libnet dependency * Thu Feb 04 2010 Lars Ellenberg - 3.0.2-2 - changed dopd socket location again to its own subdirectory, made sure the init script will create that directory with appropriate permissions * Mon Feb 01 2010 Lars Ellenberg - 3.0.2-1 - New upstream release * Sat Dec 19 2009 Florian Haas - 3.0.2-0rc2 - New upstream RC * Fri Dec 11 2009 Florian Haas - 3.0.2-0rc1 - New upstream RC - Fix docdir for legacy distributions * Thu Oct 15 2009 Andrew Beekhof - 3.0.0-0.5.0daab7da36a8.hg - Resolve file conflict, shellfuncs is provided by resource-agents * Fri Aug 21 2009 Tomas Mraz - 3.0.0-0.4.0daab7da36a8.hg.1 - rebuilt with new openssl * Mon Aug 17 2009 Andrew Beekhof - 3.0.0-0.4.0daab7da36a8.hg - Make use of the specversion variable - Add explicit dependancy on cluster-glue-libs to prevent yum from trying to use the deprecated heartbeat-{pils|stonith} packages - Update to upstream version 0daab7da36a8 + Clean up configure. Source most variables from cluster-glue to ensure build consistency * Mon Aug 17 2009 Andrew Beekhof - 3.0.0-0.3.b37cbb1b036c.hg - Make use of the uname/gname variables - Use global instead of define for variables - Remove user/group creation. This is handled in cluster-glue - Add obsoletes directive for gui subpackage which is no longer supplied - Move ldirectord subpackage to resource-agents - Use the full configure macro - Update to upstream version b37cbb1b036c + LVSSyncDaemonSwap syncid + remove the remaining OCF RA which live in the agents repository + High: RA: IPv6addr: support for new nic and cidr_netmask parameters in the OCF RA + Low: Build: findif moved to agents. + Low: Build: move ldirectord to agents. + Low: Build: remove a few hb_report artifacts. * Thu Aug 13 2009 Andrew Beekhof - 3.0.0-0.2.11f858f3bc4c.hg - Create a libs subpackage to support multi-arch * Tue Aug 4 2009 Andrew Beekhof - 3.0.0-0.1.11f858f3bc4c.hg - Update to 3.0.0-beta and build against cluster-glue * Fri Jul 24 2009 Fedora Release Engineering - 2.1.4-12 - Rebuilt for https://fedoraproject.org/wiki/Fedora_12_Mass_Rebuild * Thu Jun 25 2009 Jochen Schmitt - 2.1.4-11 - Revert changes of 2-1.4-11 * Thu Jun 25 2009 Jochen Schmitt 2.1.4-10 - Add separate filesystem subpackage (#501518) * Sun Jun 14 2009 Kevin Fenzi - 2.1.4-9 - Remove perl(Net::IMAP::Simple::SSL) for now as it's not in Fedora (yet). * Fri Jun 11 2009 Kevin Fenzi - 2.1.4-8 - Add perl(Net::IMAP::Simple::SSL) to ldirector subpackage - Fix MAILCMD (#502443) - Add patch to fix duplicate install of OCF drbd * Fri Apr 24 2009 Kevin Fenzi - 2.1.4-7 - Move ldirector dep to subpackage (#493625) - Add zlib-devel to BuildRequires (#497079) - Add pygtk2-libglade (#497079) * Tue Feb 24 2009 Kevin Fenzi - 2.1.4-6 - Remove symlink thats no longer needed. * Mon Feb 23 2009 Kevin Fenzi - 2.1.4-5 - Remove fedora-usermgmt - Change subpackage names to match all the other heartbeat packages out there. * Sat Jan 17 2009 Kevin Fenzi - 2.1.4-4 - Main package shouldn't require pygtk2 (#480157) * Fri Jan 16 2009 Tomas Mraz - 2.1.4-3 - rebuild with new openssl * Thu Dec 04 2008 Ignacio Vazquez-Abrams - 2.1.4-2 - Rebuild for Python 2.6 * Mon Dec 01 2008 Kevin Fenzi - 2.1.4-1 - Update to 2.1.4 - Drop upstreamed patch - Add patch to disable init script by default (#441286) * Mon Dec 01 2008 Ignacio Vazquez-Abrams - 2.1.3-4 - Rebuild for Python 2.6 * Tue Oct 21 2008 Lon Hohberger - 2.1.3-3 - Fix requires line to include PyXML (#467807) * Wed Jun 25 2008 Tomas Mraz - 2.1.3-2 - rebuild with new gnutls * Mon Feb 25 2008 Kevin Fenzi - 2.1.3-1 - Update to 2.1.3 - Add management GUI - Drop upstreamed patches - Add patch for IPAddr (bz #434653) * Tue Feb 19 2008 Fedora Release Engineering - 2.1.2-4 - Autorebuild for GCC 4.3 * Fri Dec 07 2007 Release Engineering - 2.1.2-3 - Rebuild for deps * Tue Aug 29 2007 Kevin Fenzi - 2.1.2-2 - Update sources * Tue Aug 29 2007 Kevin Fenzi - 2.1.2-1 - Upgrade to 2.1.2 - Update license tag for new guidelines. - Patch open function issues. * Wed Aug 29 2007 Fedora Release Engineering - 2.0.8-4 - Rebuild for selinux ppc32 issue. * Tue Jun 26 2007 Kevin Fenzi - 2.0.8-3 - Add openssl-devel BuildRequires - Remove restart from postun (bz #223949) - Fix up Requires (bz #245704) - Remove duplicate libraries in subpackages (bz #245704) - Add smp_mflags - Fix typo in stonith subpackage description - Simplify clean section. - Use find_lang macro - Fix some multilib issues with ocf dir (bz #228165) - Kill rpath - Add ldconfig to postun * Fri Feb 9 2007 Joost Soeterbroek - 2.0.8-2 - change condrestart -> restart (bz #223949) * Sun Jan 21 2007 Joost Soeterbroek - 2.0.8-1 - upstream version 2.0.8 - fix cl_status commands fail (bz #219765) * Thu Nov 30 2006 Joost Soeterbroek - 2.0.7-5 - add Requires net-snmp-libs to stonith , add BuildReqs net-snmp-devel >= 5.4 * Tue Nov 28 2006 Joost Soeterbroek - 2.0.7-4 - rebuild for updated net-snmp, soname change * Sun Oct 29 2006 Joost Soeterbroek - 2.0.7-3 - fix preun, postun to check for upgrade (#212133) * Wed Aug 30 2006 Joost Soeterbroek - 2.0.7-2 - rebuild for Fedora Extras 6 * Wed Aug 16 2006 Joost Soeterbroek - 2.0.7-1 - upstream version 2.0.7 * Sat Jul 15 2006 Joost Soeterbroek - 2.0.6-2 - added BuildReqs: ncurses-devel * Fri Jul 14 2006 Joost Soeterbroek - 2.0.6-1 - upstream version 2.0.6 * Fri Jun 16 2006 Joost Soeterbroek - 2.0.5-2 - bump for gnutls change in devel * Thu Apr 27 2006 Joost Soeterbroek - 2.0.5-1 - upstream version 2.0.5 - removed patch2 - ownership of /heartbeat/crm/cib.xml is no longer set in cts/CM_LinuxHAv2.py.in * Wed Mar 29 2006 Joost Soeterbroek - 2.0.4-2 - Version 2.0.4 * Wed Mar 1 2006 Joost Soeterbroek - 2.0.3-9 - changed user creation - added patch2 heartbeat-2.0.3-fedora-ccmuser.patch * Wed Mar 1 2006 Joost Soeterbroek - 2.0.3-8 - specifically excluded ldirectord symlink from heartbeat package - removed user and group deletion in postun - renamed subpackages ldirectord, pils and stonith to lose prefix heartbeat by using -n * Tue Feb 28 2006 Joost Soeterbroek - 2.0.3-7 - fixed more rpmlint errors and warnings * Sat Feb 25 2006 Joost Soeterbroek - 2.0.3-6 - fixed number of rpmlint warnings and errors (still ignores some..) - generate 'predictable' uid and gid with fedora-usermgmt to use with configure flag -with-ccmuser-id and groupadd, useradd - added Buildreq's: libtool-ltdl-devel, fedora-usermgmt-setup net-snmp-devel, bzip2-devel - removed *.so duplication in heartbeat and heartbeat-devel - changed file sections * Fri Feb 24 2006 Joost Soeterbroek - 2.0.3-5 - useradd with fedora-usermgmt - added *.so file to -devel sub-package * Sat Feb 18 2006 Joost Soeterbroek - 2.0.3-4 - removed all perl requires; should be picked up by rpmbuild automagically - changed system user creation hacluster part to use baseid: (http://fedoraproject.org/wiki/Packaging/UserCreation) * Thu Feb 16 2006 Joost Soeterbroek - 2.0.3-3 - removed Requires: python and gnutls - changed _libdir/ocf -> _prefix/lib/ocf - reversed subpackages depend on basepackage - removed Req swig (kept BuildReq) - added Req pygtk2 * Wed Feb 15 2006 Joost Soeterbroek - 2.0.3-2 - fixes for various rpmlint errors and warnings - fixed setup -q - make subpackages depend on basepackage, not reverse - clean buildroot at beginning of install - replaced a number of hardcoded paths with RPM macros - Changed Group from Networking/Daemons to System Environment/Daemons - enable mgmt option * Sun Feb 12 2006 Joost Soeterbroek - 2.0.3-1 - rebuilt for Fedora Extras * Fri Feb 10 2006 Alan Robertson (see doc/AUTHORS file) + Version 2.0.3 - Bug fixes and significant new features. + Management Daemon/Library and GUI client + provide a management library for manamgement daemon and CIM provider + provide a management daemon and a basic GUI management tool + CIM enablement + CIM (Common Information Model) enablement - works with sblim-sfcb, OpenWBEM, and Pegasus CIMOMs - not yet compiled into our binary RPMs because of dependencies + CRM (Cluster Resource Manager) General + All shutdowns go via the PE/TE - preserves inter-resource ordering + Support for future changes to the CIB (depreciation of cib_fragment) + Overhaul of IPC and HA channel callback logic + Many improvments to the quality and quantity (reduced) of logging + CRMd + Timerless elections - when everyone has voted we're done + Use the replace notification from the CIB to re-update our copy with our view of our peers. + Reliably detect if the LRM connection is still active. + Elections + newer versions defer to older ones in DC elections (opposite of current behavior) + this means that only once the complete cluster has been upgraded will we start acting like the new version and accept new config options + it also means newer PE's and TE's (the most complex pieces) don't need "act like the old version" options and can rely on all slaves being at least as up-to-date as they are + people can run mixed clusters as long as they want (until they want the new PE features) + new DCs only update the version number in the CIB if they have a higher value + nodes that start and have a lower version than that stored in the CIB shut themselves down (the CRM part anyway) + this prevents an admin from introducing old nodes back into an upgraded cluster. It probably doesn't fully understand the config and may not support the actions the PE/TE requires. + CIB (Common Information Base daemon) + Make sure "query only" connections cant modify the CIB + Periodically dump some stats about what the CIB has been doing. + Verify there are no memory leaks + Performance enhancements + Prevent a single CIB client from blocking everyone else + Clients Can be notified of full CIB replacements + record_config_changes option in ha.cf for those worried about the amount of logging. Defaults to "on". + suppress_cib_writes CIB option replaced with in enable_config_writes ha.cf (enable_config_writes to be removed in 2.0.4) + Never write the status section to disk + Check permissions for the on-disk CIB at startup + Dont trash unreadable on-disk CIBs + Fix for updates made against the whole CIB (not just one section) + PEngine (Policy Engine) + Many improvements to the handling of resource groups + Support "anonymous" clones + Fix stonith ordering + Order DC shutdowns after everyone else's + Support short resource names (for group and clone resources) + The ordering and colocation of grouped resources is now optional + Support probing new nodes for active resources. + All "probe" actions are controlled by the PE. + No resource may be started until the probing is complete. + Do not probe for resources we know to be active on unprobed nodes + When looking for monitor ops, only mark it optional if it was already active on the node we're interested in. + Detect changes to class/type/provider/parameters and force a restart of the resource + New record_pengine_inputs option in ha.cf for those worried about the amount of logging. Defaults to "on". + Differentiate between config and processing errors + reduces the frequency that we need to log the complete CIB + Make notify for master/slave work + New CIB option: stop_orphan_actions (boolean) If a resource is no longer defined, we can optionally stop it + New CIB option: stop_orphan_actions (boolean) If a monitor op for a given interval is no longer defined, we can optionally stop it + Add support for time and phase-of-the-moon based constraints + Improved failure handling: avoiding false positives + Always create orphaned resources - so they show up in crm_mon + Do not require sequential clone numbers starting at 0 + TEngine (transition engine) + Detect old stonith ops + CLIs (Command Line interfaces) + Create a --one-shot option for crm_mon + Switch a number of CLI tools to use the new syncronous connections + Log errors to stderr where they will be seen and therefore useful + Support migration and un-migration of resources and resource groups + Create crm_verify for checking configuration validity + Simplify the passing of XML to cibadmin + Known open bugs worth mentioning: + 1075, 1080, 1081, 1084, 1085, 1064, 1069, 756, 984 + 1050, 1082, 1037, 1079 * Thu Sep 22 2005 Alan Robertson (see doc/AUTHORS file) + Version 2.0.2 - small bug fix only release + Fixed a bug in ping directive - it works again + Added a check to BasicSanityCheck to check ping and ping_group directives + fixed cl_status nodestatus to return 0 if a node has status "ping" + fixed a memory leak in the CRM's LRM interface code + fixed code which deterimines which version of the CRM becomes the DC when basic CIB schema versions differ. It now prefers the older version to be DC instead of the newer version. * Wed Sep 14 2005 Alan Robertson (see doc/AUTHORS file) + Version 2.0.1 - + Communication Layer + netstring encoding format is changed to be more efficient + add compression capability for big messages + Add man pages for hb_standby/hb_takeover + The assert triggered by 2.0.0 has been fixed + CIB can now contain XML comments and/or be in DOS format + Includes implementation of the ISO8601 date format + New CLI tools for changing cluster preferences, node attributes and node standby + Improved recovery and placement of group resources + Detection of failed nodes by the Policy Engine is fixed + New Policy Engine features http://www.linux-ha.org/ClusterResourceManager/DTD1.0/Annotated : sections 1.5.[8,9,10,12] + Constraints and instance attributes can now be active conditionally + Rules can now contain other rules + Date/Time based expressions are supported + Cloned resources can now optionally be notified before and after any of its peers are stopped or started. + The cluster can re-evaluate the configuration automatically after a defined interval of idleness + Removed a flow control message which was very annoying when operating in a mixed 1.x/2.x environment -- Known Bugs :-( -- - Bug 859 - FSA took too long to complete action - fully recovered from - Bug 882 - IPC channel not connected during shutdown - harmless - Bug 879 - Failed actions cause extra election - harmless Each of these occurs about once or twice in 5000 test iterations - This is probably > 10K failovers - rsc_location constraints cannot have rules that contain other rules (fixed in CVS after release) * Fri Jul 29 2005 Alan Robertson (see doc/AUTHORS file) + Version 2.0.0 - First stable release of the next generation of the Linux-HA project + Basic Characteristics described here: http://linux-ha.org/FactSheetv2 + Core infrastructure improvments: + Messaging (message acknowledging and flow control) + Logging (logging daemon) + Release 1.x style (2-node) clusters fully supported + Multi-node support (so far up to 16-node clusters tested) See http://linux-ha.org/GettingStartedV2 for more information + New components: + Cluster Information Base (replicated resource configuration) + Cluster Resource Manager (supporting 1->N nodes) + Modular Policy Engine (controlling resource placement) + Local Resource Manager (policy free, not cluster aware) + Stonith Daemon (stand-alone fencing subsytem) + Support for OCF and LSB resource agents + Support for composite resource types (groups, clones) + Support for a rich set of resource location and ordering constraints + Conversion tool for existing haresources + Resources monitored by request + Resource "maintenance" mode + Several failback, failure and "No Quorum" behaviours to choose from (global defaults and per action or resource) + Sample cluster state and configuration monitoring tools Known issues in 2.0.0: - Under some rare circumstances the cluster manager will time out while stabilizing a new cluster state. This appears to be otherwise harmless - the cluster is actually fine. http://www.osdl.org/developer_bugzilla/show_bug.cgi?id=770 - Under some rare circumstances, a dev assert will be triggered in unpack.c. This results in the pengine getting restarted. This is annoying, but not a disaster. http://www.osdl.org/developer_bugzilla/show_bug.cgi?id=797 * Tue May 23 2005 Alan Robertson (see doc/AUTHORS file) + Version 1.99.5 - Near-final beta of 2.0.0 release + many bug fixes - code looks very stable at this point -- well tested at this point on 4 and 8 node clusters. * Thu Apr 07 2005 Alan Robertson (see doc/AUTHORS file) + Version 1.99.4 - Near-final beta of 2.0.0 release + many bug fixes since 1.99.1 + new external STONITH model - fully supports scripting interface + tested through 12 node clusters successfully + No serious defects found in testing + Easier-to-understand locational constraints model + Many bug fixes of many kinds + Important bug fixes to OCF IPaddr resource agent + Resources are monitored only on request + See http://wiki.linux-ha.org/ClusterResourceManager/Setup for basic ideas about getting started. + Release 1 style (2-node) clusters still fully supported + Release 2 style clusters support 1-N node clusters (where N is probably something like 8-32) * Tue Mar 20 2005 Alan Robertson (see doc/AUTHORS file) + Version 1.99.3 - Near-final beta "technology preview" of 2.0.0 release + many bug fixes since 1.99.1 + tested through 12 node clusters with reasonable success + new STONITH API * Sun Feb 20 2005 Alan Robertson (see doc/AUTHORS file) + Version 1.99.2 - Near-final beta "technology preview" of 2.0.0 release + Many many many changes. Far too many to describe here. + See http://wiki.linux-ha.org/ClusterResourceManager/Setup for certain basic ideas about getting started. * Mon Oct 11 2004 Alan Robertson (see doc/AUTHORS file) + Version 1.99.1 - *early* beta series - preparing for 2.0.0 + Andrew provided a number of fixes to the CRM and 2.0 features + Fixed a problem with retrying failed STONITH operations * Mon Oct 11 2004 Alan Robertson (see doc/AUTHORS file) + Version 1.99.0 - *early* beta series - preparing for 2.0.0 + All STABLE changes noted below have been ported to this branch + Included in this release is a beta of the next generation of Heartbeat resource manager developed by Andrew Beekhof. http://linuxha.trick.ca/NewHeartbeatDesign is a good place to learn more about this effort. Please examine crm/README, crm/test/README and crm/crm-1.0.dtd for example usage and configuration. + Also included is the L(ocal) R(esource) M(anager) developed by IBM China which is an integral part of the NewHeartbeatDesign. + Known caveats: - STONITH as a whole has seen a code cleanup and should be tested carefully. - The external STONITH plug-in has undergone major surgery and probably doesn't work yet. - the new CRM is not perfectly stable with 3 nodes yet. + PLEASE see http://osdl.org/developer_bugzilla/enter_bug.cgi?product=Linux-HA and use it to report quirks and issues you find! * Sat Sep 18 2004 Alan Robertson (see doc/AUTHORS file) + Version 1.2.3 (stable) + fixed a serious error which causes heartbeat to misbehave after about 10 months of continuous operation + Made our ARP packets more RFC compliant + Extended apcmastersnmp code to deal with new devices + fixed a bug concerning simultaneous stops of both machines causing one of them to not shut down. + added an option to suppress reporting on packet corruption + fixed it so that we don't create the FIFO by the RPM + made cl_status setgid so anyone can run it, and fixed exit codes + eliminated a serious memory leak associated with client code + packaged doc files which had been missed before + fixed many many small bugs and memory leaks detected by BEAM + added several new test cases + fixed longstanding bug in plugin unloading + fixed a shutdown hang problem + several fixes for Solaris, FreeBSD + Solaris packaging now included in base + fixed a bug related to the apache resource agent not handling quoted parameters + added use_apphbd parameter to have heartbeat register with apphbd instead of watchdog device when desired + changed apphbd to default its config file to /etc + added snmp subagent code + added hbaping communications plugin + added external STONITH plugin + ldirectord: fixed a bug where real servers that were are present in multiple virtual services will only be added to one virtual service. * Mon May 11 2004 Alan Robertson (see doc/AUTHORS file) + Version 1.2.2 (stable) + Fixed several format string errors in communication plugins + Fixed a bug which kept us from diagnosing errors in non-aliased interfaces + Fixed a bug in ipaddr which caused an infinite loop when auto_failback on + Updated Debian things... + Added IPv6addr resource agent + Added ibmhmc STONITH plugin + Added cl_status command + Fixed a bug regarding restarts when auto_failback is on... + Fixed a couple of bugs in sha1 authentication method for very long keys + Fixed a bug in the portblock resource agent so that it no longer blocks ports on the loopback interface + Increased the time allowed for split brain test before it declares failure + Version 1.2.1 (stable) + Netstrings can now be used for our on-the-wire data format + Perl/SWIG bindings added for some heartbeat libraries + Significant improvements to SAF data checkpointing API + Implemented unbuffered ipcsocket code for SAF APIs + Many Solaris fixes -- except for ipfail, Solaris works + Significant library restructuring + Watchdog device NOWAYOUT is now overridded if defaulted + Watchdog device now kills machine instantly after deadtime instead of after one minute + Hostnames should now be treated case-independently... + Added new client status APIs - client_status() and cstatus_callback() + Fixed bug with auto_failback and quick full restarts + We now automatically reboot when resources fail to stop correctly... + We now check the status of the configured STONITH device hourly... + STONITH operations repeat after a 5 second delay, not immediately... + Added hb_takeover command - complement to hb_standby + Added documentation on how to use evlog/TCP to enable testing to take place without losing messages due to UDP message forwarding + Several new tests from Mi, Jun - split brain, bandwidth, failure detection time. + Fix to LVM resource from Harald Milz + Fixed FreeBSD authentication problems breaking ipfail + Fixed .so loading on Debian + Fixed false complaints about resource scripts (from Jens Schmalzing) + Fixed false stop failure from LinuxSCSI (from Jens Schmalzing ) * Thu Apr 15 2004 Alan Robertson (see doc/AUTHORS file) + Version 1.3.0 - beta series + Netstrings can now be used for our on-the-wire data format + Perl/SWIG bindings added for some heartbeat libraries + Significant improvements to SAF data checkpointing API + Implemented unbuffered ipcsocket code for SAF APIs + Many Solaris fixes -- except for ipfail, Solaris works + Significant library restructuring + Watchdog device NOWAYOUT is now overridded if defaulted + Watchdog device now kills machine instantly after deadtime instead of after one minute + Hostnames should now be treated case-independently... + Added new client status APIs - client_status() and cstatus_callback() + Fixed bug with auto_failback and quick full restarts + We now automatically reboot when resources fail to stop correctly... + We now check the status of the configured STONITH device hourly... + STONITH operations repeat after a 5 second delay, not immediately... + Added hb_takeover command - complement to hb_standby + Added documentation on how to use evlog/TCP to enable testing to take place without losing messages due to UDP message forwarding + Several new tests from Mi, Jun - split brain, bandwidth, failure detection time. + Fix to LVM resource from Harald Milz * Tue Feb 16 2004 Alan Robertson (see doc/AUTHORS file) + Version 1.2.0 + Replaced the nice_failback option with the auto_failback option. THIS OBSOLETES THE NICE_FAILBACK OPTION. READ THE DOCS FOR HOW TO UPGRADE SMOOTHLY. + Added a new feature to hb_standby which allows you to give up any specific category of resources: local, foreign, or all. The old behavior is "all" which is the default. This allows you to put a auto_failback no cluster into an active/active configuration on demand. + ipfail now works properly with auto_failback on (active/active) + ipfail now has "hysteresis" so that it doesn't respond immediately to a network failure, but waits a little while so that the damage can be properly assessed and extraneous takeovers avoided + Added new ping node timeout directive "deadping" + Made sure heartbeat preallocated stack and heap, and printed a message if we allocate heap once we're started up... + IPMILan STONITH plugin added to CVS + Added IPaddr2 resource script + Made the APC smart UPS ups code compatible with more UPSes + Added a (preliminary?) ordered messaging facility from Yi Zhu + Changed IPaddr's method of doing ARPs in background so that certain timing windows were closed. + Added OCF (wrapper) resource script + Allow respawn programs to take arguments + Added pinggroups (where any node being up is OK) + SIGNIFICANT amount of internal rearchitecture. + Many bug fixes. + Several documentation updates. * Tue Feb 10 2004 Alan Robertson (see doc/AUTHORS file) + Version 1.1.5 + ipfail now has "hysteresis" so that it doesn't respond immediately to a network failure, but waits a little while so that the damage can be properly assessed and extraneous takeovers avoided + Several fixes to cl_poll() + More fixes to the IPC code - especially handling data reception after EOF + removed some unclean code from GSource for treating EOF conditions + Several bugs concerning hanging when shutting down early during startup + A few BasicSanityCheck bug fixes + CTS now allows a single machine to be able to monitor several clusters + Most former CTS options are now either unneeded or on the command line + Increased number of ARPs and how long they're being sent out + Fixed uncommon (authorization) memory leak + Some Solaris portability fixes. + Made init script handle standby correctly for new config files + Improved the fast failure detection test + Added some backwards compatibility for nice_failback and some default authentication directives + Corrected the 1.1.4 change log * Fri Jan 22 2004 Alan Robertson (see doc/AUTHORS file) + Version 1.1.4 + ipfail now works properly with auto_failback on (active/active) + Changed the API to use sockets (IPC library) instead of FIFOs. + Added new apiauth directives to provide authorization information formerly provided by the FIFO permissions. + Added Intel's implementation of the SAF data checkpointing API and daemon + Added a cleanup suggested by Emily Ratliff. + IPMILan STONITH plugin added to CVS + Added IPaddr2 resource script + Various cleanups due to horms. + Fixed authentication to work on 64-bit platforms(!) + Fixed the cl_poll() code to handle corner cases better + Made heartbeat close watchdog device before re-execing itself + New CTS improvements from Mi, Jun + Various minor bug fixes. . Several shutdown bugs addressed . fixed sendarp to make a pid file, so we can shut it down when we shut everything else down in case it's still running. . Lots of minor bug fixes to IPC code . Lots of minor bug fixes to ipctest program . made BasicSanityCheck more tolerant of delays . Fixed IPC code to authenticate based on ints, not int*s. . Check properly for strnlen instead of strlen... . Several signed/unsigned fixes . A few uninitialized vars now are inited . Switched to compiling lex/yacc sources the automake way . Lots of minor CTS fixes... + ldirectord bug fixes: . When new real servers are added on initialisation or when the configuration file is reread they are marked with status of -1 (uninitialised) so they will be checked and inserted into the virtual service as required . All checks use the checkport if set, otherwise the port set for the individual real server. This was the case for http and connect checks, but others had variations on this theme. . When the configuration file is reread because it changed on disk and autoreload is set, check the real servers immediately rather than waiting for checkinterval to expire . Already running message sent to stderr instead of stdout . Support alternate server in real-server specific URL . Treat the same real server with different weights as a different real server. Fixes bug reported by Philip Hayward whereby the same real-server would always have the same weight, regardless of the ldirectord.cf * Fri Sep 26 2003 Alan Robertson (see doc/AUTHORS file) + Version 1.1.3 + Bugfix for heartbeat starting resources twice concurrently if auto_failback was set to "legacy". + Bugfix for messages getting lost if messages were sent in quick succession. (Kurosawa Takahiro) + Bugfix for Filesystem resource checking for presence of filesystem support before loading the module. + BasicSanityCheck extended to cover more basic tests. + Bugfix for findif not working correctly for CIDR netmasks. + Minor bugfix for ldirectord recognizing new schedulers correctly and timeout settings are now being honoured. + Enhanced the message giving a better explanation of how to set up node names properly when current node not found in the ha.cf file + Send a message to the cluster whenever we have a node which doesn't need STONITHing - even though it's gone down. This fix needed by CCM, which is in turn needed by EVMS. + Enhanced the messages for missing ha.cf and missing haresources files explaining that sample config files are found in the documentation. + Fix for memory leak from Forrest Zhao + Added a (preliminary?) ordered messaging facility from Yi Zhu + FAQ updates + Added Xinetd resource script + Added OCF (wrapper) resource script + Allow respawn programs to take arguments + Added pinggroups (where any node being up is OK) + fixed ldirectord negotiatetimeout for HTTP + fixed a bug which caused -d flag to be ignored + failing resource scripts are now ERRORs not WARNings + now shuts down correctly when auto_failback == legacy * Mon Jul 13 2003 Alan Robertson (see doc/AUTHORS file) + Version 1.1.2 + Replaced the nice_failback option with the auto_failback option. THIS OBSOLETES THE NICE_FAILBACK OPTION. READ THE DOCS FOR HOW TO UPGRADE SMOOTHLY. + Changed IPaddr to not do ARPs in background, and shortened time between ARPs. Also made these things tunable... + changed our comm ttys to not become our controlling TTYs + Enhanced the ServeRAID script to fix a critical bug by using a new feature + Added a new DirectoryMap to CVS - tells where everything is... + significantly enhanced the BasicSanityCheck script, and the tests it calls. + added a new option to use a replacement poll function for improved real-time performance. + added the ability to have a cluster node's name be different from it's uname -n + Moved where CTS gets installed to /usr/lib/heartbeat/cts + Big improvements to the CTS README from IBM test labs in Austin. + bug fixes to the WTI NPS power switch + new client API calls: return arbitrary configuration parameters return current resource status + Added a new clplumbing function: mssleep() + added new capabilities for supporting pseudo-resources + added new messages which come out after initial takeover is done (improves CTS results) + LOTS of documentation updates. + fixed a security vulnerability + fixed a bug where heartbeat would shut down while in the middle of processing resource movement requests. + changed compilation flags to eliminate similar future security issues + went to even-more-strict gcc flags + fixed several "reload" bugs. Now reload works ;-) + fixed STONITH bug when other node never heard from. + Minor bug fixes (cleaned up corrupted message) + Two different client API bugs fixed. + changed the configure script to test which warning flags are supported by the current gcc. + enhanced the API test program to test new capabilities... * Wed May 21 2003 Alan Robertson (see doc/AUTHORS file) + Version 1.1.1 + Significant restructuring of the processes in heartbeat + Added a new feature to hb_standby which allows you to give up any specific category of resources: local, foreign, or all. The old behavior is "all" which is the default. This allows you to put a nice_failback cluster into an active/active configuration + Enhancements to the ServeRAID code to make it work with the new (supported) version of IPSSEND from the ServeRAID folks... + Added STONITH code for the Dell remote access controller + Fixed a major bug which kept it from taking over correctly after 246 days or so + Fixed a major bug where heartbeat didn't lock itself into memory properly + Added new ping node timeout directive "deadping" + Made sure heartbeat preallocated stack and heap, and printed a message if we allocate heap once we're started up... + Minor heartbeat API bug fixes + Minor documentation fixes + Minor fix to allow IP addresses with /32 masks... + Fixed a timing window for !nice_failback resource acquisition + Added several CCM bug fixes + Made the APC smart UPS ups code compatible with more UPSes + Fixed a bug in respawn + Enhanced internal checking for malloc errors... + Added IP alias search optimization from Sean Reifscheneider * Wed Mar 19 2003 Alan Robertson (see doc/AUTHORS file) + Version 1.0.2: + Fixed comment errors in heartbeat init script to allow it to run on RH 8.0 + Changed apphbd to use poll(2) instead of sigtimedwait(2) + Put missing files into tarball + Documentation improvements for IPaddr and other things + Fixed an error in hb_standby which kept it from working if releasing resources takes more than 10 seconds + Added a fix to allow heartbeat to run on systems without writable disk (like routers booting from CD-ROM) + Added configuration file for apphbd + Added fix from Adam Li to keep recoverymgr stop looping at high priority + Added fix to ServeRAID resource to make it work with (new) supported hardware + Added Delay resource script + Added fix to Filesystem to allow it to support NFS mounts and allow user to specify mount options + Added fix to IPaddr to make tmp directory for restoring loopback device + Added fix to ipcsocket code to deal correctly with EAGAIN when sending message body * Mon Feb 17 2003 Alan Robertson (see doc/AUTHORS file) + Version 1.0.1: + Fixed some compile errors on different platforms, and library versions + Disable ccm from running on 'ping' nodes + Put in Steve Snodgrass' fix to send_arp to make it work on non-primary interfaces. * Thu Feb 13 2003 Alan Robertson (see doc/AUTHORS file) + Version 1.0.1 beta series 0.4.9g: + Changed default deadtime, warntime, and heartbeat interval + Auto* tool updates + VIP loopback fixes for IP address takeover + Various Solaris and FreeBSD fixes + added SNMP agent + Several CCM bug fixes + two new heartbeat API calls + various documentation fixes, including documentation for ipfail + Numerous minor cleanups. + Fixed a few bugs in the IPC code. + Fixed the (IPC) bug which caused apphbd to hang the whole machine. + Added a new IPC call (waitout) + Wrote a simple IPC test program. + Clarified several log messages. + Cleaned up the ucast communications plugin + Cleaned up for new C compilers + Fixed permissions bug in IPC which caused apphbd to not be usable by all + Added a new rtprio option to the heartbeat config file + updated apphbtest program + Changed ipfail to log things at same level heartbeat does * Sat Nov 30 2002 Alan Robertson (see doc/AUTHORS file) + Version 0.5 beta series (now renamed to 1.0.1 beta series). 0.4.9f: + Added pre-start, pre-stop, post-stop and pre-stop constructs in init script + various IPC fixes + Fix to STONITH behavior: STONITH unresponsive node right after we reboot + Fixed extreme latency in IPC code + various configure.in cleanups + Fixed memory leak in IPC socket code + Added streamlined mainloop/IPC integration code + Moved more heartbeat internal communication to IPC library + Added further support for ipfail + Added supplementary groups to the respawn-ed clients + Added standby to init script actions + Lots of minor CCM fixes + Split (most) resource management code into a separate file. + Fixes to accommodate different versions of libraries + Heartbeat API client headers fixup + Added new API calls + Simplified (and fixed) handling of local status. This would sometimes cause obscure failures on startup. + Added new IPsrcaddr resource script KNOWN BUGS: + apphbd goes into an infinite loop on some platforms * Wed Oct 9 2002 Alan Robertson (see doc/AUTHORS file) 0.4.9e: + Changed client code to keep write file descriptor open at all times (realtime improvement) + Added a "poll replacement" function based on sigtimedwait(2), which should be faster for those cases that can use it. + Added a hb_warntime() call to the application heartbeat API. + Changed all times in the configuration file to be in milliseconds if specified with "ms" at the end. (seconds is still the default). + Fixes to serious security issue due to Nathan Wallwork + Changed read/write child processes to run as nobody. + Fixed a bug where ping packets are printed incorrectly when debugging. + Changed heartbeat code to preallocate a some heap space. + CCM daemon API restructuring + Added ipc_channel_pair() function to the IPC library. + Changed everything to use longclock_t instead of clock_t + Fixed a bug concerning the ifwalk() call on ping nodes in the API + Made apphbd run at high priority and locked into memory + Made a library for setting priority up. + Made ucast comm module at least be configurable and loadable. + Fixed a startup/shutdown timing problem. 0.4.9d: + removed an "open" call for /proc/loadavg (improve realtime behavior) + changed API code to not 1-char reads from clients + Ignored certain error conditions from API clients + fixed an obscure error message about trying to retransmit a packet which we haven't sent yet. This happens after restarts. + made the PILS libraries available in a separate package + moved the stonith headers to stonith/... when installed + improved debugging for NV failure cases... + updated AUTHORS file and simplified the changelog authorship (look in AUTHORS for the real story) + Added Ram Pai's CCM membership code + Added the application heartbeat code + Added the Kevin Dwyer's ipfail client code to the distribution + Many fixes for various tool versions and OS combinations. + Fixed a few bugs related to clients disconnecting. + Fixed some bugs in the CTS test code. + Added BasicSanityCheck script to tell if built objects look good. + Added PATH-like capabilities to PILS + Changed STONITH to use the new plugin system. + *Significantly* improved STONITH usage message (from Lorn Kay) + Fixed some bugs related to restarting. + Made exit codes more LSB-compliant. + Fixed various things so that ping nodes don't break takeovers. 0.4.9c and before: + Cluster partitioning now handled correctly (really!) + Complete rearchitecture of plugin system + Complete restructure of build system to use automake and port things to AIX, FreeBSD and solaris. + Added Lclaudio's "standby" capability to put a node into standby mode on demand. + Added code to send out gratuitous ARP requests as well as gratuitous arp replies during IP address takeover. + Suppress stonith operations for nodes which went down gracefully. + Significantly improved real-time performance + Added new unicast heartbeat type. + Added code to make serial ports flush stale data on new connections. + The Famous CLK_TCK compile time fixes (really!) + Added a document which describes the heartbeat API + Changed the code which makes FIFOs to not try and make the FIFOs for named clients, and several other minor API client changes. + Fixed a fairly rare client API bug where it would shut down the client for no apparent reason. + Added stonith plugins for: apcmaster, apcmastersnmp switches, and ssh module (for test environments only) + Integrated support for the Baytech RPC-3 switch into baytech module + Fixes to APC UPS plugin + Got rid of "control_process: NULL message" message + Got rid of the "controlfifo2msg: cannot create message" message + Added -h option to give usage message for stonith command... + Wait for successful STONITH completion, and retry if its configured. + Sped up takeover code. + Several potential timing problems eliminated. + Cleaned up the shutdown (exit) code considerably. + Detect the death of our core child processes. + Changed where usage messages go depending on exit status from usage(). + Made some more functions static. + Real-time performance improvement changes + Updated the faqntips document + Added a feature to heartbeat.h so that log messages get checked as printf-style messages on GNU C compilers + Changed several log messages to have the right parameters (discovered as a result of the change above) + Numerous FreeBSD, Solaris and OpenBSD fixes. + Added backwards compatibility kludge for udp (versus bcast) + Queued messages to API clients instead of throwing them away. + Added code to send out messages when clients join, leave. + Added support for spawning and monitoring child clients. + Cleaned up error messages. + Added support for DB2, ServeRAID and WAS, LVM, and Apache (IBMhttp too), also ICP Vortex controller. + Added locking when creating new IP aliases. + Added a "unicast" media option. + Added a new SimulStart and standby test case. + Diddled init levels around... + Added an application-level heartbeat API. + Added several new "plumbing" subsystems (IPC, longclock_t, proctrack, etc.) + Added a new "contrib" directory. + Fixed serious (but trivial) bug in the process tracking code which caused it to exit heartbeat - this occured repeatably for STONITH operations. + Write a 'v' to the watchdog device to tell it not to reboot us when we close the device. + Various ldirectord fixes due to Horms + Minor patch from Lorn Kay to deal with loopback interfaces which might have been put in by LVS direct routing + Updated AUTHORS file and moved list of authors over * Fri Mar 16 2001 Alan Robertson + Version 0.4.9 + Split into 3 rpms - heartbeat, heartbeat-stonith heartbeat-ldirectord + Made media modules and authentication modules and stonith modules dynamically loadable. + Added Multicast media support + Added ping node/membership/link type for tiebreaking. This will be useful when implementing quorum on 2-node systems. (not yet compatible with nice_failback(?)) + Removed ppp support + Heartbeat client API support + Added STONITH API library + support for the Baytech RPC-3A power switch + support for the APCsmart UPS + support for the VACM cluster management tool + support for WTI RPS10 + support for Night/Ware RPC100S + support for "Meatware" (human intervention) module + support for "null" (testing only) module + Fixed startup timing bugs + Fixed shutdown sequence bugs: takeover occured before resources were released by other system + Fixed various logging bugs + Closed holes in protection against replay attacks + Added checks that complain if all resources aren't idle on startup. + IP address takeover fixes + Endian fixes + Removed the 8-alias limitation + Takeovers now occur faster (ARPs occur asynchronously) + Port number changes + Use our IANA port number (694) by default + Recognize our IANA port number ("ha-cluster") if it's in /etc/services + Moved several files, etc. from /var/run to /var/lib/heartbeat + Incorporated new ldirectord version + Added late heartbeat warning for late-arriving heartbeats + Added detection of and partial recovery from cluster partitions + Accept multiple arguments for resource scripts + Added Raid1 and Filesystem resource scripts + Added man pages + Added debian package support * Fri Jun 30 2000 Alan Robertson + Version 0.4.8 + Incorporated ldirectord version 1.9 (fixes memory leak) + Made the order of resource takeover more rational: Takeover is now left-to-right, and giveup is right-to-left + Changed the default port number to our official IANA port number (694) + Regularized more messages, eliminated some redundant ones. + Print the version of heartbeat when starting. + Print exhaustive version info when starting with debug on. + Hosts now have 3 statuses {down, up, active} active means that it knows that all its links are operational, and it's safe to send cluster messages + Significant revisions to nice_failback (mainly due to lclaudio) + More SuSE-compatibility. Thanks to Friedrich Lobenstock + Tidied up logging so it can be to files, to syslog or both (Horms) + Tidied up build process (Horms) + Updated ldirectord to produce and install a man page and be compatible with the fwmark options to The Linux Virtual Server (Horms) + Added log rotation for ldirectord and heartbeat using logrotate if it is installed + Added Audible Alarm resource by Kirk Lawson and myself (Horms) + Added init script for ldirectord so it can be run independently of heartbeat (Horms) + Added sample config file for ldirectord (Horms) + An empty /etc/ha.d/conf/ is now part of the rpm distribution as this is where ldirectord's configuration belongs (Horms) + Minor startup script tweaks. Hopefully, we should be able to make core files should we crash in the future. Thanks to Holger Kiehl for diagnosing the problem! + Fixed a bug which kept the "logfile" option from ever working. + Added a TestCluster test utility. Pretty primitive so far... + Fixed the serial locking code so that it unlocks when it shuts down. + Lock heartbeat into memory, and raise our priority + Minor, but important fix from lclaudio to init uninited variable. * Sat Dec 25 1999 Alan Robertson + Version 0.4.7 + Added the nice_failback feature. If the cluster is running when the primary starts it acts as a secondary. (Luis Claudio Goncalves) + Put in lots of code to make lost packet retransmission happen + Stopped trying to use the /proc/ha interface + Finished the error recovery in the heartbeat protocol (and got it to work) + Added test code for the heartbeat protocol + Raised the maximum length of a node name + Added Jacob Rief's ldirectord resource type + Added Stefan Salzer's fix for a 'grep' in IPaddr which wasn't specific enough and would sometimes get IPaddr confused on IP addresses that prefix-matched. + Added Lars Marowsky-Bree's suggestion to make the code almost completely robust with respect to jumping the clock backwards and forwards + Added code from Michael Moerz to keep findif from core dumping if /proc/route can't be read. * Mon Nov 22 1999 Alan Robertson + Version 0.4.6 + Fixed timing problem in "heartbeat restart" so it's reliable now + Made start/stop status compatible with SuSE expectations + Made resource status detection compatible with SuSE start/stop expectations + Fixed a bug relating to serial and ppp-udp authentication (it never worked) + added a little more substance to the error recovery for the HB protocol. + Fixed a bug for logging from shell scripts + Added a little logging for initial resource acquisition + Added #!/bin/sh to the front of shell scripts + Fixed Makefile, so that the build root wasn't compiled into pathnames + Turned on CTSRTS, enabling for flow control for serial ports. + Fixed a bug which kept it from working in non-English environments * Wed Oct 13 1999 Alan Robertson + Version 0.4.5 + Mijta Sarp added a new feature to authenticate heartbeat packets using a variety of strong authentication techniques + Changed resource acquisition and relinquishment to occur in heartbeat, instead of in the start/stop script. This means you don't *really* have to use the start/stop script if you don't want to. + Added -k option to gracefully shut down current heartbeat instance + Added -r option to cause currently running heartbeat to reread config files + Added -s option to report on operational status of "heartbeat" + Sped up resource acquisition on master restart. + Added validation of ipresources file at startup time. + Added code to allow the IPaddr takeover script to be given the interface to take over, instead of inferring it. This was requested by Lars Marowsky-Bree + Incorporated patch from Guenther Thomsen to implement locking for serial ports used for heartbeats + Incorporated patch from Guenther Thomsen to clean up logging. (you can now use syslog and/or file logs) + Improved FreeBSD compatibility. + Fixed a bug where the FIFO doesn't get created correctly. + Fixed a couple of uninitialized variables in heartbeat and /proc/ha code + Fixed longstanding crash bug related to getting a SIGALRM while in malloc or free. + Implemented new memory management scheme, including memory stats * Thu Sep 16 1999 Alan Robertson + Version 0.4.4 + Fixed a stupid error in handling CIDR addresses in IPaddr. + Updated the documentation with the latest from Rudy. * Wed Sep 15 1999 Alan Robertson + Version 0.4.3 + Changed startup scripts to create /dev/watchdog if needed + Turned off loading of /proc/ha module by default. + Incorporated bug fix from Thomas Hepper to IPaddr for PPP configurations + Put in a fix from Gregor Howey where Gregor found that I had stripped off the ::resourceid part of the string in ResourceManager resulting in some bad calls later on. + Made it compliant with the FHS (filesystem hierarchy standard) + Fixed IP address takeover so we can take over on non-eth0 interface + Fixed IP takeover code so we can specify netmasks and broadcast addrs, or default them at the user's option. + Added code to report on message buffer usage on SIGUSR[12] + Made SIGUSR1 increment debug level, and SIGUSR2 decrement it. + Incorporated Rudy's latest "Getting Started" document + Made it largely Debian-compliant. Thanks to Guenther Thomsen, Thomas Hepper, Iñaki Fernández Villanueva and others. + Made changes to work better with Red Hat 6.1, and SMP code. + Sometimes it seems that the Master Control Process dies :-( * Sat Aug 14 1999 Alan Robertson + Version 0.4.2 + Implemented simple resource groups + Implemented application notification for groups starting/stopping + Eliminated restriction on floating IPs only being associated with eth0 + Added a uniform resource model, with IP resources being only one kind. (Thanks to Lars Marowsky-Bree for a good suggestion) + Largely rewrote the IP address takeover code, making it clearer, fit into the uniform resource model, and removing some restrictions. + Preliminary "Getting Started" document by Rudy Pawul + Improved the /proc/ha code + Fixed memory leak associated with serial ports, and problem with return of control to the "master" node. (Thanks to Holger Kiehl for reporting them, and testing fixes!) * Tue Jul 6 1999 Alan Robertson + Version 0.4.1 + Fixed major memory leak in 0.4.0 (oops!) + Added code to eliminate duplicate packets and log lost ones + Tightened up PPP/UDP startup/shutdown code + Made PPP/UDP peacefully coexist with "normal" udp + Made logs more uniform and neater + Fixed several other minor bugs + Added very preliminary kernel code for monitoring and controlling heartbeat via /proc/ha. Very cool, but not really done yet. * Wed Jun 30 1999 Alan Robertson + Version 0.4.0 + Changed packet format from single line positional parameter style to a collection of {name,value} pairs. A vital change for the future. + Fixed some bugs with regard to forwarding data around rings + We now modify /etc/ppp/ip-up.local, so PPP-udp works out of the box (at least for Red Hat) + Includes the first version of Volker Wiegand's Hardware Installation Guide (it's pretty good for a first version!) * Wed Jun 09 1999 Alan Robertson + Version 0.3.2 + Added UDP/PPP bidirectional serial ring heartbeat (PPP ensures data integrity on the serial links) + fixed a stupid bug which caused shutdown to give unpredictable results + added timestamps to /var/log/ha-log messages + fixed a couple of other minor oversights. * Sun May 10 1999 Alan Robertson + Version 0.3.1 + Make ChangeLog file from RPM specfile + Made ipresources only install in the DOC directory as a sample * Sun May 09 1999 Alan Robertson + Version 0.3.0 + Added UDP broadcast heartbeat (courtesy of Tom Vogt) + Significantly restructured code making it easier to add heartbeat media + added new directives to config file: + udp interface-name + udpport port-number + baud serial-baud-rate + made manual daemon shutdown easier (only need to kill one) + moved the sample ha.cf file to the Doc directory * Sat Mar 27 1999 Alan Robertson + Version 0.2.0 + Make an RPM out of it + Integrated IP address takeover gotten from Horms + Added support to tickle a watchdog timer whenever our heart beats + Integrated enough basic code to allow a 2-node demo to occur + Integrated patches from Andrew Hildebrand to allow it to run under IRIX. - Known Bugs - Only supports 2-node clusters - Only supports a single IP interface per node in the cluster - Doesn't yet include Tom Vogt's ethernet heartbeat code - No documentation - Not very useful yet :-) ########################################################### Heartbeat-3-0-7e3a82377fa8/heartbeat-suse.spec0000644000000000000000000002744611576626513021004 0ustar00usergroup00000000000000# # spec file for package heartbeat (Version 2.99.3) # # Copyright (c) 2009 SUSE LINUX Products GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed # upon. The license for this file, and modifications and additions to the # file, is the same license as for the pristine package itself (unless the # license for the pristine package is not an Open Source License, in which # case the license is the MIT License). An "Open Source License" is a # license that conforms to the Open Source Definition (Version 1.9) # published by the Open Source Initiative. # Please submit bugfixes or comments via http://bugs.opensuse.org/ # # norootforbuild %define with_extra_warnings 0 %define without_fatal_warnings 1 %define start_at_boot 0 %define stop_start_script 0 %define SSLeay perl-Net-SSLeay %define gname haclient %define uname hacluster %global heartbeat_docdir %{_defaultdocdir}/%{name} %if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel} %define pkg_group System Environment/Daemons BuildRequires: cluster-glue-libs-devel %else %define pkg_group Productivity/Clustering/HA BuildRequires: libglue-devel %endif Name: heartbeat Summary: Messaging and membership subsystem for High-Availability Linux Version: 3.0.5 Release: 1%{?dist} License: GPL v2 only; LGPL v2.1 or later Url: http://linux-ha.org/ Group: Productivity/Clustering/HA Source: heartbeat.tar.bz2 BuildRoot: %{_tmppath}/%{name}-%{version}-build Requires: /bin/ping perl-TimeDate resource-agents BuildRequires: e2fsprogs-devel glib2-devel iputils lynx python BuildRequires: libxslt docbook_4 docbook-xsl-stylesheets AutoReqProv: on Requires(pre): cluster-glue %if 0%{?fedora} Requires(post): /sbin/chkconfig Requires(preun):/sbin/chkconfig %endif %if 0%{?suse_version} BuildRequires: bison flex PreReq: %insserv_prereq %fillup_prereq Requires: logrotate %define SSLeay perl-Net_SSLeay %if 0%{?suse_version} >= 1100 BuildRequires: fdupes %endif %if 0%{?suse_version} == 930 BuildRequires: rpm-devel %endif %if 0%{?suse_version} == 1000 BuildRequires: lzo lzo-devel %endif %if 0%{?suse_version} > 1000 Suggests: pacemaker %endif %if 0%{?suse_version} < 1020 BuildRequires: tcpd-devel %endif %if 0%{?sles_version} == 9 BuildRequires: pkgconfig %endif %endif %if 0%{?rhel} == 406 BuildRequires: gcc-c++ %endif %if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel} Requires: which BuildRequires: which #Requires: pygtk2 >= 2.4 %if 0%{?fedora} > 7 BuildRequires: openssl-devel %endif %endif %if 0%{?mandriva_version} BuildRequires: libbzip2-devel %else # Hack to avoid needing to install a kernel on Mandriva just to build pacemaker Requires: iptables %endif #!BuildIgnore: -iptables %description Heartbeat is a daemon that provides cluster infrastructure (communication and membership) services to its clients. This allows clients to know about the presence (or disappearance!) of peer processes on other machines and to easily exchange messages with them. Reference documentation is available online: http://www.linux-ha.org/doc/ Extensive manual pages for system administration commands and configuration files are included. In order to be useful to users, the Heartbeat daemon needs to be combined with a cluster resource manager (CRM) which has the task of starting and stopping the services (IP addresses, web servers, etc.) that cluster will make highly available. Pacemaker is the preferred cluster resource manager for clusters based on Heartbeat, supporting "n-node" clusters with significant capabilities for managing resources and dependencies. In addition Heartbeat continues to support the legacy realease 1 style of 2-node clustering. It implements the following kinds of heartbeats: - Serial ports - UDP/IP multicast (ethernet, etc) - UDP/IP broadcast (ethernet, etc) - UDP/IP unicast heartbeats - "ping" heartbeats (for routers, switches, etc.) %package devel License: GPL v2 or later; LGPL v2.1 or later Summary: Heartbeat development package Group: Productivity/Clustering/HA Requires: %{name} = %{version}-%{release} Requires: libglue-devel %description devel Headers and shared libraries for writing programs for Heartbeat %prep ########################################################### %setup -n heartbeat %{?suse_update_config:%{suse_update_config -f}} ########################################################### %build # TODO: revisit -all CFLAGS="${CFLAGS} ${RPM_OPT_FLAGS}" # Feature-dependent CFLAGS: %if %with_extra_warnings # CFLAGS="${CFLAGS} -Wshadow -Wfloat-equal -Waggregate-return -Wnested-externs -Wunreachable-code -Wendif-labels -Winline" CFLAGS="${CFLAGS} -Wfloat-equal -Wendif-labels -Winline" %endif # Distribution specific settings: %if 0%{?suse_version} > 1001 CFLAGS="${CFLAGS} -fstack-protector-all" %endif %if 0%{?suse_version} > 1020 CFLAGS="$CFLAGS -fgnu89-inline" %endif %if 0%{?fedora} > 6 CFLAGS="$CFLAGS -fgnu89-inline" %endif export CFLAGS %if 0%{?suse_version} < 1020 export docdir=%{heartbeat_docdir} %endif ./ConfigureMe configure \ --libexecdir=%{_var} \ --libdir=%{_libdir} \ --sysconfdir=%{_sysconfdir} \ --mandir=%{_mandir} \ %if 0%{?suse_version} >= 1020 --docdir=%{heartbeat_docdir} \ %endif --with-group-name=%{gname} \ --with-ccmuser-name=%{uname} make %{?_smp_mflags} ########################################################### %install ########################################################### #make DESTDIR=$RPM_BUILD_ROOT install-strip make DESTDIR=$RPM_BUILD_ROOT install test -d $RPM_BUILD_ROOT/sbin || mkdir $RPM_BUILD_ROOT/sbin ( cd $RPM_BUILD_ROOT/sbin ln -s /etc/init.d/heartbeat rcheartbeat ) || true # Cleanup [ -d $RPM_BUILD_ROOT/usr/man ] && rm -rf $RPM_BUILD_ROOT/usr/man [ -d $RPM_BUILD_ROOT/usr/share/libtool ] && rm -rf $RPM_BUILD_ROOT/usr/share/libtool find $RPM_BUILD_ROOT -name '*.a' -type f -print0 | xargs -0 rm -f find $RPM_BUILD_ROOT -name '*.la' -type f -print0 | xargs -0 rm -f # EVMS2 is finally gone from 11.1 and later, no need to include # confusing RAs. %if 0%{?suse_version} > 1100 rm -f $RPM_BUILD_ROOT/usr/lib/ocf/resource.d/heartbeat/Evms* %endif ########################################################### %clean ########################################################### if [ -n "${RPM_BUILD_ROOT}" -a "${RPM_BUILD_ROOT}" != "/" ] then rm -rf $RPM_BUILD_ROOT fi rm -rf $RPM_BUILD_DIR/heartbeat-%{version} ########################################################### %post /sbin/ldconfig %if %{start_at_boot} %if 0%{?suse_version} %{fillup_and_insserv -n heartbeat} %endif %if 0%{?fedora} /sbin/chkconfig --add heartbeat %endif %endif ########################################################### %if 0%{?suse_version} %preun %stop_on_removal heartbeat %endif %if 0%{?fedora} %preun %if %{stop_start_script} /sbin/chkconfig --del heartbeat %endif %endif ########################################################### %postun /sbin/ldconfig %if 0%{?suse_version} %if %{stop_start_script} %restart_on_update heartbeat %endif %{insserv_cleanup} %endif %files ########################################################### %defattr(-,root,root) %{_bindir}/cl_respawn %attr (2555, root, haclient) %{_bindir}/cl_status /sbin/rcheartbeat %{_libdir}/heartbeat/mlock %{_libdir}/heartbeat/plugins/HBauth %{_libdir}/heartbeat/plugins/HBcomm %{_libdir}/heartbeat/plugins/HBcompress %{_libdir}/heartbeat/plugins/quorum %{_libdir}/heartbeat/plugins/tiebreaker %{_libdir}/heartbeat/heartbeat %{_libdir}/heartbeat/ipfail %{_libdir}/heartbeat/ccm %{_libdir}/heartbeat/apphbd %{_libdir}/heartbeat/dopd %{_libdir}/heartbeat/drbd-peer-outdater %{_libdir}/libclm.so.* %{_libdir}/libhbclient.so.* %{_libdir}/libccmclient.so.* %{_libdir}/libapphb.so.* %{_datadir}/heartbeat/ResourceManager %{_datadir}/heartbeat/ha_config %{_datadir}/heartbeat/ha_propagate %{_datadir}/heartbeat/hb_addnode %{_datadir}/heartbeat/hb_delnode %{_datadir}/heartbeat/hb_setsite %{_datadir}/heartbeat/hb_setweight %{_datadir}/heartbeat/hb_standby %{_datadir}/heartbeat/hb_takeover %{_datadir}/heartbeat/mach_down %{_datadir}/heartbeat/req_resource %{_datadir}/doc/packages/heartbeat/apphbd.cf %{_sysconfdir}/ha.d %{_sysconfdir}/init.d/heartbeat %config(noreplace) %{_sysconfdir}/logrotate.d/heartbeat %dir %{_var}/run/heartbeat %dir %attr (0750, %{uname}, %{gname}) %{_var}/run/heartbeat/dopd %dir %{_var}/lib/heartbeat %dir %attr (0755, %{uname}, %{gname}) %{_var}/run/heartbeat/ccm %dir %{_libdir}/heartbeat %dir %{_libdir}/heartbeat/plugins %dir %{_datadir}/heartbeat %dir %{_datadir}/doc/packages/heartbeat %doc %{_datadir}/doc/packages/heartbeat/AUTHORS %doc %{_datadir}/doc/packages/heartbeat/COPYING %doc %{_datadir}/doc/packages/heartbeat/COPYING.LGPL %doc %{_mandir}/man1/cl_status.1* %doc %{_mandir}/man1/hb_addnode.1* %doc %{_mandir}/man1/hb_delnode.1* %doc %{_mandir}/man1/hb_standby.1* %doc %{_mandir}/man1/hb_takeover.1* %doc %{_mandir}/man5/ha.cf.5* %doc %{_mandir}/man5/authkeys.5* %doc %{_mandir}/man8/heartbeat.8* %doc %{_mandir}/man8/apphbd.8* %doc %{_datadir}/doc/packages/heartbeat/README %doc %{_datadir}/doc/packages/heartbeat/authkeys %doc %{_datadir}/doc/packages/heartbeat/haresources %doc %{_datadir}/doc/packages/heartbeat/ChangeLog %doc %{_datadir}/doc/packages/heartbeat/ha.cf ########################################################### %files devel %defattr(-,root,root) #%doc %{_datadir}/doc/%{name}-%{version} %{_includedir}/saf/ %{_includedir}/ocf/ %{_includedir}/heartbeat/hb_api.h %dir %{_includedir}/heartbeat %{_includedir}/heartbeat/apphb.h %{_includedir}/heartbeat/apphb_notify.h %{_includedir}/heartbeat/HBauth.h %{_includedir}/heartbeat/HBcomm.h %{_includedir}/heartbeat/hb_config.h %{_includedir}/heartbeat/heartbeat.h %{_libdir}/libclm*.so %{_libdir}/libapphb*.so %{_libdir}/libhbclient*.so %{_libdir}/libccmclient*.so %{_libdir}/heartbeat/clmtest %{_libdir}/heartbeat/api_test %{_libdir}/heartbeat/apphbtest %{_libdir}/heartbeat/ccm_testclient %{_datadir}/heartbeat/BasicSanityCheck %{_datadir}/heartbeat/TestHeartbeatComm %exclude %{_datadir}/heartbeat/cts %changelog * Thu Jun 16 2011 Lars Ellenberg - 3.0.5-1 - do not request retransmission of lost messages from dead members - fix segfault due to recursion in api_remove_client_pid - properly cleanup pending delayed rexmit requests before reset of seqtrack - create HA_RSCTMP on start, if necessary - improve detection of pacemaker clusters in init script * Tue Nov 30 2010 Lars Ellenberg - 3.0.4-1 - better support for Pacemaker >= 1.1 - say Pacemaker support, not "v2", favor "pacemaker on" in ha.cf - fix message rexmit request logic, it could cause rexmit packet storms - increase ccm ipc message queue length - new mcast6 UDP IPv6 communication plugin - improve some log messages - drop headers which are now in glue - fixed/dropped some package dependencies - fixed/dropped some build dependencies - new proof-of-concept-only known-to-be-broken RDS communication plugin * Wed Apr 14 2010 Lars Ellenberg - 3.0.3-1 - added /var/run/* directory permission paranoia to init script - added SBD and lrmadmin configuration support to init script - drop libnet dependency * Thu Feb 04 2010 Lars Ellenberg - 3.0.2-2 - changed dopd socket location again to its own subdirectory, made sure the init script will create that directory with appropriate permissions * Mon Feb 01 2010 Lars Ellenberg - 3.0.2-1 - New upstream release * Sat Dec 19 2009 Florian Haas - 3.0.2-0rc2 - New upstream RC * Fri Dec 11 2009 Florian Haas - 3.0.2-0rc1 - New upstream RC - Fix docdir for legacy distributions - Use _smp_mflags macro Heartbeat-3-0-7e3a82377fa8/heartbeat/Makefile.am0000644000000000000000000000506411576626513021177 0ustar00usergroup00000000000000# # heartbeat: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl hadir = $(sysconfdir)/ha.d habindir = @bindir@ halibdir = $(libdir)/@HB_PKG@ commmoddir = $(halibdir)/modules/comm havarlibdir = $(localstatedir)/lib/@HB_PKG@ havarrundir = $(localstatedir)/run havarrunhbdir = $(localstatedir)/run/@HB_PKG@ # fifos with path hafifo = $(havarlibdir)/fifo apigid = @HA_APIGID@ gliblib = @GLIBLIB@ LIBRT = @LIBRT@ AM_CFLAGS = @CFLAGS@ ## script subdirs SUBDIRS = init.d lib logrotate.d rc.d noinst_HEADERS = hb_config.h \ hb_module.h \ hb_proc.h \ hb_resource.h \ hb_signal.h \ heartbeat_private.h \ test.h ## binary progs halib_PROGRAMS = heartbeat ## SOURCES heartbeat_SOURCES = heartbeat.c auth.c \ config.c \ ha_msg_internal.c hb_api.c hb_resource.c \ hb_signal.c module.c hb_uuid.c hb_rexmit.c heartbeat_LDADD = -lstonith \ -lpils \ -lplumb \ -lplumbgpl \ $(top_builddir)/lib/apphb/libapphb.la \ $(top_builddir)/replace/libreplace.la \ $(gliblib) $(LIBRT) heartbeat_LDFLAGS = @LIBADD_DL@ @LIBLTDL@ -export-dynamic @DLOPEN_FORCE_FLAGS@ heartbeat_CFLAGS = $(AM_CFLAGS) ## SCRIPTS/DATA ha_DATA = README.config ha_SCRIPTS = harc EXTRA_DIST = $(ha_DATA) $(ha_SCRIPTS) ## additional Makefile targets # additional installations not covered normally install-exec-local: $(mkinstalldirs) $(DESTDIR)$(havarrundir) $(mkinstalldirs) $(DESTDIR)$(havarlibdir) $(mkinstalldirs) $(DESTDIR)$(havarrunhbdir) -chmod 755 $(DESTDIR)$(havarrunhbdir) uninstall-local: -test -p $(DESTDIR)$(hafifo) && rm $(DESTDIR)$(hafifo) rm -rf $(DESTDIR)$(havarrunhbdir) Heartbeat-3-0-7e3a82377fa8/heartbeat/README.config0000644000000000000000000000126411576626513021265 0ustar00usergroup00000000000000You need three configuration files to make heartbeat happy, and they all go in this directory. They are: ha.cf Main configuration file haresources Resource configuration file authkeys Authentication information These first two may be readable by everyone, but the authkeys file must not be. The good news is that sample versions of these files may be found in the documentation directory (providing you installed the documentation). If you installed heartbeat using rpm packages then this command will show you where they are on your system: rpm -q heartbeat -d If you installed heartbeat using Debian packages then the documentation should be located in /usr/share/doc/heartbeat Heartbeat-3-0-7e3a82377fa8/heartbeat/auth.c0000644000000000000000000001551111576626513020246 0ustar00usergroup00000000000000/* * auth.c: Authentication code for heartbeat * * Copyright (C) 1999,2000 Mitja Sarp * Somewhat mangled by Alan Robertson * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include #define time FOOtime #include #undef time #include #include #include #include #include #include #include #include #include #include struct HBAuthOps * findauth(const char * type, char ** tptr); unsigned char result[MAXLINE]; extern GHashTable* AuthFunctions; extern PILPluginUniv* PluginLoadingSystem; struct HBAuthOps * findauth(const char * type, char ** tptr) { struct HBAuthOps* ret; /* Look and see if we already have the module loaded in memory */ if (!g_hash_table_lookup_extended(AuthFunctions, type , (gpointer*) tptr, (gpointer) &ret)) { PIL_rc rc; /* Nope. Load it now. */ rc = PILLoadPlugin(PluginLoadingSystem, HB_AUTH_TYPE_S , type, NULL); if (rc != PIL_OK) { ha_log(LOG_ERR, "LoadPlugin on %s returned %d: %s" , type, rc, PIL_strerror(rc)); } if (!g_hash_table_lookup_extended(AuthFunctions, type , (gpointer) tptr, (gpointer)&ret)) { ha_log(LOG_ERR, "Lookup extended#2 returned FALSE for %s" , type); ha_log(LOG_ERR, "Table size: %d" , g_hash_table_size(AuthFunctions)); ret = NULL; } } return ret; } /* * Set authentication method and key. * Open and parse the keyfile. */ int parse_authfile(void) { FILE * f; char buf[MAXLINE]; char method[MAXLINE]; char key[MAXLINE]; int i; int src; int rc = HA_OK; int authnum = -1; struct stat keyfilestat; static int ParsedYet = 0; int j; if (ANYDEBUG) { ha_log(LOG_DEBUG , "Beginning authentication parsing"); } if (ANYDEBUG) { ha_log(LOG_DEBUG , "%d max authentication methods", MAXAUTH); } if ((f = fopen(KEYFILE, "r")) == NULL) { ha_log(LOG_ERR, "Cannot open keyfile [%s]. Stop." , KEYFILE); return(HA_FAIL); } if (ANYDEBUG) { ha_log(LOG_DEBUG, "Keyfile opened"); } if (fstat(fileno(f), &keyfilestat) < 0 || keyfilestat.st_mode & (S_IROTH | S_IRGRP)) { ha_log(LOG_ERR, "Bad permissions on keyfile" " [%s], 600 recommended.", KEYFILE); fclose(f); return(HA_FAIL); } if (ANYDEBUG) { ha_log(LOG_DEBUG, "Keyfile perms OK"); } config->auth_time = keyfilestat.st_mtime; config->rereadauth = 0; /* Allow for us to reread the file without restarting... */ config->authmethod = NULL; config->authnum = -1; if (ANYDEBUG) { ha_log(LOG_DEBUG , "%d max authentication methods", MAXAUTH); } /* * We reload modules more than necessary. * * In an ideal world, we wouldn't unload something unless * it became unreferenced. But this is kind of a pain. * We could make a list of the current modules * cross compare it against the new set, but it's kind * of a pain. * * At least we don't load every auth module, then unload those * we find out we don't need - like the old code did ;-) * * (clean) patches are being accepted ;-) */ for (j=0; j < MAXAUTH; ++j) { if (ParsedYet) { if (config->auth_config[j].auth) { /* Unload this auth module */ PILIncrIFRefCount(PluginLoadingSystem , HB_AUTH_TYPE_S , config->auth_config[j].authname, -1); } if (config->auth_config[j].key) { free(config->auth_config[j].key); } } config->auth_config[j].auth = NULL; config->auth_config[j].authname = NULL; config->auth_config[j].key=NULL; } ParsedYet=1; while(fgets(buf, MAXLINE, f) != NULL) { char * bp = buf; struct HBAuthOps * at; bp += strspn(bp, WHITESPACE); if (*bp == COMMENTCHAR || *bp == EOS) { continue; } if (*bp == 'a') { if ((src=sscanf(bp, "auth %d", &authnum)) != 1) { ha_log(LOG_ERR , "Invalid auth line [%s] in " KEYFILE , buf); rc = HA_FAIL; } /* Parsing of this line now complete */ continue; } key[0] = EOS; if ((src=sscanf(bp, "%d%s%s", &i, method, key)) >= 2) { char * cpkey; char * permname; if (ANYDEBUG) { ha_log(LOG_DEBUG , "Found authentication method [%s]" , method); } if ((i < 0) || (i >= MAXAUTH)) { ha_log(LOG_ERR, "Invalid authnum [%d] in " KEYFILE, i); PILIncrIFRefCount(PluginLoadingSystem , HB_AUTH_TYPE_S, method, -1); rc = HA_FAIL; continue; } if ((at = findauth(method, &permname)) == NULL) { ha_log(LOG_ERR, "Invalid authtype [%s]" , method); PILIncrIFRefCount(PluginLoadingSystem , HB_AUTH_TYPE_S, method, -1); rc = HA_FAIL; continue; } if (strlen(key) > 0 && !at->needskey()) { ha_log(LOG_INFO , "Auth method [%s] doesn't use a key" , method); PILIncrIFRefCount(PluginLoadingSystem , HB_AUTH_TYPE_S, method, -1); rc = HA_FAIL; continue; } if (strlen(key) == 0 && at->needskey()) { ha_log(LOG_ERR , "Auth method [%s] requires a key" , method); PILIncrIFRefCount(PluginLoadingSystem , HB_AUTH_TYPE_S, method, -1); rc = HA_FAIL; continue; } cpkey = strdup(key); if (cpkey == NULL) { ha_log(LOG_ERR, "Out of memory for authkey"); fclose(f); PILIncrIFRefCount(PluginLoadingSystem , HB_AUTH_TYPE_S, method, -1); return(HA_FAIL); } config->auth_config[i].key = cpkey; config->auth_config[i].auth = at; config->auth_config[i].authname = permname; if (ANYDEBUG) { ha_log(LOG_INFO , "AUTH: i=%d: key = 0x%0lx" ", auth=0x%0lx, authname=%s", i , (unsigned long)cpkey , (unsigned long)at , permname); } if (i == authnum) { config->authnum = i; config->authmethod = config->auth_config+i; if (ANYDEBUG) { ha_log(LOG_DEBUG , "Outbound signing method is %d" , i); } } }else if (*bp != EOS) { ha_log(LOG_ERR, "Auth line [%s] is invalid." , buf); rc = HA_FAIL; } } fclose(f); if (!config->authmethod) { if (authnum < 0) { ha_log(LOG_ERR , "Missing auth directive in keyfile [%s]" , KEYFILE); }else{ ha_log(LOG_ERR , "Auth Key [%d] not found in keyfile [%s]" , authnum, KEYFILE); } rc = HA_FAIL; } if (ANYDEBUG) { ha_log(LOG_DEBUG , "Authentication parsing complete [%d]", rc); } return(rc); } Heartbeat-3-0-7e3a82377fa8/heartbeat/config.c0000644000000000000000000020541311576626513020554 0ustar00usergroup00000000000000/* * Parse various heartbeat configuration files... * * Copyright (C) 2000 Alan Robertson * portions (c) 1999,2000 Mitja Sarp * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define DIRTYALIASKLUDGE void dellist_destroy(void); int dellist_add(const char* nodename); const char *get_hg_version(void); static int set_cluster_name(const char * value); static int add_normal_node(const char *); static int set_hopfudge(const char *); static int set_keepalive_ms(const char *); static int set_deadtime_ms(const char *); static int set_deadping_ms(const char *); static int set_initial_deadtime_ms(const char *); static int set_watchdogdev(const char *); static int set_baudrate(const char *); static int set_udpport(const char *); static int set_facility(const char *); static int set_logfile(const char *); static int set_dbgfile(const char *); static int set_nice_failback(const char *); static int set_auto_failback(const char *); static int set_warntime_ms(const char *); static int set_stonith_info(const char *); static int set_stonith_host_info(const char *); static int set_realtime_prio(const char *); static int add_client_child(const char *); static int add_failfast_child(const char *); static int set_compression(const char *); static int set_compression_threshold(const char *); static int set_traditional_compression(const char *); static int set_env(const char *); static int set_max_rexmit_delay(const char *); static int set_generation_method(const char *); static int set_realtime(const char *); static int set_debuglevel(const char *); static int set_api_authorization(const char *); static int set_msgfmt(const char*); static int set_logdaemon(const char*); static int set_logdconntime(const char *); static int set_register_to_apphbd(const char *); static int set_badpack_warn(const char*); static int set_coredump(const char*); static int set_corerootdir(const char*); static int set_release2mode(const char*); static int set_pcmk_support(const char*); static int set_autojoin(const char*); static int set_uuidfrom(const char*); static int ha_config_check_boolean(const char *); static int set_memreserve(const char *); static int set_quorum_server(const char * value); static int set_syslog_logfilefmt(const char * value); #ifdef ALLOWPOLLCHOICE static int set_normalpoll(const char *); #endif void hb_set_max_rexmit_delay(int); /* * Each of these parameters is is automatically recorded by * SetParameterValue(). They are then passed to the plugins * for their use later. This avoids coupling through global * variables. */ struct directive { const char * name; int (*add_func) (const char *); int record_value; const char * defaultvalue; const char * explanation; }Directives[] = { {KEY_CLUSTER, set_cluster_name, TRUE, "linux-ha", "the name of cluster"} , {KEY_HOST, add_normal_node, FALSE, NULL, NULL} , {KEY_HOPS, set_hopfudge, TRUE, "1", "# of hops above cluster size"} , {KEY_KEEPALIVE, set_keepalive_ms, TRUE, "1000ms", "keepalive time"} , {KEY_DEADTIME, set_deadtime_ms, TRUE, "30000ms", "node deadtime"} , {KEY_DEADPING, set_deadping_ms, TRUE, NULL, "ping deadtime"} , {KEY_INITDEAD, set_initial_deadtime_ms, TRUE, NULL, "initial deadtime"} , {KEY_WARNTIME, set_warntime_ms, TRUE, NULL, "warning time"} , {KEY_WATCHDOG, set_watchdogdev, TRUE, NULL, "watchdog device"} , {KEY_BAUDRATE, set_baudrate, TRUE, "19200", "baud rate"} , {KEY_UDPPORT, set_udpport, TRUE, NULL, "UDP port number"} , {KEY_FACILITY, set_facility, TRUE, NULL, "syslog log facility"} , {KEY_LOGFILE, set_logfile, TRUE, NULL, "log file"} , {KEY_DBGFILE, set_dbgfile, TRUE, NULL, "debug file"} , {KEY_FAILBACK, set_nice_failback, FALSE, NULL, NULL} , {KEY_AUTOFAIL, set_auto_failback, TRUE, "legacy","auto failback"} , {KEY_RT_PRIO, set_realtime_prio, TRUE, NULL, "realtime priority"} , {KEY_GEN_METH, set_generation_method, TRUE, "file", "protocol generation computation method"} , {KEY_REALTIME, set_realtime, TRUE, "true", "enable realtime behavior?"} , {KEY_DEBUGLEVEL,set_debuglevel, TRUE, NULL, "debug level"} #ifdef ALLOWPOLLCHOICE , {KEY_NORMALPOLL,set_normalpoll, TRUE, "true", "Use system poll(2) function?"} #endif , {KEY_MSGFMT, set_msgfmt, TRUE, "classic", "message format in the wire"} , {KEY_LOGDAEMON, set_logdaemon, TRUE, NULL, "use logging daemon"} , {KEY_CONNINTVAL,set_logdconntime, TRUE, "60", "the interval to reconnect to logd"} , {KEY_REGAPPHBD, set_register_to_apphbd, FALSE, NULL, "register with apphbd"} , {KEY_BADPACK, set_badpack_warn, TRUE, "true", "warn about bad packets"} , {KEY_SYSLOGFMT, set_syslog_logfilefmt, TRUE, "true", "log to files in syslog format"} , {KEY_COREDUMP, set_coredump, TRUE, "true", "enable Linux-HA core dumps"} , {KEY_COREROOTDIR,set_corerootdir, TRUE, NULL, "set root directory of core dump area"} , {KEY_REL2, set_release2mode, FALSE, NULL, "historical alias for '"KEY_PACEMAKER"'"} , {KEY_PACEMAKER, set_pcmk_support, TRUE, "false", "enable Pacemaker resource management"} , {KEY_AUTOJOIN, set_autojoin, TRUE, "none" , "set automatic join mode/style"} , {KEY_UUIDFROM, set_uuidfrom, TRUE, "file" , "set the source for uuid"} ,{KEY_COMPRESSION, set_compression, TRUE ,"zlib", "set compression module"} ,{KEY_COMPRESSION_THRESHOLD, set_compression_threshold, TRUE, "2", "set compression threshold"} ,{KEY_TRADITIONAL_COMPRESSION, set_traditional_compression, TRUE, "no", "set traditional_compression"} ,{KEY_ENV, set_env, FALSE, NULL, "set environment variable for respawn clients"} ,{KEY_MAX_REXMIT_DELAY, set_max_rexmit_delay, TRUE,"250", "set the maximum rexmit delay time"} ,{KEY_LOG_CONFIG_CHANGES, ha_config_check_boolean, TRUE,"on", "record changes to the cib (valid only with: "KEY_PACEMAKER" on)"} ,{KEY_LOG_PENGINE_INPUTS, ha_config_check_boolean, TRUE,"on", "record the input used by the policy engine (valid only with: "KEY_PACEMAKER" on)"} ,{KEY_CONFIG_WRITES_ENABLED, ha_config_check_boolean, TRUE,"on", "write configuration changes to disk (valid only with: "KEY_PACEMAKER" on)"} ,{KEY_MEMRESERVE, set_memreserve, TRUE, "6500", "number of kbytes to preallocate in heartbeat"} ,{KEY_QSERVER,set_quorum_server, TRUE, NULL, "the name or ip of quorum server"} }; static const struct WholeLineDirective { const char * type; int (*parse) (const char *line); }WLdirectives[] = { {KEY_STONITH, set_stonith_info} , {KEY_STONITHHOST, set_stonith_host_info} , {KEY_APIPERM, set_api_authorization} , {KEY_CLIENT_CHILD, add_client_child} , {KEY_FAILFAST, add_failfast_child} }; extern const char * cmdname; extern int parse_only; extern struct hb_media* sysmedia[MAXMEDIA]; extern struct sys_config * config; extern struct sys_config config_init_value; extern volatile struct pstat_shm * procinfo; extern volatile struct process_info * curproc; extern char * watchdogdev; extern int nummedia; extern int nice_failback; extern int auto_failback; extern int DoManageResources; extern int hb_realtime_prio; extern PILPluginUniv* PluginLoadingSystem; extern GHashTable* CommFunctions; extern GHashTable* CompressFuncs; GHashTable* APIAuthorization = NULL; extern struct node_info * curnode; extern int timebasedgenno; int enable_realtime = TRUE; extern int debug_level; int netstring_format = FALSE; extern int UseApphbd; GSList* del_node_list; static int islegaldirective(const char *directive); static int parse_config(const char * cfgfile, char *nodename); static int add_option(const char * option, const char * value); int num_hb_media_types; static gboolean any_media_statements_yet = FALSE; struct hb_media_fns** hbmedia_types; #ifdef IRIX void setenv(const char *name, const char * value, int); #endif const char *get_hg_version(void) { /* limit this #define's use to a single file to avoid * rebuilding more than necessary */ return HA_HG_VERSION; } static void check_logd_usage(int* errcount) { const char* value; int truefalse = FALSE; /*we set uselogd to TRUE here so the next message can be logged*/ value = GetParameterValue(KEY_LOGDAEMON); if (value != NULL){ if(cl_str_to_boolean(value, &truefalse) == HA_FAIL){ cl_log(LOG_ERR, "cl_str_to_boolean failed[%s]", value); (*errcount)++; return; } } if (*(config->logfile) == EOS && *(config->dbgfile) == EOS && config->log_facility <= 0){ cl_log_set_uselogd(TRUE); if (value == NULL){ cl_log(LOG_INFO, "No log entry found in ha.cf -- use logd"); add_option(KEY_LOGDAEMON,"yes"); return; } if (truefalse == FALSE){ (*errcount)++; cl_log(LOG_ERR, "No log entry found in ha.cf " "and use_logd is set to off"); return; } }else if (value == NULL || truefalse == FALSE){ cl_log(LOG_WARNING, "Logging daemon is disabled --" "enabling logging daemon is recommended"); }else{ cl_log(LOG_WARNING, "logd is enabled but %s%s%s is still" " configured in ha.cf", config->logfile?"logfile":"", config->dbgfile?"/debugfile":"", config->log_facility > 0?"/logfacility":"" ); } } static gboolean r1_style_valid(void) { /* we cannot set autojoin to HB_JOIN_ANY or HB_JOIN_OTHER * in R1 style */ if (!DoManageResources){ return TRUE; } if (config->rtjoinconfig == HB_JOIN_NONE){ return TRUE; } cl_log(LOG_ERR, "R1 style resource management conflicts with " " autojoin set"); cl_log(LOG_ERR, "You need either unset autojoin or enable crm"); return FALSE; } /* * Read in and validate the configuration file. * Act accordingly. */ int init_config(const char * cfgfile) { int errcount = 0; int j; int err; /* * 'Twould be good to move this to a shared memory segment * Then we could share this information with others */ /* config = (struct sys_config *)calloc(1 , sizeof(struct sys_config)); */ memset(&config_init_value, 0, sizeof(config_init_value)); config = &config_init_value; if (config == NULL) { ha_log(LOG_ERR, "Heartbeat not started: " "Out of memory during configuration"); return(HA_FAIL); } config->format_vers = 100; config->heartbeat_ms = 1000; config->deadtime_ms = 30000; config->initial_deadtime_ms = -1; config->deadping_ms = -1; config->hopfudge = 1; config->log_facility = -1; config->client_list = NULL; config->last_client = NULL; config->uuidfromname = FALSE; curnode = NULL; if (!parse_config(cfgfile, localnodename)) { err = errno; ha_log(LOG_ERR, "Heartbeat not started: configuration error."); errno=err; return(HA_FAIL); } if (parse_authfile() != HA_OK) { err = errno; ha_log(LOG_ERR, "Authentication configuration error."); errno=err; return(HA_FAIL); } if (config->log_facility >= 0) { cl_log_set_entity(cmdname); cl_log_set_facility(config->log_facility); } /* Set any "fixed" defaults */ for (j=0; j < DIMOF(Directives); ++j) { if (!Directives[j].defaultvalue || GetParameterValue(Directives[j].name)) { continue; } add_option(Directives[j].name, Directives[j].defaultvalue); } if (GetParameterValue(KEY_DEBUGLEVEL) == NULL) { char debugstr[10]; snprintf(debugstr, sizeof(debugstr), "%d", debug_level); add_option(KEY_DEBUGLEVEL, debugstr); } if (nummedia < 1) { ha_log(LOG_ERR, "No heartbeat media defined"); ++errcount; } if (config->warntime_ms <= 0) { char tmp[32]; config->warntime_ms = config->deadtime_ms/2; snprintf(tmp, sizeof(tmp), "%ldms", config->warntime_ms); SetParameterValue(KEY_WARNTIME, tmp); } /* We should probably complain if there aren't at least two... */ if (config->nodecount < 1 && config->rtjoinconfig != HB_JOIN_ANY) { ha_log(LOG_ERR, "no nodes defined"); ++errcount; } if (config->authmethod == NULL) { ha_log(LOG_ERR, "No authentication specified."); ++errcount; } if (access(HOSTUUIDCACHEFILE, R_OK) >= 0) { if (read_cache_file(config) != HA_OK) { cl_log(LOG_ERR , "Invalid host/uuid map file [%s] - removed." , HOSTUUIDCACHEFILE); if (unlink(HOSTUUIDCACHEFILE) < 0) { cl_perror("unlink(%s) failed" , HOSTUUIDCACHEFILE); } } write_cache_file(config); } if ((curnode = lookup_node(localnodename)) == NULL) { if (config->rtjoinconfig == HB_JOIN_ANY) { add_normal_node(localnodename); curnode = lookup_node(localnodename); ha_log(LOG_NOTICE, "Current node [%s] added to configuration." , localnodename); write_cache_file(config); }else{ ha_log(LOG_ERR, "Current node [%s] not in configuration!" , localnodename); ha_log(LOG_INFO, "By default, cluster nodes are named" " by `uname -n` and must be declared with a 'node'" " directive in the ha.cf file."); ha_log(LOG_INFO, "See also: " HAURL("Ha.cf#node_directive")); ++errcount; } } setenv(CURHOSTENV, localnodename, 1); if (config->deadtime_ms <= 2 * config->heartbeat_ms) { ha_log(LOG_ERR , "Dead time [%ld] is too small compared to keeplive [%ld]" , config->deadtime_ms, config->heartbeat_ms); ++errcount; } if (config->initial_deadtime_ms < 0) { char tmp[32]; if (config->deadtime_ms > 10000) { config->initial_deadtime_ms = config->deadtime_ms; }else{ if (config->deadtime_ms < 6000) { config->initial_deadtime_ms = 12000; }else{ config->initial_deadtime_ms = 2 * config->deadtime_ms; } } snprintf(tmp, sizeof(tmp), "%ldms" , config->initial_deadtime_ms); SetParameterValue(KEY_INITDEAD, tmp); } /* Check deadtime parameters */ if (config->initial_deadtime_ms < config->deadtime_ms) { ha_log(LOG_ERR , "Initial dead time [%ld] is smaller than" " deadtime [%ld]" , config->initial_deadtime_ms, config->deadtime_ms); ++errcount; }else if (config->initial_deadtime_ms < 10000) { ha_log(LOG_WARNING, "Initial dead time [%ld ms] may be too small!" , config->initial_deadtime_ms); ha_log(LOG_INFO , "Initial dead time accounts for slow network startup time"); ha_log(LOG_INFO , "It should be >= deadtime and >= 10 seconds"); } if (config->deadping_ms < 0 ){ char tmp[32]; config->deadping_ms = config->deadtime_ms; snprintf(tmp, sizeof(tmp), "%ldms", config->deadping_ms); SetParameterValue(KEY_DEADPING, tmp); }else if (config->deadping_ms <= 2 * config->heartbeat_ms) { ha_log(LOG_ERR , "Ping dead time [%ld] is too small" " compared to keeplive [%ld]" , config->deadping_ms, config->heartbeat_ms); ++errcount; } if (GetParameterValue(KEY_UDPPORT) == NULL) { struct servent* service; int tmpudpport; char tmp[32]; /* If our service name is in /etc/services, then use it */ if ((service=getservbyname(HA_SERVICENAME, "udp")) != NULL){ tmpudpport = ntohs(service->s_port); }else{ tmpudpport = UDPPORT; } snprintf(tmp, (sizeof(tmp)-1), "%d", tmpudpport); SetParameterValue(KEY_UDPPORT, tmp); } if (!nice_failback && DoManageResources) { ha_log(LOG_WARNING , "Deprecated 'legacy' auto_failback option selected."); ha_log(LOG_WARNING , "Please convert to 'auto_failback on'."); ha_log(LOG_WARNING , "See documentation for conversion details."); } if (*(config->logfile) == EOS) { if (config->log_facility > 0) { /* * Set to DEVNULL in case a stray script outputs logs */ strncpy(config->logfile, DEVNULL , sizeof(config->logfile)); config->use_logfile=0; } } if (*(config->dbgfile) == EOS) { if (config->log_facility > 0) { /* * Set to DEVNULL in case a stray script outputs errors */ strncpy(config->dbgfile, DEVNULL , sizeof(config->dbgfile)); config->use_dbgfile=0; } } check_logd_usage(&errcount); if ( !r1_style_valid()){ errcount++; } if (!RestartRequested && errcount == 0 && !parse_only) { ha_log(LOG_INFO, "**************************"); ha_log(LOG_INFO, "Configuration validated." " Starting heartbeat %s", VERSION); } for (j=0; j < config->nodecount; ++j) { config->nodes[j].has_resources = DoManageResources; if (config->nodes[j].nodetype == PINGNODE_I) { config->nodes[j].dead_ticks = msto_longclock(config->deadping_ms); }else{ config->nodes[j].dead_ticks = msto_longclock(config->deadtime_ms); } } if (errcount == 0 && DoManageResources) { init_resource_module(); } return(errcount ? HA_FAIL : HA_OK); } static void init_node_link_info(struct node_info * node) { longclock_t cticks = time_longclock(); int j; if (node->nodetype == PINGNODE_I) { node->nlinks = 1; for (j=0; j < nummedia; j++) { struct link *lnk = &node->links[0]; if (!sysmedia[j]->vf->isping() || strcmp(node->nodename , sysmedia[j]->name) != 0) { continue; } lnk->name = node->nodename; lnk->lastupdate = cticks; strncpy(lnk->status, DEADSTATUS , sizeof(lnk->status)); lnk[1].name = NULL; break; } return; } node->nlinks = 0; for (j=0; j < nummedia; j++) { int nc = node->nlinks; struct link *lnk = &node->links[nc]; if (sysmedia[j]->vf->isping()) { continue; } lnk->name = sysmedia[j]->name; lnk->lastupdate = cticks; strncpy(lnk->status, DEADSTATUS, sizeof(lnk->status)); lnk[1].name = NULL; ++node->nlinks; } } #if 0 /* * This code does _not_ (permanently) affect the value of nummedia * This can be seen as an advantage, or a disadvantage ;-) * This */ static int create_medium(const char * directive, const char * optionstring, int mediaslot) { struct hb_media* mp = NULL; int retval=1; char* type; char* descr; struct hb_media_fns* funs; /* Load the medium plugin if its not already loaded... */ if ((funs=g_hash_table_lookup(CommFunctions, directive)) == NULL) { if (PILPluginExists(PluginLoadingSystem , HB_COMM_TYPE_S, directive) == PIL_OK) { PIL_rc rc; if ((rc = PILLoadPlugin(PluginLoadingSystem , HB_COMM_TYPE_S, directive, NULL)) != PIL_OK) { ha_log(LOG_ERR, "Cannot load comm" " plugin %s [%s]", directive , PIL_strerror(rc)); } funs=g_hash_table_lookup(CommFunctions , directive); } }else{ PILIncrIFRefCount(PluginLoadingSystem , HB_COMM_TYPE_S, directive, +1); } if ((funs=g_hash_table_lookup(CommFunctions, directive)) == NULL) { return -1; } if (funs->new != NULL) { mp = funs->new(optionstring); if (mp) { sysmedia[mediaslot]=mp; } }else if (funs->parse) { int savenummedia = nummedia; nummedia=mediaslot; if (funs->parse(optionstring) == HA_OK) { mp=NULL; }else{ mp=sysmedia[mediaslot]; nummedia=savenummedia; retval = (nummedia > mediaslot)? 1 : -1; } } funs->descr(&descr); funs->mtype(&type); if (mp == NULL) { ha_log(LOG_ERR, "Illegal %s [%s] in config file [%s]" , type, descr, optionstring); PILIncrIFRefCount(PluginLoadingSystem , HB_COMM_TYPE_S, directive, -1); /* By default, PILS modules use g_malloc and g_free */ g_free(descr); descr = NULL; g_free(type); type = NULL; return -1; } mp->vf = funs; mp->type = type; mp->description = descr; g_assert(mp->type); g_assert(mp->description); g_assert(mp->type[0] != '('); g_assert(mp->description[0] != '('); if (!mp->name) { mp->name = strdup(directive); } PILIncrIFRefCount(PluginLoadingSystem , HB_COMM_TYPE_S, directive, +1); } #endif /* * Parse the configuration file and stash away the data */ static int parse_config(const char * cfgfile, char *nodename) { FILE * f; char buf[MAXLINE]; char * cp; char directive[MAXLINE]; size_t dirlength; char option[MAXLINE]; size_t optionlength; int errcount = 0; int j; int i; struct stat sbuf; struct DefServices { const char * name; const char * authspec; } defserv[] = { {"ipfail", "uid=" HA_CCMUSER} , {"ccm", "uid=" HA_CCMUSER} , {"ping", "gid=" HA_APIGROUP} , {"lha-snmpagent","uid=root"} , {"anon", "gid=" HA_APIGROUP} }; if ((f = fopen(cfgfile, "r")) == NULL) { ha_log(LOG_ERR, "Cannot open config file [%s]", cfgfile); ha_log(LOG_INFO , "An annotated sample %s file is provided in" " the documentation." , cfgfile); ha_log(LOG_INFO , "Please copy it to %s, read it, customize it" ", and try again." , cfgfile); return(HA_FAIL); } APIAuthorization = g_hash_table_new(g_str_hash, g_str_equal); fstat(fileno(f), &sbuf); config->cfg_time = sbuf.st_mtime; /* It's ugly, but effective */ while (fgets(buf, MAXLINE, f) != NULL) { char * bp = buf; int IsOptionDirective=1; struct hb_media_fns* funs = NULL; /* Skip over white space */ bp += strspn(bp, WHITESPACE); /* Zap comments on the line */ if ((cp = strchr(bp, COMMENTCHAR)) != NULL) { *cp = EOS; } /* Strip '\n' and '\r' chars */ if ((cp = strpbrk(bp, CRLF)) != NULL) { *cp = EOS; } /* Ignore blank (and comment) lines */ if (*bp == EOS) { continue; } /* Now we expect a directive name */ dirlength = strcspn(bp, WHITESPACE); strncpy(directive, bp, dirlength); directive[dirlength] = EOS; #ifdef DIRTYALIASKLUDGE if (strcmp(directive, "udp") == 0) { ha_log(LOG_WARNING , "WARNING: directive 'udp' replaced by 'bcast'"); strncpy(directive, "bcast", sizeof("bcast")); } #endif if (!islegaldirective(directive)) { ha_log(LOG_ERR, "Illegal directive [%s] in %s" , directive, cfgfile); ++errcount; continue; } bp += dirlength; /* Skip over Delimiters */ bp += strspn(bp, DELIMS); /* Load the medium plugin if its not already loaded... */ if ((funs=g_hash_table_lookup(CommFunctions, directive)) == NULL) { if (PILPluginExists(PluginLoadingSystem , HB_COMM_TYPE_S, directive) == PIL_OK) { PIL_rc rc; if ((rc = PILLoadPlugin(PluginLoadingSystem , HB_COMM_TYPE_S, directive, NULL)) != PIL_OK) { ha_log(LOG_ERR, "Cannot load comm" " plugin %s [%s]", directive , PIL_strerror(rc)); continue; } funs=g_hash_table_lookup(CommFunctions , directive); } }else{ PILIncrIFRefCount(PluginLoadingSystem , HB_COMM_TYPE_S, directive, +1); } /* Check first for whole line media-type directives */ if (funs && funs->parse) { int num_save = nummedia; IsOptionDirective=0; if (funs->parse(bp) != HA_OK) { PILIncrIFRefCount(PluginLoadingSystem , HB_COMM_TYPE_S, directive, -1); errcount++; *bp = EOS; /* Stop parsing now */ continue; } sysmedia[num_save]->vf = funs; if(!sysmedia[num_save]->name) { char * pname = strdup(bp); sysmedia[num_save]->name = pname; } funs->mtype(&sysmedia[num_save]->type); funs->descr(&sysmedia[num_save]->description); g_assert(sysmedia[num_save]->type); g_assert(sysmedia[num_save]->description); any_media_statements_yet = TRUE; *bp = EOS; } /* Check for "parse" type (whole line) directives */ for (j=0; j < DIMOF(WLdirectives); ++j) { if (WLdirectives[j].parse == NULL) { continue; } if (strcmp(directive, WLdirectives[j].type) == 0) { IsOptionDirective=0; if (WLdirectives[j].parse(bp) != HA_OK) { errcount++; } *bp = EOS; any_media_statements_yet = TRUE; } } /* Now Check for the options-list stuff */ while (IsOptionDirective && *bp != EOS) { optionlength = strcspn(bp, DELIMS); strncpy(option, bp, optionlength); option[optionlength] = EOS; bp += optionlength; if (add_option(directive, option) != HA_OK) { errcount++; } /* Skip over Delimiters */ bp += strspn(bp, DELIMS); } } /* Provide default authorization information for well-known services */ for (i=0; i < DIMOF(defserv); ++i) { char buf[100]; /* Allow users to override our defaults... */ if (g_hash_table_lookup(APIAuthorization, defserv[i].name) == NULL) { snprintf(buf, sizeof(buf), "%s %s" , defserv[i].name , defserv[i].authspec); set_api_authorization(buf); } } for (i=0; i < config->nodecount; ++i) { /* * We need to re-do this now, after all the * media directives were parsed. */ init_node_link_info(&config->nodes[i]); } fclose(f); return(errcount ? HA_FAIL : HA_OK); } /* * Dump the configuration file - as a configuration file :-) * * This does not include every directive at this point. */ void dump_config(void) { int j; struct node_info * hip; printf("#\n# Linux-HA heartbeat configuration (on %s)\n#\n" , localnodename); printf("\n#---------------------------------------------------\n"); printf("#\n# HA configuration and status\n#\n"); for (j=0; j < DIMOF(Directives); ++j) { const char * v; if (!Directives[j].record_value || (v = GetParameterValue(Directives[j].name)) == NULL) { continue; } printf("%s\t%s", Directives[j].name, v); if (Directives[j].explanation) { printf("\t#\t%s", Directives[j].explanation); } printf("\n"); } printf("#\n"); printf("#\tHA Cluster nodes:\n"); printf("#\n"); for (j=0; j < config->nodecount; ++j) { hip = &config->nodes[j]; printf("%s %s\t#\t current status: %s\n" , KEY_HOST , hip->nodename , hip->status); } printf("#\n"); printf("#\tCommunications media:\n"); for(j=0; j < nummedia; ++j) { g_assert(sysmedia[j]->type); g_assert(sysmedia[j]->description); puts("#"); printf("# %s heartbeat channel -------------\n" , sysmedia[j]->description); printf("%s %s\n", sysmedia[j]->type , sysmedia[j]->name); } printf("#---------------------------------------------------\n"); } /* * Dump the default configuration file values for those directives that * have them * * This does not include every directive at this point. */ void dump_default_config(int wikiout) { int j, k, lmaxlen = 0, cmaxlen = 0, rmaxlen = 0; const char * dashes = "----------------------------------------" "----------------------------------------"; const char * lcolhdr = "Directive"; const char * ccolhdr = "Default"; const char * rcolhdr = "Description"; /* First determine max name lens to help make things look nice */ for (j=0; j < DIMOF(Directives); ++j) { struct directive * pdir = &Directives[j]; if (pdir->defaultvalue != NULL) { if ((k = strlen(pdir->name)) > lmaxlen) { lmaxlen = k; } if ((k = strlen(pdir->defaultvalue)) > cmaxlen) { cmaxlen = k; } if ((pdir->explanation != NULL) && ((k = strlen(pdir->explanation)) > rmaxlen)) { rmaxlen = k; } } } /* Don't do anything if there are no default values */ if (!lmaxlen) { printf("There are no default values for ha.cf directives\n"); return; } if (wikiout) { printf("##Put this output in the ha.cf/DefaultValues" " page\n"); printf("The [wiki:ha.cf ha.cf] directives with default" " values are shown below - along with a brief description.\n"); printf("This was produced by {{{heartbeat -DW}}}" " ''# (version %s)''\n\n" , VERSION); printf("||\'\'%s\'\'||\'\'%s\'\'||\'\'%s\'\'||\n" , lcolhdr, ccolhdr, rcolhdr); for (j=0; j < DIMOF(Directives); ++j) { char WikiName[lmaxlen+1]; char * pch; if (Directives[j].defaultvalue) { strcpy(WikiName, Directives[j].name); WikiName[0] = toupper(WikiName[0]); /* wiki convention is to remove underscores, slide chars to left, and capitalize */ while ((pch = strchr(WikiName, '_')) != NULL) { char *pchplus1 = pch + 1; *pch = toupper(*pchplus1); while (*pchplus1) { *++pch = *++pchplus1; } } printf("||[wiki:ha.cf/%sDirective" " %s]||%s||%s||\n" , WikiName , Directives[j].name , Directives[j].defaultvalue , Directives[j].explanation ? Directives[j].explanation : ""); } } } else { if ((k = strlen(lcolhdr)) > lmaxlen) { lmaxlen = k; } if ((k = strlen(ccolhdr)) > cmaxlen) { cmaxlen = k; } if ((k = strlen(rcolhdr)) > rmaxlen) { rmaxlen = k; } printf("%-*.*s %-*.*s %s\n", lmaxlen, lmaxlen, lcolhdr , cmaxlen, cmaxlen, ccolhdr, rcolhdr); /* this 4 comes from the pair of 2 blanks between columns */ printf("%-*.*s\n", (int)sizeof(dashes) , lmaxlen + cmaxlen + rmaxlen + 4, dashes); for (j=0; j < DIMOF(Directives); ++j) { if (Directives[j].defaultvalue) { printf("%-*.*s %-*.*s %s\n" , lmaxlen, lmaxlen , Directives[j].name , cmaxlen, cmaxlen , Directives[j].defaultvalue , Directives[j].explanation ? Directives[j].explanation : ""); } } } } /* * Check the /etc/ha.d/haresources file * * All we check for now is the set of node names. * * It would be good to check the resource names, too... * * And for that matter, to compute an md5 checksum of the haresources * file so we can complain if they're different. */ int parse_ha_resources(const char * cfgfile) { char buf[MAXLINE]; struct stat sbuf; int rc = HA_OK; FILE * f; if ((f = fopen(cfgfile, "r")) == NULL) { ha_log(LOG_ERR, "Cannot open resources file [%s]", cfgfile); ha_log(LOG_INFO , "An annotated sample %s file is provided in the" " documentation.", cfgfile); ha_log(LOG_INFO , "Please copy it to %s, read it, customize it" ", and try again." , cfgfile); return(HA_FAIL); } fstat(fileno(f), &sbuf); config->rsc_time = sbuf.st_mtime; while (fgets(buf, MAXLINE-1, f) != NULL) { char * bp = buf; char * endp; char token[MAXLINE]; /* Skip over white space */ bp += strspn(bp, WHITESPACE); if (*bp == COMMENTCHAR) { continue; } if (*bp == EOS) { continue; } endp = bp + strcspn(bp, WHITESPACE); strncpy(token, bp, endp - bp); token[endp-bp] = EOS; if (lookup_node(token) == NULL) { ha_log(LOG_ERR, "Bad nodename in %s [%s]", cfgfile , token); rc = HA_FAIL; break; } /* * FIXME: Really ought to deal with very long lines * correctly. */ while (buf[strlen(buf)-2]=='\\') { if (fgets(buf, MAXLINE-1, f)==NULL) break; } } fclose(f); return(rc); } /* * Is this a legal directive name? */ static int islegaldirective(const char *directive) { int j; /* * We have four kinds of directives to deal with: * * 1) Builtin directives which are keyword value value value... * "Directives[]" * 2) Builtin directives which are one per line... * WLdirectives[] * 3) media declarations which are media value value value * These are dynamically loaded plugins... * of type HBcomm * 4) media declarations which are media rest-of-line * These are dynamically loaded plugins... * of type HBcomm * */ for (j=0; j < DIMOF(Directives); ++j) { if (DEBUGDETAILS) { ha_log(LOG_DEBUG , "Comparing directive [%s] against [%s]" , directive, Directives[j].name); } if (strcmp(directive, Directives[j].name) == 0) { return(HA_OK); } } for (j=0; j < DIMOF(WLdirectives); ++j) { if (DEBUGDETAILS) { ha_log(LOG_DEBUG , "Comparing directive [%s] against WLdirective[%s]" , directive, WLdirectives[j].type); } if (strcmp(directive, WLdirectives[j].type) == 0) { return(HA_OK); } } if (PILPluginExists(PluginLoadingSystem, HB_COMM_TYPE_S, directive) == PIL_OK){ return HA_OK; } return(HA_FAIL); } /* * Add the given option/value pair to the configuration */ static int add_option(const char * option, const char * value) { int j; struct hb_media_fns* funs = NULL; if (ANYDEBUG) { ha_log(LOG_DEBUG, "add_option(%s,%s)", option, value); } for (j=0; j < DIMOF(Directives); ++j) { if (strcmp(option, Directives[j].name) == 0) { int rc; rc = ((*Directives[j].add_func)(value)); if (rc == HA_OK && Directives[j].record_value) { SetParameterValue(option, value); } return rc; } } if ((funs=g_hash_table_lookup(CommFunctions, option)) != NULL && funs->new != NULL) { struct hb_media* mp = funs->new(value); char* type; char* descr; funs->descr(&descr); funs->mtype(&type); if (nummedia >= MAXMEDIA) { cl_log(LOG_ERR, "Too many media specified (> %d)" , MAXMEDIA); cl_log(LOG_INFO, "Offending command: %s %s" , option, value); return HA_FAIL; } sysmedia[nummedia] = mp; if (mp == NULL) { ha_log(LOG_ERR, "Illegal %s [%s] in config file [%s]" , type, descr, value); PILIncrIFRefCount(PluginLoadingSystem , HB_COMM_TYPE_S, option, -1); /* Does this come from malloc? FIXME!! */ g_free(descr); descr = NULL; g_free(type); type = NULL; return(HA_FAIL); }else{ mp->type = type; mp->description = descr; g_assert(mp->type); g_assert(mp->description); g_assert(mp->type[0] != '('); g_assert(mp->description[0] != '('); mp->vf = funs; if (!mp->name) mp->name = strdup(value); ++nummedia; PILIncrIFRefCount(PluginLoadingSystem , HB_COMM_TYPE_S, option, +1); } g_assert(sysmedia[nummedia-1]->type); g_assert(sysmedia[nummedia-1]->description); return(HA_OK); } ha_log(LOG_ERR, "Illegal configuration directive [%s]", option); return(HA_FAIL); } void dellist_destroy(void){ GSList* list = del_node_list; while (list != NULL){ free(list->data); list->data=NULL; list= list->next; } g_slist_free(del_node_list); del_node_list = NULL; return; } static void dellist_append(struct node_info* hip) { struct node_info* dup_hip; dup_hip = malloc(sizeof(struct node_info)); if (dup_hip == NULL){ cl_log(LOG_ERR, "%s: malloc failed", __FUNCTION__); return; } memcpy(dup_hip, hip, sizeof(struct node_info)); del_node_list = g_slist_append(del_node_list, dup_hip); } int dellist_add(const char* nodename){ struct node_info node; int i; for (i=0; i < config->nodecount; i++){ if (strncmp(nodename, config->nodes[i].nodename,HOSTLENG) == 0){ dellist_append(&config->nodes[i]); return HA_OK; } } memset(&node, 0, sizeof(struct node_info)); strncpy(node.nodename, nodename, HOSTLENG); dellist_append(&node); return HA_OK; } static gint dellist_match(gconstpointer data, gconstpointer nodename) { const struct node_info* node = (const struct node_info*) data; if (data == NULL){ /* the list is empty,i.e. not found*/ return 1; } return strncasecmp(node->nodename,nodename, HOSTLENG); } void remove_from_dellist( const char* nodename) { GSList* listitem; listitem = g_slist_find_custom(del_node_list, nodename, dellist_match); if (listitem!= NULL){ if (listitem->data){ free(listitem->data); } del_node_list = g_slist_delete_link(del_node_list, listitem); } return; } /* * For reliability reasons, we should probably require nodename * to be in /etc/hosts, so we don't lose our mind if (when) DNS goes out... * This would also give us an additional sanity check for the config file. * * This is only the administrative interface, whose IP address never moves * around. */ /* Process a node declaration */ int add_node(const char * value, int nodetype) { struct node_info * hip; if (config->nodecount >= MAXNODE) { return(HA_FAIL); } remove_from_dellist(value); hip = &config->nodes[config->nodecount]; memset(hip, 0, sizeof(*hip)); ++config->nodecount; strncpy(hip->status, INITSTATUS, sizeof(hip->status)); strncpy(hip->nodename, value, sizeof(hip->nodename)); g_strdown(hip->nodename); cl_uuid_clear(&hip->uuid); hip->rmt_lastupdate = 0L; hip->has_resources = TRUE; hip->anypacketsyet = 0; hip->local_lastupdate = time_longclock(); hip->track.nmissing = 0; hip->track.last_seq = NOSEQUENCE; hip->track.ackseq = 0; hip->weight = 100; /* srand() done in init_config() already, * and probably still too many places throughout the code */ hip->track.ack_trigger = rand()%ACK_MSG_DIV; hip->nodetype = nodetype; add_nametable(hip->nodename, hip); init_node_link_info(hip); if (nodetype == PINGNODE_I) { hip->dead_ticks = msto_longclock(config->deadping_ms); }else{ hip->dead_ticks = msto_longclock(config->deadtime_ms); } return(HA_OK); } int set_node_weight(const char* value, int weight) { int i; struct node_info * hip = NULL; if (value == NULL){ cl_log(LOG_ERR, "%s: invalid nodename", __FUNCTION__); return HA_FAIL; } for (i = 0; i < config->nodecount; i++){ hip = &config->nodes[i]; if (strncasecmp(hip->nodename, value, sizeof(hip->nodename)) ==0){ break; } } if (i == config->nodecount){ cl_log(LOG_DEBUG,"set weight to non-existing node %s", value); return HA_FAIL; } hip->weight = weight; return HA_OK; } int set_node_site(const char* value, const char* site) { int i; struct node_info * hip = NULL; if (value == NULL){ cl_log(LOG_ERR, "%s: invalid nodename", __FUNCTION__); return HA_FAIL; } for (i = 0; i < config->nodecount; i++){ hip = &config->nodes[i]; if (strncasecmp(hip->nodename, value, sizeof(hip->nodename)) ==0){ break; } } if (i == config->nodecount){ cl_log(LOG_DEBUG,"set site to non-existing node %s", value); return HA_FAIL; } strncpy(hip->site, site, sizeof(hip->site)); return HA_OK; } int remove_node(const char* value, int deletion) { int i; struct node_info * hip = NULL; int j; if (value == NULL){ cl_log(LOG_ERR, "%s: invalid nodename", __FUNCTION__); return HA_FAIL; } for (i = 0; i < config->nodecount; i++){ hip = &config->nodes[i]; if (strncasecmp(hip->nodename, value, sizeof(hip->nodename)) ==0){ break; } } if (i == config->nodecount){ if (deletion){ cl_log(LOG_DEBUG,"Adding node(%s) to deletion list", value); dellist_add(value); } return HA_OK; } if (STRNCMP_CONST(hip->status, DEADSTATUS) != 0 && STRNCMP_CONST(hip->status, INITSTATUS) != 0){ cl_log(LOG_ERR, "%s: node %s is %s. Cannot remove alive node", __FUNCTION__, value, hip->status); return HA_FAIL; } if (deletion){ cl_log(LOG_DEBUG,"Adding this node to deletion list"); dellist_append(hip); } for (j = i; j < config->nodecount; j++){ memcpy(&config->nodes[j], &config->nodes[j + 1], sizeof(config->nodes[0])); } config->nodecount -- ; tables_remove(hip->nodename, &hip->uuid); curnode = lookup_node(localnodename); if (!curnode){ cl_log(LOG_ERR, "localnode not found"); } return(HA_OK); } /* Set the name of cluster */ static int set_cluster_name(const char * value) { strncpy(config->cluster, value, PATH_MAX); return(HA_OK); } /* Set the quorum server */ static int set_quorum_server(const char * value) { strncpy(config->cluster, value, PATH_MAX); strncpy(config->quorum_server, value, PATH_MAX); return(HA_OK); } /* Process a node declaration */ static int add_normal_node(const char * value) { return add_node(value, NORMALNODE_I); } /* Set the hopfudge variable */ static int set_hopfudge(const char * value) { config->hopfudge = atoi(value); if (config->hopfudge >= 0 && config->hopfudge < 256) { return(HA_OK); } return(HA_FAIL); } /* Set the keepalive time */ static int set_keepalive_ms(const char * value) { config->heartbeat_ms = cl_get_msec(value); if (config->heartbeat_ms > 0) { return(HA_OK); } return(HA_FAIL); } /* Set the dead timeout */ static int set_deadtime_ms(const char * value) { config->deadtime_ms = cl_get_msec(value); if (config->deadtime_ms >= 0) { return(HA_OK); } return(HA_FAIL); } /* Set the dead ping timeout */ static int set_deadping_ms(const char * value) { config->deadping_ms = cl_get_msec(value); if (config->deadping_ms >= 0) { return(HA_OK); } return(HA_FAIL); } /* Set the initial dead timeout */ static int set_initial_deadtime_ms(const char * value) { config->initial_deadtime_ms = cl_get_msec(value); if (config->initial_deadtime_ms >= 0) { return(HA_OK); } return(HA_FAIL); } /* Set the watchdog device */ static int set_watchdogdev(const char * value) { if (watchdogdev != NULL) { fprintf(stderr, "%s: Watchdog device multiply specified.\n" , cmdname); return(HA_FAIL); } if ((watchdogdev = strdup(value)) == NULL) { fprintf(stderr, "%s: Out of memory for watchdog device\n" , cmdname); return(HA_FAIL); } return(HA_OK); } int StringToBaud(const char * baudstr) { int baud; baud = atoi(baudstr); switch(baud) { case 9600: return B9600; case 19200: return B19200; #ifdef B38400 case 38400: return B38400; #endif #ifdef B57600 case 57600: return B57600; #endif #ifdef B115200 case 115200: return B115200; #endif #ifdef B230400 case 230400: return B230400; #endif #ifdef B460800 case 460800: return B460800; #endif default: return 0; } } /* * All we do here is *validate* the baudrate. * This parameter is automatically recorded by SetParameterValue() * for later use by the plugins. */ static int set_baudrate(const char * value) { static int baudset = 0; int serial_baud = 0; if (baudset) { fprintf(stderr, "%s: Baudrate multiply specified.\n" , cmdname); return(HA_FAIL); } ++baudset; serial_baud = StringToBaud(value); if (serial_baud <= 0) { fprintf(stderr, "%s: invalid baudrate [%s] specified.\n" , cmdname, value); return(HA_FAIL); } if (any_media_statements_yet) { fprintf(stderr , "%s: baudrate setting must precede media statements" , cmdname); } return(HA_OK); } /* * All we do here is *validate* the udpport number. * This parameter is automatically recorded by SetParameterValue() * for later use by the plugins. */ static int set_udpport(const char * value) { int port = atoi(value); struct servent* service; if (port <= 0) { fprintf(stderr, "%s: invalid port [%s] specified.\n" , cmdname, value); return(HA_FAIL); } /* Make sure this port isn't reserved for something else */ if ((service=getservbyport(htons(port), "udp")) != NULL) { if (strcmp(service->s_name, HA_SERVICENAME) != 0) { ha_log(LOG_WARNING , "%s: udp port %s reserved for service \"%s\"." , cmdname, value, service->s_name); } } endservent(); fprintf(stderr , "%s: udpport setting must precede media statements" , cmdname); return(HA_OK); } /* set syslog facility config variable */ static int set_facility(const char * value) { int i; i = cl_syslogfac_str2int(value); if (i >= 0) { config->log_facility = i; strncpy(config->facilityname, value, sizeof(config->facilityname)-1); config->facilityname[sizeof(config->facilityname)-1] = EOS; cl_log_set_facility(config->log_facility); return(HA_OK); }else { ha_log(LOG_ERR, "Log facility(%s) not valid", value); return(HA_FAIL); } } /* set syslog facility config variable */ static int set_dbgfile(const char * value) { strncpy(config->dbgfile, value, PATH_MAX); cl_log_set_debugfile(config->dbgfile); config->use_dbgfile=1; return(HA_OK); } /* set syslog facility config variable */ static int set_logfile(const char * value) { strncpy(config->logfile, value, PATH_MAX); cl_log_set_logfile(config->logfile); config->use_logfile=1; return(HA_OK); } /* sets nice_failback behavior on/off */ static int set_nice_failback(const char * value) { int rc; int failback = 0; rc = cl_str_to_boolean(value, &failback); cl_log(LOG_ERR, "nice_failback flag is obsolete." ". Use auto_failback {on, off, legacy} instead."); if (rc) { if (nice_failback) { cl_log(LOG_ERR , "'%s %s' has been changed to '%s off'" , KEY_FAILBACK, value, KEY_AUTOFAIL); set_auto_failback("off"); }else{ cl_log(LOG_ERR , "%s %s has been strictly interpreted as" " '%s legacy'" , KEY_FAILBACK, value, KEY_AUTOFAIL); cl_log(LOG_ERR , "Consider converting to '%s on'." , KEY_AUTOFAIL); cl_log(LOG_ERR , "When you do, then you can use ipfail" ", and hb_standby"); set_auto_failback("legacy"); } } cl_log(LOG_ERR, "See documentation for details."); return rc; } /* sets auto_failback behavior on/off */ static int set_auto_failback(const char * value) { int rc; rc = cl_str_to_boolean(value, &auto_failback); if (rc == HA_FAIL) { if (strcasecmp(value, "legacy") == 0) { nice_failback = FALSE; auto_failback = FALSE; rc = HA_OK; } }else{ nice_failback = TRUE; } return rc; } static int set_register_to_apphbd(const char * value) { return cl_str_to_boolean(value, &UseApphbd); } /* Set warntime interval */ static int set_warntime_ms(const char * value) { long warntime; warntime = cl_get_msec(value); if (warntime <= 0) { fprintf(stderr, "Warn time [%s] is invalid.\n", value); return(HA_FAIL); } config->warntime_ms = warntime; return(HA_OK); } /* * Set Stonith information * * Expect a line that looks like: * stonith * */ static int set_stonith_info(const char * value) { const char * vp = value; const char * evp; Stonith * s; char StonithType [MAXLINE]; char StonithFile [MAXLINE]; size_t tlen; int rc; vp += strspn(vp, WHITESPACE); tlen = strcspn(vp, WHITESPACE); evp = vp + tlen; if (tlen < 1) { ha_log(LOG_ERR, "No Stonith type given"); return(HA_FAIL); } if (tlen >= sizeof(StonithType)) { ha_log(LOG_ERR, "Stonith type too long"); return(HA_FAIL); } strncpy(StonithType, vp, tlen); StonithType[tlen] = EOS; if ((s = stonith_new(StonithType)) == NULL) { ha_log(LOG_ERR, "Invalid Stonith type [%s]", StonithType); return(HA_FAIL); } vp = evp + strspn(evp, WHITESPACE); if (sscanf(vp, "%[^\r\n]", StonithFile) <= 0) { }; switch ((rc=stonith_set_config_file(s, StonithFile))) { case S_OK: /* This will have to change to a list !!! */ config->stonith = s; stonith_get_status(s); return(HA_OK); case S_BADCONFIG: ha_log(LOG_ERR, "Invalid Stonith config file [%s]" , StonithFile); break; default: ha_log(LOG_ERR, "Unknown Stonith config error [%s] [%d]" , StonithFile, rc); break; } return(HA_FAIL); } /* * Set Stonith information * * Expect a line that looks like: * stonith_host * */ static int set_stonith_host_info(const char * value) { const char * vp = value; /* points to the current token */ const char * evp; /* points to the next token */ Stonith * s; char StonithType [MAXLINE]; char StonithHost [HOSTLENG]; size_t tlen; int rc; vp += strspn(vp, WHITESPACE); tlen = strcspn(vp, WHITESPACE); /* Save the pointer just past the hostname field */ evp = vp + tlen; /* Grab the hostname */ if (tlen < 1) { ha_log(LOG_ERR, "No Stonith hostname argument given"); return(HA_FAIL); } if (tlen >= sizeof(StonithHost)) { ha_log(LOG_ERR, "Stonith hostname too long"); return(HA_FAIL); } strncpy(StonithHost, vp, tlen); StonithHost[tlen] = EOS; g_strdown(StonithHost); /* Verify that this host is valid to create this stonith * object. Expect the hostname listed to match this host or '*' */ if (strcmp ("*", StonithHost) != 0 && strcmp (localnodename, StonithHost)) { /* This directive is not valid for this host */ return HA_OK; } /* Grab the next field */ vp = evp + strspn(evp, WHITESPACE); tlen = strcspn(vp, WHITESPACE); /* Save the pointer just past the stonith type field */ evp = vp + tlen; /* Grab the stonith type */ if (tlen < 1) { ha_log(LOG_ERR, "No Stonith type given"); return(HA_FAIL); } if (tlen >= sizeof(StonithType)) { ha_log(LOG_ERR, "Stonith type too long"); return(HA_FAIL); } strncpy(StonithType, vp, tlen); StonithType[tlen] = EOS; if ((s = stonith_new(StonithType)) == NULL) { ha_log(LOG_ERR, "Invalid Stonith type [%s]", StonithType); return(HA_FAIL); } /* Grab the parameters list */ vp = evp; vp += strspn(vp, WHITESPACE); switch ((rc=stonith_set_config_info(s, vp))) { case S_OK: /* This will have to change to a list !!! */ config->stonith = s; stonith_get_status(s); return(HA_OK); case S_BADCONFIG: ha_log(LOG_ERR , "Invalid Stonith configuration parameter [%s]" , evp); break; default: ha_log(LOG_ERR , "Unknown Stonith config error parsing [%s] [%d]" , evp, rc); break; } return(HA_FAIL); } static int set_realtime_prio(const char * value) { #ifdef _POSIX_PRIORITY_SCHEDULING int foo; foo = atoi(value); if ( foo < sched_get_priority_min(SCHED_FIFO) || foo > sched_get_priority_max(SCHED_FIFO)) { ha_log(LOG_ERR, "Illegal realtime priority [%s]", value); return HA_FAIL; } hb_realtime_prio = foo; #else ha_log(LOG_WARNING , "Realtime scheduling not supported on this platform."); #endif return HA_OK; } static int set_generation_method(const char * value) { if (strcmp(value, "file") == 0) { timebasedgenno = FALSE; return HA_OK; } if (strcmp(value, "time") != 0) { ha_log(LOG_ERR, "Illegal hb generation method [%s]", value); return HA_FAIL; } timebasedgenno = TRUE; return HA_OK; } static int set_realtime(const char * value) { int ret = cl_str_to_boolean(value, &enable_realtime); if (ret == HA_OK) { if (enable_realtime) { cl_enable_realtime(); #ifndef _POSIX_PRIORITY_SCHEDULING ha_log(LOG_WARNING , "Realtime scheduling not supported on this platform."); #endif }else{ cl_disable_realtime(); } } return ret; } static int set_debuglevel(const char * value) { debug_level = atoi(value); if (debug_level >= 0 && debug_level < 256) { if (debug_level > 0) { static char cdebug[8]; snprintf(cdebug, sizeof(debug_level), "%d", debug_level); setenv(HADEBUGVAL, cdebug, TRUE); } return(HA_OK); } return(HA_FAIL); } #ifdef ALLOWPOLLCHOICE static int set_normalpoll(const char * value) { int normalpoll=TRUE; int ret = cl_str_to_boolean(value, &normalpoll); if (ret == HA_OK) { extern int UseOurOwnPoll; UseOurOwnPoll = !normalpoll; } return ret; } #endif static int set_msgfmt(const char* value) { if( strcmp(value, "classic") ==0 ){ netstring_format = FALSE; cl_set_msg_format(MSGFMT_NVPAIR); return HA_OK; } if( strcmp(value,"netstring") == 0){ netstring_format = TRUE; cl_set_msg_format(MSGFMT_NETSTRING); return HA_OK; } return HA_FAIL; } static int set_logdaemon(const char * value) { int rc; int uselogd; rc = cl_str_to_boolean(value, &uselogd); cl_log_set_uselogd(uselogd); if (!uselogd){ cl_log(LOG_WARNING, "Logging daemon is disabled --" "enabling logging daemon is recommended"); }else{ cl_log(LOG_INFO, "Enabling logging daemon "); cl_log(LOG_INFO, "logfile and debug file are those specified " "in logd config file (default /etc/logd.cf)"); } return rc; } static int set_logdconntime(const char * value) { int logdtime; logdtime = cl_get_msec(value); cl_log_set_logdtime(logdtime); if (logdtime > 0) { return(HA_OK); } return(HA_FAIL); } static int set_badpack_warn(const char* value) { int warnme = TRUE; int rc; rc = cl_str_to_boolean(value, &warnme); if (HA_OK == rc) { cl_msg_quiet_fmterr = !warnme; } return rc; } static int add_client_child_base(const char * directive, gboolean failfast) { struct client_child* child; const char * uidp; const char * cmdp; char chuid[64]; size_t uidlen; size_t cmdlen; size_t pathlen; char* command; char* path; struct passwd* pw; if (ANYDEBUG) { ha_log(LOG_INFO, "respawn directive: %s", directive); } /* Skip over initial white space, so we can get the uid */ uidp = directive; uidp += strspn(uidp, WHITESPACE); uidlen = strcspn(uidp, WHITESPACE); cmdp = uidp + uidlen+1; /* Skip over white space, find the command */ cmdp += strspn(cmdp, WHITESPACE); cmdlen = strcspn(cmdp, CRLF); pathlen = strcspn(cmdp, WHITESPACE); if (uidlen >= sizeof(chuid)) { ha_log(LOG_ERR , "UID specified for client child is too long"); return HA_FAIL; } memcpy(chuid, uidp, uidlen); chuid[uidlen] = EOS; if ((pw = getpwnam(chuid)) == NULL) { ha_log(LOG_ERR , "Invalid uid [%s] specified for client child" , chuid); return HA_FAIL; } if (*cmdp != '/') { ha_log(LOG_ERR , "Client child command [%s] is not full pathname" , cmdp); return HA_FAIL; } command = malloc(cmdlen+1); if (command == NULL) { ha_log(LOG_ERR, "Out of memory in add_client_child (command)"); return HA_FAIL; } memcpy(command, cmdp, cmdlen); command[cmdlen] = EOS; path = malloc(pathlen+1); if (path == NULL) { ha_log(LOG_ERR, "Out of memory in add_client_child " "(path)"); free(command); command=NULL; return HA_FAIL; } memcpy(path, cmdp, pathlen); path[pathlen] = EOS; if (access(path, X_OK|F_OK) < 0) { ha_log(LOG_ERR , "Client child command [%s] is not executable" , path); free(command); command=NULL; free(path); path=NULL; return HA_FAIL; } child = MALLOCT(struct client_child); if (child == NULL) { ha_log(LOG_ERR, "Out of memory in add_client_child (child)"); free(command); command=NULL; free(path); path=NULL; return HA_FAIL; } memset(child, 0, sizeof(*child)); child->respawn = 1; child->rebootifitdies = failfast; child->u_runas = pw->pw_uid; child->g_runas = pw->pw_gid; child->command = command; child->path = path; config->client_list = g_list_append(config->client_list, child); config->last_client = g_list_last(config->client_list); return HA_OK; } static int add_client_child(const char * directive) { return add_client_child_base(directive, FALSE); } static int add_failfast_child(const char * directive) { return add_client_child_base(directive, TRUE); } static int set_compression(const char * directive) { return cl_set_compress_fns(directive); } static int set_compression_threshold(const char * value) { int threshold = atoi(value); if (threshold <=0){ cl_log(LOG_ERR, "%s: compress_threshhold(%s)" " invalid", __FUNCTION__, value); return HA_FAIL; } cl_set_compression_threshold(threshold *1024); return HA_OK; } static int set_traditional_compression(const char * value) { int result; if (value == NULL){ cl_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return HA_FAIL; } if (cl_str_to_boolean(value, &result)!= HA_OK){ cl_log(LOG_ERR, "%s:Invalid directive value %s", __FUNCTION__,value); return HA_FAIL; } cl_set_traditional_compression(result); return HA_OK; } static int set_env(const char * nvpair) { int nlen; int vlen; char* env_name; char* value; nlen = strcspn(nvpair, "="); if (nlen >= MAXLINE || nlen <=0){ cl_log(LOG_ERR, "%s: invalid nvpair(%s)", __FUNCTION__, nvpair); return HA_FAIL; } env_name = malloc(nlen + 4); if (env_name == NULL){ cl_log(LOG_ERR, "%s: malloc failed", __FUNCTION__); return HA_FAIL; } memcpy(env_name, "HA_", 3); memcpy(env_name + 3, nvpair, nlen); env_name[nlen + 3] = 0; vlen = strlen(nvpair + nlen + 1); if (vlen >= MAXLINE || nlen <=0){ cl_log(LOG_ERR, "%s: invalid value(%s)", __FUNCTION__, nvpair); return HA_FAIL; } value = malloc(vlen + 1); if (value == NULL){ cl_log(LOG_ERR, "%s: malloc failed for value", __FUNCTION__); return HA_FAIL; } memcpy(value, nvpair+nlen +1 , vlen); value[vlen] = 0; /* * It is unclear whether any given version of setenv * makes a copy of the name or value, or both. * Therefore it is UNSAFE to free either one. * Fortunately the size of the resulting potential memory leak * is small for this particular situation. */ setenv(env_name, value, 1); if (ANYDEBUG){ cl_log(LOG_DEBUG, "setting env(%s=%s), nvpair(%s)", env_name, value,nvpair); } return HA_OK; } static int set_max_rexmit_delay(const char * value) { int foo; foo = atoi(value); if (foo <= 0){ cl_log(LOG_ERR, "Invalid max_rexmit_delay time(%s)", value); return HA_FAIL; } hb_set_max_rexmit_delay(foo); return HA_OK; } #if 0 static void id_table_dump(gpointer key, gpointer value, gpointer user_data) { unsigned int ikey = GPOINTER_TO_UINT(key); cl_log(LOG_DEBUG, "%s %u" , (const char *)user_data, ikey); if (value == NULL) { cl_log(LOG_ERR, "Key %u has NULL data!!", ikey); } } static void dump_auth_tables(struct IPC_AUTH* auth, const char * clientname) { char uid [] = "uid = "; char gid [] = "gid = "; if (auth->uid ) { cl_log(LOG_DEBUG, "Dumping uid authorization info for client %s" , clientname); g_hash_table_foreach(auth->uid, id_table_dump, uid); } if (auth->gid) { cl_log(LOG_DEBUG, "Dumping gid authorization info for client %s" , clientname); g_hash_table_foreach(auth->gid, id_table_dump, gid); } } #endif /* * apiauth client-name gid=gidlist uid=uidlist * * Record API permissions for use in API client authorization */ static int set_api_authorization(const char * directive) { const char * bp; const char * client; int clientlen; const char * gidlist = NULL; int gidlen = 0; const char * uidlist = NULL; int uidlen = 0; struct IPC_AUTH* auth = NULL; char* clname = NULL; client_proc_t dummy; /* String processing in 'C' is *so* ugly... */ /* Skip over any initial white space -- to the client name */ bp = directive; bp += strspn(bp, WHITESPACE); if (*bp == EOS) { goto baddirective; } client = bp; clientlen = strcspn(bp, WHITESPACE); if (clientlen <= 0) { goto baddirective; } if (clientlen >= (int)sizeof(dummy.client_id)) { cl_log(LOG_ERR, "client name [%*s] too long" , clientlen, client); goto baddirective; } clname = malloc(clientlen+1); if (clname == NULL) { cl_log(LOG_ERR, "out of memory for client name"); goto baddirective; } strncpy(clname, client, clientlen); clname[clientlen] = EOS; bp += clientlen; bp += strspn(bp, WHITESPACE); while (*bp != EOS) { bp += strspn(bp, WHITESPACE); if (strncmp(bp, "uid=", 4) == 0) { if (uidlist != NULL) { cl_log(LOG_ERR , "Duplicate uid list in " KEY_APIPERM); goto baddirective; } bp += 4; uidlist=bp; uidlen = strcspn(bp, WHITESPACE); bp += uidlen; }else if (strncmp(bp, "gid=", 4) == 0) { if (gidlist != NULL) { cl_log(LOG_ERR , "Duplicate gid list in " KEY_APIPERM); goto baddirective; } bp += 4; gidlist=bp; gidlen = strcspn(bp, WHITESPACE); bp += gidlen; }else if (*bp != EOS) { cl_log(LOG_ERR , "Missing uid or gid in " KEY_APIPERM); goto baddirective; } } if (uidlist == NULL && gidlist == NULL) { goto baddirective; } if (ANYDEBUG) { cl_log(LOG_DEBUG, "uid=%s, gid=%s" , (uidlist == NULL ? "" : uidlist) , (gidlist == NULL ? "" : gidlist)); } auth = ipc_str_to_auth(uidlist, uidlen, gidlist, gidlen); if (auth == NULL){ goto baddirective; } if (g_hash_table_lookup(APIAuthorization, clname) != NULL) { cl_log(LOG_ERR , "Duplicate %s directive for API client %s: [%s]" , KEY_APIPERM, clname, directive); goto baddirective; } g_hash_table_insert(APIAuthorization, clname, auth); if (DEBUGDETAILS) { cl_log(LOG_DEBUG , "Creating authentication: uidptr=0x%lx gidptr=0x%lx" , (unsigned long)auth->uid, (unsigned long)auth->gid); } return HA_OK; baddirective: cl_log(LOG_ERR, "Invalid %s directive [%s]", KEY_APIPERM, directive); cl_log(LOG_INFO, "Syntax: %s client [uid=uidlist] [gid=gidlist]" , KEY_APIPERM); cl_log(LOG_INFO, "Where uidlist is a comma-separated list of uids,"); cl_log(LOG_INFO, "and gidlist is a comma-separated list of gids"); cl_log(LOG_INFO, "One or the other must be specified."); if (auth != NULL) { if (auth->uid) { /* Ought to destroy the strings too */ g_hash_table_destroy(auth->uid); auth->uid = NULL; } if (auth->gid) { /* Ought destroy the strings too */ g_hash_table_destroy(auth->gid); auth->gid = NULL; } memset(auth, 0, sizeof(*auth)); free(auth); auth = NULL; } if (clname) { free(clname); clname = NULL; } return HA_FAIL; } static int set_coredump(const char* value) { gboolean docore; int rc; if ((rc = cl_str_to_boolean(value, &docore)) == HA_OK) { if (cl_enable_coredumps(docore) < 0 ) { rc = HA_FAIL; } } return rc; } static int set_syslog_logfilefmt(const char * value) { gboolean dosyslogfmt = HA_OK; int rc; if ((rc = cl_str_to_boolean(value, &dosyslogfmt)) == HA_OK) { cl_log_enable_syslog_filefmt(dosyslogfmt); } return rc; } static int set_corerootdir(const char* value) { if (cl_set_corerootdir(value) < 0) { cl_perror("Invalid core directory [%s]", value); return HA_FAIL; } return HA_OK; } /* * Enable all these flags when KEY_PACEMAKER is enabled... * apiauth lrmd uid=root * apiauth stonithd uid=root * apiauth stonith-ng uid=root * apiauth crmd uid=hacluster * apiauth cib uid=hacluster * respawn root /usr/lib/heartbeat/lrmd * respawn root /usr/lib/heartbeat/stonithd * respawn hacluster /usr/lib/heartbeat/ccm * respawn hacluster /usr/lib/heartbeat/cib * respawn hacluster /usr/lib/heartbeat/crmd */ static int set_release2mode(const char* value) { /* alias KEY_REL2 to KEY_PACEMAKER */ return add_option(KEY_PACEMAKER, value); } static int set_pcmk_support(const char* value) { struct do_directive { const char * dname; const char * dval; }; struct do_directive *r2dirs; struct do_directive r2auto_dirs[] = /* * To whom it may concern: Please keep the apiauth and respawn * lines in the same order to make auditing the two against each * other easier. * Thank you. */ { /* CCM apiauth already implicit elsewhere */ {"apiauth", "cib uid=" HA_CCMUSER} /* LRMd is not a heartbeat API client */ , {"apiauth", "stonithd uid=root" } /* "NG" registers as stonith-ng, but the name of the binary * is still the same: stonithd */ , {"apiauth", "stonith-ng uid=root" } , {"apiauth", "attrd uid=" HA_CCMUSER} , {"apiauth", "crmd uid=" HA_CCMUSER} , {"apiauth", "pingd uid=root"} , {"failfast"," "HA_CCMUSER " " HA_LIBHBDIR "/ccm"} , {"failfast"," "HA_CCMUSER " " HA_LIBHBDIR "/cib" } , {"respawn", "root " HA_LIBHBDIR "/lrmd -r"} , {"respawn", "root " HA_LIBHBDIR "/stonithd"} , {"respawn", " "HA_CCMUSER " " HA_LIBHBDIR "/attrd" } , {"failfast"," "HA_CCMUSER " " HA_LIBHBDIR "/crmd" } /* Don't 'respawn' pingd - it's a resource agent */ }; struct do_directive r2respawn_dirs[] = /* * To whom it may concern: Please keep the apiauth and respawn * lines in the same order to make auditing the two against each * other easier. * Thank you. */ { /* CCM apiauth already implicit elsewhere */ {"apiauth", "cib uid=" HA_CCMUSER} /* LRMd is not a heartbeat API client */ , {"apiauth", "stonithd uid=root" } , {"apiauth", "stonith-ng uid=root" } , {"apiauth", "attrd uid=" HA_CCMUSER} , {"apiauth", "crmd uid=" HA_CCMUSER} , {"apiauth", "pingd uid=root"} , {"respawn", " "HA_CCMUSER " " HA_LIBHBDIR "/ccm"} , {"respawn", " "HA_CCMUSER " " HA_LIBHBDIR "/cib" } , {"respawn", "root " HA_LIBHBDIR "/lrmd -r"} , {"respawn", "root " HA_LIBHBDIR "/stonithd"} , {"respawn", " "HA_CCMUSER " " HA_LIBHBDIR "/attrd" } , {"respawn", " "HA_CCMUSER " " HA_LIBHBDIR "/crmd" } /* Don't 'respawn' pingd - it's a resource agent */ }; struct do_directive r2minimal_dirs[] = { /* CCM apiauth already implicit elsewhere */ {"apiauth", "cib uid=" HA_CCMUSER} , {"apiauth", "crmd uid=" HA_CCMUSER} , {"failfast"," "HA_CCMUSER " " HA_LIBHBDIR "/ccm"} , {"failfast"," "HA_CCMUSER " " HA_LIBHBDIR "/cib"} , {"respawn", "root " HA_LIBHBDIR "/lrmd"} , {"failfast"," "HA_CCMUSER " " HA_LIBHBDIR "/crmd"} /* Don't 'respawn' pingd - it's a resource agent */ }; struct do_directive r2valgrind_dirs[] = { /* CCM apiauth already implicit elsewhere */ {"apiauth", "cib uid=" HA_CCMUSER} , {"apiauth", "stonithd uid=root" } , {"apiauth", "stonith-ng uid=root" } , {"apiauth", "attrd uid=" HA_CCMUSER} , {"apiauth", "crmd uid=" HA_CCMUSER} , {"respawn"," "HA_CCMUSER " "HA_LIBHBDIR"/ccm"} , {"respawn"," "HA_CCMUSER " "VALGRIND_BIN" "HA_LIBHBDIR"/cib"} , {"respawn", "root " HA_LIBHBDIR"/lrmd -r"} , {"respawn", "root " HA_LIBHBDIR"/stonithd"} , {"respawn", " "HA_CCMUSER " "VALGRIND_BIN" "HA_LIBHBDIR"/attrd" } , {"respawn"," "HA_CCMUSER " "VALGRIND_BIN" "HA_LIBHBDIR"/crmd"} /* Don't 'respawn' pingd - it's a resource agent */ }; gboolean dorel2; int rc; int j, r2size; int rc2 = HA_OK; r2dirs = &r2auto_dirs[0]; r2size = DIMOF(r2auto_dirs); cl_log(LOG_INFO, "Pacemaker support: %s", value); if (0 == strcasecmp("minimal", value) || 0 == strcasecmp("manual", value)) { r2dirs = &r2minimal_dirs[0]; r2size = DIMOF(r2minimal_dirs); } else if (0 == strcasecmp("respawn", value)) { r2dirs = &r2respawn_dirs[0]; r2size = DIMOF(r2respawn_dirs); } else if (0 == strcasecmp("valgrind", value)) { r2dirs = &r2valgrind_dirs[0]; r2size = DIMOF(r2valgrind_dirs); setenv("HA_VALGRIND_ENABLED", "1", 1); cl_log(LOG_INFO, "Enabling Valgrind on selected components"); } else if ((rc = cl_str_to_boolean(value, &dorel2)) == HA_OK) { if (!dorel2) { return HA_OK; } } else { return rc; } DoManageResources = FALSE; if (cl_file_exists(RESOURCE_CFG)){ cl_log(LOG_WARNING, "File %s exists.", RESOURCE_CFG); cl_log(LOG_WARNING, "This file is not used because "KEY_PACEMAKER" is enabled"); } /* Enable Pacemaker cluster management */ for (j=0; j < r2size ; ++j) { int k; for (k=0; k < DIMOF(WLdirectives); ++k) { if (0 != strcmp(r2dirs->dname, WLdirectives[k].type)) { continue; } if (ANYDEBUG) { cl_log(LOG_DEBUG, "Implicit directive: %s %s" , r2dirs->dname , r2dirs->dval); } if (HA_OK != (rc2 = WLdirectives[k].parse(r2dirs->dval))) { cl_log(LOG_ERR, "Directive %s %s failed" , r2dirs->dname, r2dirs->dval); } } r2dirs++; } return rc2; } static int set_autojoin(const char* value) { if (strcasecmp(value, "none") == 0) { config->rtjoinconfig = HB_JOIN_NONE; return HA_OK; } if (strcasecmp(value, "other") == 0) { config->rtjoinconfig = HB_JOIN_OTHER; return HA_OK; } if (strcasecmp(value, "any") == 0) { config->rtjoinconfig = HB_JOIN_ANY; return HA_OK; } cl_log(LOG_ERR, "Invalid %s directive [%s]", KEY_AUTOJOIN, value); return HA_FAIL; } static int set_uuidfrom(const char* value) { if (strcmp(value, "file") == 0) { config->uuidfromname = FALSE; return HA_OK; } if (strcmp(value, "nodename") == 0) { config->uuidfromname = TRUE; return HA_OK; } cl_log(LOG_ERR, "Invalid %s directive [%s]", KEY_UUIDFROM, value); return HA_FAIL; } /* Set the memory reserve amount for heartbeat (in kbytes) */ static int set_memreserve(const char * value) { config->memreserve = atoi(value); if (config->memreserve > 0) { return(HA_OK); } return(HA_FAIL); } static int ha_config_check_boolean(const char *value) { int result; if (value == NULL){ cl_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return HA_FAIL; } if (cl_str_to_boolean(value, &result)!= HA_OK){ cl_log(LOG_ERR, "%s:Invalid directive value %s", __FUNCTION__,value); return HA_FAIL; } return HA_OK; } Heartbeat-3-0-7e3a82377fa8/heartbeat/ha_msg_internal.c0000644000000000000000000002436511576626513022446 0ustar00usergroup00000000000000/* * ha_msg_internal: heartbeat internal messaging functions * * Copyright (C) 2000 Alan Robertson * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define MINFIELDS 30 #define CRNL "\r\n" #define SEQ "seq" #define LOAD1 "load1" extern int netstring_format; #define IS_SEQ 1 /* the name is seq*/ #define IS_UUID 2 /* the value is uuid*/ /* The value functions are expected to return pointers to static data */ struct default_vals { const char * name; const char * (*value)(void); int flags; }; static const char * ha_msg_seq(void); static const char * ha_msg_timestamp(void); static const char * ha_msg_loadavg(void); static const char * ha_msg_from(void); static const char * ha_msg_fromuuid(void); static const char * ha_msg_ttl(void); static const char * ha_msg_hbgen(void); /* Each of these functions returns static data requiring copying */ struct default_vals defaults [] = { {F_ORIG, ha_msg_from, 0}, {F_ORIGUUID, ha_msg_fromuuid, 2}, {F_SEQ, ha_msg_seq, 1}, {F_HBGENERATION,ha_msg_hbgen, 0}, {F_TIME, ha_msg_timestamp,0}, {F_LOAD, ha_msg_loadavg, 1}, {F_TTL, ha_msg_ttl, 0}, }; struct ha_msg * add_control_msg_fields(struct ha_msg* ret) { const char * type; int j; int noseqno; const char * to; cl_uuid_t touuid; char uuidstr[UU_UNPARSE_SIZEOF]; /* if F_TO field is present this message is for one specific node attach the uuid for that node*/ if ((to = ha_msg_value(ret, F_TO)) != NULL ) { if (nodename2uuid(to, &touuid) == HA_OK){ cl_msg_moduuid(ret, F_TOUUID, &touuid); } else{ /* working with previous non-uuid version */ /* ha_log(LOG_WARNING, " destnation %s uuid not found", to); */ /* do nothing */ } } else if (cl_get_uuid(ret, F_TOUUID, &touuid) == HA_OK){ if ((to = uuid2nodename(&touuid)) != NULL){ if (ha_msg_mod(ret, F_TO, to) != HA_OK){ ha_log(LOG_WARNING, " adding field to message failed"); } }else { memset(uuidstr,0,UU_UNPARSE_SIZEOF); cl_uuid_unparse(&touuid, uuidstr); ha_log(LOG_WARNING,"nodename not found for uuid %s",uuidstr); } } if ((type = ha_msg_value(ret, F_TYPE)) == NULL) { ha_log(LOG_ERR, "No type (add_control_msg_fields): "); cl_log_message(LOG_ERR, ret); ha_msg_del(ret); return(NULL); } if (DEBUGPKTCONT) { ha_log(LOG_DEBUG, "add_control_msg_fields: input packet"); cl_log_message(LOG_DEBUG, ret); } noseqno = (strncmp(type, NOSEQ_PREFIX, sizeof(NOSEQ_PREFIX)-1) == 0); /* Add our default name=value pairs */ for (j=0; j < DIMOF(defaults); ++j) { /* * Should we skip putting a sequence number on this packet? * * We don't want requests for retransmission to be subject * to being retransmitted according to the protocol. They * need to be outside the normal retransmission protocol. * To accomplish that, we avoid giving them sequence numbers. */ if (noseqno && (defaults[j].flags & IS_SEQ)) { continue; } /* Don't put in duplicate values already gotten */ if (noseqno && ha_msg_value(ret, defaults[j].name) != NULL) { /* This keeps us from adding another "from" field */ continue; } if( defaults[j].flags & IS_UUID){ if (cl_msg_moduuid(ret, defaults[j].name, (const cl_uuid_t*)defaults[j].value()) != HA_OK ){ ha_msg_del(ret); return(NULL); } }else { if (ha_msg_mod(ret, defaults[j].name, defaults[j].value()) != HA_OK) { ha_msg_del(ret); return(NULL); } } } if (netstring_format || must_use_netstring(ret)){ goto out; } if ( add_msg_auth(ret) != HA_OK) { ha_msg_del(ret); ret = NULL; } if (DEBUGPKTCONT) { ha_log(LOG_DEBUG, "add_control_msg_fields: packet returned"); cl_log_message(LOG_DEBUG, ret); } out: return ret; } int add_msg_auth(struct ha_msg * m) { char msgbody[MAXLINE]; char authstring[MAXLINE]; char authtoken[MAXLINE]; char* msgbuf; int buf_malloced = 0; int buflen; const char * from; const char * ts; const char * type; int ret = HA_FAIL; /* Extract message type, originator, timestamp, auth */ type = ha_msg_value(m, F_TYPE); from = ha_msg_value(m, F_ORIG); ts = ha_msg_value(m, F_TIME); if (from == NULL || ts == NULL || type == NULL) { ha_log(LOG_ERR , "add_msg_auth: %s: from %s" , "missing from/ts/type" , (from? from : "")); cl_log_message(LOG_ERR, m); } buflen = get_stringlen(m); if (buflen < MAXLINE){ msgbuf = &msgbody[0]; }else{ msgbuf = malloc(get_stringlen(m)); if (msgbuf == NULL){ cl_log(LOG_ERR, "%s: malloc failed", __FUNCTION__); goto out; } buf_malloced = 1; } check_auth_change(config); msgbuf[0] = EOS; if (msg2string_buf(m, msgbuf, buflen, 0, NOHEAD) != HA_OK){ ha_log(LOG_ERR , "add_msg_auth: compute string failed"); cl_log_message(LOG_ERR,m); goto out; } if (!config->authmethod->auth->auth(config->authmethod, msgbuf , strnlen(msgbuf, buflen) , authtoken, DIMOF(authtoken))) { ha_log(LOG_ERR , "Cannot compute message authentication [%s/%s/%s]" , config->authmethod->authname , config->authmethod->key , msgbuf); goto out; } sprintf(authstring, "%d %s", config->authnum, authtoken); /* It will add it if it's not there yet, or modify it if it is */ ret= ha_msg_mod(m, F_AUTH, authstring); out: if (msgbuf && buf_malloced){ free(msgbuf); } return ret; } gboolean isauthentic(const struct ha_msg * m) { char msgbody[MAXLINE]; char authstring[MAXLINE]; char authbuf[MAXLINE]; char* msgbuf; int buflen; int buf_malloced = 0; const char * authtoken = NULL; int j; int authwhich = 0; struct HBauth_info* which; gboolean ret =FALSE; buflen = get_stringlen(m); if (buflen < MAXLINE){ msgbuf = &msgbody[0]; }else{ msgbuf = malloc(get_stringlen(m)); if (msgbuf == NULL){ cl_log(LOG_ERR, "%s: malloc failed", __FUNCTION__); goto out; } buf_malloced = 1; } /* Reread authentication? */ check_auth_change(config); if (msg2string_buf(m, msgbuf, buflen,0, NOHEAD) != HA_OK){ ha_log(LOG_ERR , "add_msg_auth: compute string failed"); goto out; } for (j=0; j < m->nfields; ++j) { if (strcmp(m->names[j], F_AUTH) == 0) { authtoken = m->values[j]; continue; } } if (authtoken == NULL || sscanf(authtoken, "%d %s", &authwhich, authstring) != 2) { if (!cl_msg_quiet_fmterr) { ha_log(LOG_WARNING , "Bad/invalid auth token, authtoken=%p" , authtoken); if (ANYDEBUG){ cl_log_message(LOG_INFO, m); } } goto out; } which = config->auth_config + authwhich; if (authwhich < 0 || authwhich >= MAXAUTH || which->auth == NULL) { ha_log(LOG_WARNING , "Invalid authentication type [%d] in message!" , authwhich); goto out; } if (!which->auth->auth(which , msgbuf, strnlen(msgbuf, buflen) , authbuf, DIMOF(authbuf))) { ha_log(LOG_ERR, "Failed to compute message authentication"); goto out; } if (strcmp(authstring, authbuf) == 0) { if (DEBUGAUTH) { ha_log(LOG_DEBUG, "Packet authenticated"); } ret = TRUE; goto out; } if (DEBUGAUTH) { ha_log(LOG_INFO, "Packet failed authentication check, " "authstring =%s,authbuf=%s ", authstring, authbuf); } out: if (buf_malloced && msgbuf){ free(msgbuf); } return ret; } /* Add field to say who this packet is from */ STATIC const char * ha_msg_from(void) { return localnodename; } /*Add field to say the node uuid this packet is from */ STATIC const char* ha_msg_fromuuid() { return (char*)&config->uuid; } /* Add sequence number field */ STATIC const char * ha_msg_seq(void) { static char seq[32]; static seqno_t seqno = 1; sprintf(seq, "%lx", seqno); ++seqno; return(seq); } /* Add local timestamp field */ STATIC const char * ha_msg_timestamp(void) { static char ts[32]; sprintf(ts, TIME_X, (TIME_T)time(NULL)); return(ts); } /* Add load average field */ STATIC const char * ha_msg_loadavg(void) { static char loadavg[64]; static int fd = -1; char * nlp; /* * NOTE: We never close 'fd' * We keep it open to avoid touching the real filesystem once we * are running, and avoid realtime problems. I don't know that * this was a significant problem, but if updates were being made * to the / or /proc directories, then we could get blocked, * and this was a very simple fix. * * We should probably get this information once every few seconds * and use that, but this is OK for now... */ if (fd < 0 && (fd=open(LOADAVG, O_RDONLY)) < 0 ) { strcpy(loadavg, "n/a"); }else{ lseek(fd, 0, SEEK_SET); if (read(fd, loadavg, sizeof(loadavg)) <= 0) { strcpy(loadavg, "n/a"); } loadavg[sizeof(loadavg)-1] = EOS; } if ((nlp = strchr(loadavg, '\n')) != NULL) { *nlp = EOS; } return(loadavg); } STATIC const char * ha_msg_ttl(void) { static char ttl[8]; snprintf(ttl, sizeof(ttl), "%d", config->hopfudge + config->nodecount); return(ttl); } STATIC const char * ha_msg_hbgen(void) { static char hbgen[32]; snprintf(hbgen, sizeof(hbgen), "%lx", config->generation); return(hbgen); } #ifdef TESTMAIN_MSGS int main(int argc, char ** argv) { struct ha_msg* m; while (!feof(stdin)) { if ((m=controlfifo2msg(stdin)) != NULL) { fprintf(stderr, "Got message!\n"); if (msg2stream(m, stdout) == HA_OK) { fprintf(stderr, "Message output OK!\n"); }else{ fprintf(stderr, "Could not output Message!\n"); } }else{ fprintf(stderr, "Could not get message!\n"); } } return(0); } #endif Heartbeat-3-0-7e3a82377fa8/heartbeat/ha_test.py0000755000000000000000000002231411576626513021144 0ustar00usergroup00000000000000#!/usr/bin/env python __copyright__=''' Copyright (C) 2000 Alan Robertson Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. from hb_api import * import select # # This is an abstract class, you need to derive a class from it and set # RequestType and ResponseType in the derived class # class RequestMsg (ha_msg): #RequestType = ha_msg.T_TESTREQ #ResponseType = ha_msg.T_TESTRSP '''This class is a CTS request packet. It is an 'abstract' class. To make a derived class from it, set derived_class.RequestType and and derived_class.ResponseType. ''' def __init__(self, type): '''We initialize the packet type and request type.''' ha_msg.__init__(self) self.reqtype=type # Set the packet type and request type self.update( { ha_msg.F_TYPE : self.__class__.RequestType, ha_msg.F_APIREQ: type }) def WaitForReplies(self, api, nodelist, timeout): '''Wait for reply messages, and return them. The return is a 3-element tuple. The first element of the tuple is a list of reply messages. The second element of the tuple is the list of nodes that timed out. The third element of the tuple is the list of replies that we didn't expect. Unexpected replies are from machines not in 'nodes', or duplicates. ''' nodes = {} for node in nodelist: nodes[node] = None fd = api.get_inputfd() replies=[] extras=[] while 1: # Are there any messages waiting? inp,out,exc = select.select([fd,], [], [], timeout) if len(inp) > 0: # Read it. msg = api.readmsg(0) # May return None... try: msgtype = msg[ha_msg.F_TYPE] fromnode = msg[ha_msg.F_ORIG] reqtype = msg[ha_msg.F_APIREQ] except (TypeError, KeyError): continue if (msgtype != self.__class__.ResponseType or reqtype != self.reqtype): continue # Remember this message if nodes.has_key(fromnode): del nodes[fromnode] replies.append(msg) else: extras.append(msg) # Return if we've gotten replies from each node if len(nodes) == 0: return replies, [], extras else: return replies, nodes.keys(), extras def sendall(self, api, timeout, participants=None): '''Send the request packet to every node. We return the messages we get in reply. The list of expected participants is either the value of the 'participants' argument, or the list of 'up' machines. We return when all expected participants reply, or timeout expires. See WaitForReplies() for an explanation of the return value. ''' if participants == None: participants=api.nodes_with_status() api.sendclustermsg(self) return self.WaitForReplies(api, participants, timeout) def sendnode(self, api, node, timeout): '''Send the request to the given node. We return the messages we get in reply. See WaitForReplies() for an explanation of the return value. ''' if api.nodestatus(node) != hb_api.ActiveStatus: print "Attempt to send request to bad/down node" api.sendnodemsg(self, node) return self.WaitForReplies(api, [node], timeout) class ReplyMsg(ha_msg): '''This is an CTS reply packet. It is an "abstract" class. To make a derived class from it, set derived_class.RequestType and and derived_class.ResponseType. ''' #RequestType = ha_msg.T_TESTREQ #ResponseType = ha_msg.T_TESTRSP def __init__(self, req, result): '''Pass the constructor the packet you're responding to.''' self.data = req.data __str__ = UserDict.__repr__ # use default __str__ function if self[ha_msg.F_TYPE] != self.__class__.RequestType: raise ValueError("Inappropriate initialization packet") # # Change message type, add return code, and return to sender # self.update(req, { ha_msg.F_TYPE : self.__class__.ResponseType, ha_msg.F_TO : self[ha_msg.F_ORIG], ha_msg.F_TOID : self[ha_msg.F_FROMID], ha_msg.F_APIRESULT : result }) def send(self, api): # We've already set the return address above. # This means this is now effectively sendnodemsg ;-) api.sendclustermsg(self) class TestMappings(UserDict): '''A class to call the right function with the right arguments when presented with a message. Each data item in the mapping is a 2-element tuple of the form: (function, argument-to-function). When the function given in the mapping is actually called, it is called with three arguments: (message-to-handle, API object, argument-to-function) Argument-to-function was the second element of the 2-tuple originally associated with the message type (the first element was the function). In this class, everything depends on messages having an F_APIREQ field to be used as the message type when processing messages. ''' def __init__(self, api): self.Api = api self.data = {} def __setitem__(self, key, value): '''The values you assign to go with keys need to be 2-element tuples or lists. The first item has to be a callable thing (function), and the second can be anything that the function likes for an argument. This function is all about error checking. ''' if ((not isinstance(value, types.ListType) and not isinstance(value, types.TupleType)) or len(value) != 2) : raise ValueError("inappropriate TestMappings tuple") if (not callable(value[0])) : raise ValueError("Non-callable TestMappings 'function'") self.data[key]=value def __call__(self, msg, dummyarg): '''Process the request that goes with the given message. We use the F_APIREQ field to determine the type of request we're processing. ''' reqtype=msg[ha_msg.F_APIREQ] if self.has_key(reqtype): self[reqtype][0](msg, self.Api, self[reqtype][1]) elif self.has_key(self.Api.BADREQ): self[self.Api.BADREQ][0](msg, self.Api, self[self.Api.BADREQ][1]) else: # It would be nice to do something better ;-) print "No handler for request type %s" % reqtype class CTSRequest (RequestMsg): '''A CTS request message. This class can be further subclassed to good effect. ''' RequestType = ha_msg.T_TESTREQ ResponseType = ha_msg.T_TESTRSP class CTSReply(ReplyMsg): '''A CTS reply message.''' RequestType = ha_msg.T_TESTREQ ResponseType = ha_msg.T_TESTRSP # # A little test code... # # This is a trivial 'ping' application... # # pingreply is called when a ping "request" is received # if __name__ == '__main__': class PingRequest(CTSRequest): '''A Ping request message''' def __init__(self): CTSRequest.__init__(self, "ping") class SpamRequest(CTSRequest): '''A Spam request message (which we won't handle)''' def __init__(self): CTSRequest.__init__(self, "spam") # Function to perform a ping reply... def pingreply(pingmsg, api, arg): '''Construct and send a ping reply message.''' reply=CTSReply(pingmsg, api.OK) reply.send(api) # Function to perform a Bad Request reply... def BadReq(badmsg, api, arg): '''Give 'em the bad news...''' reply=CTSReply(badmsg, api.BADREQ) reply[ha_msg.F_COMMENT]=arg reply.send(api) hb = hb_api() hb.signon() # Set up response functions to automatically reply to pings when # they arrive. testmap = TestMappings(hb) testmap["ping"] = (pingreply, None) testmap[hb.BADREQ] = (BadReq, "Invalid CTS request (we don't like spam)") # Set up our function to respond to CTSRequest packets hb.set_msg_callback(ha_msg.T_TESTREQ, testmap, None) print hb.cluster_config() req = PingRequest() # Same as CTSRequest("ping") print req.sendnode(hb, "kathyamy", 5) spam = SpamRequest() # Same as CTSRequest("spam") print spam.sendnode(hb, "kathyamy", 5) print req.sendall(hb, 5) hb.signoff() Heartbeat-3-0-7e3a82377fa8/heartbeat/harc0000644000000000000000000000135111576626513017776 0ustar00usergroup00000000000000#!/bin/sh # # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # # This script is patterned after the Red Hat SysV init script system # # It doesn't know how to do anything except to run other scripts... # # Basically, it notifies the world of something that was sent around # via the heartbeat cluster network... # . $HA_FUNCS RCDIR=$HA_DIR/rc.d if [ -n "$HA_DEBUGLOG" ]; then exec >>$HA_DEBUGLOG 2>&1 fi if [ ! -d $RCDIR ] then ha_log "ERROR: $0: $RCDIR does not exist" exit 1 else if [ ! -x $RCDIR/$1 ] then ha_log "ERROR: $0: $RCDIR/$1: not executable" exit 1 fi fi ha_log "info: Running $RCDIR/$1 $*" exec $RCDIR/$1 "$@" ha_log "ERROR: $0: $RCDIR/$1: cannot execute" Heartbeat-3-0-7e3a82377fa8/heartbeat/hb_api.c0000644000000000000000000015414511576626513020536 0ustar00usergroup00000000000000/* * hb_api: Server-side heartbeat API code * * Copyright (C) 2000 Alan Robertson * Copyright (C) 2000 Marcelo Tosatti * * Thanks to Conectiva S.A. for sponsoring Marcelo Tosatti work * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ /* * A little about the API FIFO structure... * * We have two kinds of API clients: casual and named * * Casual clients just attach and listen in to messages, and ask * the status of things. Casual clients are typically used as status * agents, or debugging agents. * * They can't send messages, and they are known only by their PID. * Anyone in the group that owns the casual FIFO directory can use * the casual API. Casual clients create and delete their own * FIFOs for the API (or are cleaned up after by heartbeat ;-)) * Hence, the casual client FIFO directory must be group writable, * and sticky. * * Named clients attach and listen in to messages, and they are also * allowed to send messages to the other clients in the cluster with * the same name. Named clients typically provide persistent services * in the cluster. A cluster manager would be an example * of such a persistent service. * * Their FIFOs are pre-created for them, and they neither create nor * delete them - nor should they be able to. * The named client FIFO directory must not be writable by group or other. * * We deliver messages from named clients to clients in the cluster * which are registered with the same name. Each named client * also receives the messages it sends. I could allow them to send * to any other service that they want, but right now that's overridden. * We mark each packet with the service name that the packet came from. * * A client can only register for a given name if their userid is the * owner of the named FIFO for that name. * * If a client has permissions to snoop on packets (debug mode), * then they are allowed to receive all packets, but otherwise only * clients registered with the same name will receive these messages. * * It is important to make sure that each named client FIFO is owned by the * same UID on each machine. */ #include #include #define time FOOtime #include #undef time #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hb_signal.h" /* Definitions of API query handlers */ static int api_ping_iflist(const struct ha_msg* msg, struct node_info * node , struct ha_msg* resp , client_proc_t* client, const char** failreason); static int api_signoff (const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char **failreason); static int api_setfilter (const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char **failreason); static int api_setsignal (const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason); static int api_nodelist (const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason); static int api_nodestatus (const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason); static int api_nodeweight (const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason); static int api_nodesite (const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason); static int api_nodetype (const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason); static int api_ifstatus (const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason); static int api_iflist (const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason); static int api_clientstatus (const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason); static int api_num_nodes(const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason); static int api_get_parameter (const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason); static int api_get_resources (const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason); static int api_get_uuid (const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason); static int api_get_nodename (const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason); static int api_set_sendqlen(const struct ha_msg* msg, struct ha_msg* resp, client_proc_t* client, const char** failreason); gboolean ProcessAnAPIRequest(client_proc_t* client); struct api_query_handler query_handler_list [] = { { API_SIGNOFF, api_signoff }, { API_SETFILTER, api_setfilter }, { API_SETSIGNAL, api_setsignal }, { API_NODELIST, api_nodelist }, { API_NODESTATUS, api_nodestatus }, { API_NODEWEIGHT, api_nodeweight }, { API_NODESITE, api_nodesite }, { API_NODETYPE, api_nodetype }, { API_IFSTATUS, api_ifstatus }, { API_IFLIST, api_iflist }, { API_CLIENTSTATUS, api_clientstatus }, { API_NUMNODES, api_num_nodes}, { API_GETPARM, api_get_parameter}, { API_GETRESOURCES, api_get_resources}, { API_GETUUID, api_get_uuid}, { API_GETNAME, api_get_nodename}, { API_SET_SENDQLEN, api_set_sendqlen} }; extern int UseOurOwnPoll; int debug_client_count = 0; int total_client_count = 0; client_proc_t* client_list = NULL; /* List of all our API clients */ /* TRUE when any client output still pending */ extern struct node_info *curnode; static unsigned long client_generation = 0; #define MAX_CLIENT_GEN 64 static void api_process_request(client_proc_t* client, struct ha_msg *msg); static void api_send_client_msg(client_proc_t* client, struct ha_msg *msg); static void api_send_client_status(client_proc_t* client , const char * status, const char * reason); static void api_remove_client_int(client_proc_t* client, const char * rsn); static int api_add_client(client_proc_t* chan, struct ha_msg* msg); static void G_remove_client(gpointer Client); static gboolean APIclients_input_dispatch(IPC_Channel* chan, gpointer udata); static void api_process_registration_msg(client_proc_t*, struct ha_msg *); static gboolean api_check_client_authorization(client_proc_t* client); static int create_seq_snapshot_table(GHashTable** ptable) ; static void destroy_seq_snapshot_table(GHashTable* table); extern GHashTable* APIAuthorization; struct seq_snapshot{ seqno_t generation; seqno_t last_seq; }; static int should_msg_sendto_client(client_proc_t* client, struct ha_msg* msg) { GHashTable* table; struct node_info * thisnode = NULL; const char * from; cl_uuid_t fromuuid; struct seq_snapshot* snapshot; const char * cseq; const char * cgen; seqno_t seq; seqno_t gen; int ret = 0; const char* type; struct seqtrack * t; if (!client || !msg){ cl_log(LOG_ERR, "should_msg_sendto_client:" " invalid arguemts"); return FALSE; } from = ha_msg_value(msg, F_ORIG); cseq = ha_msg_value(msg, F_SEQ); cgen = ha_msg_value(msg, F_HBGENERATION); if (!from || !cseq || !cgen){ /* some local generated status messages, * e.g. node dead status message, * return yes */ return TRUE; } if (sscanf(cseq, "%lx", &seq) <= 0 || sscanf(cgen, "%lx", &gen) <= 0) { cl_log(LOG_ERR, "should_msg_sendto_client:" "wrong seq/gen format"); return FALSE; } if (seq < 0 || gen < 0){ cl_log(LOG_ERR, "should_msg_sendto_client:" "wrong seq/gen number"); return FALSE; } cl_get_uuid(msg, F_ORIGUUID, &fromuuid); thisnode = lookup_tables(from, &fromuuid); if ( thisnode == NULL){ cl_log(LOG_ERR, "should_msg_sendto_client:" "node not found in table"); return FALSE; } t = &thisnode->track; /*if uuid is not found, then it always passes the first restriction*/ if ( cl_uuid_is_null(&fromuuid) || (table = client->seq_snapshot_table)== NULL || (snapshot= (struct seq_snapshot*) g_hash_table_lookup(table, &fromuuid)) == NULL){ goto nextstep; } ret = gen > snapshot->generation || (gen == snapshot->generation && seq >= snapshot->last_seq); /*check if there is any retransmission going on if not, we can delete this item */ if (t->nmissing == 0){ if (ANYDEBUG){ cl_log(LOG_DEBUG, "Removing one entry in seq snapshot hash table" "for node %s", thisnode->nodename); } if(!g_hash_table_remove(table, &fromuuid)){ cl_log(LOG_ERR,"should_msg_sendto_client:" "g_hash_table_remove failed"); return FALSE; } free(snapshot); if ( g_hash_table_size(table) ==0){ if (ANYDEBUG){ cl_log(LOG_DEBUG, "destroying the seq snapshot hash table"); } g_hash_table_destroy(table); client->seq_snapshot_table = NULL; } } if ( ret == 0 ){ /* hmmmm.... this message is dropped */ cl_log(LOG_WARNING, "message is dropped "); cl_log_message(LOG_WARNING, msg); return FALSE; } nextstep: /* We only worry about the ordering of certain types of messages * and then only when they arrive out of order. * Basically we implement a barrier at the receipt of each * message of this type. */ if( (type = ha_msg_value(msg, F_TYPE)) == NULL){ cl_log(LOG_ERR, "no type field found"); return FALSE; } if ( strcmp(type, T_APICLISTAT) != 0 || t->nmissing == 0){ return TRUE; } if ( seq > t->first_missing_seq ){ /*We cannot deliver the message now, queue it*/ struct ha_msg* copymsg = ha_msg_copy(msg); if (!copymsg){ cl_log(LOG_ERR, "msg copy failed"); return FALSE; } t->client_status_msg_queue = g_list_append(t->client_status_msg_queue, copymsg); if (ANYDEBUG){ cl_log(LOG_DEBUG,"one entry added to " "client_status_msg_queue" "for node %s", thisnode->nodename); } return FALSE; } return TRUE; } /* * One client pointer per input FIFO. It's indexed by file descriptor, so * it's not densely populated. We use this in conjunction with select(2) */ /* * The original structure of this code was due to * Marcelo Tosatti * * It has been significantly and repeatedly mangled into nearly unrecognizable * oblivion by Alan Robertson * */ /* * Monitor messages. Pass them along to interested clients (if any) */ void api_heartbeat_monitor(struct ha_msg *msg, int msgtype, const char *iface) { const char* clientid; client_proc_t* client; client_proc_t* nextclient; /* This kicks out most messages, since debug clients are rare */ if ((msgtype&DEBUGTREATMENTS) != 0 && debug_client_count <= 0) { return; } /* Verify that we understand what kind of message we've got here */ if ((msgtype & ALLTREATMENTS) != msgtype || msgtype == 0) { cl_log(LOG_ERR, "heartbeat_monitor: unknown msgtype [%d]" , msgtype); return; } /* See who this message is addressed to (if anyone) */ clientid = ha_msg_value(msg, F_TOID); for (client=client_list; client != NULL; client=nextclient) { /* * "client" might be removed by api_send_client_msg() * so, we'd better fetch the next client now! */ nextclient=client->next; /* Is this message addressed to us? */ if (clientid != NULL && strcmp(clientid, client->client_id) != 0) { continue; } if (client->chan->ch_status != IPC_CONNECT) { continue; } /* Is this one of the types of messages we're interested in?*/ if ((msgtype & client->desired_types) != 0) { if (should_msg_sendto_client(client, msg)){ api_send_client_msg(client, msg); }else { /*This happens when join/leave messages is *received but there are messages before *that are missing. The join/leave messages *will be queued and not delivered until all *messages before them are received and *delivered. */ /*do nothing*/ } if (client->removereason && !client->isindispatch) { if (ANYDEBUG){ cl_log(LOG_DEBUG , "%s: client is %s" , __FUNCTION__ , client->client_id); } api_remove_client_pid(client->pid , client->removereason); } } /* If this is addressed to us, then no one else should get it */ if (clientid != NULL) { break; /* No one else should get it */ } } } /* * Periodically clean up after dead clients... * In case we somehow miss them... */ gboolean api_audit_clients(gpointer p) { client_proc_t* client; client_proc_t* nextclient; for (client=client_list; client != NULL; client=nextclient) { nextclient=client->next; if (CL_KILL(client->pid, 0) < 0 && errno == ESRCH) { cl_log(LOG_INFO, "api_audit_clients: client %ld died" , (long) client->pid); client->removereason = NULL; api_remove_client_pid(client->pid, "died-audit"); client=NULL; } } return TRUE; } /********************************************************************** * API_SETFILTER: Set the types of messages we want to see **********************************************************************/ static int api_setfilter(const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char **failreason) { const char * cfmask; unsigned mask; /* * Record the types of messages desired by this client * (desired_types) */ if ((cfmask = ha_msg_value(msg, F_FILTERMASK)) == NULL || (sscanf(cfmask, "%x", &mask) != 1) || (mask&ALLTREATMENTS) == 0) { *failreason = "EINVAL"; return I_API_BADREQ; } if ((client->desired_types & DEBUGTREATMENTS)== 0 && (mask&DEBUGTREATMENTS) != 0) { /* Only allowed to root and to our uid */ if (client->uid != 0 && client->uid != getuid()) { *failreason = "EPERM"; return I_API_BADREQ; } ++debug_client_count; }else if ((client->desired_types & DEBUGTREATMENTS) != 0 && (mask & DEBUGTREATMENTS) == 0) { --debug_client_count; } client->desired_types = mask; return I_API_RET; } /********************************************************************** * API_SIGNOFF: Sign off as a client **********************************************************************/ static int api_signoff(const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char **failreason) { /* We send them no reply */ if (ANYDEBUG) { cl_log(LOG_DEBUG, "Signing client %ld off" , (long) client->pid); } if (client->seq_snapshot_table){ destroy_seq_snapshot_table(client->seq_snapshot_table); client->seq_snapshot_table = NULL; } client->removereason = API_SIGNOFF; return I_API_IGN; } /********************************************************************** * API_SETSIGNAL: Record the type of signal they want us to send. **********************************************************************/ static int api_setsignal(const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason) { const char * csignal; unsigned oursig; if ((csignal = ha_msg_value(msg, F_SIGNAL)) == NULL || (sscanf(csignal, "%u", &oursig) != 1)) { return I_API_BADREQ; } /* Validate the signal number in the message ... */ if (oursig < 0 || oursig == SIGKILL || oursig == SIGSTOP || oursig >= 32) { /* These can't be caught (or is a bad signal). */ *failreason = "EINVAL"; return I_API_BADREQ; } client->signal = oursig; return I_API_RET; } /*********************************************************************** * API_NODELIST: List the nodes in the cluster **********************************************************************/ static int api_nodelist(const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason) { int j; int last = config->nodecount-1; for (j=0; j <= last; ++j) { if (ha_msg_mod(resp, F_NODENAME , config->nodes[j].nodename) != HA_OK) { cl_log(LOG_ERR , "api_nodelist: " "cannot mod field/5"); return I_API_IGN; } if (ha_msg_mod(resp, F_APIRESULT , (j == last ? API_OK : API_MORE)) != HA_OK) { cl_log(LOG_ERR , "api_nodelist: " "cannot mod field/6"); return I_API_IGN; } api_send_client_msg(client, resp); } return I_API_IGN; } /********************************************************************** * API_NODESTATUS: Return the status of the given node *********************************************************************/ static int api_nodestatus(const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason) { const char * cnode; struct node_info * node; const char * savedstat; if ((cnode = ha_msg_value(msg, F_NODENAME)) == NULL || (node = lookup_node(cnode)) == NULL) { *failreason = "EINVAL"; return I_API_BADREQ; } if (ha_msg_add(resp, F_STATUS, node->status) != HA_OK) { cl_log(LOG_ERR , "api_nodestatus: cannot add field"); return I_API_IGN; } /* Give them the "real" (non-delayed) status */ if (node->saved_status_msg && (savedstat = ha_msg_value(node->saved_status_msg, F_STATUS))) { ha_msg_mod(resp, F_STATUS, savedstat); } return I_API_RET; } /********************************************************************** * API_NODEWEIGHT: Return the weight of the given node *********************************************************************/ static int api_nodeweight(const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason) { const char * cnode; struct node_info * node; if ((cnode = ha_msg_value(msg, F_NODENAME)) == NULL || (node = lookup_node(cnode)) == NULL) { *failreason = "EINVAL"; return I_API_BADREQ; } if (ha_msg_add_int(resp, F_WEIGHT, node->weight) != HA_OK) { cl_log(LOG_ERR , "api_nodeweight: cannot add field"); return I_API_IGN; } return I_API_RET; } /********************************************************************** * API_NODESITE: Return the site of the given node *********************************************************************/ static int api_nodesite(const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason) { const char * cnode; struct node_info * node; if ((cnode = ha_msg_value(msg, F_NODENAME)) == NULL || (node = lookup_node(cnode)) == NULL) { *failreason = "EINVAL"; return I_API_BADREQ; } if (ha_msg_add(resp, F_SITE, node->site) != HA_OK) { cl_log(LOG_ERR , "api_nodesite: cannot add field"); return I_API_IGN; } return I_API_RET; } /********************************************************************** * API_NODETYPE: Return the type of the given node *********************************************************************/ static int api_nodetype(const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason) { const char * cnode; struct node_info * node; const char * ntype; if ((cnode = ha_msg_value(msg, F_NODENAME)) == NULL || (node = lookup_node(cnode)) == NULL) { *failreason = "EINVAL"; return I_API_BADREQ; } switch (node->nodetype) { case PINGNODE_I: ntype = PINGNODE; break; case NORMALNODE_I: ntype = NORMALNODE; break; default: ntype = UNKNOWNNODE; break; } if (ha_msg_add(resp, F_NODETYPE, ntype) != HA_OK) { cl_log(LOG_ERR , "api_nodetype: cannot add field"); return I_API_IGN; } return I_API_RET; } /********************************************************************** * API_IFLIST: List the interfaces for the given machine *********************************************************************/ static int api_iflist(const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason) { struct link * lnk; int j; int last = config->nodecount-1; const char * cnode; struct node_info * node; if ((cnode = ha_msg_value(msg, F_NODENAME)) == NULL || (node = lookup_node(cnode)) == NULL) { *failreason = "EINVAL"; return I_API_BADREQ; } if (node->nodetype == PINGNODE_I) { return api_ping_iflist ( msg, node, resp ,client, failreason); } /* Find last link... */ for(j=0; (lnk = &node->links[j], lnk->name); ++j) { last = j; } /* Don't report on ping links */ if (node->links[last].isping) { --last; } for (j=0; j <= last; ++j) { if (node->links[j].isping) { continue; } if (ha_msg_mod(resp, F_IFNAME , node->links[j].name) != HA_OK) { cl_log(LOG_ERR , "api_iflist: " "cannot mod field/1"); return I_API_IGN; } if (ha_msg_mod(resp, F_APIRESULT , (j == last ? API_OK : API_MORE)) != HA_OK) { cl_log(LOG_ERR , "api_iflist: " "cannot mod field/2"); return I_API_IGN; } api_send_client_msg(client, resp); } return I_API_IGN; } static int api_ping_iflist(const struct ha_msg* msg, struct node_info * node , struct ha_msg* resp , client_proc_t* client, const char** failreason) { int j; struct link * lnk; for(j=0; (lnk = &node->links[j], lnk->name); ++j) { if (strcmp(lnk->name, node->nodename) == 0) { if (ha_msg_mod(resp, F_IFNAME , lnk->name) != HA_OK) { cl_log(LOG_ERR , "api_ping_iflist: " "cannot mod field/1"); return I_API_IGN; } if (ha_msg_mod(resp, F_APIRESULT, API_OK)!= HA_OK) { cl_log(LOG_ERR , "api_ping_iflist: " "cannot mod field/2"); return I_API_IGN; } return I_API_RET; } } *failreason = "UhOh"; return I_API_BADREQ; } /********************************************************************** * API_IFSTATUS: Return the status of the given interface... *********************************************************************/ static int api_ifstatus(const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason) { const char * cnode; struct node_info * node; const char * ciface; struct link * iface; if ((cnode = ha_msg_value(msg, F_NODENAME)) == NULL || (node = lookup_node(cnode)) == NULL || (ciface = ha_msg_value(msg, F_IFNAME)) == NULL || (iface = lookup_iface(node, ciface)) == NULL) { *failreason = "EINVAL"; return I_API_BADREQ; } if (ha_msg_mod(resp, F_STATUS, iface->status) != HA_OK) { cl_log(LOG_ERR , "api_ifstatus: cannot add field/1"); cl_log(LOG_ERR , "name: %s, value: %s (if=%s)" , F_STATUS, iface->status, ciface); return I_API_IGN; } return I_API_RET; } /********************************************************************** * API_CLIENTSTATUS: Return the status of the given client on a node *********************************************************************/ static int api_clientstatus(const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason) { const char * cnode; const char * cname; const char * our_clientid; struct node_info * node; struct ha_msg * m; int ret = HA_FAIL; if ((cnode = ha_msg_value(msg, F_NODENAME)) == NULL || (cname = ha_msg_value(msg, F_CLIENTNAME)) == NULL || (our_clientid = ha_msg_value(msg, F_FROMID)) == NULL || (node = lookup_node(cnode)) == NULL) { *failreason = "EINVAL"; return I_API_BADREQ; } if (ha_msg_add(resp, F_SUBTYPE, T_RCSTATUS) != HA_OK) { ha_log(LOG_ERR, "api_clientstatus: cannot add field"); *failreason = "ENOMEM"; return I_API_BADREQ; } /* returns client status on local node */ if (strcmp(cnode, curnode->nodename) == 0) { if (find_client(cname, NULL) != NULL) ret = ha_msg_add(resp, F_CLIENTSTATUS, ONLINESTATUS); else ret = ha_msg_add(resp, F_CLIENTSTATUS, OFFLINESTATUS); if (ret != HA_OK) { ha_log(LOG_ERR, "api_clientstatus: cannot add field"); *failreason = "ENOMEM"; return I_API_BADREQ; } return I_API_RET; } if (strcmp(node->status, ACTIVESTATUS) != 0) { if (ha_msg_add(resp, F_CLIENTSTATUS, OFFLINESTATUS) != HA_OK) { ha_log(LOG_ERR, "api_clientstatus: cannot add field"); *failreason = "ENOMEM"; return I_API_BADREQ; } return I_API_RET; } if ((m = ha_msg_new(0)) == NULL || ha_msg_add(m, F_TYPE, T_QCSTATUS) != HA_OK || ha_msg_add(m, F_TO, cnode) != HA_OK || ha_msg_add(m, F_CLIENTNAME, cname) != HA_OK || ha_msg_add(m, F_FROMID, our_clientid) != HA_OK) { ha_log(LOG_ERR, "api_clientstatus: cannot add field"); *failreason = "ENOMEM"; return I_API_BADREQ; } if (send_cluster_msg(m) != HA_OK) { ha_log(LOG_ERR, "api_clientstatus: send_cluster_msg failed"); *failreason = "ECOMM"; return I_API_BADREQ; } /* * Here we return I_API_IGN because currently we don't know * the answer yet. */ return I_API_IGN; } /********************************************************************** * API_NUM_NODES: Return the number of normal nodes *********************************************************************/ static int api_num_nodes(const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason) { int ret; int num_nodes = 0; int i; for( i = 0; i < config->nodecount; i++){ if (config->nodes[i].nodetype == NORMALNODE_I){ num_nodes++; } } ret = ha_msg_add_int(resp, F_NUMNODES, num_nodes); if (ret != HA_OK){ cl_log(LOG_ERR, "%s: adding num_nodes field failed", __FUNCTION__); *failreason= "adding msg field failed"; return I_API_BADREQ; } return I_API_RET; } /********************************************************************** * API_GET_PARAMETER: Return the value of the given parameter... *********************************************************************/ static int api_get_parameter (const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason) { const char * pname; const char * pvalue; if ((pname = ha_msg_value(msg, F_PNAME)) == NULL) { *failreason = "EINVAL"; return I_API_BADREQ; } if ((pvalue = GetParameterValue(pname)) != NULL) { if (ha_msg_mod(resp, F_PVALUE, pvalue) != HA_OK) { cl_log(LOG_ERR , "api_parameter: cannot add " F_PVALUE " field to message"); } } return I_API_RET; } static int api_get_resources (const struct ha_msg* msg, struct ha_msg* resp , client_proc_t* client, const char** failreason) { const char * ret; if (!DoManageResources) { *failreason = "Resource is managed by crm." "Use crm tool to query resource"; return I_API_BADREQ; } ret = hb_rsc_resource_state(); if (ha_msg_mod(resp, F_RESOURCES, ret) != HA_OK) { cl_log(LOG_ERR , "api_get_resources: cannot add " F_RESOURCES " field to message"); } return I_API_RET; } static int api_get_uuid (const struct ha_msg* msg, struct ha_msg* resp, client_proc_t* client, const char** failreason) { const char* query_nodename; cl_uuid_t uuid; if ((query_nodename = ha_msg_value(msg, F_QUERYNAME))== NULL){ *failreason = "no query node name found"; return I_API_BADREQ; } if (nodename2uuid(query_nodename, &uuid) != HA_OK){ return I_API_RET; } if (cl_msg_moduuid(resp, F_QUERYUUID, &uuid) != HA_OK){ cl_log(LOG_ERR, "api_get_uuid: cannnot add" F_QUERYUUID " field to message"); return I_API_RET; } return I_API_RET; } static int api_get_nodename(const struct ha_msg* msg, struct ha_msg* resp, client_proc_t* client, const char** failreason) { const char* nodename; cl_uuid_t query_uuid; if (cl_get_uuid(msg, F_QUERYUUID, &query_uuid) != HA_OK){ *failreason = "no query node name found"; return I_API_BADREQ; } if ((nodename = uuid2nodename(&query_uuid)) == NULL){ cl_log(LOG_ERR, "api_get_nodename: nodename not found" " in map table"); return I_API_RET; } if (ha_msg_mod(resp, F_QUERYNAME, nodename) != HA_OK){ cl_log(LOG_ERR, "api_get_nodename: cannnot add" F_QUERYNAME " field to message"); return I_API_RET; } return I_API_RET; } static int api_set_sendqlen(const struct ha_msg* msg, struct ha_msg* resp, client_proc_t* client, const char** failreason) { int length; int ret = ha_msg_value_int(msg, F_SENDQLEN, &length); if (ret != HA_OK){ cl_log(LOG_ERR, "api_set_sendqlen: getting field F_SENDQLEN failed"); return I_API_IGN; } if (length <= 0){ cl_log(LOG_ERR, "api_set_sendqlen: invalid length value(%d)", length); return I_API_IGN; } cl_log(LOG_INFO, "the send queue length from heartbeat to client %s " "is set to %d", client->client_id, length); client->chan->ops->set_send_qlen(client->chan,length); return I_API_IGN; } static int add_client_gen(client_proc_t* client, struct ha_msg* msg) { char buf[MAX_CLIENT_GEN]; memset(buf, 0, MAX_CLIENT_GEN); snprintf(buf, MAX_CLIENT_GEN, "%d", client->cligen); return ha_msg_mod(msg, F_CLIENT_GENERATION, buf); } /* * Process an API request message from one of our clients */ static void api_process_request(client_proc_t* fromclient, struct ha_msg * msg) { const char * msgtype; const char * reqtype; const char * fromid; const char * pid; client_proc_t* client; struct ha_msg * resp = NULL; const char * failreason = NULL; int x; if (msg == NULL || (msgtype = ha_msg_value(msg, F_TYPE)) == NULL) { cl_log(LOG_ERR, "api_process_request: bad message type"); goto freeandexit; } /* Things that aren't T_APIREQ are general packet xmit requests... */ if (strcmp(msgtype, T_APIREQ) != 0) { /* Only named clients can send out packets to clients */ if (fromclient->iscasual) { cl_log(LOG_INFO, "api_process_request: " "general message from casual client!"); /* Bad Client! */ fromclient->removereason = "badclient"; goto freeandexit; } /* We put their client ID info in the packet as the F_FROMID*/ if (ha_msg_mod(msg, F_FROMID, fromclient->client_id) !=HA_OK){ cl_log(LOG_ERR, "api_process_request: " "cannot add F_FROMID field"); goto freeandexit; } /* Is this too restrictive? */ /* We also put their client ID info in the packet as F_TOID */ /* * N.B.: This restriction exists because of security concerns * It would be imprudent to remove it without a lot * of thought and proof of safety. In fact, as of right * now, without some enhancements, it would be quite unsafe. * * The right way to talk to something that's has an API * and is running on every machine is probably to use it's * API, and not talk crosstalk through the core heartbeat * messaging service, and compromise interface safety for * every application using the API. * (ALR - 17 January 2004) */ if (ha_msg_mod(msg, F_TOID, fromclient->client_id) != HA_OK) { cl_log(LOG_ERR, "api_process_request: " "cannot add F_TOID field"); goto freeandexit; } if (DEBUGDETAILS) { cl_log(LOG_DEBUG, "Sending API message to cluster..."); cl_log_message(LOG_DEBUG, msg); } if (add_client_gen(fromclient, msg) != HA_OK){ cl_log(LOG_ERR, "api_process_request: " "add client generation to ha_msg failed"); ha_msg_del(msg); msg=NULL; return; } if (send_cluster_msg(msg) != HA_OK) { cl_log(LOG_ERR, "api_process_request: " "cannot forward message to cluster"); } msg = NULL; return; } /* It must be a T_APIREQ request */ fromid = ha_msg_value(msg, F_FROMID); pid = ha_msg_value(msg, F_PID); reqtype = ha_msg_value(msg, F_APIREQ); if ((fromid == NULL && pid == NULL) || reqtype == NULL) { cl_log(LOG_ERR, "api_process_request: no fromid/pid/reqtype" " in message."); goto freeandexit; } /* * Create the response message */ if ((resp = ha_msg_new(4)) == NULL) { cl_log(LOG_ERR, "api_process_request: out of memory/1"); goto freeandexit; } /* API response messages are of type T_APIRESP */ if (ha_msg_add(resp, F_TYPE, T_APIRESP) != HA_OK) { cl_log(LOG_ERR, "api_process_request: cannot add field/2"); goto freeandexitresp; } /* Echo back the type of API request we're responding to */ if (ha_msg_add(resp, F_APIREQ, reqtype) != HA_OK) { cl_log(LOG_ERR, "api_process_request: cannot add field/3"); goto freeandexitresp; } if ((client = find_client(fromid, pid)) == NULL) { cl_log(LOG_ERR, "api_process_request: msg from non-client"); goto freeandexitresp; } /* See if they correctly stated their client id information... */ if (client != fromclient) { cl_log(LOG_ERR, "Client mismatch! (impersonation?)"); cl_log(LOG_INFO, "pids (%ld vs %ld), Client IDs (%s vs %s)" , (long) client->pid , (long) fromclient->pid , client->client_id , fromclient->client_id); goto freeandexitresp; } for(x = 0 ; x < DIMOF(query_handler_list); x++) { int ret; if(strcmp(reqtype, query_handler_list[x].queryname) == 0) { ret = query_handler_list[x].handler(msg, resp, client , &failreason); switch(ret) { case I_API_IGN: goto freeandexitresp; case I_API_RET: if (ha_msg_mod(resp, F_APIRESULT, API_OK) != HA_OK) { cl_log(LOG_ERR , "api_process_request:" " cannot add field/8.1"); goto freeandexitresp; } api_send_client_msg(client, resp); goto freeandexitresp; case I_API_BADREQ: goto bad_req; } } } /******************************************************************** * Unknown request type... ********************************************************************/ cl_log(LOG_ERR, "Unknown API request"); /* Common error return handling */ bad_req: cl_log(LOG_ERR, "api_process_request: bad request [%s]" , reqtype); cl_log_message(LOG_ERR, msg); if (ha_msg_add(resp, F_APIRESULT, API_BADREQ) != HA_OK) { cl_log(LOG_ERR , "api_process_request: cannot add field/11"); goto freeandexitresp; } if (failreason) { if (ha_msg_add(resp, F_COMMENT, failreason) != HA_OK) { cl_log(LOG_ERR , "api_process_request: cannot add failreason"); } } api_send_client_msg(client, resp); freeandexitresp: ha_msg_del(resp); resp=NULL; freeandexit: if (msg != NULL) { ha_msg_del(msg); msg=NULL; } } /* Process a registration request from a potential client */ void process_registerevent(IPC_Channel* chan, gpointer user_data) { client_proc_t* client; if ((client = MALLOCT(client_proc_t)) == NULL) { cl_log(LOG_ERR , "unable to add client [no memory]"); chan->ops->destroy(chan); return; } if (ANYDEBUG) { cl_log(LOG_DEBUG , "process_registerevent() {"); } /* FIXME do we want to set a different default send_qlen, * as per some configuration directive? */ /* Zap! */ memset(client, 0, sizeof(*client)); client->pid = 0; client->desired_types = DEFAULTREATMENT; client->signal = 0; client->chan = chan; client->gsource = G_main_add_IPC_Channel(PRI_CLIENTMSG , chan, FALSE , APIclients_input_dispatch , client, G_remove_client); G_main_setdescription((GSource*)client->gsource, "API client"); G_main_setmaxdispatchdelay((GSource*)client->gsource, config->heartbeat_ms); G_main_setmaxdispatchtime((GSource*)client->gsource, 100); if (ANYDEBUG) { cl_log(LOG_DEBUG , "client->gsource = 0x%lx" , (unsigned long)client->gsource); } client->next = client_list; client_list = client; total_client_count++; if (ANYDEBUG) { cl_log(LOG_DEBUG , "}/*process_registerevent*/;"); } } static void destroy_pair(gpointer key, gpointer value, gpointer user_data) { if(value){ free(value); } } static void destroy_seq_snapshot_table(GHashTable* table) { if (ANYDEBUG){ cl_log(LOG_DEBUG, "Destroying seq snapshot hash table"); } if(table){ g_hash_table_foreach(table, destroy_pair, NULL); g_hash_table_destroy(table); } return ; } static int create_seq_snapshot_table(GHashTable** ptable) { GHashTable* table = NULL; int i; if ( !ptable){ cl_log(LOG_ERR, "create_seq_snapshot_table: " "nvalid arguments"); return HA_FAIL; } *ptable = NULL; for (i = 0 ; i < config->nodecount; i++){ struct node_info* node = &config->nodes[i]; struct seqtrack* t = &node->track; if (cl_uuid_is_null(&node->uuid)){ continue; } if (t->nmissing > 0){ struct seq_snapshot* snapshot; snapshot = (struct seq_snapshot*) malloc(sizeof(struct seq_snapshot)); if (snapshot == NULL){ cl_log(LOG_ERR, "allocating memory for" " seq_snapshot failed"); return HA_FAIL; } snapshot->last_seq = t->last_seq; snapshot->generation = t->generation; if (table == NULL){ if (ANYDEBUG){ cl_log(LOG_DEBUG, "Creating seq snapshot hash table"); } table = g_hash_table_new(uuid_hash, uuid_equal); if (table == NULL){ cl_log(LOG_ERR, "creating hashtable for" " seq_snapshot failed"); return HA_FAIL; } } if (ANYDEBUG){ cl_log(LOG_DEBUG, "Creating one entry in seq snapshot hash table" "for node %s", node->nodename); } g_hash_table_insert(table, &node->uuid, snapshot); }else{ if (ANYDEBUG){ cl_log(LOG_DEBUG, "create_seq_snapshot_table:" "no missing packets found for " "node %s", node->nodename); } } } *ptable = table; return HA_OK; } /* * Register a new client. */ static void api_process_registration_msg(client_proc_t* client, struct ha_msg * msg) { const char * msgtype; const char * reqtype; const char * fromid; const char * pid; struct ha_msg * resp = NULL; client_proc_t* fcli = NULL; const char * failreason = NULL; const char * api_retcode = API_OK; char deadtime[64]; char keepalive[64]; char logfacility[64]; /*set the client generation*/ client->cligen = client_generation++; if (msg == NULL || (msgtype = ha_msg_value(msg, F_TYPE)) == NULL || (reqtype = ha_msg_value(msg, F_APIREQ)) == NULL || strcmp(msgtype, T_APIREQ) != 0 || strcmp(reqtype, API_SIGNON) != 0) { cl_log(LOG_ERR, "api_process_registration_msg: bad message"); cl_log_message(LOG_ERR, msg); goto del_msg; } fromid = ha_msg_value(msg, F_FROMID); pid = ha_msg_value(msg, F_PID); if (fromid == NULL && pid == NULL) { cl_log(LOG_ERR , "api_process_registration_msg: no fromid in msg"); goto del_msg; } if (DEBUGDETAILS) { cl_log(LOG_DEBUG , "api_process_registration_msg(%s, %s, %s)" , msgtype, pid, (fromid==NULL ?"nullfrom" : fromid)); } /* * Create the response message */ if ((resp = ha_msg_new(4)) == NULL) { cl_log(LOG_ERR , "api_process_registration_msg: out of memory/1"); goto del_msg; } if (ha_msg_add(resp, F_TYPE, T_APIRESP) != HA_OK) { cl_log(LOG_ERR , "api_process_registration_msg: cannot add field/2"); goto del_rsp_and_msg; } if (ha_msg_add(resp, F_APIREQ, reqtype) != HA_OK) { cl_log(LOG_ERR , "api_process_registration_msg: cannot add field/3"); goto del_rsp_and_msg; } client->pid = atoi(pid); /* * Sign 'em up. */ if (!api_add_client(client, msg)) { cl_log(LOG_ERR , "api_process_registration_msg: cannot add client(%s)" , client->client_id); failreason = "cannot add client"; } /* Make sure we can find them in the table... */ if (failreason == NULL && (fcli = find_client(fromid, pid)) == NULL) { cl_log(LOG_ERR , "api_process_registration_msg: cannot find client"); failreason = "cannot locate client"; } if (failreason == NULL && fcli != client) { cl_log(LOG_ERR , "api_process_registration_msg: found wrong client"); failreason = "found wrong client"; } /* Hopefully, everything went well. * Now create a table to record sequence/generation number * for each node if necessary */ client->seq_snapshot_table = NULL; if (failreason == NULL && create_seq_snapshot_table(&client->seq_snapshot_table) != HA_OK){ cl_log(LOG_ERR, "api_process_registration_msg" ": creating seq snapshot table failed"); failreason = "cannot create sequence snapshot table"; } if (failreason != NULL) { ha_msg_add(msg, F_COMMENT, failreason); api_retcode = API_BADREQ; } if (ha_msg_mod(resp, F_APIRESULT, api_retcode) != HA_OK) { cl_log(LOG_ERR , "api_process_registration_msg: cannot add field/4"); goto del_rsp_and_msg; return; } snprintf(deadtime, sizeof(deadtime), "%lx", config->deadtime_ms); snprintf(keepalive, sizeof(keepalive), "%lx", config->heartbeat_ms); snprintf(logfacility, sizeof(logfacility), "%d", config->log_facility); /* Add deadtime and keepalive time to the response */ if ( (ha_msg_add(resp, F_DEADTIME, deadtime) != HA_OK) || (ha_msg_add(resp, F_KEEPALIVE, keepalive) != HA_OK) || (ha_msg_mod(resp, F_NODENAME, localnodename) != HA_OK) || (ha_msg_add(resp, F_LOGFACILITY, logfacility) != HA_OK)) { cl_log(LOG_ERR, "api_process_registration_msg: cannot add field/4"); goto del_rsp_and_msg; } if (ANYDEBUG) { cl_log(LOG_DEBUG, "Signing on API client %ld (%s)" , (long) client->pid , (client->iscasual? "'casual'" : client->client_id)); } api_send_client_msg(client, resp); del_rsp_and_msg: if (resp != NULL) { ha_msg_del(resp); resp=NULL; } del_msg: if (msg != NULL) { ha_msg_del(msg); msg=NULL; } } static void api_send_client_status(client_proc_t* client, const char * status , const char * reason) { struct ha_msg* msg; if (client->iscasual) { return; } /* * Create the status message */ if ((msg = ha_msg_new(4)) == NULL) { cl_log(LOG_ERR, "api_send_client_status: out of memory/1"); return; } if (ha_msg_add(msg, F_TYPE, T_APICLISTAT) != HA_OK || ha_msg_add(msg, F_STATUS, status) != HA_OK || ha_msg_add(msg, F_FROMID, client->client_id) != HA_OK || ha_msg_add(msg, F_TOID, client->client_id) != HA_OK || ha_msg_add(msg, F_ORIG, curnode->nodename) != HA_OK || (reason != NULL && ha_msg_add(msg, F_COMMENT, reason) != HA_OK)) { cl_log(LOG_ERR, "api_send_client_status: cannot add fields"); ha_msg_del(msg); msg=NULL; return; } if (add_client_gen(client, msg) != HA_OK){ cl_log(LOG_ERR, "api_send_client_status: cannot add client generation"); ha_msg_del(msg); msg=NULL; return; } if (strcmp(status, LEAVESTATUS) == 0) { /* Make sure they know they're signed off... */ api_send_client_msg(client, msg); } if (send_cluster_msg(msg) != HA_OK) { cl_log(LOG_ERR, "api_send_client_status: " "cannot send message to cluster"); } msg = NULL; } /* * Send a message to a client process. */ static void api_send_client_msg(client_proc_t* client, struct ha_msg *msg) { if (msg2ipcchan(msg, client->chan) != HA_OK) { if (!client->removereason) { if (client->chan->failreason[0] == EOS){ client->removereason = "sendfail"; }else { client->removereason = client->chan->failreason; } } } if (CL_KILL(client->pid, client->signal) < 0 && errno == ESRCH) { if (ANYDEBUG) { cl_log(LOG_DEBUG, "api_send_client: client %ld died" , (long) client->pid); } if (!client->removereason) { client->removereason = "died"; } } } int api_remove_client_pid(pid_t c_pid, const char * reason) { char cpid[20]; int backlog_processed = 0; client_proc_t* client; snprintf(cpid, sizeof(cpid)-1, "%d", c_pid); if ((client = find_client(NULL, cpid)) == NULL) { return 0; } client->removereason = reason; client->isindispatch = TRUE; /* avoid recursion */ while (client->chan && client->chan->recv_queue->current_qlen > 0 && (!reason || strcmp(reason, API_SIGNOFF) != 0)) { ProcessAnAPIRequest(client); if ((++backlog_processed & 0x3f) == 0) { /* FIXME backlog processing should be done from a * dedicated backlog processing callback from the * mainloop, so we won't stall other callback for too * long. For now, just try to avoid being killed by our * busy-loop protection. */ cl_cpu_limit_update(); } } client->isindispatch = FALSE; G_main_del_IPC_Channel(client->gsource); /* Should trigger G_remove_client (below) */ return 1; } static void G_remove_client(gpointer Client) { client_proc_t* client = Client; const char * reason; reason = client->removereason ? client->removereason : "?"; if (ANYDEBUG) { cl_log(LOG_DEBUG , "G_remove_client(pid=%d, reason='%s' gsource=0x%lx) {" , client->pid, reason, (unsigned long) client->gsource); } api_remove_client_int(client, reason); if (ANYDEBUG) { cl_log(LOG_DEBUG , "}/*G_remove_client;*/"); } } /* * Make this client no longer a client ;-) * Should only be called by G_remove_client(). * G_remove_client gets called by the API code when the API object * gets removed. It can also get called by G_main_del_fd(). */ static void api_remove_client_int(client_proc_t* req, const char * reason) { client_proc_t* prev = NULL; client_proc_t* client; --total_client_count; if ((req->desired_types & DEBUGTREATMENTS) != 0) { --debug_client_count; } /* Locate the client data structure in our list */ for (client=client_list; client != NULL; client=client->next) { /* Is this the client? */ if (client->pid == req->pid) { if (ANYDEBUG) { const char *id = client->iscasual ? "casual" : client->client_id; cl_log(LOG_DEBUG , "api_remove_client_int: removing '%s'" " pid [%ld] reason: %s" , id, (long)req->pid, reason); } if (prev == NULL) { client_list = client->next; }else{ prev->next = client->next; } break; } prev = client; } if (req == client){ api_send_client_status(req, LEAVESTATUS, reason); /* Zap! */ memset(client, 0, sizeof(*client)); free(client); client = NULL; }else{ cl_log(LOG_ERR, "api_remove_client_int: could not find pid [%ld]" , (long) req->pid); } return; } /* Validate client credentials against the real world */ static gboolean api_check_client_credentials(client_proc_t* client, uid_t uid, gid_t gid) { IPC_Auth auth; guint id; int one = 1; gboolean result = TRUE; int auth_result = IPC_FAIL; GHashTable* uidlist = g_hash_table_new(g_direct_hash, g_direct_equal); id = (guint) uid; g_hash_table_insert(uidlist, GUINT_TO_POINTER(id), &one); auth.uid = uidlist; auth.gid = NULL; auth_result = client->chan->ops->verify_auth(client->chan, &auth); if (auth_result != IPC_OK) { result = FALSE; } else { GHashTable* gidlist = g_hash_table_new(g_direct_hash , g_direct_equal); id = (guint) gid; g_hash_table_insert(gidlist, GUINT_TO_POINTER(id), &one); auth.uid = NULL; auth.gid = gidlist; auth_result = client->chan->ops->verify_auth( client->chan, &auth); if (auth_result != IPC_OK && auth_result != IPC_BROKEN) { result = FALSE; } g_hash_table_destroy(gidlist); } g_hash_table_destroy(uidlist); return result; } /* * Add the process described in this message to our list of clients. * * The following fields are used: * F_PID: Mandantory. The client process id. * F_FROMID: The client's identifying handle. * If omitted, it defaults to the F_PID field as a * decimal integer. */ static int api_add_client(client_proc_t* client, struct ha_msg* msg) { pid_t pid = 0; const char* cpid; const char * fromid; const char * cgid = NULL; const char * cuid = NULL; long luid = -1; long lgid = -1; int uid = -1; int gid = -1; if ((cpid = ha_msg_value(msg, F_PID)) != NULL) { pid = atoi(cpid); } if (pid <= 0 || (CL_KILL(pid, 0) < 0 && errno == ESRCH)) { cl_log(LOG_WARNING , "api_add_client: bad pid [%ld]", (long) pid); return FALSE; } fromid = ha_msg_value(msg, F_FROMID); if (find_client(fromid, NULL) != NULL) { cl_log(LOG_WARNING , "duplicate client add request [%s] [%s]" , (fromid ? fromid : "(nullfromid)") , (cpid ? cpid : "(nullcpid)")); client->removereason = "duplicate add request"; return FALSE; } if (fromid != NULL) { strncpy(client->client_id, fromid, sizeof(client->client_id)); if (atoi(client->client_id) == pid) { client->iscasual = 1; }else{ client->iscasual = 0; } }else{ snprintf(client->client_id, sizeof(client->client_id) , "%d", pid); client->iscasual = 1; } /* Encourage better realtime behavior by heartbeat */ client->chan->ops->set_recv_qlen(client->chan, 0); if ((cuid = ha_msg_value(msg, F_UID)) == NULL || (cgid = ha_msg_value(msg, F_GID)) == NULL || sscanf(cuid, "%ld", &luid) != 1 || sscanf(cgid, "%ld", &lgid) != 1) { cl_log_message(LOG_ERR, msg); client->removereason = "invalid id info"; cl_log(LOG_ERR, "Client user/group id is incorrect" " [%s] => %ld [%s] => %ld" , cuid == NULL ? "" : cuid, luid , cgid == NULL ? "" : cgid, lgid); return FALSE; } uid = (uid_t)luid; gid = (gid_t)lgid; if (!api_check_client_credentials(client, uid, gid)) { client->removereason = "incorrect/false credentials"; return FALSE; } client->uid = uid; client->gid = gid; if (api_check_client_authorization(client)) { api_send_client_status(client, JOINSTATUS, API_SIGNON); }else{ cl_log(LOG_WARNING , "Client [%s] pid %d failed authorization [%s]" , client->client_id, pid, client->removereason); return FALSE; } return TRUE; } static gboolean api_check_client_authorization(client_proc_t* client) { gpointer gauth = NULL; IPC_Auth* auth; int auth_result = IPC_FAIL; /* If client is casual, or type of client not in authorization table * then default to the "default" authorization category. * otherwise, use the client type's authorization list */ if (client->iscasual){ gauth = g_hash_table_lookup(APIAuthorization, "anon"); if (gauth == NULL){ cl_log(LOG_ERR, "NO auth found for anonymous"); return FALSE; } }else if((gauth = g_hash_table_lookup(APIAuthorization, client->client_id)) == NULL) { if ((gauth = g_hash_table_lookup(APIAuthorization, "default")) == NULL) { client->removereason = "no default client auth"; return FALSE; } } auth = gauth; if ((long)auth->gid == (long)-1L) { cl_log(LOG_DEBUG, "Darn! -1 gid ptr in api_check_client_authorization"); abort(); } if (ANYDEBUG) { cl_log(LOG_DEBUG , "Checking client authorization for client %s (%ld:%ld)" , client->client_id , (long)client->uid, (long)client->gid); } auth_result = client->chan->ops->verify_auth( client->chan, auth); if (auth_result == IPC_OK) { #ifndef GETPID_INCONSISTENT if (client->chan->farside_pid > 0) { if (client->chan->farside_pid != client->pid) { client->removereason = "pid mismatch"; cl_log(LOG_INFO , "PID mismatch: %d vs farside_pid: %d" , client->pid , client->chan->farside_pid); return FALSE; } } #endif return TRUE; } else if(auth_result == IPC_BROKEN) { return TRUE; } client->removereason = "client failed authorization"; return FALSE; } /* * Find the client that goes with this client id/pid */ client_proc_t* find_client(const char * fromid, const char * cpid) { pid_t pid = -1; client_proc_t* client; if (cpid != NULL) { pid = atoi(cpid); } for (client=client_list; client != NULL; client=client->next) { if (cpid && client->pid == pid) { return(client); } if (fromid && strcmp(fromid, client->client_id) == 0) { return(client); } } return(NULL); } static gboolean APIclients_input_dispatch(IPC_Channel* chan, gpointer user_data) { client_proc_t* client = user_data; gboolean ret = TRUE; hb_signal_process_pending(); if (DEBUGDETAILS) { cl_log(LOG_DEBUG , "APIclients_input_dispatch() {"); } if (chan != client->chan) { /* Bad boojum! */ cl_log(LOG_ERR , "APIclients_input_dispatch chan mismatch"); ret = FALSE; goto getout; } if (client->removereason) { ret = FALSE; goto getout; } /* Process a single API client request */ client->isindispatch = TRUE; ProcessAnAPIRequest(client); client->isindispatch = FALSE; if (client->removereason) { ret = FALSE; goto getout; } getout: if (DEBUGDETAILS) { cl_log(LOG_DEBUG , "return %d;", ret); cl_log(LOG_DEBUG , "}/*APIclients_input_dispatch*/;"); } return ret; } static gboolean all_clients_running = TRUE; gboolean all_clients_pause(void) { client_proc_t* client; if (!all_clients_running ){ return TRUE; } cl_log(LOG_INFO, "all clients are now paused"); for (client=client_list; client != NULL; client=client->next) { G_main_IPC_Channel_pause(client->gsource); } all_clients_running = FALSE; return TRUE; } gboolean all_clients_resume(void) { client_proc_t* client; if (all_clients_running ){ return TRUE; } cl_log(LOG_INFO, "all clients are now resumed"); for (client=client_list; client != NULL; client=client->next) { G_main_IPC_Channel_resume(client->gsource); } all_clients_running = TRUE; return TRUE; } gboolean ProcessAnAPIRequest(client_proc_t* client) { struct ha_msg* msg; static int consecutive_failures = 0; gboolean rc = FALSE; if (DEBUGDETAILS) { cl_log(LOG_DEBUG , "ProcessAnAPIRequest() {"); } if (!client->chan->ops->is_message_pending(client->chan)) { goto getout; } /* See if we can read the message */ if ((msg = msgfromIPC(client->chan, 0)) == NULL) { /* EOF? */ if (!IPC_ISRCONN(client->chan)) { if (ANYDEBUG) { cl_log(LOG_DEBUG , "EOF from client pid %ld" , (long)client->pid); } if (!client->removereason) { client->removereason = "EOF"; } goto getout; } /* None of the above... */ cl_log(LOG_INFO, "No message from pid %ld" , (long)client->pid); ++consecutive_failures; /* * This used to happen because of EOF, * which is now handled above. This is * good protection to have anyway ;-) */ if (consecutive_failures >= 10) { cl_log(LOG_ERR , "Removing client pid %ld" , (long)client->pid); client->removereason = "noinput"; consecutive_failures = 0; } goto getout; } consecutive_failures = 0; /* Process the API request message... */ api_heartbeat_monitor(msg, APICALL, ""); /* First message must be a registration msg */ if (client->pid == 0) { api_process_registration_msg(client, msg); }else{ api_process_request(client, msg); } msg = NULL; rc = TRUE; if (!all_clients_running){ /* This is a new client, * this allows a client to sign on but further action will be blocked */ G_main_IPC_Channel_pause(client->gsource); rc = FALSE; } getout: /* May have gotten a message from 'client' */ if (CL_KILL(client->pid, 0) < 0 && errno == ESRCH) { /* Oops... he's dead */ if (ANYDEBUG) { cl_log(LOG_DEBUG , "Client pid %ld died (input)" , (long)client->pid); } client->removereason = "died"; } if (DEBUGDETAILS) { cl_log(LOG_DEBUG, "\treturn %s;" , (rc ? "TRUE" : "FALSE")); cl_log(LOG_DEBUG, "}/*ProcessAnAPIRequest*/;"); } return rc; } Heartbeat-3-0-7e3a82377fa8/heartbeat/hb_api.py0000755000000000000000000005215111576626513020741 0ustar00usergroup00000000000000#!/usr/bin/env python '''Heartbeat related classes. What we have here is a handful of classes related to the heartbeat cluster membership services. These classes are: ha_msg: The heartbeat messaging class hb_api: The heartbeat API class ''' __copyright__=''' Copyright (C) 2000 Alan Robertson Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import types, string, os, sys from UserDict import UserDict import select class ha_msg (UserDict): '''ha_msg is the Heartbeat messaging class. It is the bottle into which you put messages before throwing them out onto the sea of cluster :-) Not surprisingly, it is also the bottle which you receive them in. It is also the way you communicate with heartbeat itself using its API All heartbeat messages are name value pairs (~Python dicts) Not too surprisingly, this is much nicer in python than in 'C' These objects are the fundamental units of heartbeat communication, and also the fundamental units of communication with heartbeat itself (via the heartbeat API). This class is basically a restricted dictionary type with a few minor twists to make it fit a little better into the heartbeat message paradigm. These twists are: We only allow strings as names and values We require a particular canonical string representation so we can transport them compatibly on the network We allow a wide variety of __init__() and update() args including strings in our canonical network format See the update member function for more details. We are picky about what kinds of things you want to shove into our bottle. Everything needs to be strings, and need to be somewhat restricted in content from the Python point of view. For example, no nulls, no newlines, etc. Constructor arguments: dictionaries, ha_msg objects, 2-element lists/tuples, files strings (in canonical msg format) Exceptions raised: ValueError: For every bad parameter we see, we raise a ValueError. This can happen when the string you've given us doesn't meet our expectations in various ways. Be prepared to deal with it when you give us messages you can't guarantee are perfect. ''' # Field names start with F_... F_TYPE="t" F_ORIG="src" F_NODE="node" F_TO="dest" F_FROMID="from_id" F_IFNAME="ifname" F_NODENAME="node" F_TOID="to_id" F_PID="pid" F_STATUS="st" F_APIREQ="reqtype" F_APIRESULT="result" F_COMMENT="info" # Message types start with T_... T_APIREQ="hbapi-req" T_APIRESP="hbapi-resp" T_TESTREQ="cltest-req" T_TESTRSP="cltest-rsp" # # Things we need for making network-compatible strings # from ha_msg objects # max_reprlen = 1024 # Maximum length string for an ha_msg startstr=">>>\n" endstr="<<<\n" __str__ = UserDict.__repr__ # use default __str__ function def __init__(self, *args): '''Initialize the ha_msg according to the parameters we're given''' self.data = {} for arg in args: self.update(arg) def update(self, *args): '''Update the message from info in our arguments We currently allow these kinds of arguments: dictionary, ha_msg, tuple, list, string, file... ''' # # It would be nice to check for type attributes rather than # for specific types... # for arg in args: # Do we have a String? if isinstance(arg, types.StringType): self.fromstring(arg) # Do we have a 2-element Tuple/List? elif (isinstance(arg, types.TupleType) or isinstance(arg, types.ListType)): if len(arg) != 2: raise ValueError("wrong size tuple/list") self[arg[0]] = arg[1] # Do we have a dictionary or ha_msg object? elif (isinstance(arg, types.DictType) or (isinstance(arg, types.InstanceType) and issubclass(arg.__class__, UserDict))): for key in arg.keys(): self[key] = arg[key] # How about a file? elif isinstance(arg, types.FileType): self.fromfile(arg) elif isinstance(arg, types.FileType): self.fromfile(arg) else: raise ValueError("bad type in update") # I can imagine more validation being useful... # The strings have more constraints than this code enforces... # They can't contain NULLs, or \r or \n # # The names should be legitimate environment var names # (for example, can't contain '=') # etc... def __setitem__(self, k, value): if (not isinstance(k, types.StringType) or not isinstance(k, types.StringType)): raise ValueError("non-string data") self.data[k] = value def __repr__(self): '''Convert to the canonical network-format string that heartbeat expects us to use. ''' ret = ha_msg.startstr for i in self.items(): ret = ret + i[0] + "=" + i[1] + "\n" ret = ret + ha_msg.endstr if len(ret) <= ha_msg.max_reprlen: return ret raise ValueError("message length error") # Convert from canonical-message-string to ha_msg def fromstring(self, s): '''Update an ha_msg from a string The string must be in our "well-known" network format (like comes from heartbeat or __repr__()) ''' # # It should start w/ha_msg.startstr, and end w/ha_msg.endstr # if (s[:len(ha_msg.startstr)] != ha_msg.startstr or s[-len(ha_msg.endstr):] != ha_msg.endstr) : raise ValueError("message format error") # # Split up the string into lines, and process each # line as a name=value pair # strings = string.split(s, '\n')[1:-2] for astring in strings: # Update-from-list is handy here... self.update(string.split(astring, '=')) def fromfile(self, f): '''Read an ha_msg from a file. This means that we read from the file until we find an ha_msg string, then plop it into 'self' ''' delimfound=0 while not delimfound: line = f.readline() if line == "" : raise ValueError("EOF") delimfound = (line == ha_msg.startstr) delimfound=0 line="?" while not delimfound and line != "": line = f.readline() if line == "" : raise ValueError("EOF") delimfound = (line == ha_msg.endstr) if not delimfound: self.update(string.split(line[:-1], '=')) def tofile(self, f): '''Write an ha_msg to a file, and flush it.''' f.write(repr(self)) f.flush() return 1 class hb_api: '''The heartbeat API class. This interesting and useful class is a python client side implementation of the heartbeat API. It allows one to inquire concerning the valid set of nodes and interfaces, and in turn allows one to inquire about the status of these things. Additionally, it allows one to send messages to the cluster, and to receive messages from the cluster. ''' # # Probably the exceptions we trap should have messages that # go along with them, since they shouldn't happen. # FIFO_BASE_DIR = "/var/lib/heartbeat/" # # Various constants that are part of the heartbeat API # SIGNON="signon" SIGNOFF="signoff" SETFILTER="setfilter" SETSIGNAL="setsignal" NODELIST="nodelist" NODESTATUS="nodestatus" IFLIST="iflist" IFSTATUS="ifstatus" ActiveStatus="active" OK="OK" FAILURE="fail" BADREQ="badreq" MORE="ok/more" _pid=os.getpid() API_REGFIFO = FIFO_BASE_DIR + "register" NAMEDCLIENTDIR = FIFO_BASE_DIR + "api" CASUALCLIENTDIR = FIFO_BASE_DIR + "casual" def __init__(self): self.SignedOn=0 self.iscasual=1 self.MsgQ = [] self.Callbacks = {} self.NodeCallback = None self.IFCallback = None def __del__(self): '''hb_api class destructor. NOTE: If you're going to let an hb_api object go out of scope, and not sign off, then don't let it go out of scope from the highest level but instead make sure it goes out of scope from a function. This is because some of the classes this destructor needs may have already disappeared if you wait until the bitter end to __del__ us :-( ''' print "Destroying hb_api object" self.signoff() def __api_msg(self, msgtype): '''Create a standard boilerplate API message''' return ha_msg( { ha_msg.F_TYPE : ha_msg.T_APIREQ, ha_msg.F_APIREQ : msgtype, ha_msg.F_PID : repr(hb_api._pid), ha_msg.F_FROMID : self.OurClientID }) def __get_reply(self): '''Return the reply to the current API request''' try: while 1: reply = ha_msg(self.ReplyFIFO) if reply[ha_msg.F_TYPE] == ha_msg.T_APIRESP: return reply # Not an API reply. Queue it up for later... self.MsgQ.append(reply) except (KeyError,ValueError): return None def __CallbackCall(self, msg): '''Perform the callback calls (if any) associated with the given message. We never do more than one callback per message. and we return true if we did any callbacks, and None otherwise. ''' msgtype = msg[ha_msg.F_TYPE] if self.NodeCallback and (msgtype == ha_msg.T_STATUS or msgtype == T_NS_STATUS): node=msg[ha_msg.F_ORIG] self.NodeCallback[0](node, self.NodeCallback[1]) return 1 if self.IFCallback and msgtype == ha_msg.T_IFSTATUS: node=msg[ha_msg.F_ORIG] stat=msg[ha_msg.F_STATUS] self.IFCallback[0](node, stat, self.IFCallback[1]) return 1 if self.Callbacks.has_key(msgtype): entry = self.Callbacks[msgtype] entry[0](msg, entry[1]) return 1 return None def __read_hb_msg(self, blocking): '''Return the next message from heartbeat.''' if len(self.MsgQ) > 0: return self.MsgQ.pop(0) if not blocking and not self.msgready(): return None try: return ha_msg(self.ReplyFIFO) except (ValueError): return None def readmsg(self, blocking): '''Return the next message to the caller for which there were no active callbacks. Call the callbacks for those messages which might have been read along the way that *do* have callbacks. Because this is Python, and this member function also replaces the 'rcvmsg' function in the 'C' code. ''' while(1): rc=self.__read_hb_msg(blocking) if rc == None: return None if not self.__CallbackCall(rc): return rc def signoff(self): '''Sign off of the heartbeat API.''' if self.SignedOn: msg = self.__api_msg(hb_api.SIGNOFF) msg.tofile(self.MsgFIFO) self.SignedOn=0 def signon(self, service=None): '''Sign on to heartbeat (register as a client)''' if service == None: self.OurClientID = repr(hb_api._pid) self.iscasual = 1 else: self.OurClientID = service self.iscasual = 0 msg = self.__api_msg(hb_api.SIGNON) # Compute FIFO directory if self.iscasual: self.FIFOdir = hb_api.CASUALCLIENTDIR else: self.FIFOdir = hb_api.NAMEDCLIENTDIR self.ReqFIFOName = self.FIFOdir + os.sep + self.OurClientID + ".req" self.ReplyFIFOName = self.FIFOdir + os.sep + self.OurClientID + ".rsp" self.OurNode = lower(os.uname()[1]) # # For named clients, lock the request/response fifos # (FIXME!!) # if self.iscasual: # Make the registration, request FIFOs os.mkfifo(self.ReqFIFOName, 0600) os.mkfifo(self.ReplyFIFOName, 0600) # # Open the reply FIFO with fdopen... # (this keeps it from hanging) fd = os.open(self.ReplyFIFOName, os.O_RDWR) self.ReplyFIFO = os.fdopen(fd, "r") msg = hb_api.__api_msg(self, hb_api.SIGNON) # Open the registration FIFO RegFIFO = open(hb_api.API_REGFIFO, "w"); # Send the registration request msg.tofile(RegFIFO) RegFIFO.close() try: # Read the reply reply = self.__get_reply() # Read the return code rc = reply[ha_msg.F_APIRESULT] if rc == hb_api.OK : self.SignedOn=1 self.MsgFIFO = open(self.ReqFIFOName, "w") return 1 return None except (KeyError,ValueError): return None def setfilter(self, fmask): '''Set message reception filter mask This is the 'raw' interface. I guess I should implement a higher-level one, too... :-) ''' msg = hb_api.__api_msg(self, hb_api.SETFILTER) msg[ha_msg.F_FILTERMASK] = "%x" % fmask msg.tofile(self.MsgFIFO) try: reply = self.__get_reply() rc = reply[ha_msg.F_APIRESULT] if rc == hb_api.OK: return 1 return None except (KeyError, ValueError): return None def setsignal(self, signal): '''Set message notification signal (0 to cancel)''' msg = hb_api.__api_msg(self, hb_api.SETSIGNAL) msg[ha_msg.F_SIGNAL] = "%d" % signal msg.tofile(self.MsgFIFO) try: reply = self.__get_reply() rc = reply[ha_msg.F_APIRESULT] if rc == hb_api.OK : return 1 return None except (KeyError, ValueError): return None def nodelist(self): '''Retrieve the list of nodes in the cluster''' Nodes = [] msg = hb_api.__api_msg(self, hb_api.NODELIST) msg.tofile(self.MsgFIFO) try: while 1: reply = self.__get_reply() rc = reply[ha_msg.F_APIRESULT] if rc != hb_api.OK and rc != hb_api.MORE: return None Nodes.append(reply[ha_msg.F_NODENAME]) if rc == hb_api.OK : return Nodes elif rc == hb_api.MORE: continue else: return None except (KeyError, ValueError): return None def iflist(self, node): '''Retrieve the list of interfaces to the given node''' Interfaces = [] msg = hb_api.__api_msg(self, hb_api.IFLIST) msg[ha_msg.F_NODENAME] = node msg.tofile(self.MsgFIFO) try: while 1: reply = self.__get_reply() rc = reply[ha_msg.F_APIRESULT] if rc != hb_api.OK and rc != hb_api.MORE : return None Interfaces.append(reply[ha_msg.F_IFNAME]) if rc == hb_api.OK : return Interfaces elif rc == hb_api.MORE: continue else: return None except (KeyError, ValueError): return None def nodestatus(self, node): '''Retrieve the status of the given node''' msg = hb_api.__api_msg(self, hb_api.NODESTATUS) msg[ha_msg.F_NODENAME]=node msg.tofile(self.MsgFIFO) try: reply = self.__get_reply() rc = reply[ha_msg.F_APIRESULT] if rc == hb_api.FAILURE : return None return reply[ha_msg.F_STATUS] except (KeyError, ValueError): return None def ifstatus(self, node, interface): '''Retrieve the status of the given interface on the given node''' msg = hb_api.__api_msg(self, hb_api.IFSTATUS) msg[ha_msg.F_NODENAME]=node msg[ha_msg.F_IFNAME]=interface msg.tofile(self.MsgFIFO) try: reply = self.__get_reply() rc = reply[ha_msg.F_APIRESULT] if rc == hb_api.FAILURE : return None return reply[ha_msg.F_STATUS] except (KeyError, ValueError): return None def cluster_config(self): '''Return the whole current cluster configuration. This call not present in the 'C' API. It could probably give a better structured return value. ''' ret = {} for node in self.nodelist(): nstat = {} nstat["status"] = self.nodestatus(node) interfaces={} for intf in self.iflist(node): interfaces[intf] = self.ifstatus(node, intf) nstat["interfaces"] = interfaces ret[node] = nstat return ret def nodes_with_status(self, status=None): '''Return the list of nodes with the given status. Default status is hb_api.ActiveStatus (i.e., "active") ''' if status == None: status=hb_api.ActiveStatus ret = [] for node in self.nodelist(): if self.nodestatus(node) == status: ret.append(node) return ret def get_inputfd(self): '''Return the input file descriptor associated with this object''' if not self.SignedOn: return None return self.ReplyFIFO.fileno() def fileno(self): return self.get_inputfd() def msgready(self): '''Returns TRUE if a message is waiting to be read.''' if len(self.MsgQ) > 0: return 1 ifd = self.get_inputfd() inp, out, exc = select.select([ifd,], [], [], 0) if len(inp) > 0 : return 1 return None def sendclustermsg(self, origmsg): '''Send a message to all cluster members. This is not allowed for casual clients.''' if not self.SignedOn or self.iscasual: return None msg =ha_msg(origmsg) msg[ha_msg.F_ORIG] = self.OurNode return msg.tofile(self.MsgFIFO) def sendnodemsg(self, origmsg, node): '''Send a message to a specific node in the cluster. This is not allowed for casual clients.''' if not self.SignedOn or self.iscasual: return None msg = ha_msg(origmsg) msg[ha_msg.F_ORIG] = self.OurNode msg[ha_msg.F_TO] = node return msg.tofile(self.MsgFIFO) def set_msg_callback(self, msgtype, callback, data): '''Define a callback for a specific message type. It returns the previous (callback,data) for that particular message type. ''' if self.Callbacks.has_key(msgtype) : ret=self.Callbacks[msgtype] else: ret=None if callback == None : if self.Callbacks.has_key(msgtype) : del self.Callbacks[msgtype] return ret self.Callbacks[msgtype] = (callback, data) return ret def set_nstatus_callback(self, callback, data): '''Define a callback for node status changes. It returns the previous (callback,data) for the previous nstatus_callback. ''' ret = self.NodeCallback if callback == None: self.NodeCallback = None return ret self.NodeCallback = (callback, data) return ret def set_ifstatus_callback(self, callback, data): '''Define a callback for interface status changes. It returns the previous (callback,data) for the previous ifstatus_callback. ''' ret = self.IFCallback if callback == None: self.IFCallback = None return ret self.IFCallback = (callback, data) return ret # # A little test code... # if __name__ == '__main__': hb = hb_api() hb.signon() print "Now signed on to heartbeat API..." print "Nodes in cluster:", hb.nodelist() for node in hb.nodelist(): print "\nStatus of %s: %s" % (node, hb.nodestatus(node)) print "\tInterfaces to %s: %s" % (node, hb.iflist(node)) for intf in hb.iflist(node): print "\tInterface %s: %s" % (intf, hb.ifstatus(node, intf)) print "\nCluster Config:" config = hb.cluster_config() print config print "\n" print config["localhost"]["interfaces"]["localhost"], ":-)" print config["kathyamy"]["interfaces"]["/dev/ttyS0"] Heartbeat-3-0-7e3a82377fa8/heartbeat/hb_config.h0000644000000000000000000000235511576626513021232 0ustar00usergroup00000000000000/* * Parse various heartbeat configuration files... * * Copyright (C) 2000 Alan Robertson * portions (c) 1999,2000 Mitja Sarp * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _HB_CONFIG_H #define _HB_CONFIG_H int parse_ha_resources(const char * cfgfile); void dump_config(void); void dump_default_config(int wikiout); int add_node(const char * value, int nodetype); int parse_authfile(void); int init_config(const char * cfgfile); int StringToBaud(const char * baudstr); const char * GetParameterValue(const char * name); #endif /* _HB_CONFIG_H */ Heartbeat-3-0-7e3a82377fa8/heartbeat/hb_module.h0000644000000000000000000000154511576626513021252 0ustar00usergroup00000000000000/* Copyright (C) 2000 Alan Robertson * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef HB_MODULE_H #define HB_MODULE_H 1 #include int module_init(void); #endif Heartbeat-3-0-7e3a82377fa8/heartbeat/hb_proc.h0000644000000000000000000000405111576626513020723 0ustar00usergroup00000000000000/* * hb_proc.h: definitions of heartbeat child process info * * These are the things that let us manage our child processes well. * * Copyright (C) 2001 Alan Robertson * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _HB_PROC_H # define _HB_PROC_H 1 #include #include #include enum process_type { PROC_UNDEF=0, /* OOPS! ;-) */ PROC_MST_CONTROL, /* Master control process */ PROC_HBREAD, /* Read process */ PROC_HBWRITE, /* Write process */ PROC_HBFIFO, /* FIFO process */ PROC_PPP /* (Obsolete) PPP process */ }; enum process_status { FORKED=1, /* This process is forked, but not yet really running */ RUNNING=2, /* This process is fully active, and open for business*/ PROCDEAD=3, /* This process has died, perhaps been respawned */ }; struct process_info { enum process_type type; /* Type of process */ enum process_status pstat; /* Is it running yet? */ pid_t pid; /* Process' PID */ int medianum; /* Which media index does this process go with? */ hb_msg_stats_t msgstats; }; struct pstat_shm { int nprocs; int restart_after_shutdown; int giveup_resources; int i_hold_resources; struct process_info info [MAXPROCS]; }; /* These are volatile because they're in shared memory */ volatile extern struct pstat_shm * procinfo; volatile extern struct process_info * curproc; #endif /*_HB_PROC_H*/ Heartbeat-3-0-7e3a82377fa8/heartbeat/hb_resource.c0000644000000000000000000017164111576626513021614 0ustar00usergroup00000000000000/* * hb_resource: Linux-HA heartbeat resource management code * * Copyright (C) 2001-2002 Luis Claudio R. Goncalves * * Copyright (C) 1999-2002 Alan Robertson * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /************************************************************************** * * This file contains almost all the resource management code for * heartbeat. * * It contains code to handle: * resource takeover * standby processing * STONITH operations. * performing notify_world() type notifications of status changes. * * We're planning on replacing it with an external process * to perform resource management functions as a heartbeat client. * * In the mean time, we're planning on disentangling it from the main * heartbeat code and cleaning it up some. * * Here are my favorite cleanup tasks: * * Get rid of the "standby_running" timer, and replace it with a gmainloop * timer. * * Make hooks for processing incoming messages (in heartbeat.c) cleaner * and probably hook them in through a hash table callback hook * or something. * * Make registration hooks to allow notify_world to be called by pointer. * * Reduce the dependency on global variables shared between heartbeat.c * and here. * * Generally Reduce the number of interactions between this code and * heartbeat.c as evidenced by heartbeat_private.h and hb_resource.h * **************************************************************************/ extern struct node_info * curnode; int DoManageResources = TRUE; int nice_failback = FALSE; int auto_failback = FALSE; int failback_in_progress = FALSE; static gboolean rsc_needs_failback = FALSE; /* * These are true when all our initial work for taking over local * or foreign resources is completed, or found to be unnecessary. */ static gboolean local_takeover_work_done = FALSE; static gboolean foreign_takeover_work_done = FALSE; static gboolean rsc_needs_shutdown = FALSE; int other_holds_resources = HB_NO_RSC; int other_is_stable = FALSE; /* F_ISSTABLE */ int takeover_in_progress = FALSE; enum hb_rsc_state resourcestate = HB_R_INIT; enum standby going_standby = NOT; longclock_t standby_running = 0L; static int standby_rsctype = HB_ALL_RSC; #define INITMSG "Initial resource acquisition complete" /* * A helper to allow us to pass things into the anonproc * environment without any warnings about passing const strings * being passed into a plain old (non-const) gpointer. */ struct hb_const_string { const char * str; }; #define HB_RSCMGMTPROC(p, s) \ { \ static struct hb_const_string cstr = {(s)}; \ NewTrackedProc((p), 1 \ , (debug_level ? PT_LOGVERBOSE : PT_LOGNORMAL) \ , &cstr, &hb_rsc_RscMgmtProcessTrackOps); \ } #define RSC_MGR HA_NOARCHDATAHBDIR "/ResourceManager" /* * A helper function which points at a malloced string. */ struct StonithProcHelper { char * nodename; }; extern ProcTrack_ops ManagedChildTrackOps; static int ResourceMgmt_child_count = 0; static void StartNextRemoteRscReq(void); static void InitRemoteRscReqQueue(void); static int send_standby_msg(enum standby state); static void send_stonith_msg(const char *, const char *); static void go_standby(enum standby who, int resourceset); static int send_local_starting(void); static void RscMgmtProcessRegistered(ProcTrack* p); static void RscMgmtProcessDied(ProcTrack* p, int status, int signo , int exitcode, int waslogged); static const char * RscMgmtProcessName(ProcTrack* p); static void StonithProcessDied(ProcTrack* p, int status, int signo , int exitcode, int waslogged); static const char * StonithProcessName(ProcTrack* p); static void StonithStatProcessDied(ProcTrack* p, int status, int signo , int exitcode, int waslogged); static const char * StonithStatProcessName(ProcTrack* p); void Initiate_Reset(Stonith* s, const char * nodename, gboolean doreset); static int FilterNotifications(const char * msgtype); static int countbystatus(const char * status, int matchornot); static gboolean hb_rsc_isstable(void); static void PerformAutoFailback(void); ProcTrack_ops hb_rsc_RscMgmtProcessTrackOps = { RscMgmtProcessDied, RscMgmtProcessRegistered, RscMgmtProcessName }; static ProcTrack_ops StonithProcessTrackOps = { StonithProcessDied, NULL, StonithProcessName }; static ProcTrack_ops StonithStatProcessTrackOps = { StonithStatProcessDied, NULL, StonithStatProcessName }; static void HBDoMsg_T_STARTING_or_RESOURCES(const char * type, struct node_info * fromnode , TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg) { /* * process_resources() will deal with T_STARTING * and T_RESOURCES messages appropriately. */ process_resources(type, msg, fromnode); heartbeat_monitor(msg, KEEPIT, iface); } /* Someone wants to go standby!!! */ static void HBDoMsg_T_ASKRESOURCES(const char * type, struct node_info * fromnode , TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg) { heartbeat_monitor(msg, KEEPIT, iface); ask_for_resources(msg); } static void HBDoMsg_T_ASKRELEASE(const char * type, struct node_info * fromnode , TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg) { heartbeat_monitor(msg, KEEPIT, iface); if (fromnode != curnode) { /* * Queue for later handling... */ QueueRemoteRscReq(PerformQueuedNotifyWorld, msg); } } static void HBDoMsg_T_ACKRELEASE(const char * type, struct node_info * fromnode , TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg) { /* Ignore this if we're shutting down! */ if (shutdown_in_progress) { return; } heartbeat_monitor(msg, KEEPIT, iface); QueueRemoteRscReq(PerformQueuedNotifyWorld, msg); } /* Process a message no one recognizes */ static void HBDoMsg_default(const char * type, struct node_info * fromnode , TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg) { heartbeat_monitor(msg, KEEPIT, iface); QueueRemoteRscReq(PerformQueuedNotifyWorld, msg); } /* Received a "SHUTDONE" message from someone... */ static void HBDoMsg_T_SHUTDONE(const char * type, struct node_info * fromnode , TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg) { if (heartbeat_comm_state == COMM_LINKSUP) { process_resources(type, msg, fromnode); } heartbeat_monitor(msg, KEEPIT, iface); if (fromnode == curnode) { if (ANYDEBUG) { cl_log(LOG_DEBUG , "Received T_SHUTDONE from us."); } if (ANYDEBUG) { cl_log(LOG_DEBUG , "Calling hb_mcp_final_shutdown" " in a second."); } /* Trigger next phase of final shutdown process in a second */ Gmain_timeout_add(1000, hb_mcp_final_shutdown, NULL); /* phase 0 - normal */ }else{ fromnode->has_resources = FALSE; other_is_stable = 0; other_holds_resources= HB_NO_RSC; cl_log(LOG_INFO , "Received shutdown notice from '%s'." , fromnode->nodename); takeover_from_node(fromnode->nodename); } } void init_resource_module(void) { hb_register_msg_callback(T_SHUTDONE, HBDoMsg_T_SHUTDONE); hb_register_comm_up_callback(comm_up_resource_action); } #ifndef WCOREDUMP # define WCOREDUMP(rc) 0 #endif static const char * rctomsg(int waitrc) { static char retval[64]; if (WIFSIGNALED(waitrc)) { snprintf(retval, sizeof(retval) , "killed by signal %d%s" , WTERMSIG(waitrc) , WCOREDUMP(waitrc) ? " (core dumped)" : ""); }else{ snprintf(retval, sizeof(retval) , "exited with return code %d" , WEXITSTATUS(waitrc)); } return retval; } static const char * rsc_msg[] = {HB_NO_RESOURCES, HB_LOCAL_RESOURCES , HB_FOREIGN_RESOURCES, HB_ALL_RESOURCES}; /* * We look at the directory /etc/ha.d/rc.d to see what * scripts are there to avoid trying to run anything * which isn't there. */ static GHashTable* RCScriptNames = NULL; static void CreateInitialFilter(void) { DIR* dp; struct dirent* dep; static char foo[] = "bar"; RCScriptNames = g_hash_table_new(g_str_hash, g_str_equal); if ((dp = opendir(HA_RC_DIR)) == NULL) { cl_perror("Cannot open directory " HA_RC_DIR); return; } while((dep = readdir(dp)) != NULL) { if (dep->d_name[0] == '.') { continue; } if (ANYDEBUG) { cl_log(LOG_DEBUG , "CreateInitialFilter: %s", dep->d_name); } g_hash_table_insert(RCScriptNames, g_strdup(dep->d_name),foo); } closedir(dp); } static int FilterNotifications(const char * msgtype) { int rc; if (RCScriptNames == NULL) { CreateInitialFilter(); } rc = g_hash_table_lookup(RCScriptNames, msgtype) != NULL; if (DEBUGDETAILS) { cl_log(LOG_DEBUG , "FilterNotifications(%s) => %d" , msgtype, rc); } return rc; } static gboolean AutoFailbackProc(gpointer dummy) { PerformAutoFailback(); return FALSE; } static void PerformAutoFailback(void) { if (ANYDEBUG) { cl_log(LOG_DEBUG, "Calling PerformAutoFailback()"); } if (shutdown_in_progress || (procinfo->i_hold_resources & HB_FOREIGN_RSC) == 0 || !rsc_needs_failback || !auto_failback) { rsc_needs_failback = FALSE; hb_shutdown_if_needed(); return; } if (going_standby != NOT || !other_is_stable || resourcestate != HB_R_STABLE) { cl_log(LOG_DEBUG, "Auto failback delayed."); Gmain_timeout_add(1*1000, AutoFailbackProc, NULL); return; } if (ANYDEBUG) { cl_log(LOG_DEBUG, "Auto failback triggered."); } failback_in_progress = TRUE; standby_rsctype = HB_FOREIGN_RSC; send_standby_msg(ME); rsc_needs_failback = FALSE; } /* Notify the (external) world of an HA event */ void notify_world(struct ha_msg * msg, const char * ostatus) { /* * We invoke our "rc" script with the following arguments: * * 0: RC_ARG0 (always the same) * 1: lowercase version of command ("type" field) * * All message fields get put into environment variables * * The rc script, in turn, runs the scripts it finds in the rc.d * directory (or whatever we call it... ) with the same arguments. * * We set the following environment variables for the RC script: * HA_CURHOST: the node name we're running on * HA_OSTATUS: Status of node (before this change) * */ struct sigaction sa; /* We only run one of these commands at a time */ static char command[STATUSLENG]; char rc_arg0 [] = RC_ARG0; char * const argv[MAXFIELDS+3] = {rc_arg0, command, NULL}; const char * fp; char * tp; int pid; #if WAITFORCOMMANDS int status; #endif if (!DoManageResources) { return; } tp = command; fp = ha_msg_value(msg, F_TYPE); ASSERT(fp != NULL && strlen(fp) < STATUSLENG); if (fp == NULL || strlen(fp) >= STATUSLENG || !FilterNotifications(fp)) { return; } if (ANYDEBUG) { cl_log(LOG_DEBUG , "notify_world: invoking %s: OLD status: %s" , RC_ARG0, (ostatus ? ostatus : "(none)")); } /* FIXME: No check on length of command */ while (*fp) { if (isupper((unsigned int)*fp)) { *tp = tolower((unsigned int)*fp); }else{ *tp = *fp; } ++fp; ++tp; } *tp = EOS; switch ((pid=fork())) { case -1: cl_perror("Can't fork to notify world!"); break; case 0: { /* Child */ int j; hb_setup_child(); set_proc_title("%s: notify_world()", cmdname); setpgid(0,0); CL_SIGACTION(SIGCHLD, NULL, &sa); if (sa.sa_handler != SIG_DFL) { cl_log(LOG_DEBUG , "notify_world: setting SIGCHLD" " Handler to SIG_DFL"); CL_SIGNAL(SIGCHLD,SIG_DFL); } for (j=0; j < msg->nfields; ++j) { char ename[64]; snprintf(ename, sizeof(ename), "HA_%s" , msg->names[j]); if (msg->types[j] == FT_STRING){ setenv(ename, msg->values[j], 1); } } if (ostatus) { setenv(OLDSTATUS, ostatus, 1); } if (nice_failback) { setenv(HANICEFAILBACK, "yes", 1); } /*should we use logging daemon or not in script*/ setenv(HALOGD, cl_log_get_uselogd()? "yes":"no", 1); if (ANYDEBUG) { cl_log(LOG_DEBUG , "notify_world: Running %s %s" , argv[0], argv[1]); } execv(RCSCRIPT, argv); cl_log(LOG_ERR, "cannot exec %s", RCSCRIPT); cleanexit(1); /*NOTREACHED*/ break; } default: /* We're the Parent. */ /* We run these commands at a time */ /* So this use of "command" is OK */ HB_RSCMGMTPROC(pid, command); if (ANYDEBUG) { cl_log(LOG_DEBUG , "Starting notify process [%s]" , command); } } #if WAITFORCOMMANDS waitpid(pid, &status, 0); #endif } /* * Node 'hip' has died. Take over its resources (if any) * This may mean we have to STONITH them. */ void hb_rsc_recover_dead_resources(struct node_info* hip) { gboolean need_stonith = TRUE; struct ha_msg * hmsg; char timestamp[16]; if ((hmsg = ha_msg_new(6)) == NULL) { cl_log(LOG_ERR, "no memory to takeover_from_node"); return; } snprintf(timestamp, sizeof(timestamp), TIME_X, (TIME_T) time(NULL)); if ( ha_msg_add(hmsg, F_TYPE, T_STATUS) != HA_OK || ha_msg_add(hmsg, F_SEQ, "1") != HA_OK || ha_msg_add(hmsg, F_TIME, timestamp) != HA_OK || ha_msg_add(hmsg, F_ORIG, hip->nodename) != HA_OK || ha_msg_add(hmsg, F_STATUS, DEADSTATUS) != HA_OK) { cl_log(LOG_ERR, "no memory to takeover_from_node"); ha_msg_del(hmsg); return; } if (hip->nodetype == PINGNODE_I) { if (ha_msg_add(hmsg, F_COMMENT, "ping") != HA_OK) { cl_log(LOG_ERR, "no memory to mark ping node dead"); ha_msg_del(hmsg); return; } } else if (going_standby != NOT) { cl_log(LOG_INFO, "Cancelling pending standby operation"); going_standby = NOT; standby_running = zero_longclock; if ((!other_is_stable) && ((procinfo->i_hold_resources & HB_ALL_RSC) == HB_ALL_RSC)) { other_is_stable = TRUE; if (ANYDEBUG) { cl_log(LOG_DEBUG , "hb_rsc_recover_dead_resources:" " other now stable"); } } hb_shutdown_if_needed(); } /*deliver this message to clients*/ heartbeat_monitor(hmsg, KEEPIT, ""); ha_msg_del(hmsg); hmsg = NULL; if (!DoManageResources) { return; } if (hip->nodetype == PINGNODE_I) { takeover_from_node(hip->nodename); return; } /* * We can get confused by a dead node when we're * not fully started, unless we're careful. */ if (shutdown_in_progress) { switch(resourcestate) { case HB_R_SHUTDOWN: case HB_R_STABLE: return; default: cl_log(LOG_ERR , "recover_dead_resources()" " during shutdown" ": state %d", resourcestate); /* FALL THROUGH! */ case HB_R_INIT: case HB_R_BOTHSTARTING: case HB_R_RSCRCVD: case HB_R_STARTING: hb_giveup_resources(); return; } } rsc_needs_failback = TRUE; /* * If we haven't heard anything from them - they might be holding * resources - we have no way of knowing. */ if (hip->anypacketsyet) { if (nice_failback) { if (other_holds_resources == HB_NO_RSC) { need_stonith = FALSE; } }else if (!hip->has_resources) { need_stonith = FALSE; } } if (need_stonith) { /* We have to Zap them before we take the resources */ /* This often takes a few seconds. */ if (config->stonith) { Initiate_Reset(config->stonith, hip->nodename, TRUE); /* It will call takeover_from_node() later */ return; }else{ send_stonith_msg(hip->nodename, T_STONITH_NOTCONFGD); cl_log(LOG_WARNING, "No STONITH device configured."); cl_log(LOG_WARNING, "Shared disks are not protected."); /* nice_failback needs us to do this anyway... */ takeover_from_node(hip->nodename); } }else{ cl_log(LOG_INFO, "Dead node %s gave up resources." , hip->nodename); send_stonith_msg(hip->nodename, T_STONITH_UNNEEDED); if (nice_failback) { if ((procinfo->i_hold_resources & HB_ALL_RSC) == HB_ALL_RSC) { other_is_stable = TRUE; if (ANYDEBUG) { cl_log(LOG_DEBUG , "hb_rsc_recover_dead_resources:" " other now stable"); } return; } /* These might happen due to timing weirdnesses */ if (! (procinfo->i_hold_resources & HB_LOCAL_RSC)){ req_our_resources(TRUE); } if (! (procinfo->i_hold_resources & HB_FOREIGN_RSC)){ takeover_from_node(hip->nodename); } }else{ /* With nice_failback disabled, we always * own our own (local) resources */ takeover_from_node(hip->nodename); } } } static gboolean hb_rsc_isstable(void) { /* Is this the "legacy" case? */ if (!nice_failback) { if (ANYDEBUG) { cl_log(LOG_DEBUG , "hb_rsc_isstable" ": ResourceMgmt_child_count: %d" , ResourceMgmt_child_count); } return ResourceMgmt_child_count == 0; } if (ANYDEBUG) { cl_log(LOG_DEBUG , "hb_rsc_isstable: ResourceMgmt_child_count: %d" ", other_is_stable: %d" ", takeover_in_progress: %d, going_standby: %d" ", standby running(ms): %ld, resourcestate: %d" , ResourceMgmt_child_count, other_is_stable , takeover_in_progress, going_standby , longclockto_ms(standby_running) , resourcestate); } /* Special case for early shutdown requests */ if (shutdown_in_progress && resourcestate == HB_R_INIT) { return TRUE; } return other_is_stable && !takeover_in_progress && going_standby == NOT && standby_running == 0L && ResourceMgmt_child_count == 0 && (resourcestate == HB_R_STABLE||resourcestate==HB_R_SHUTDOWN || resourcestate == HB_R_INIT); } const char * hb_rsc_resource_state(void) { return (hb_rsc_isstable() ? decode_resources(procinfo->i_hold_resources) : "transition"); } /* * Here starts the nice_failback thing. The main purpouse of * nice_failback is to create a controlled failback. This * means that when the primary comes back from an outage it * stays quiet and acts as a secondary/backup server. * There are some more comments about it in nice_failback.txt */ /* * At this point nice failback deals with two nodes and is * an interim measure. The new version using the API is coming soon! * * This piece of code treats five different situations: * * 1. Node1 is starting and Node2 is down (or vice-versa) * Take the resources. req_our_resources(), mark_node_dead() * * 2. Node1 and Node2 are starting at the same time * Let both machines req_our_resources(). * * 3. Node1 is starting and Node2 holds no resources * Just like #2 * * 4. Node1 is starting and Node2 has (his) local resources * Let's ask for our local resources. req_our_resources() * * 5. Node1 is starting and Node2 has both local and foreign * resources (all resources) * Do nothing :) * */ /* * About the nice_failback resource takeover model: * * There are two principles that seem to guarantee safety: * * 1) Take all unclaimed resources if the other side is stable. * [Once you do this, you are also stable]. * * 2) Take only unclaimed local resources when a timer elapses * without things becoming stable by (1) above. * [Once this occurs, you're stable]. * * Stable means that we have taken the resources we think we ought to, and * won't take any more without another transition ocurring. * * The other side is stable whenever it says it is (in its RESOURCE * message), or if it is dead. * * The nice thing about the stable bit in the resources message is that it * enables you to tell if the other side is still messing around, or if * they think they're done messing around. If they're done, then it's safe * to proceed. If they're not, then you need to wait until they say * they're done, or until a timeout occurs (because no one has become stable). * * When the timeout occurs, you're both deadlocked each waiting for the * other to become stable. Then it's safe to take your local resources * (unless, of course, for some unknown reason, the other side has taken * them already). * * If a node dies die, then they'll be marked dead, and its resources will * be marked unclaimed. In this case, you'll take over everything - whether * local resources through mark_node_dead() or remote resources through * mach_down. */ #define HB_UPD_RSC(full, cur, up) ((full) ? up : (up == HB_NO_RSC) ? HB_NO_RSC : ((up)|(cur))) void comm_up_resource_action(void) { static int resources_requested_yet = 0; int deadcount = countbystatus(DEADSTATUS, TRUE); hb_register_msg_callback(T_STARTING, HBDoMsg_T_STARTING_or_RESOURCES); hb_register_msg_callback(T_RESOURCES, HBDoMsg_T_STARTING_or_RESOURCES); hb_register_msg_callback(T_ASKRESOURCES, HBDoMsg_T_ASKRESOURCES); hb_register_msg_callback(T_ASKRELEASE, HBDoMsg_T_ASKRELEASE); hb_register_msg_callback(T_ACKRELEASE, HBDoMsg_T_ACKRELEASE); hb_register_msg_callback("", HBDoMsg_default); if (deadcount == 0) { /* * If all nodes are up, we won't have to acquire * anyone else's resources. We're done with that. */ foreign_takeover_work_done = TRUE; } if (nice_failback) { send_local_starting(); }else{ /* Original ("normal") starting behavior */ if (!WeAreRestarting && !resources_requested_yet) { resources_requested_yet=1; req_our_resources(FALSE); } } if (config->stonith) { /* This will get called every hour from now on... */ Initiate_Reset(config->stonith, NULL, FALSE); } } static void AnnounceTakeover(const char * reason) { static gboolean init_takeover_announced = FALSE; if (ANYDEBUG) { cl_log(LOG_INFO , "AnnounceTakeover(local %d, foreign %d" ", reason '%s' (%d))" , local_takeover_work_done , foreign_takeover_work_done , reason , init_takeover_announced); } if (init_takeover_announced || !local_takeover_work_done || !foreign_takeover_work_done) { return; } cl_log(LOG_INFO, INITMSG " (%s)", reason); init_takeover_announced = TRUE; } void process_resources(const char * type, struct ha_msg* msg , struct node_info * thisnode) { enum hb_rsc_state newrstate = resourcestate; static int first_time = 1; hb_shutdown_if_needed(); if (!DoManageResources || !nice_failback) { return; } /* Otherwise, we're in the nice_failback case */ /* This first_time switch might still be buggy -- FIXME */ if (first_time && WeAreRestarting) { resourcestate = newrstate = HB_R_STABLE; } /* * Deal with T_STARTING messages coming from the other side. * * These messages are a request for resource usage information. * The appropriate reply is a T_RESOURCES message. */ if (strcasecmp(type, T_STARTING) == 0 && (thisnode != curnode)) { switch(resourcestate) { case HB_R_RSCRCVD: case HB_R_STABLE: case HB_R_SHUTDOWN: break; case HB_R_STARTING: newrstate = HB_R_BOTHSTARTING; foreign_takeover_work_done = TRUE; AnnounceTakeover("HB_R_BOTHSTARTING"); /* ??? req_our_resources(); ??? */ break; default: cl_log(LOG_ERR, "Received '%s' message in state %d" , T_STARTING, resourcestate); return; } other_is_stable = FALSE; if (ANYDEBUG) { cl_log(LOG_DEBUG , "process_resources: other now unstable"); } if (takeover_in_progress) { cl_log(LOG_WARNING , "T_STARTING received during takeover."); } hb_send_resources_held(resourcestate == HB_R_STABLE, NULL); } /* Manage resource related messages... */ if (strcasecmp(type, T_RESOURCES) == 0) { const char *p; int fullupdate = FALSE; int n; /* * There are four possible resource answers: * * "I don't hold any resources" HB_NO_RSC * "I hold only LOCAL resources" HB_LOCAL_RSC * "I hold only FOREIGN resources" HB_FOREIGN_RSC * "I hold ALL resources" (local+foreign) HB_ALL_RSC */ p=ha_msg_value(msg, F_RESOURCES); if (p == NULL) { cl_log(LOG_ERR , T_RESOURCES " message without " F_RESOURCES " field."); return; } n = encode_resources(p); if ((p = ha_msg_value(msg, F_RTYPE)) && strcmp(p, "full") == 0) { fullupdate = TRUE; } switch (resourcestate) { case HB_R_BOTHSTARTING: case HB_R_STARTING: newrstate = HB_R_RSCRCVD; if (nice_failback && !auto_failback) { foreign_takeover_work_done = TRUE; AnnounceTakeover ("T_RESOURCES"); } case HB_R_RSCRCVD: case HB_R_STABLE: case HB_R_SHUTDOWN: break; default: cl_log(LOG_ERR, T_RESOURCES " message received in state %d" , resourcestate); return; } if (thisnode != curnode) { /* * This T_RESOURCES message is from the other side. */ const char * f_stable; other_holds_resources = HB_UPD_RSC(fullupdate, other_holds_resources, n); /* f_stable is NULL when msg from takeover script */ if ((f_stable = ha_msg_value(msg, F_ISSTABLE)) != NULL){ if (strcmp(f_stable, "1") == 0) { if (!other_is_stable) { cl_log(LOG_INFO , "remote resource" " transition completed."); other_is_stable = TRUE; hb_send_resources_held(resourcestate == HB_R_STABLE, NULL); PerformAutoFailback(); } }else{ other_is_stable = FALSE; if (ANYDEBUG) { cl_log(LOG_DEBUG , "process_resources(2): %s" , " other now unstable"); } } } if (ANYDEBUG) { cl_log(LOG_INFO , "other_holds_resources: %d" , other_holds_resources); } if ((resourcestate != HB_R_STABLE && resourcestate != HB_R_SHUTDOWN) && other_is_stable) { cl_log(LOG_INFO , "remote resource transition completed." ); req_our_resources(FALSE); newrstate = HB_R_STABLE; hb_send_resources_held(TRUE, NULL); PerformAutoFailback(); foreign_takeover_work_done = TRUE; if (!auto_failback) { if (other_holds_resources & HB_FOREIGN_RSC) { local_takeover_work_done = TRUE; } } AnnounceTakeover("T_RESOURCES(them)"); } }else{ /* This message is from us... */ const char * comment = ha_msg_value(msg, F_COMMENT); /* * This T_RESOURCES message is from us. It might be * from the "mach_down" script or our own response to * the other side's T_STARTING message. The mach_down * script sets the info (F_COMMENT) field to "mach_down" * We set it to "shutdown" in giveup_resources(). * * We do this so the audits work cleanly AND we can * avoid a potential race condition. * * Also, we could now time how long a takeover is * taking to occur, and complain if it takes "too long" * [ whatever *that* means ] */ /* Probably unnecessary */ procinfo->i_hold_resources = HB_UPD_RSC(fullupdate , procinfo->i_hold_resources, n); if (procinfo->i_hold_resources & HB_LOCAL_RSC) { /* This may sometimes be slightly premature. * The problem is that if the machine has * no local resources we will receive no * ip-addr-resp messages for resource * releases from the far side, so we * have to do something to cover that case. */ local_takeover_work_done = TRUE; AnnounceTakeover("T_RESOURCES(us)"); } if (comment) { if (strcmp(comment, "mach_down") == 0) { cl_log(LOG_INFO , "mach_down takeover complete."); takeover_in_progress = FALSE; /* FYI: This also got noted earlier */ procinfo->i_hold_resources |= HB_FOREIGN_RSC; rsc_needs_failback = TRUE; other_is_stable = TRUE; if (ANYDEBUG) { cl_log(LOG_DEBUG , "process_resources(3): %s" , " other now stable"); } foreign_takeover_work_done = TRUE; AnnounceTakeover("mach_down"); }else if (strcmp(comment, "shutdown") == 0) { resourcestate = newrstate = HB_R_SHUTDOWN; } } } } if (strcasecmp(type, T_SHUTDONE) == 0) { if (thisnode != curnode) { /* * It seems other_is_stable should be set to TRUE * when we come here because the other side * declared they are shutting down and no longer * own any resources. */ other_is_stable = TRUE; other_holds_resources = HB_NO_RSC; if (ANYDEBUG) { cl_log(LOG_DEBUG , "process_resources(4): %s" , " other now stable - T_SHUTDONE"); } if ((procinfo->i_hold_resources != HB_ALL_RSC) && !shutdown_in_progress) { int rtype; switch (procinfo->i_hold_resources) { case HB_FOREIGN_RSC: rtype = HB_LOCAL_RSC; break; case HB_LOCAL_RSC: rtype = HB_FOREIGN_RSC; break; default: case HB_NO_RSC: rtype = HB_ALL_RSC; break; } /* Take over resources immediately */ going_standby = DONE; go_standby(OTHER, rtype); } }else{ resourcestate = newrstate = HB_R_SHUTDOWN; procinfo->i_hold_resources = 0; } } if (resourcestate != newrstate) { if (ANYDEBUG) { cl_log(LOG_INFO , "STATE %d => %d", resourcestate, newrstate); } } resourcestate = newrstate; if (resourcestate == HB_R_RSCRCVD && local_takeover_time == 0L) { local_takeover_time = add_longclock(time_longclock() , secsto_longclock(RQSTDELAY)); } AuditResources(); hb_shutdown_if_needed(); } void AuditResources(void) { if (!nice_failback) { return; } /******************************************************* * Look for for duplicated or orphaned resources *******************************************************/ /* * Do both nodes own our local resources? */ if ((procinfo->i_hold_resources & HB_LOCAL_RSC) != 0 && (other_holds_resources & HB_FOREIGN_RSC) != 0) { cl_log(LOG_ERR, "Both machines own our resources!"); } /* * Do both nodes own foreign resources? */ if ((other_holds_resources & HB_LOCAL_RSC) != 0 && (procinfo->i_hold_resources & HB_FOREIGN_RSC) != 0) { cl_log(LOG_ERR, "Both machines own foreign resources!"); } /* * If things are stable, look for orphaned resources... */ if (hb_rsc_isstable() && !shutdown_in_progress && (resourcestate != HB_R_SHUTDOWN)) { /* * Does someone own local resources? */ if ((procinfo->i_hold_resources & HB_LOCAL_RSC) == 0 && (other_holds_resources & HB_FOREIGN_RSC) == 0) { cl_log(LOG_ERR, "No one owns our local resources!"); } /* * Does someone own foreign resources? */ if ((other_holds_resources & HB_LOCAL_RSC) == 0 && (procinfo->i_hold_resources & HB_FOREIGN_RSC) == 0) { cl_log(LOG_ERR, "No one owns foreign resources!"); } } } const char * decode_resources(int i) { return (i < 0 || i >= DIMOF(rsc_msg))? "(undefined)" : rsc_msg[i]; } int encode_resources(const char *p) { int i; for (i=0; i < DIMOF(rsc_msg); i++) { if (strcmp(rsc_msg[i], p) == 0) { return i; break; } } cl_log(LOG_ERR, "encode_resources: bad resource type [%s]", p); return 0; } /* Send the "I hold resources" or "I don't hold" resource messages */ int hb_send_resources_held(int stable, const char * comment) { struct ha_msg * m; int rc = HA_OK; char timestamp[16]; const char * str; if (!nice_failback) { return HA_OK; } str = rsc_msg[procinfo->i_hold_resources]; snprintf(timestamp, sizeof(timestamp), TIME_X, (TIME_T) time(NULL)); if (ANYDEBUG) { cl_log(LOG_DEBUG , "Sending hold resources msg: %s, stable=%d # %s" , str, stable, (comment ? comment : "")); } if ((m=ha_msg_new(0)) == NULL) { cl_log(LOG_ERR, "Cannot send local starting msg"); return(HA_FAIL); } if ((ha_msg_add(m, F_TYPE, T_RESOURCES) != HA_OK) || (ha_msg_add(m, F_RESOURCES, str) != HA_OK) || (ha_msg_add(m, F_RTYPE, "full") != HA_OK) || (ha_msg_add(m, F_ISSTABLE, (stable ? "1" : "0")) != HA_OK)) { cl_log(LOG_ERR, "hb_send_resources_held: Cannot create local msg"); rc = HA_FAIL; }else if (comment) { rc = ha_msg_add(m, F_COMMENT, comment); } if (rc == HA_OK) { rc = send_cluster_msg(m); m = NULL; }else{ ha_msg_del(m); m = NULL; } return(rc); } /* Send the starting msg out to the cluster */ static int send_local_starting(void) { struct ha_msg * m; int rc; if (!nice_failback) { return HA_OK; } if (ANYDEBUG) { cl_log(LOG_DEBUG , "Sending local starting msg: resourcestate = %d" , resourcestate); } if ((m=ha_msg_new(0)) == NULL) { cl_log(LOG_ERR, "Cannot send local starting msg"); return(HA_FAIL); } if ((ha_msg_add(m, F_TYPE, T_STARTING) != HA_OK)) { cl_log(LOG_ERR, "send_local_starting: " "Cannot create local starting msg"); rc = HA_FAIL; ha_msg_del(m); m = NULL; }else{ rc = send_cluster_msg(m); m = NULL; } resourcestate = HB_R_STARTING; return(rc); } /* We take all resources over from a given node */ void takeover_from_node(const char * nodename) { struct node_info * hip = lookup_node(nodename); struct ha_msg * hmsg; char timestamp[16]; if (hip == 0) { return; } if (shutdown_in_progress) { cl_log(LOG_INFO , "Resource takeover cancelled - shutdown in progress."); hb_shutdown_if_needed(); return; }else if (hip->nodetype != PINGNODE_I) { cl_log(LOG_INFO , "Resources being acquired from %s." , hip->nodename); } if ((hmsg = ha_msg_new(6)) == NULL) { cl_log(LOG_ERR, "no memory to takeover_from_node"); return; } snprintf(timestamp, sizeof(timestamp), TIME_X, (TIME_T) time(NULL)); if ( ha_msg_add(hmsg, F_TYPE, T_STATUS) != HA_OK || ha_msg_add(hmsg, F_SEQ, "1") != HA_OK || ha_msg_add(hmsg, F_TIME, timestamp) != HA_OK || ha_msg_add(hmsg, F_ORIG, hip->nodename) != HA_OK || ha_msg_add(hmsg, F_STATUS, DEADSTATUS) != HA_OK) { cl_log(LOG_ERR, "no memory to takeover_from_node"); ha_msg_del(hmsg); return; } if (hip->nodetype == PINGNODE_I) { if (ha_msg_add(hmsg, F_COMMENT, "ping") != HA_OK) { cl_log(LOG_ERR, "no memory to mark ping node dead"); ha_msg_del(hmsg); return; } } /* Sending this message triggers the "mach_down" script */ /*heartbeat_monitor(hmsg, KEEPIT, "");*/ QueueRemoteRscReq(PerformQueuedNotifyWorld, hmsg); /* * STONITH has already successfully completed, or wasn't needed... */ if (hip->nodetype != PINGNODE_I) { if (nice_failback) { /* mach_down is out there acquiring foreign resources */ /* So, make a note of it... */ procinfo->i_hold_resources |= HB_FOREIGN_RSC; other_holds_resources = HB_NO_RSC; other_is_stable = TRUE; /* Not going anywhere */ takeover_in_progress = TRUE; if (ANYDEBUG) { cl_log(LOG_DEBUG , "takeover_from_node: other now stable"); } /* * We MUST do this now, or the other side might come * back up and think they can own their own resources * when we do due to receiving an interim * T_RESOURCE message from us. */ /* case 1 - part 1 */ /* part 2 is done by the mach_down script... */ } /* This is here because we might not have gotten our * resources yet - waiting for the other side to give them * up. Fortunately, req_our_resources() won't cause a * race condition because it queues its work. */ req_our_resources(TRUE); /* req_our_resources turns on the HB_LOCAL_RSC bit */ } hip->anypacketsyet = 1; ha_msg_del(hmsg); } void req_our_resources(int getthemanyway) { FILE * rkeys; char cmd[MAXLINE]; char getcmd[MAXLINE]; char buf[MAXLINE]; int finalrc = HA_OK; int rc; int rsc_count = 0; int pid; int upcount; if (!DoManageResources || shutdown_in_progress) { return; } if (nice_failback) { if (((other_holds_resources & HB_FOREIGN_RSC) != 0 || (procinfo->i_hold_resources & HB_LOCAL_RSC) != 0) && !getthemanyway) { if (going_standby == NOT) { /* Someone already owns our resources */ cl_log(LOG_INFO , "Local Resource acquisition completed" ". (none)"); return; } } /* * We MUST do this now, or the other side might think they * can have our resources, due to an interim T_RESOURCE * message */ procinfo->i_hold_resources |= HB_LOCAL_RSC; } upcount = countbystatus(ACTIVESTATUS, TRUE); /* Our status update is often not done yet */ if (strcmp(curnode->status, ACTIVESTATUS) != 0) { upcount++; } /* We need to fork so we can make child procs not real time */ switch(pid=fork()) { case -1: cl_log(LOG_ERR, "Cannot fork."); return; default: if (upcount < 2) { HB_RSCMGMTPROC(pid , "req_our_resources"); }else{ HB_RSCMGMTPROC(pid , "req_our_resources(ask)"); } return; case 0: /* Child */ break; } hb_setup_child(); set_proc_title("%s: req_our_resources()", cmdname); setpgid(0,0); CL_SIGNAL(SIGCHLD, SIG_DFL); alarm(0); CL_IGNORE_SIG(SIGALRM); CL_SIGINTERRUPT(SIGALRM, 0); /* Are we all alone in the world? */ if (upcount < 2) { setenv(HADONTASK, "yes", 1); } if (nice_failback) { setenv(HANICEFAILBACK, "yes", 1); } snprintf(cmd, sizeof(cmd), RSC_MGR " listkeys %s" , curnode->nodename); if (ANYDEBUG) { cl_log(LOG_DEBUG, "req_our_resources(%s)" , cmd); } if ((rkeys = popen(cmd, "r")) == NULL) { cl_log(LOG_ERR, "Cannot run command %s", cmd); exit(1); } for (;;) { if (DEBUGDETAILS) { cl_log(LOG_DEBUG, "req_our_resources() before fgets()"); } errno = 0; if (fgets(buf, MAXLINE, rkeys) == NULL) { if (DEBUGDETAILS) { cl_log(LOG_DEBUG , "req_our_resources() fgets => NULL"); } if (ferror(rkeys)) { cl_perror("req_our_resources: fgets failure"); } break; } ++rsc_count; if (buf[strlen(buf)-1] == '\n') { buf[strlen(buf)-1] = EOS; } snprintf(getcmd, sizeof(getcmd) , HA_NOARCHDATAHBDIR "/req_resource %s", buf); if (ANYDEBUG) { cl_log(LOG_DEBUG, "req_our_resources()" ": running [%s]", getcmd); } /*should we use logging daemon or not in script*/ setenv(HALOGD, cl_log_get_uselogd()? "yes":"no", 1); if ((rc=system(getcmd)) != 0) { cl_perror("%s %s", getcmd, rctomsg(rc)); finalrc=HA_FAIL; } } if ((rc = pclose(rkeys)) != 0) { cl_log(LOG_ERR, "pclose(%s) %s", cmd, rctomsg(rc)); } rkeys = NULL; if (rc < 0 && errno != ECHILD) { cl_perror("pclose(%s) [%s?]", cmd, rctomsg(rc)); }else if (rc > 0) { cl_log(LOG_ERR, "[%s] %s", cmd, rctomsg(rc)); } if (rsc_count == 0) { cl_log(LOG_INFO, "No local resources [%s] to acquire.", cmd); }else{ if (ANYDEBUG) { cl_log(LOG_INFO, "%d local resources from [%s]" , rsc_count, cmd); } cl_log(LOG_INFO, "Local Resource acquisition completed."); } hb_send_resources_held(TRUE, "req_our_resources()"); exit(0); } /* Send "standby" related msgs out to the cluster */ static int send_standby_msg(enum standby state) { const char * standby_msg[] = { "not", "me", "other", "done"}; struct ha_msg * m; int rc; char timestamp[16]; snprintf(timestamp, sizeof(timestamp), TIME_X, (TIME_T) time(NULL)); if (ANYDEBUG) { cl_log(LOG_DEBUG, "Sending standby [%s] msg" , standby_msg[state]); } if ((m=ha_msg_new(0)) == NULL) { cl_log(LOG_ERR, "Cannot send standby [%s] msg" , standby_msg[state]); return(HA_FAIL); } if (ha_msg_add(m, F_TYPE, T_ASKRESOURCES) != HA_OK || ha_msg_add(m, F_RESOURCES, decode_resources(standby_rsctype)) != HA_OK || ha_msg_add(m, F_COMMENT, standby_msg[state]) != HA_OK) { cl_log(LOG_ERR, "send_standby_msg: " "Cannot create standby reply msg"); rc = HA_FAIL; ha_msg_del(m); m = NULL; }else{ rc = send_cluster_msg(m); m = NULL; } return(rc); } void send_stonith_msg(const char *nodename, const char *result) { struct ha_msg* hmsg; if ((hmsg = ha_msg_new(6)) == NULL) { cl_log(LOG_ERR, "no memory for " T_STONITH); } if ( hmsg != NULL && ha_msg_add(hmsg, F_TYPE, T_STONITH) == HA_OK && ha_msg_add(hmsg, F_NODE, nodename) == HA_OK && ha_msg_add(hmsg, F_APIRESULT, result) == HA_OK) { if (send_cluster_msg(hmsg) != HA_OK) { cl_log(LOG_ERR, "cannot send " T_STONITH " request for %s", nodename); } hmsg = NULL; }else{ cl_log(LOG_ERR , "Cannot send reset reply message [%s] for %s", result , nodename); if (hmsg != NULL) { ha_msg_del(hmsg); hmsg = NULL; } } return; } #define STANDBY_INIT_TO_MS 10000L /* ms timeout for initial reply */ #define HB_STANDBY_RSC_TO_MS 60L*(60L*1000L) /* resource handling timeout */ /* (An hour in ms)*/ void ask_for_resources(struct ha_msg *msg) { const char * info; const char * from; int msgfromme; longclock_t now = time_longclock(); int message_ignored = 0; const enum standby orig_standby = going_standby; const longclock_t standby_rsc_to = msto_longclock(HB_STANDBY_RSC_TO_MS); const longclock_t init_to = msto_longclock(STANDBY_INIT_TO_MS); const char * rsctype; int rtype; if (!nice_failback) { cl_log(LOG_INFO , "Standby mode only implemented when nice_failback on"); return; } if (resourcestate == HB_R_SHUTDOWN) { if (ANYDEBUG){ cl_log(LOG_DEBUG , "standby message ignored during shutdown"); } return; } info = ha_msg_value(msg, F_COMMENT); from = ha_msg_value(msg, F_ORIG); rsctype=ha_msg_value(msg, F_RESOURCES); if (rsctype == NULL) { rtype = HB_ALL_RSC; }else{ rtype = encode_resources(rsctype); } if (info == NULL || from == NULL) { cl_log(LOG_ERR, "Received standby message without info/from"); return; } msgfromme = strcmp(from, curnode->nodename) == 0; if (ANYDEBUG){ cl_log(LOG_DEBUG , "Received standby message %s from %s in state %d " , info, from, going_standby); } if (cmp_longclock(standby_running, zero_longclock) != 0 && cmp_longclock(now, standby_running) < 0 && strcasecmp(info, "me") == 0) { unsigned long secs_left; secs_left = longclockto_ms(sub_longclock(standby_running, now)); secs_left = (secs_left+999)/1000; cl_log(LOG_WARNING , "Standby in progress" "- new request from %s ignored [%ld seconds left]" , from, secs_left); return; } /* Starting the STANDBY 3-phased protocol */ switch(going_standby) { case NOT: if (!other_is_stable) { cl_log(LOG_WARNING, "standby message [%s] from %s" " ignored. Other side is in flux.", info, from); return; } if (resourcestate != HB_R_STABLE) { cl_log(LOG_WARNING, "standby message [%s] from %s" " ignored. local resources in flux.", info, from); return; } standby_rsctype = rtype; if (strcasecmp(info, "me") == 0) { if (ANYDEBUG) { cl_log(LOG_DEBUG , "ask_for_resources: other now unstable"); } other_is_stable = FALSE; cl_log(LOG_INFO, "%s wants to go standby [%s]" , from, decode_resources(rtype)); if (msgfromme) { /* We want to go standby */ if (ANYDEBUG) { cl_log(LOG_INFO , "i_hold_resources: %d" , procinfo->i_hold_resources); } standby_running = add_longclock(now, init_to); going_standby = ME; }else{ if (ANYDEBUG) { cl_log(LOG_INFO , "standby" ": other_holds_resources: %d" , other_holds_resources); } /* Other node wants to go standby */ going_standby = OTHER; send_standby_msg(going_standby); standby_running = add_longclock(now , standby_rsc_to); } }else{ message_ignored = 1; } break; case ME: /* Other node is alive, so give up our resources */ if (!msgfromme) { standby_rsctype = rtype; standby_running = add_longclock(now, standby_rsc_to); if (strcasecmp(info,"other") == 0) { cl_log(LOG_INFO , "standby: %s can take our %s resources" , from, decode_resources(rtype)); go_standby(ME, rtype); /* Our child proc sends a "done" message */ /* after all the resources are released */ }else{ message_ignored = 1; } }else if (strcasecmp(info, "done") == 0) { /* * The "done" message came from our child process * indicating resources are completely released now. */ cl_log(LOG_INFO , "Local standby process completed [%s]." , decode_resources(rtype)); going_standby = DONE; procinfo->i_hold_resources &= ~standby_rsctype; standby_running = add_longclock(now, standby_rsc_to); }else{ message_ignored = 1; } break; case OTHER: standby_rsctype = rtype; if (strcasecmp(info, "done") == 0) { standby_running = add_longclock(now, standby_rsc_to); if (!msgfromme) { /* It's time to acquire resources */ cl_log(LOG_INFO , "standby: acquire [%s] resources" " from %s" , decode_resources(rtype), from); /* go_standby gets requested resources */ go_standby(OTHER, standby_rsctype); going_standby = DONE; }else{ message_ignored = 1; } }else if (!msgfromme || strcasecmp(info, "other") != 0) { /* We expect an "other" message from us */ /* But, that's not what this one is ;-) */ message_ignored = 1; } break; case DONE: standby_rsctype = rtype; if (strcmp(info, "done")== 0) { standby_running = zero_longclock; going_standby = NOT; if (msgfromme) { int rup = HB_NO_RSC; cl_log(LOG_INFO , "Standby resource" " acquisition done [%s]." , decode_resources(rtype)); if (auto_failback) { local_takeover_work_done = TRUE; AnnounceTakeover("auto_failback"); } switch(rtype) { case HB_LOCAL_RSC: rup=HB_FOREIGN_RSC; break; case HB_FOREIGN_RSC: rup=HB_LOCAL_RSC; break; case HB_ALL_RSC: rup=HB_ALL_RSC; break; } procinfo->i_hold_resources |= rup; }else{ cl_log(LOG_INFO , "Other node completed standby" " takeover of %s resources." , decode_resources(rtype)); } hb_send_resources_held(TRUE, NULL); going_standby = NOT; }else{ message_ignored = 1; } break; } if (message_ignored){ cl_log(LOG_ERR , "Ignored standby message '%s' from %s in state %d" , info, from, orig_standby); } if (ANYDEBUG) { cl_log(LOG_INFO, "New standby state: %d", going_standby); } hb_shutdown_if_needed(); } static int countbystatus(const char * status, int matchornot) { int count = 0; int matches; int j; matchornot = (matchornot ? TRUE : FALSE); for (j=0; j < config->nodecount; ++j) { if (config->nodes[j].nodetype == PINGNODE_I) { continue; } matches = (strcmp(config->nodes[j].status, status) == 0); if (matches == matchornot) { ++count; } } return count; } static void go_standby(enum standby who, int resourceset) /* Which resources to give up */ { FILE * rkeys; char cmd[MAXLINE]; char buf[MAXLINE]; int finalrc = HA_OK; int rc = 0; pid_t pid; int actresources; /* Resources to act on */ const char * querycmd = "allkeys"; #define ACTION_ACQUIRE 0 #define ACTION_GIVEUP 1 int action; /* Action to take */ static const char* actionnames[2] = {"acquire", "give up"}; static const char* actioncmds [2] = {"takegroup", "givegroup"}; /* * We consider them unstable because they're about to pick up * our resources. */ if (who == ME) { other_is_stable = FALSE; if (ANYDEBUG) { cl_log(LOG_DEBUG, "go_standby: other is unstable"); } /* Make sure they know what we're doing and that we're * not done yet (not stable) * Since heartbeat doesn't guarantee message ordering * this could theoretically have problems, but all that * happens if it gets out of order is that we get * a funky warning message (or maybe two). */ procinfo->i_hold_resources &= ~resourceset; hb_send_resources_held(FALSE, "standby"); action = ACTION_GIVEUP; }else{ action = ACTION_ACQUIRE; } /* We need to fork so we can make child procs not real time */ switch((pid=fork())) { case -1: cl_log(LOG_ERR, "Cannot fork."); return; /* * We can't block here, because then we * aren't sending heartbeats out... */ default: HB_RSCMGMTPROC(pid, "go_standby"); return; case 0: /* Child */ break; } hb_setup_child(); setpgid(0,0); CL_SIGNAL(SIGCHLD, SIG_DFL); /* Figure out which resources to inquire about */ switch(resourceset) { case HB_FOREIGN_RSC: actresources = (who == ME ? HB_FOREIGN_RSC : HB_LOCAL_RSC); break; case HB_LOCAL_RSC: actresources = (who == ME ? HB_LOCAL_RSC : HB_FOREIGN_RSC); break; case HB_ALL_RSC: actresources = HB_ALL_RSC; break; default: cl_log(LOG_ERR, "no resources to %s" , actionnames[action]); exit(10); } /* Figure out what command to issue to get resource list... */ switch (actresources) { case HB_FOREIGN_RSC: querycmd = "otherkeys"; break; case HB_LOCAL_RSC: querycmd = "ourkeys"; break; case HB_ALL_RSC: querycmd = "allkeys"; break; } cl_log(LOG_INFO , "%s %s HA resources (standby)." , actionnames[action] , rsc_msg[actresources]); if (ANYDEBUG) { cl_log(LOG_INFO, "go_standby: who: %d resource set: %s" , who, rsc_msg[actresources]); cl_log(LOG_INFO, "go_standby: (query/action): (%s/%s)" , querycmd, actioncmds[action]); } /* * We could do this ourselves fairly easily... */ snprintf(cmd, sizeof(cmd), RSC_MGR " %s", querycmd); if ((rkeys = popen(cmd, "r")) == NULL) { cl_log(LOG_ERR, "Cannot run command %s", cmd); return; } while (fgets(buf, MAXLINE, rkeys) != NULL) { if (buf[strlen(buf)-1] == '\n') { buf[strlen(buf)-1] = EOS; } snprintf(cmd, sizeof(cmd), RSC_MGR " %s %s" , actioncmds[action], buf); /*should we use logging daemon or not in script*/ setenv(HALOGD, cl_log_get_uselogd()? "yes":"no", 1); if ((rc=system(cmd)) != 0) { cl_log(LOG_ERR, "%s %s", cmd, rctomsg(rc)); finalrc=HA_FAIL; } } if ((rc = pclose(rkeys)) != 0) { cl_log(LOG_ERR, "pclose(%s) %s", cmd, rctomsg(rc)); } cl_log(LOG_INFO, "%s HA resource %s completed (standby)." , rsc_msg[actresources] , action == ACTION_ACQUIRE ? "acquisition" : "release"); send_standby_msg(DONE); exit(rc); } void hb_shutdown_if_needed(void) { if (rsc_needs_shutdown) { hb_giveup_resources(); } } /* * This is the first part of the graceful shutdown process * * We cannot shut down right now if resource actions are pending... * * Examples: * - initial resource acquisition * - hb_standby in progress * - req_our_resources() in progress * - notify_world() in progress * * All these ideas are encapsulated by hb_rsc_isstable() */ void hb_giveup_resources(void) { FILE * rkeys; char cmd[MAXLINE]; char buf[MAXLINE]; int finalrc = HA_OK; int rc; pid_t pid; struct ha_msg * m; static int resource_shutdown_in_progress = FALSE; if (!DoManageResources){ if (!shutdown_in_progress) { hb_initiate_shutdown(FALSE); } return; } if (!hb_rsc_isstable()) { /* Try again later... */ /* (through hb_shutdown_if_needed()) */ if (!rsc_needs_shutdown) { cl_log(LOG_WARNING , "Shutdown delayed until current" " resource activity finishes."); rsc_needs_shutdown = TRUE; } return; } rsc_needs_shutdown = FALSE; shutdown_in_progress = TRUE; if (resource_shutdown_in_progress) { cl_log(LOG_INFO, "Heartbeat shutdown already underway."); return; } resource_shutdown_in_progress = TRUE; if (ANYDEBUG) { cl_log(LOG_INFO, "hb_giveup_resources(): " "current status: %s", curnode->status); } hb_close_watchdog(); DisableProcLogging(); /* We're shutting down */ procinfo->i_hold_resources = HB_NO_RSC ; resourcestate = HB_R_SHUTDOWN; /* or we'll get a whiny little comment out of the resource management code */ if (nice_failback) { hb_send_resources_held(FALSE, "shutdown"); } cl_log(LOG_INFO, "Heartbeat shutdown in progress. (%d)" , (int) getpid()); /* We need to fork so we can make child procs not real time */ switch((pid=fork())) { case -1: cl_log(LOG_ERR, "Cannot fork."); return; default: HB_RSCMGMTPROC(pid , "hb_giveup_resources"); return; case 0: /* Child */ break; } hb_setup_child(); setpgid(0,0); set_proc_title("%s: hb_signal_giveup_resources()", cmdname); /* We don't want to be interrupted while shutting down */ CL_SIGNAL(SIGCHLD, SIG_DFL); CL_SIGINTERRUPT(SIGCHLD, 0); alarm(0); CL_IGNORE_SIG(SIGALRM); CL_SIGINTERRUPT(SIGALRM, 0); CL_IGNORE_SIG(SIGTERM); /* CL_SIGINTERRUPT(SIGTERM, 0); */ cl_log(LOG_INFO, "Giving up all HA resources."); /* * We could do this ourselves fairly easily... */ strlcpy(cmd, RSC_MGR " listkeys '.*'" , sizeof(cmd)); if ((rkeys = popen(cmd, "r")) == NULL) { cl_log(LOG_ERR, "Cannot run command %s", cmd); exit(1); } while (fgets(buf, MAXLINE, rkeys) != NULL) { if (buf[strlen(buf)-1] == '\n') { buf[strlen(buf)-1] = EOS; } /*should we use logging daemon or not in script*/ setenv(HALOGD, cl_log_get_uselogd()? "yes":"no", 1); snprintf(cmd, sizeof(buf) , RSC_MGR " givegroup %s" , buf); if ((rc=system(cmd)) != 0) { cl_log(LOG_ERR, "%s %s", cmd, rctomsg(rc)); finalrc=HA_FAIL; } } if ((rc = pclose(rkeys)) != 0) { cl_log(LOG_ERR, "pclose(%s) %s", cmd, rctomsg(rc)); } cl_log(LOG_INFO, "All HA resources relinquished."); if ((m=ha_msg_new(0)) == NULL) { cl_log(LOG_ERR, "Cannot send final shutdown msg"); exit(1); } if ((ha_msg_add(m, F_TYPE, T_SHUTDONE) != HA_OK || ha_msg_add(m, F_STATUS, DEADSTATUS) != HA_OK)) { cl_log(LOG_ERR, "hb_signal_giveup_resources: " "Cannot create local msg"); ha_msg_del(m); }else{ if (ANYDEBUG) { cl_log(LOG_DEBUG, "Sending T_SHUTDONE."); } rc = send_cluster_msg(m); m = NULL; } exit(0); } void Initiate_Reset(Stonith* s, const char * nodename, gboolean doreset) { const char* result = "bad"; int pid; int exitcode = 0; struct StonithProcHelper * h; int rc; ProcTrack_ops * track; /* * We need to fork because the stonith operations block for a long * time (10 seconds in common cases) */ track = (doreset ? &StonithProcessTrackOps : &StonithStatProcessTrackOps); switch((pid=fork())) { case -1: cl_log(LOG_ERR, "Cannot fork."); return; default: h = g_new(struct StonithProcHelper, 1); h->nodename = g_strdup(nodename); NewTrackedProc(pid, 1, PT_LOGVERBOSE, h, track); /* StonithProcessDied is called when done */ return; case 0: /* Child */ break; } /* Guard against possibly hanging Stonith code, etc... */ hb_setup_child(); setpgid(0,0); set_proc_title("%s: Initiate_Reset()", cmdname); CL_SIGNAL(SIGCHLD,SIG_DFL); if (doreset) { cl_log(LOG_INFO , "Resetting node %s with [%s]" , nodename , stonith_get_info(s, ST_DEVICEID)); }else{ cl_log(LOG_INFO , "Checking status of STONITH device [%s]" , stonith_get_info(s, ST_DEVICEID)); } if (doreset) { rc = stonith_req_reset(s, ST_GENERIC_RESET, nodename); }else{ rc = stonith_get_status(s); } switch (rc) { case S_OK: result=T_STONITH_OK; if (doreset) { cl_log(LOG_INFO , "node %s now reset.", nodename); } exitcode = 0; break; case S_BADHOST: cl_log(LOG_ERR , "Device %s cannot reset host %s." , stonith_get_info(s, ST_DEVICEID) , nodename); exitcode = 100; result = T_STONITH_BADHOST; break; default: if (doreset) { cl_log(LOG_ERR, "Host %s not reset!", nodename); }else{ cl_log(LOG_ERR, "STONITH device %s not operational!" , stonith_get_info(s, ST_DEVICEID)); } exitcode = 1; result = T_STONITH_BAD; } if (doreset) { send_stonith_msg(nodename, result); } exit (exitcode); } static void RscMgmtProcessRegistered(ProcTrack* p) { ResourceMgmt_child_count ++; if (ANYDEBUG) { cl_log(LOG_DEBUG, "Process [%s] started pid %d" , p->ops->proctype(p) , p->pid ); } } /* Handle the death of a resource management process */ static void RscMgmtProcessDied(ProcTrack* p, int status, int signo, int exitcode , int waslogged) { const char * pname = RscMgmtProcessName(p); ResourceMgmt_child_count --; if (ANYDEBUG) { cl_log(LOG_DEBUG, "RscMgmtProc '%s' exited code %d" , pname, exitcode); } if (strcmp(pname, "req_our_resources") == 0 || strcmp(pname, "ip-request-resp") == 0) { local_takeover_work_done = TRUE; AnnounceTakeover(pname); }else if (!nice_failback && strcmp(pname, "status") == 0) { int deadcount = countbystatus(DEADSTATUS, TRUE); if (deadcount > 0) { /* Must be our partner is dead... * Status would have invoked mach_down * and now all their resource are belong to us */ foreign_takeover_work_done = TRUE; AnnounceTakeover(pname); } } p->privatedata = NULL; StartNextRemoteRscReq(); hb_shutdown_if_needed(); } static const char * RscMgmtProcessName(ProcTrack* p) { struct hb_const_string * s = p->privatedata; return (s && s->str ? s->str : "heartbeat resource child"); } /*********************************************************************** * * RemoteRscRequests are resource management requests from other nodes * * Our "privatedata" is a GHook. This GHook points back to the * queue entry for this object. Its "data" element points to the message * which we want to give to the function which the hook points to... * QueueRemoteRscReq is the function which sets up the hook, then queues * it for later execution. * * StartNextRemoteRscReq() is the function which runs the hook, * when the time is right. Basically, we won't run the hook if any * other asynchronous resource management operations are going on. * This solves the problem of a remote request coming in and conflicting * with a different local resource management request. It delays * it until the local startup/takeover/etc. operations are complete. * At this time, it has a clear picture of what's going on, and * can safely do its thing. * * So, we queue the job to do in a Ghook. When the Ghook runs, it * will create a ProcTrack object to track the completion of the process. * * When the process completes, it will clean up the ProcTrack, which in * turn will remove the GHook from the queue, destroying it and the * associated struct ha_msg* from the original message. * ***********************************************************************/ static GHookList RemoteRscReqQueue = {0,0,0}; static GHook* RunningRemoteRscReq = NULL; /* Initialized the remote resource request queue */ static void InitRemoteRscReqQueue(void) { if (RemoteRscReqQueue.is_setup) { return; } g_hook_list_init(&RemoteRscReqQueue, sizeof(GHook)); } /* Queue a remote resource request */ void QueueRemoteRscReq(RemoteRscReqFunc func, struct ha_msg* msg) { GHook* hook; const char * fp; if (!DoManageResources) { return; } InitRemoteRscReqQueue(); hook = g_hook_alloc(&RemoteRscReqQueue); fp = ha_msg_value(msg, F_TYPE); if (DEBUGDETAILS) { cl_log(LOG_DEBUG , "Queueing remote resource request (hook = 0x%p) %s" , (void *)hook, fp); cl_log_message(LOG_DEBUG, msg); } if (fp == NULL || !FilterNotifications(fp)) { if (DEBUGDETAILS) { cl_log(LOG_DEBUG , "%s: child process unneeded.", fp); cl_log_message(LOG_DEBUG, msg); } g_hook_free(&RemoteRscReqQueue, hook); return; } hook->func = func; hook->data = ha_msg_copy(msg); hook->destroy = (GDestroyNotify)(ha_msg_del); g_hook_append(&RemoteRscReqQueue, hook); StartNextRemoteRscReq(); } /* If the time is right, start the next remote resource request */ static void StartNextRemoteRscReq(void) { GHook* hook; RemoteRscReqFunc func; /* We can only run one of these at a time... */ if (ResourceMgmt_child_count != 0) { cl_log(LOG_DEBUG, "StartNextRemoteRscReq(): child count %d" , ResourceMgmt_child_count); return; } RunningRemoteRscReq = NULL; /* Run the first hook in the list... */ hook = g_hook_first_valid(&RemoteRscReqQueue, FALSE); if (hook == NULL) { ResourceMgmt_child_count = 0; hb_shutdown_if_needed(); return; } RunningRemoteRscReq = hook; func = hook->func; if (ANYDEBUG) { cl_log(LOG_DEBUG, "StartNextRemoteRscReq() - calling hook"); } /* Call the hook... */ func(hook); g_hook_destroy_link(&RemoteRscReqQueue, hook); g_hook_unref(&RemoteRscReqQueue, hook); } /* * Perform a queued notify_world() call * * The Ghook and message are automatically destroyed by our * caller. */ void PerformQueuedNotifyWorld(GHook* hook) { struct ha_msg* m = hook->data; /* * We have been asked to run a notify_world() which * we would like to have done earlier... */ if (DEBUGDETAILS) { cl_log(LOG_DEBUG, "PerformQueuedNotifyWorld() msg follows"); cl_log_message(LOG_DEBUG, m); } notify_world(m, curnode->status); /* "m" is automatically destroyed when "hook" is */ } static gboolean StonithProc(gpointer gph) { struct StonithProcHelper* h = gph; Initiate_Reset(config->stonith, h->nodename, TRUE); return FALSE; } /* Handle the death of a STONITH process */ static void StonithProcessDied(ProcTrack* p, int status, int signo, int exitcode, int waslogged) { struct StonithProcHelper* h = p->privatedata; if (signo != 0 || exitcode != 0) { cl_log(LOG_ERR, "STONITH of %s failed. Retrying..." , h->nodename); Gmain_timeout_add(5*1000, StonithProc, h); /* Don't free 'h' - we still need it */ p->privatedata = NULL; return; }else{ /* We need to finish taking over the other side's resources */ takeover_from_node(h->nodename); } g_free(h->nodename); h->nodename=NULL; g_free(p->privatedata); p->privatedata = NULL; } static const char * StonithProcessName(ProcTrack* p) { static char buf[100]; struct StonithProcHelper * h = p->privatedata; snprintf(buf, sizeof(buf), "STONITH %s", h->nodename); return buf; } static gboolean StonithStatProc(gpointer dummy) { Initiate_Reset(config->stonith, "?", FALSE); return FALSE; } static void StonithStatProcessDied(ProcTrack* p, int status, int signo, int exitcode, int waslogged) { struct StonithProcHelper* h = p->privatedata; if ((signo != 0 && signo != SIGTERM) || exitcode != 0) { cl_log(LOG_ERR, "STONITH status operation failed."); cl_log(LOG_INFO, "This may mean that the STONITH device has failed!"); } g_free(h->nodename); h->nodename=NULL; g_free(p->privatedata); p->privatedata = NULL; Gmain_timeout_add(3600*1000, StonithStatProc, NULL); } static const char * StonithStatProcessName(ProcTrack* p) { static char buf[100]; snprintf(buf, sizeof(buf), "STONITH-stat"); return buf; } Heartbeat-3-0-7e3a82377fa8/heartbeat/hb_resource.h0000644000000000000000000000557311576626513021621 0ustar00usergroup00000000000000/* * Copyright (C) 2001-2002 Luis Claudio R. Goncalves * * Copyright (C) 1999-2002 Alan Robertson * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _HB_RESOURCE_H #define _HB_RESOURCE_H #include #include /* Resource-related types and variables */ enum standby { NOT, ME, OTHER, DONE }; enum hb_rsc_state { HB_R_INIT, /* Links not up yet */ HB_R_STARTING, /* Links up, start message issued */ HB_R_BOTHSTARTING, /* Links up, start msg received & issued */ /* BOTHSTARTING now equiv to STARTING (?) */ HB_R_RSCRCVD, /* Resource Message received */ HB_R_STABLE, /* Local resources acquired, too... */ HB_R_SHUTDOWN /* We're in shutdown... */ }; /* * Note that the _RSC defines below are bit fields! */ #define HB_NO_RESOURCES "none" #define HB_NO_RSC 0 #define HB_LOCAL_RESOURCES "local" #define HB_LOCAL_RSC 1 #define HB_FOREIGN_RESOURCES "foreign" #define HB_FOREIGN_RSC 2 #define HB_ALL_RSC (HB_LOCAL_RSC|HB_FOREIGN_RSC) #define HB_ALL_RESOURCES "all" typedef void (*RemoteRscReqFunc) (GHook * data); extern int DoManageResources; extern int nice_failback; extern int other_holds_resources; extern int other_is_stable; extern int takeover_in_progress; extern enum hb_rsc_state resourcestate; extern enum standby going_standby; extern longclock_t standby_running; extern longclock_t local_takeover_time; extern int DoManageResources; /* Also: procinfo->i_hold_resources */ /* Resource-related functions */ void notify_world(struct ha_msg * msg, const char * ostatus); void PerformQueuedNotifyWorld(GHook* g); int parse_ha_resources(const char * cfgfile); int encode_resources(const char *p); const char * decode_resources(int); void comm_up_resource_action(void); void process_resources(const char * type, struct ha_msg* msg , struct node_info * thisnode); void takeover_from_node(const char * nodename); void req_our_resources(int getthemanyway); void ask_for_resources(struct ha_msg *msg); void AuditResources(void); void QueueRemoteRscReq(RemoteRscReqFunc, struct ha_msg* data); void hb_rsc_recover_dead_resources(struct node_info* hip); const char * hb_rsc_resource_state(void); #endif Heartbeat-3-0-7e3a82377fa8/heartbeat/hb_rexmit.c0000644000000000000000000001550311576626513021267 0ustar00usergroup00000000000000/* * Heartbeat retransmission mechanism * * Copyright (C) 2005 Guochun Shi * * This software licensed under the GNU LGPL. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void schedule_rexmit_request(struct node_info* node, seqno_t seq, int delay); int max_rexmit_delay = 250; static GHashTable* rexmit_hash_table = NULL; void hb_set_max_rexmit_delay(int); struct rexmit_info{ seqno_t seq; struct node_info* node; }; void hb_set_max_rexmit_delay(int value) { if (value <= 0){ cl_log(LOG_ERR, "%s: invalid value (%d)", __FUNCTION__, value); return; } if (ANYDEBUG){ cl_log(LOG_DEBUG, "Setting max_rexmit_delay to %d ms", value); } max_rexmit_delay =value; return; } static guint rexmit_hash_func(gconstpointer key) { const struct rexmit_info* ri; guint hashvalue; ri = (const struct rexmit_info*) key; hashvalue= ri->seq* g_str_hash(ri->node->nodename); return hashvalue; } static gboolean rexmit_info_equal(gconstpointer a, gconstpointer b){ const struct rexmit_info* ri1 ; const struct rexmit_info* ri2 ; ri1 = (const struct rexmit_info*) a; ri2 = (const struct rexmit_info*) b; if (ri1->seq == ri2->seq && strcmp(ri1->node->nodename, ri2->node->nodename)== 0){ return TRUE; } return FALSE; } static void free_data_func(gpointer data) { if (data){ free(data); data = NULL; } } static void entry_display(gpointer key, gpointer value, gpointer user_data) { struct rexmit_info* ri = (struct rexmit_info*)key; unsigned long tag = (unsigned long) value; cl_log(LOG_INFO, "seq, node, nodename (%ld, %p, %s), tag = %ld", ri->seq, ri->node, ri->node->nodename, tag); } static void rexmit_hash_table_display(void) { cl_log(LOG_INFO, "Dumping rexmit hash table:"); if (rexmit_hash_table == NULL){ cl_log(LOG_INFO, "rexmit_hash_table is NULL"); return; } g_hash_table_foreach(rexmit_hash_table, entry_display, NULL); return; } int init_rexmit_hash_table(void) { rexmit_hash_table = g_hash_table_new_full(rexmit_hash_func, rexmit_info_equal, free_data_func, NULL); if (rexmit_hash_table == NULL){ cl_log(LOG_ERR, "%s: creating rexmit hash_table failed",__FUNCTION__); return HA_FAIL; } return HA_OK; } int destroy_rexmit_hash_table(void) { if (rexmit_hash_table){ g_hash_table_destroy(rexmit_hash_table); } return HA_OK; } static gboolean send_rexmit_request( gpointer data) { struct rexmit_info* ri = (struct rexmit_info*) data; seqno_t seq = (seqno_t) ri->seq; struct node_info* node = ri->node; struct ha_msg* hmsg; if (STRNCMP_CONST(node->status, UPSTATUS) != 0 && STRNCMP_CONST(node->status, ACTIVESTATUS) !=0) { /* no point requesting rexmit from a dead node. */ return FALSE; } if ((hmsg = ha_msg_new(6)) == NULL) { cl_log(LOG_ERR, "%s: no memory for " T_REXMIT, __FUNCTION__); return FALSE; } if (ha_msg_add(hmsg, F_TYPE, T_REXMIT) != HA_OK || ha_msg_add(hmsg, F_TO, node->nodename) !=HA_OK || ha_msg_add_int(hmsg, F_FIRSTSEQ, seq) != HA_OK || ha_msg_add_int(hmsg, F_LASTSEQ, seq) != HA_OK) { cl_log(LOG_ERR, "%s: adding fields to msg failed", __FUNCTION__); ha_msg_del(hmsg); return FALSE; } if (send_cluster_msg(hmsg) != HA_OK) { cl_log(LOG_ERR, "%s: cannot send " T_REXMIT " request to %s",__FUNCTION__, node->nodename); ha_msg_del(hmsg); return FALSE; } node->track.last_rexmit_req = time_longclock(); if (!g_hash_table_remove(rexmit_hash_table, ri)){ cl_log(LOG_ERR, "%s: entry not found in rexmit_hash_table" "for seq/node(%ld %s)", __FUNCTION__, ri->seq, ri->node->nodename); return FALSE; } schedule_rexmit_request(node, seq, max_rexmit_delay); return FALSE; } #ifndef HAVE_CL_RAND_FROM_INTERVAL /* you should grab latest glue headers! */ static inline int cl_rand_from_interval(const int a, const int b) { /* RAND_MAX may be INT_MAX */ long long r = get_next_random(); return a + (r * (b-a) + RAND_MAX/2)/RAND_MAX; } #endif static void schedule_rexmit_request(struct node_info* node, seqno_t seq, int delay) { unsigned long sourceid; struct rexmit_info* ri; if (delay == 0) { /* generate some random delay, * 50ms offset to allow for out-of-order arrival * without actually sending the rexmit requests, * which happens more often than one might think. */ const int a = max_rexmit_delay < 100 ? 0 : 50; const int b = max_rexmit_delay; delay = cl_rand_from_interval(a,b); } ri = malloc(sizeof(struct rexmit_info)); if (ri == NULL){ cl_log(LOG_ERR, "%s: memory allocation failed", __FUNCTION__); return; } ri->seq = seq; ri->node = node; sourceid = Gmain_timeout_add_full(G_PRIORITY_HIGH - 1, delay, send_rexmit_request, ri, NULL); G_main_setall_id(sourceid, "retransmit request", config->heartbeat_ms/2, 10); if (sourceid == 0){ cl_log(LOG_ERR, "%s: scheduling a timeout event failed", __FUNCTION__); return; } if (rexmit_hash_table == NULL){ init_rexmit_hash_table(); } g_hash_table_insert(rexmit_hash_table, (gpointer)ri, (gpointer)sourceid); return ; } void request_msg_rexmit(struct node_info *node, seqno_t lowseq, seqno_t hiseq) { int i; for (i = lowseq; i <= hiseq; i++){ schedule_rexmit_request(node, i, 0); } return; } int remove_msg_rexmit(struct node_info *node, seqno_t seq) { struct rexmit_info ri; gpointer value; unsigned long sourceid; ri.seq = seq; ri.node =node; (void)rexmit_hash_table_display; value = g_hash_table_lookup(rexmit_hash_table, &ri); if (value == NULL){ cl_log(LOG_ERR, "%s: no entry found in rexmit hash_table for the missing packet(%ld)", __FUNCTION__, seq); return HA_FAIL; }else { sourceid = (unsigned long) value; Gmain_timeout_remove(sourceid); g_hash_table_remove(rexmit_hash_table, &ri); } return HA_OK; } Heartbeat-3-0-7e3a82377fa8/heartbeat/hb_signal.c0000644000000000000000000003337411576626513021242 0ustar00usergroup00000000000000/* * hb_signal.c: signal handling routines to be used by Heartbeat * * Copyright (C) 2002 Horms * * Derived from code in heartbeat.c in this tree * Copyright (C) 2000 Alan Robertson * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #define _USE_BSD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static volatile unsigned int __hb_signal_pending = 0; #define HB_SIG_REAPER_SIG 0x0001UL #define HB_SIG_TERM_SIG 0x0002UL #define HB_SIG_DEBUG_USR1_SIG 0x0004UL #define HB_SIG_DEBUG_USR2_SIG 0x0008UL #define HB_SIG_PARENT_DEBUG_USR1_SIG 0x0010UL #define HB_SIG_PARENT_DEBUG_USR2_SIG 0x0020UL #define HB_SIG_REREAD_CONFIG_SIG 0x0040UL #define HB_SIG_FALSE_ALARM_SIG 0x0080UL /* * This function does NOT have the same semantics as setting SIG_IGN. * Signals set to SIG_IGN never interrupt system calls. * Setting this signal handler and calling siginterrupt(nsig, TRUE) * will result in the signal interrupting system calls but otherwise * being ignored. This is nice for interrupting writes to serial * ports that might otherwise hang forever (for example). */ static void hb_ignoresig(int sig) { } void hb_signal_signal_all(int sig) { int us = getpid(); int j; extern pid_t processes[MAXPROCS]; if (ANYDEBUG) { ha_log(LOG_DEBUG, "pid %d: received signal %d", us, sig); if (curproc) { ha_log(LOG_DEBUG, "pid %d: type is %d", us , curproc->type); } } if (sig == SIGTERM) { CL_IGNORE_SIG(SIGTERM); cl_make_normaltime(); } for (j=0; j < procinfo->nprocs; ++j) { if (processes[j] != us && processes[j] != 0) { if (ANYDEBUG) { ha_log(LOG_DEBUG , "%d: Signalling process %d [%d]" , us, (int) processes[j], (int) sig); } return_to_orig_privs(); CL_KILL(processes[j], sig); return_to_dropped_privs(); } } switch (sig) { case SIGTERM: /* Shouldn't happen... */ if (curproc && curproc->type == PROC_MST_CONTROL) { return; } cleanexit(1); break; } return; } /* Signal handler to use with SIGCHLD to free the * resources of any exited children using wait3(2). * This stops zombie processes from hanging around */ void hb_signal_reaper_handler(int sig) { __hb_signal_pending|=HB_SIG_REAPER_SIG; } /* * We need to handle the case of the exiting process is one of our * client children that we spawn as requested when we started up. */ void hb_signal_reaper_action(int waitflags) { int status; pid_t pid; while((pid=wait3(&status, waitflags, NULL)) > 0 || (pid == -1 && errno == EINTR)) { if (pid > 0) { /* If they're in the API client table, * remove them... */ api_remove_client_pid(pid, "died"); ReportProcHasDied(pid, status); } } } void hb_signal_term_handler(int sig) { __hb_signal_pending |= HB_SIG_TERM_SIG; } void hb_signal_term_action(void) { extern volatile struct process_info *curproc; return_to_orig_privs(); cl_make_normaltime(); if (ANYDEBUG) { ha_log(LOG_DEBUG, "Process %d processing SIGTERM" , (int) getpid()); } if (curproc->type == PROC_MST_CONTROL) { hb_initiate_shutdown(FALSE); }else{ cleanexit(SIGTERM); } } static void __hb_signal_debug_action(int sig) { extern PILPluginUniv *PluginLoadingSystem; switch(sig) { case SIGUSR1: ++debug_level; break; case SIGUSR2: if (debug_level > 0) { --debug_level; }else{ debug_level=0; } break; } PILSetDebugLevel(PluginLoadingSystem, NULL, NULL , debug_level); { static char cdebug[8]; snprintf(cdebug, sizeof(debug_level), "%d", debug_level); setenv(HADEBUGVAL, cdebug, TRUE); } if (debug_level <= 0) { unsetenv(HADEBUGVAL); } } void hb_signal_debug_usr1_handler(int sig) { __hb_signal_pending|=HB_SIG_DEBUG_USR1_SIG; } void hb_signal_debug_usr1_action(void) { __hb_signal_debug_action(SIGUSR1); } void hb_signal_debug_usr2_handler(int sig) { __hb_signal_pending|=HB_SIG_DEBUG_USR2_SIG; } void hb_signal_debug_usr2_action(void) { __hb_signal_debug_action(SIGUSR2); } static void __parent_hb_signal_debug_action(int sig) { int olddebug = debug_level; __hb_signal_debug_action(sig); hb_signal_signal_all(sig); ha_log(LOG_DEBUG, "debug now set to %d [pid %d]", debug_level , (int) getpid()); if (debug_level == 1 && olddebug == 0) { hb_versioninfo(); } } void parent_hb_signal_debug_usr1_handler(int sig) { __hb_signal_pending|=HB_SIG_PARENT_DEBUG_USR1_SIG; } void parent_hb_signal_debug_usr1_action(void) { __parent_hb_signal_debug_action(SIGUSR1); } void parent_hb_signal_debug_usr2_handler(int sig) { __hb_signal_pending|=HB_SIG_PARENT_DEBUG_USR2_SIG; } void parent_hb_signal_debug_usr2_action(void) { __parent_hb_signal_debug_action(SIGUSR2); } void hb_signal_reread_config_handler(int sig) { __hb_signal_pending|=HB_SIG_REREAD_CONFIG_SIG; } void hb_signal_reread_config_action(void) { int j; int signal_children = 0; /* If we're the master control process, tell our children */ if (curproc->type == PROC_MST_CONTROL) { struct stat buf; if (stat(CONFIG_NAME, &buf) < 0) { ha_perror("Cannot stat " CONFIG_NAME); return; } if (ANYDEBUG) { ha_log(LOG_DEBUG , "stat of %s: %lu versus old %lu" , CONFIG_NAME , (unsigned long)buf.st_mtime , (unsigned long)config->cfg_time); } if ((TIME_T)buf.st_mtime != config->cfg_time) { procinfo->giveup_resources = FALSE; procinfo->restart_after_shutdown = TRUE; hb_initiate_shutdown(TRUE); return; } if (stat(KEYFILE, &buf) < 0) { ha_perror("Cannot stat " KEYFILE); }else if ((TIME_T)buf.st_mtime != config->auth_time) { config->rereadauth = TRUE; ha_log(LOG_INFO, "Rereading authentication file."); signal_children = TRUE; }else{ ha_log(LOG_INFO, "Configuration unchanged."); } }else{ /* * We are not the control process, and we received a SIGHUP * signal. This means the authentication file has changed. */ ha_log(LOG_INFO, "Child rereading authentication file."); config->rereadauth = TRUE; check_auth_change(config); } if (ParseTestOpts() && curproc->type == PROC_MST_CONTROL) { signal_children = 1; } if (signal_children) { return_to_orig_privs(); for (j=0; j < procinfo->nprocs; ++j) { if (procinfo->info+j != curproc) { CL_KILL(procinfo->info[j].pid, SIGHUP); } } return_to_dropped_privs(); } } void hb_signal_false_alarm_handler(int sig) { __hb_signal_pending|=HB_SIG_FALSE_ALARM_SIG; } void hb_signal_false_alarm_action(void) { ha_log(LOG_ERR, "Unexpected alarm in process %d", (int) getpid()); } static sigset_t __hb_signal_process_pending_mask; int __hb_signal_process_pending_mask_set = 0; void hb_signal_process_pending_set_mask_set(const sigset_t *set) { if (!set) { return; } memcpy(&__hb_signal_process_pending_mask, set, sizeof(sigset_t)); __hb_signal_process_pending_mask_set = 1; } unsigned int hb_signal_pending(void) { return(__hb_signal_pending); } void hb_signal_process_pending(void) { while (__hb_signal_pending) { unsigned long handlers; if (__hb_signal_process_pending_mask_set && cl_signal_block_set(SIG_BLOCK , &__hb_signal_process_pending_mask, NULL) < 0) { ha_log(LOG_ERR, "hb_signal_process_pending(): " "cl_signal_block_set(): " "Could not block signals"); } handlers = __hb_signal_pending; __hb_signal_pending=0; /* Allow signals */ if (__hb_signal_process_pending_mask_set && cl_signal_block_set(SIG_UNBLOCK , &__hb_signal_process_pending_mask, NULL) < 0) { ha_log(LOG_ERR, "hb_signal_process_pending(): " "cl_signal_block_set(): " "Could not unblock signals"); } if (handlers&HB_SIG_TERM_SIG) { hb_signal_term_action(); } if (handlers&HB_SIG_DEBUG_USR1_SIG) { hb_signal_debug_usr1_action(); } if (handlers&HB_SIG_DEBUG_USR2_SIG) { hb_signal_debug_usr2_action(); } if (handlers&HB_SIG_PARENT_DEBUG_USR1_SIG) { parent_hb_signal_debug_usr1_action(); } if (handlers&HB_SIG_PARENT_DEBUG_USR2_SIG) { parent_hb_signal_debug_usr2_action(); } if (handlers&HB_SIG_REREAD_CONFIG_SIG) { hb_signal_reread_config_action(); } if (handlers&HB_SIG_FALSE_ALARM_SIG) { hb_signal_false_alarm_action(); } if (handlers&HB_SIG_REAPER_SIG) { hb_signal_reaper_action(WNOHANG); } } } int hb_signal_set_common(sigset_t *set) { sigset_t our_set; sigset_t *use_set; const cl_signal_mode_t mode [] = { {SIGHUP, hb_signal_reread_config_handler,1} , {SIGPIPE, SIG_IGN, 0} #ifdef SIGSTP , {SIGSTP, SIG_IGN, 0} #endif #ifdef SIGTTOU , {SIGTTOU, SIG_IGN, 0} #endif #ifdef SIGTTIN , {SIGTTIN, SIG_IGN, 0} #endif , {SIGINT, SIG_IGN, 0} , {SIGTERM, hb_signal_term_handler, 1} , {SIGALRM, hb_signal_false_alarm_handler, 1} , {SIGUSR1, hb_signal_debug_usr1_handler, 1} , {SIGUSR2, hb_signal_debug_usr2_handler, 1} , {0, 0, 0} }; if (set) { use_set = set; }else{ use_set = &our_set; if (CL_SIGEMPTYSET(use_set) < 0) { ha_log(LOG_ERR, "hb_signal_set_common(): " "CL_SIGEMPTYSET(): %s", strerror(errno)); return(-1); } } if (cl_signal_set_handler_mode(mode, use_set) < 0) { ha_log(LOG_ERR, "hb_signal_set_common(): " "cl_signal_set_handler_mode()"); return(-1); } hb_signal_process_pending_set_mask_set(use_set); /* * This signal is generated by our ttys in order to cause output * flushing, but we don't want to see it in our software. * I don't think this next function call is needed any more because * it's covered by the cl_signal_mode_t above. */ if (cl_signal_set_interrupt(SIGINT, 0) < 0) { ha_log(LOG_ERR, "hb_signal_set_common(): " "cl_signal_set_interrupt()"); return(-1); } if (cl_signal_block(SIG_BLOCK, SIGINT, NULL) < 0) { ha_log(LOG_ERR, "hb_signal_set_common(): " "cl_signal_block()"); return(-1); } return(0); } int hb_signal_set_write_child(sigset_t *set) { sigset_t our_set; sigset_t *use_set; const cl_signal_mode_t mode [] = { {SIGALRM, hb_ignoresig, 1} , {0, 0, 0} }; if (set) { use_set = set; }else{ use_set = &our_set; if (CL_SIGEMPTYSET(use_set) < 0) { ha_log(LOG_ERR, "hb_signal_set_write_child(): " "CL_SIGEMPTYSET(): %s", strerror(errno)); return(-1); } } if (hb_signal_set_common(use_set) < 0) { ha_log(LOG_ERR, "hb_signal_set_write_child(): " "hb_signal_set_common()"); return(-1); } if (cl_signal_set_handler_mode(mode, use_set) < 0) { ha_log(LOG_ERR, "hb_signal_set_write_child(): " "cl_signal_set_handler_mode()"); return(-1); } hb_signal_process_pending_set_mask_set(use_set); return(0); } int hb_signal_set_read_child(sigset_t *set) { if (hb_signal_set_common(set) < 0) { ha_log(LOG_ERR, "hb_signal_set_read_child(): " "hb_signal_set_common()"); return(-1); } hb_signal_process_pending_set_mask_set(set); return(0); } int hb_signal_set_fifo_child(sigset_t *set) { sigset_t *use_set; sigset_t our_set; const cl_signal_mode_t mode [] = { {SIGALRM, hb_ignoresig, 1} , {0, 0, 0} }; if (set) { use_set = set; }else{ use_set = &our_set; if (CL_SIGEMPTYSET(use_set) < 0) { ha_log(LOG_ERR, "hb_signal_set_write_child(): " "CL_SIGEMPTYSET(): %s", strerror(errno)); return(-1); } } if (hb_signal_set_common(use_set) < 0) { ha_log(LOG_ERR, "hb_signal_set_fifo_child(): " "hb_signal_set_common()"); return(-1); } if (cl_signal_set_handler_mode(mode, use_set) < 0) { ha_log(LOG_ERR, "hb_signal_set_fifo_child(): " "cl_signal_set_handler_mode()"); return(-1); } if (cl_signal_set_handler_mode(mode, use_set) < 0) { ha_log(LOG_ERR, "%s(): cl_signal_set_handler_mode() failed." , __FUNCTION__); return(-1); } hb_signal_process_pending_set_mask_set(set); return(0); } int hb_signal_set_master_control_process(sigset_t *set) { sigset_t our_set; sigset_t *use_set; const cl_signal_mode_t mode [] = { {SIGTERM, hb_signal_term_handler, 1} , {SIGUSR1, parent_hb_signal_debug_usr1_handler, 1} , {SIGUSR2, parent_hb_signal_debug_usr2_handler, 1} , {SIGALRM, hb_signal_false_alarm_handler, 1} , {0, 0, 0} }; if (set) { use_set = set; }else{ use_set = &our_set; if (CL_SIGEMPTYSET(use_set) < 0) { ha_log(LOG_ERR, "hb_signal_set_master_control_process(): " "CL_SIGEMPTYSET(): %s", strerror(errno)); return(-1); } } if (hb_signal_set_common(use_set) < 0) { ha_log(LOG_ERR, "hb_signal_set_master_control_process(): " "hb_signal_set_common()"); return(-1); } if (cl_signal_set_handler_mode(mode, use_set) < 0) { ha_log(LOG_ERR, "hb_signal_set_master_control_process(): " "cl_signal_set_handler_mode()"); return(-1); } set_sigchld_proctrack(G_PRIORITY_HIGH,DEFAULT_MAXDISPATCHTIME); hb_signal_process_pending_set_mask_set(use_set); return(0); } Heartbeat-3-0-7e3a82377fa8/heartbeat/hb_signal.h0000644000000000000000000000421111576626513021233 0ustar00usergroup00000000000000/* * hb_signal.h: signal handling routines to be used by Heartbeat * * Copyright (C) 2002 Horms * * Derived from code in heartbeat.c in this tree * Copyright (C) 2000 Alan Robertson * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _HB_SIGNAL_H #define _HB_SIGNAL_H #include void hb_signal_signal_all(int sig); void hb_signal_reaper_handler(int sig); void hb_signal_reaper_action(int waitflags); void hb_signal_term_handler(int sig); void hb_signal_term_action(void); void hb_signal_debug_usr1_handler(int sig); void hb_signal_debug_usr1_action(void); void hb_signal_debug_usr2_handler(int sig); void hb_signal_debug_usr2_action(void); void parent_hb_signal_debug_usr1_handler(int sig); void parent_hb_signal_debug_usr1_action(void); void parent_hb_signal_debug_usr2_handler(int sig); void parent_hb_signal_debug_usr2_action(void); void hb_signal_reread_config_handler(int sig); void hb_signal_reread_config_action(void); void hb_signal_false_alarm_handler(int sig); void hb_signal_false_alarm_action(void); void hb_signal_process_pending_set_mask_set(const sigset_t *set); unsigned int hb_signal_pending(void); void hb_signal_process_pending(void); int hb_signal_set_common(sigset_t *set); int hb_signal_set_write_child(sigset_t *set); int hb_signal_set_read_child(sigset_t *set); int hb_signal_set_fifo_child(sigset_t *set); int hb_signal_set_master_control_process(sigset_t *set); #endif /* _HB_SIGNAL_H */ Heartbeat-3-0-7e3a82377fa8/heartbeat/hb_uuid.c0000644000000000000000000003672311576626513020734 0ustar00usergroup00000000000000/* * heartbeat: Linux-HA uuid code * * Copyright (C) 2004 Guochun Shi * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include extern int DoManageResources; #ifndef O_SYNC # define O_SYNC 0 #endif static GHashTable* name_table = NULL; static GHashTable* uuid_table = NULL; static gboolean nodecache_read_yet = FALSE; static gboolean delcache_read_yet = FALSE; extern GList* del_node_list; static int read_node_uuid_file(struct sys_config * cfg); static int read_delnode_file(struct sys_config* cfg); static void remove_all(void); guint uuid_hash(gconstpointer key) { const char *p = key; const char *pmax = p + sizeof(cl_uuid_t); guint h = *p; if (h){ for (p += 1; p < pmax; p++){ h = (h << 5) - h + *p; } } return h; } static int string_hash(const char* key) { const char *p = key; const char *pmax = p + strlen(key); guint h = *p; if (h){ for (p += 1; p < pmax; p++){ h = (h << 5) - h + *p; } } return h; } gint uuid_equal(gconstpointer v, gconstpointer v2) { return (cl_uuid_compare(v, v2) == 0 ); } #if 0 static void print_key_value(gpointer key, gpointer value, gpointer user_data) { struct node_info* hip = (struct node_info*)value; cl_log(LOG_INFO, "key=%s, value=%s", (char*)key, uuid_is_null(&hip->uuid)?"null":"not null"); } static void printout(void){ cl_log(LOG_INFO, " printing out name table:"); g_hash_table_foreach(name_table, print_key_value, NULL); cl_log(LOG_INFO, " printing out uuidname table:"); g_hash_table_foreach(uuid_table, print_key_value, NULL); } #endif static void uuidtable_entry_display( gpointer key, gpointer value, gpointer userdata) { cl_uuid_t* uuid =(cl_uuid_t*) key; struct node_info* node= (struct node_info*)value; char tmpstr[UU_UNPARSE_SIZEOF]; memset(tmpstr , 0, UU_UNPARSE_SIZEOF); cl_uuid_unparse(uuid, tmpstr); cl_log(LOG_DEBUG, "uuid=%s, name=%s", tmpstr, node->nodename); } static void uuidtable_display(void) { cl_log(LOG_DEBUG,"displaying uuid table"); g_hash_table_foreach(uuid_table, uuidtable_entry_display,NULL); return; } static void nametable_display(void) { return; } static struct node_info* lookup_nametable(const char* nodename) { return (struct node_info*)g_hash_table_lookup(name_table, nodename); } static struct node_info* lookup_uuidtable(cl_uuid_t* uuid) { return (struct node_info*)g_hash_table_lookup(uuid_table, uuid); } struct node_info* lookup_tables(const char* nodename, cl_uuid_t* uuid) { struct node_info* hip = NULL; if(!nodename){ cl_log(LOG_ERR,"lookup_tables: bad parameters"); } /*printout();*/ if(uuid){ hip = lookup_uuidtable(uuid); } if(!hip){ hip = lookup_nametable(nodename); } return hip; } /*return value indicates whether tables are changed*/ gboolean update_tables(const char* nodename, cl_uuid_t* uuid) { struct node_info* hip ; if (uuid == NULL){ cl_log(LOG_ERR, "%s: NULL uuid pointer", __FUNCTION__); return FALSE; } if(cl_uuid_is_null(uuid)){ return FALSE; } hip = (struct node_info*) lookup_uuidtable(uuid); if (hip != NULL){ if (strncmp(hip->nodename, nodename, sizeof(hip->nodename)) ==0){ return FALSE; } cl_log(LOG_WARNING, "nodename %s uuid changed to %s" , hip->nodename, nodename); uuidtable_display(); strncpy(hip->nodename, nodename, sizeof(hip->nodename)); add_nametable(nodename, hip); return TRUE; } hip = (struct node_info*) lookup_nametable(nodename); if(!hip){ cl_log(LOG_WARNING, "node %s not found in table", nodename); return FALSE; } if (cl_uuid_is_null(&hip->uuid)){ cl_uuid_copy(&hip->uuid, uuid); }else if (cl_uuid_compare(&hip->uuid, uuid) != 0){ char tmpstr[UU_UNPARSE_SIZEOF]; memset(tmpstr , 0, UU_UNPARSE_SIZEOF); cl_uuid_unparse(uuid, tmpstr); cl_log(LOG_INFO, "node %s changed its uuid to %s" , nodename, tmpstr); cl_uuid_copy(&hip->uuid, uuid); nametable_display(); } add_uuidtable(uuid, hip); return TRUE; } int tables_remove(const char* nodename, cl_uuid_t* uuid) { int i; remove_all(); for (i = 0; i< config->nodecount; i++){ add_nametable(config->nodes[i].nodename, &config->nodes[i]); add_uuidtable(&config->nodes[i].uuid, &config->nodes[i]); } return HA_OK; } void add_nametable(const char* nodename, struct node_info* value) { char * ds = strdup(nodename); g_hash_table_insert(name_table, ds, value); } void add_uuidtable(cl_uuid_t* uuid, struct node_info* value) { cl_uuid_t* du ; if (cl_uuid_is_null(uuid)){ return; } du = (cl_uuid_t*)malloc(sizeof(cl_uuid_t)); cl_uuid_copy(du, uuid); g_hash_table_insert(uuid_table, du, value); } static void free_data(gpointer data) { if (data){ g_free(data); } } int inittable(void) { if( uuid_table || name_table){ cleanuptable(); } uuid_table = g_hash_table_new_full(uuid_hash, uuid_equal, free_data, NULL); if (!uuid_table){ cl_log(LOG_ERR, "ghash table allocation error"); return HA_FAIL; } name_table = g_hash_table_new_full(g_str_hash, g_str_equal, free_data, NULL); if (!name_table){ cl_log(LOG_ERR, "ghash table allocation error"); return HA_FAIL; } return HA_OK; } static gboolean always_true(gpointer key, gpointer value, gpointer userdata) { return 1; } static void remove_all(void) { g_hash_table_foreach_remove(name_table, always_true, NULL); g_hash_table_foreach_remove(uuid_table, always_true, NULL); } void cleanuptable(void){ g_hash_table_destroy(name_table); name_table = NULL; g_hash_table_destroy(uuid_table); uuid_table = NULL; } const char* uuid2nodename(cl_uuid_t* uuid) { struct node_info* hip; hip = g_hash_table_lookup(uuid_table, uuid); if (hip){ return hip->nodename; } else{ return NULL; } } int nodename2uuid(const char* nodename, cl_uuid_t* id) { struct node_info* hip; if (nodename == NULL){ cl_log(LOG_ERR, "nodename2uuid:" "nodename is NULL "); return HA_FAIL; } cl_uuid_clear(id); hip = g_hash_table_lookup(name_table, nodename); if (!hip){ return HA_FAIL; } cl_uuid_copy(id, &hip->uuid); return HA_OK; } static int gen_uuid_from_name(const char* nodename, cl_uuid_t* uu) { int seed; int value; int loops[]={8,4,4, 4, 12}; char buf[UU_UNPARSE_SIZEOF]; char *p = buf; int i; int j; seed = string_hash(nodename); cl_log(LOG_INFO, "seed is %d", seed); srand(seed); for(i = 0; i < 5; i++){ for (j = 0; j < loops[i]; j++){ value = rand(); p +=sprintf(p, "%01x", value%16); } if (i != 4){ p += sprintf(p,"-"); } } if (cl_uuid_parse(buf, uu) < 0){ cl_log(LOG_INFO, "cl_uuid_parse failed"); return HA_FAIL; } return HA_OK; } #ifndef HB_UUID_FILE #define HB_UUID_FILE HA_VARLIBHBDIR "/hb_uuid" #endif int GetUUID(struct sys_config* cfg, const char* nodename, cl_uuid_t* uuid) { int fd; int flags = 0; int uuid_len = sizeof(uuid->uuid); if (cfg->uuidfromname){ return gen_uuid_from_name(nodename, uuid); } if ((fd = open(HB_UUID_FILE, O_RDONLY)) > 0 && read(fd, uuid->uuid, uuid_len) == uuid_len) { close(fd); return HA_OK; } cl_log(LOG_INFO, "No uuid found for current node" " - generating a new uuid."); flags = O_CREAT; if ((fd = open(HB_UUID_FILE, O_WRONLY|O_SYNC|flags, 0644)) < 0) { return HA_FAIL; } cl_uuid_generate(uuid); if (write(fd, uuid->uuid, uuid_len) != uuid_len) { close(fd); return HA_FAIL; } /* * Some UNIXes don't implement O_SYNC. * So we do an fsync here for good measure. It can't hurt ;-) */ if (fsync(fd) < 0) { cl_perror("fsync failure on " HB_UUID_FILE); return HA_FAIL; } if (close(fd) < 0) { cl_perror("close failure on " HB_UUID_FILE); return HA_FAIL; } return HA_OK; } /* * Functions for writing out our current node/uuid configuration to a file * as nodes are added/deleted to the configuration and for reading it back * in at startup. */ static int node_uuid_file_out(FILE *f, const char * nodename, const cl_uuid_t * uu , int weight, const char* site) { char uuid_str[UU_UNPARSE_SIZEOF]; cl_uuid_unparse(uu, uuid_str); if (fprintf(f, "%s\t%s\t%d\t%s\n", nodename, uuid_str, weight, site?site:"") > sizeof(uuid_str)) { return HA_OK; } return HA_FAIL; } static int /* Returns -, 0 + *; 0 = EOF, + = OK, - = ERROR */ node_uuid_file_in(FILE *f, char* nodename, cl_uuid_t * uu, int* weight, char* site) { char linebuf[MAXLINE]; char * name_end; char * uuid_end; char * weight_end; int len; int hlen; if (fgets(linebuf, MAXLINE, f) == NULL) { if (feof(f)) { return 0; } cl_perror("Cannot read line from node/uuid file"); return -1; } len = strlen(linebuf); if (len < UU_UNPARSE_SIZEOF+2) { cl_log(LOG_ERR, "Malformed (short) node/uuid line [%s] (1)" , linebuf); return -1; } len -=1; /* fgets leaves '\n' on end of line */ if (linebuf[len] != '\n') { cl_log(LOG_ERR, "Malformed (long) node/uuid line [%s] (2)" , linebuf); return -1; } linebuf[len] = EOS; name_end = strchr(linebuf, '\t'); if (name_end == NULL || (hlen=(name_end - linebuf)) > (HOSTLENG-1) || hlen < 1){ cl_log(LOG_ERR, "Malformed node/uuid line [%s] (3)", linebuf); return -1; } *name_end = EOS; strncpy(nodename, linebuf, HOSTLENG); uuid_end = strchr(name_end+1, '\t'); if (uuid_end != NULL) { *uuid_end = EOS; } if (cl_uuid_parse(name_end+1, uu) < 0) { cl_log(LOG_ERR, "Malformed uuid in line [%s] (5)", linebuf); return -1; } if (uuid_end == NULL) { /* old format, no weight and site columns */ *weight = 100; *site = 0; return 1; } weight_end = strchr(uuid_end+1, '\t'); if (weight_end != NULL) { *weight_end = EOS; } *weight = atoi(uuid_end+1); if (weight_end == NULL) { /* no site columns */ *site = 0; return 1; } strncpy(site, weight_end+1, HOSTLENG); return 1; } static int write_node_uuid_file(struct sys_config * cfg) { int j; const char * tmpname = HOSTUUIDCACHEFILETMP; const char * finalname = HOSTUUIDCACHEFILE; FILE * f; if (!nodecache_read_yet && cfg->rtjoinconfig != HB_JOIN_NONE) { read_node_uuid_file(cfg); } (void)unlink(tmpname); if ((f=fopen(tmpname, "w")) == NULL) { cl_perror("%s: Cannot fopen %s for writing" , __FUNCTION__, tmpname); return HA_FAIL; } for (j=0; j < cfg->nodecount; ++j) { if (cfg->nodes[j].nodetype != NORMALNODE_I) { continue; } if (node_uuid_file_out(f, cfg->nodes[j].nodename , &cfg->nodes[j].uuid, cfg->nodes[j].weight , cfg->nodes[j].site) != HA_OK) { fclose(f); unlink(tmpname); return HA_FAIL; } } if (fflush(f) < 0) { cl_perror("fflush error on %s", tmpname); fclose(f); unlink(tmpname); return HA_FAIL; } if (fsync(fileno(f)) < 0) { cl_perror("fsync error on %s", tmpname); fclose(f); unlink(tmpname); return HA_FAIL; } if (fclose(f) < 0) { cl_perror("fclose error on %s", tmpname); unlink(tmpname); return HA_FAIL; } if (rename(tmpname, finalname) < 0) { cl_perror("Cannot rename %s to %s [errno %d]" , tmpname, finalname, errno); unlink(tmpname); return HA_FAIL; } return HA_OK; } static int read_node_uuid_file(struct sys_config * cfg) { FILE * f; char host[HOSTLENG]; cl_uuid_t uu; int weight; char site[HOSTLENG]; int rc; const char * uuidcachename = HOSTUUIDCACHEFILE; gboolean outofsync = FALSE; nodecache_read_yet = TRUE; if (!cl_file_exists(uuidcachename)){ return HA_OK; } if ((f=fopen(uuidcachename, "r")) == NULL) { cl_perror("%s: Cannot fopen %s for reading" , __FUNCTION__, uuidcachename); return HA_FAIL; } while ((rc=node_uuid_file_in(f, host, &uu, &weight, site)) > 0) { struct node_info * thisnode = lookup_tables(host, &uu); cl_uuid_t curuuid; if (thisnode == NULL) { /* auto-added node */ add_node(host, NORMALNODE_I); set_node_weight(host, weight); set_node_site(host, site); update_tables(host, &uu); continue; } nodename2uuid(host, &curuuid); if (cl_uuid_compare(&uu, &curuuid) != 0) { if (!cl_uuid_is_null(&uu)) { update_tables(host, &uu); outofsync=TRUE; } } thisnode->weight = weight; strncpy(thisnode->site, site, sizeof(thisnode->site)); } fclose(f); /* * If outofsync is TRUE, then we need to write out a new * uuid cache file. */ if (outofsync) { write_node_uuid_file(cfg); } if (ANYDEBUG) { uuidtable_display(); } return rc < 0 ? HA_FAIL: HA_OK; } int write_delnode_file(struct sys_config* cfg) { const char * tmpname = DELHOSTCACHEFILETMP; const char * finalname = DELHOSTCACHEFILE; FILE * f; GList* list = NULL; const struct node_info* hip; if (!delcache_read_yet && cfg->rtjoinconfig != HB_JOIN_NONE) { read_delnode_file(cfg); } (void)unlink(tmpname); if ((f=fopen(tmpname, "w")) == NULL) { cl_perror("%s: Cannot fopen %s for writing", __FUNCTION__, tmpname); return HA_FAIL; } list = del_node_list; while(list != NULL){ hip = (const struct node_info*) list->data; if (hip == NULL){ break; /*list empty*/ } if (node_uuid_file_out(f, hip->nodename, &hip->uuid, hip->weight, hip->site) != HA_OK) { fclose(f); unlink(tmpname); return HA_FAIL; } list = g_list_next(list); } if (fflush(f) < 0) { cl_perror("fflush error on %s", tmpname); fclose(f); unlink(tmpname); return HA_FAIL; } if (fsync(fileno(f)) < 0) { cl_perror("fsync error on %s", tmpname); fclose(f); unlink(tmpname); return HA_FAIL; } if (fclose(f) < 0) { cl_perror("fclose error on %s", tmpname); unlink(tmpname); return HA_FAIL; } if (rename(tmpname, finalname) < 0) { cl_perror("Cannot rename %s to %s", tmpname, finalname); unlink(tmpname); return HA_FAIL; } return HA_OK; } static int read_delnode_file(struct sys_config* cfg) { FILE * f; char host[HOSTLENG]; cl_uuid_t uu; int weight; char site[HOSTLENG]; int rc; const char * filename = DELHOSTCACHEFILE; struct node_info thisnode; delcache_read_yet = TRUE; if (!cl_file_exists(filename)){ return HA_OK; } if ((f=fopen(filename, "r")) == NULL) { cl_perror("%s: Cannot fopen %s for reading" , __FUNCTION__, filename); return HA_FAIL; } memset(site, 0, sizeof(site)); while ((rc=node_uuid_file_in(f, host, &uu, &weight, site)) > 0) { strncpy(thisnode.nodename, host, HOSTLENG); cl_uuid_copy(&thisnode.uuid, &uu); thisnode.weight = weight; strncpy(thisnode.site, site, HOSTLENG); remove_node(thisnode.nodename, TRUE); } fclose(f); return rc < 0 ? HA_FAIL: HA_OK; } int write_cache_file(struct sys_config* cfg) { if (DoManageResources){ return HA_OK; } if (write_node_uuid_file(cfg) != HA_OK){ return HA_FAIL; } return HA_OK; } int read_cache_file(struct sys_config* cfg) { if (DoManageResources || cfg->rtjoinconfig == HB_JOIN_NONE){ return HA_OK; } if (read_node_uuid_file(cfg) != HA_OK){ return HA_FAIL; } return read_delnode_file(cfg); } Heartbeat-3-0-7e3a82377fa8/heartbeat/heartbeat.c0000644000000000000000000047676611576626513021273 0ustar00usergroup00000000000000/* * heartbeat: Linux-HA heartbeat code * * Copyright (C) 1999-2002 Alan Robertson * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * * The basic facilities for heartbeats and intracluster communication * are contained within. * * There is a master configuration file which we open to tell us * what to do. * * It has lines like this in it: * * serial /dev/cua0, /dev/cua1 * udp eth0 * * node amykathy, kathyamy * node dralan * keepalive 2 * deadtime 10 * hopfudge 2 * baud 19200 * udpport 694 * * "Serial" lines tell us about our heartbeat configuration. * If there is more than one serial port configured, we are in a "ring" * configuration, every message not originated on our node is echoed * to the other serial port(s) * * "Node" lines tell us about the cluster configuration. * We had better find our uname -n nodename here, or we won't start up. * * We complain if we find extra nodes in the stream that aren't * in the master configuration file. * * keepalive lines specify the keepalive interval * deadtime lines specify how long we wait before declaring * a node dead * hopfudge says how much larger than #nodes we allow hopcount * to grow before dropping the message * * I need to separate things into a "global" configuration file, * and a "local" configuration file, so I can double check * the global against the cluster when changing configurations. * Things like serial port assignments may be node-specific... * * This has kind of happened over time. Haresources and authkeys are * decidely global, whereas ha.cf has remained more local. * */ /* * Here's our process structure: * * * Master Status process - manages protocol and controls everything. * * hb channel read processes - each reads a hb channel, and * copies messages to the master status process. The tty * version of this cross-echos to the other ttys * in the ring (ring passthrough) * * hb channel write processes - one per hb channel, each reads * its own IPC channel and send the result to its medium * * The result of all this hoorah is that we have the following procs: * * One Master Control process * One FIFO reader process * "n" hb channel read processes * "n" hb channel write processes * * For the usual 2 ttys in a ring configuration, this is 6 processes * * For a system using only UDP for heartbeats this is 4 processes. * * For a system using 2 ttys and UDP, this is 8 processes. * * If every second, each node writes out 150 chars of status, * and we have 8 nodes, and the data rate would be about 1200 chars/sec. * This would require about 12000 bps. Better run faster than that. * * for such a cluster... With good UARTs and CTS/RTS, and good cables, * you should be able to. Maybe 56K would be a good choice... * * ****** Wish List: ********************************************************** * [not necessarily in priority order] * * Heartbeat API conversion to unix domain sockets: * We ought to convert to UNIX domain sockets because we get * better verification of the user, and we would get notified * when they die. This should use the now-written IPC libary. * (NOTE: this is currently in progress) * * Fuzzy heartbeat timing * Right now, the code works in such a way that it * systematically gets everyone heartbeating on the same time * intervals, so that they happen at precisely the same time. * This isn't too good for non-switched ethernet (CSMA/CD) * environments, where it generates gobs of collisions, packet * losses and retransmissions. It's especially bad if all the * clocks are in sync, which of course, every good system * administrator strives to do ;-) This description is due to * Alan Cox who pointed out section 3.3 "Timers" in RFC 1058, * which it states: * * "It is undesirable for the update messages to become * synchronized, since it can lead to unnecessary collisions * on broadcast networks." * * In particular, on Linux, if you set your all the clocks in * your cluster via NTP (as you should), and heartbeat every * second, then all the machines in the world will all try and * heartbeat at precisely the same time, because alarm(2) wakes * up on even second boundaries, which combined with the use of * NTP (recommended), will systematically cause LOTS of * unnecessary collisions. * * Martin Lichtin suggests: * Could you skew the heartbeats, based on the interface IP#? * * AlanR replied: * * I thought that perhaps I could set each machine to a * different interval in a +- 0.25 second range. For example, * one machine might heartbeat at 0.75 second interval, and * another at a 1.25 second interval. The tendency would be * then for the timers to wander across second boundaries, * and even if they started out in sync, they would be unlikely * to stay in sync. [but in retrospect, I'm not 100% sure * about this approach] * * This would keep me from having to generate a random number * for every single heartbeat as the RFC suggests. * * Of course, there are only 100 ticks/second, so if the clocks * get closely synchronized, you can only have 100 different * times to heartbeat. I suppose if you have something like * 50-100 nodes, you ought to use a switch, and not a hub, and * this would likely mitigate the problems. * * Nearest Neighbor heartbeating (? maybe?) * This is a candidate to replace the current policy of * full-ring heartbeats In this policy, each machine only * heartbeats to it's nearest neighbors. The nearest neighbors * only forward on status CHANGES to their neighbors. * This means that the total ring traffic in the non-error * case is reduced to the same as a 3-node cluster. * This is a huge improvement. It probably means that * 19200 would be fast enough for almost any size * network. Non-heartbeat admin traffic would need to be * forwarded to all members of the ring as it was before. * * IrDA heartbeats * This is a near-exact replacement for ethernet with lower * bandwidth, low costs and fewer points of failure. * The role of an ethernet hub is replaced by a mirror, which * is less likely to fail. But if it does, it might mean * seven years of bad luck :-) On the other hand, the "mirror" * could be a white painted board ;-) * * The idea would be to make a bracket with the IrDA * transceivers on them all facing the same way, then mount * the bracket with the transceivers all facing the mirror. * Then each of the transceivers would be able to "see" each * other. * * I do kind of wonder if the kernel's IrDA stacks would be up * to so much contention as it seems unlikely that they'd ever * been tested in such a stressful environment. But, it seems * really cool to me, and it only takes one port per machine * rather than two like we need for serial rings. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef _POSIX_MEMLOCK # include #endif #ifdef _POSIX_PRIORITY_SCHEDULING # include #endif #if HAVE_LINUX_WATCHDOG_H # include # include # include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "clplumbing/setproctitle.h" #include #define OPTARGS "dDkMrRsvWlC:V" #define ONEDAY (24*60*60) /* Seconds in a day */ #define REAPER_SIG 0x0001UL #define TERM_SIG 0x0002UL #define DEBUG_USR1_SIG 0x0004UL #define DEBUG_USR2_SIG 0x0008UL #define PARENT_DEBUG_USR1_SIG 0x0010UL #define PARENT_DEBUG_USR2_SIG 0x0020UL #define REREAD_CONFIG_SIG 0x0040UL #define FALSE_ALARM_SIG 0x0080UL #define MAX_MISSING_PKTS 20 #define ALWAYSRESTART_ON_SPLITBRAIN 1 #define FLOWCONTROL_LIMIT ((seqno_t)(MAXMSGHIST/2)) static char hbname []= "heartbeat"; const char * cmdname = hbname; char * localnodename = NULL; static int Argc = -1; extern int optind; void (*localdie)(void); extern PILPluginUniv* PluginLoadingSystem; struct hb_media* sysmedia[MAXMEDIA]; struct msg_xmit_hist msghist; extern struct hb_media_fns** hbmedia_types; extern int num_hb_media_types; int nummedia = 0; struct sys_config config_init_value; struct sys_config * config = &config_init_value; struct node_info * curnode = NULL; pid_t processes[MAXPROCS]; volatile struct pstat_shm * procinfo = NULL; volatile struct process_info * curproc = NULL; struct TestParms * TestOpts; extern int debug_level; gboolean verbose = FALSE; int timebasedgenno = FALSE; int parse_only = FALSE; static gboolean killrunninghb = FALSE; static gboolean rpt_hb_status = FALSE; int RestartRequested = FALSE; int hb_realtime_prio = -1; int UseApphbd = FALSE; static gboolean RegisteredWithApphbd = FALSE; char * watchdogdev = NULL; static int watchdogfd = -1; static int watchdog_timeout_ms = 0L; gboolean shutdown_in_progress = FALSE; gboolean shutting_down_comm=FALSE; int startup_complete = FALSE; int WeAreRestarting = FALSE; enum comm_state heartbeat_comm_state = COMM_STARTING; static gboolean get_reqnodes_reply = FALSE; static int CoreProcessCount = 0; static int managed_child_count= 0; int UseOurOwnPoll = FALSE; static longclock_t NextPoll = 0UL; static int ClockJustJumped = FALSE; longclock_t local_takeover_time = 0L; static int deadtime_tmpadd_count = 0; gboolean enable_flow_control = TRUE; static int send_cluster_msg_level = 0; static int live_node_count = 1; /* namely us... */ static void print_a_child_client(gpointer childentry, gpointer unused); static seqno_t timer_lowseq = 0; static gboolean init_deadtime_passed = FALSE; static int PrintDefaults = FALSE; static int WikiOutput = FALSE; GTRIGSource* write_hostcachefile = NULL; GTRIGSource* write_delcachefile = NULL; extern GSList* del_node_list; #undef DO_AUDITXMITHIST #ifdef DO_AUDITXMITHIST # define AUDITXMITHIST audit_xmit_hist() void audit_xmit_hist(void); #else # define AUDITXMITHIST /* Nothing */ #endif static void restart_heartbeat(void); static void usage(void); static void init_procinfo(void); static int initialize_heartbeat(void); static const char* core_proc_name(enum process_type t); static void CoreProcessRegistered(ProcTrack* p); static void CoreProcessDied(ProcTrack* p, int status, int signo , int exitcode, int waslogged); static const char* CoreProcessName(ProcTrack* p); void hb_kill_managed_children(int nsig); void hb_kill_rsc_mgmt_children(int nsig); void hb_kill_core_children(int nsig); gboolean hb_mcp_final_shutdown(gpointer p); static void ManagedChildRegistered(ProcTrack* p); static void ManagedChildDied(ProcTrack* p, int status , int signo, int exitcode, int waslogged); static const char* ManagedChildName(ProcTrack* p); static void check_for_timeouts(void); static void check_comm_isup(void); static int send_local_status(void); static int set_local_status(const char * status); static void check_rexmit_reqs(void); static void mark_node_dead(struct node_info* hip); static void change_link_status(struct node_info* hip, struct link *lnk , const char * new); static void comm_now_up(void); static void make_daemon(void); static void hb_del_ipcmsg(IPC_Message* m); static IPC_Message* hb_new_ipcmsg(const void* data, int len, IPC_Channel* ch , int refcnt); static void send_to_all_media(const char * smsg, int len); static int should_drop_message(struct node_info* node , const struct ha_msg* msg, const char *iface, int *); static int is_lost_packet(struct node_info * thisnode, seqno_t seq); static void cause_shutdown_restart(void); static gboolean CauseShutdownRestart(gpointer p); static void add2_xmit_hist (struct msg_xmit_hist * hist , struct ha_msg* msg, seqno_t seq); static void init_xmit_hist (struct msg_xmit_hist * hist); static void process_rexmit(struct msg_xmit_hist * hist , struct ha_msg* msg); static void update_ackseq(seqno_t new_ackseq) ; static void process_clustermsg(struct ha_msg* msg, struct link* lnk); extern void process_registerevent(IPC_Channel* chan, gpointer user_data); static void nak_rexmit(struct msg_xmit_hist * hist, seqno_t seqno, const char*, const char * reason); static int IncrGeneration(seqno_t * generation); static int GetTimeBasedGeneration(seqno_t * generation); static int process_outbound_packet(struct msg_xmit_hist* hist , struct ha_msg * msg); static void start_a_child_client(gpointer childentry, gpointer dummy); static gboolean shutdown_last_client_child(int nsig); static void LookForClockJumps(void); static void get_localnodeinfo(void); static gboolean EmergencyShutdown(gpointer p); static void hb_check_mcp_alive(void); static gboolean hb_reregister_with_apphbd(gpointer dummy); static void hb_add_deadtime(int increment); static gboolean hb_pop_deadtime(gpointer p); static void dump_missing_pkts_info(void); static int write_hostcachedata(gpointer ginfo); static int write_delcachedata(gpointer ginfo); static GHashTable* message_callbacks = NULL; static gboolean HBDoMsgCallback(const char * type, struct node_info* fromnode , TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg); static void HBDoMsg_T_REXMIT(const char * type, struct node_info * fromnode , TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg); static void HBDoMsg_T_STATUS(const char * type, struct node_info * fromnode , TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg); static void HBDoMsg_T_QCSTATUS(const char * type, struct node_info * fromnode , TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg); static void (*comm_up_callback)(void) = NULL; static gboolean set_init_deadtime_passed_flag(gpointer p); /* * Glib Mainloop Source functions... */ static gboolean polled_input_prepare(GSource* source, gint* timeout); static gboolean polled_input_check(GSource* source); static gboolean polled_input_dispatch(GSource* source, GSourceFunc callback, gpointer user_data); static gboolean APIregistration_dispatch(IPC_Channel* chan, gpointer user_data); static gboolean FIFO_child_msg_dispatch(IPC_Channel* chan, gpointer udata); static gboolean read_child_dispatch(IPC_Channel* chan, gpointer user_data); static gboolean hb_update_cpu_limit(gpointer p); static int SetupFifoChild(void); /* * The biggies */ static void read_child(struct hb_media* mp, int medianum); static void write_child(struct hb_media* mp, int medianum); static void fifo_child(IPC_Channel* chan); /* Reads from FIFO */ /* The REAL biggie ;-) */ static void master_control_process(void); extern void dellist_destroy(void); extern int dellist_add(const char* nodename); #define CHECK_HA_RESOURCES() (DoManageResources \ ? (parse_ha_resources(RESOURCE_CFG) == HA_OK) : TRUE) /* * Structures initialized to function pointer values... */ ProcTrack_ops ManagedChildTrackOps = { ManagedChildDied, ManagedChildRegistered, ManagedChildName }; static ProcTrack_ops CoreProcessTrackOps = { CoreProcessDied, CoreProcessRegistered, CoreProcessName }; static GSourceFuncs polled_input_SourceFuncs = { polled_input_prepare, polled_input_check, polled_input_dispatch, NULL, }; static void init_procinfo() { int ipcid; struct pstat_shm * shm; if ((ipcid = shmget(IPC_PRIVATE, sizeof(*procinfo), 0600)) < 0) { cl_perror("Cannot shmget for process status"); return; } /* * Casting this address into a long stinks, but there's no other * way because of the way the shared memory API is designed. */ if (((long)(shm = shmat(ipcid, NULL, 0))) == -1L) { cl_perror("Cannot shmat for process status"); shm = NULL; return; } if (shm) { procinfo = shm; memset(shm, 0, sizeof(*procinfo)); } /* * Go ahead and "remove" our shared memory now... * * This is cool because the manual says: * * IPC_RMID is used to mark the segment as destroyed. It * will actually be destroyed after the last detach. * * Not all the Shared memory implementations have as clear a * description of this fact as Linux, but they all work this way * anyway (for all we've tested). */ if (shmctl(ipcid, IPC_RMID, NULL) < 0) { cl_perror("Cannot IPC_RMID proc status shared memory id"); } /* THIS IS RESOURCE WORK! FIXME */ procinfo->giveup_resources = 1; procinfo->i_hold_resources = HB_NO_RSC; } void hb_versioninfo(void) { cl_log(LOG_INFO, "%s: version %s", cmdname, VERSION); } /* * Look up the interface in the node struct, * returning the link info structure */ struct link * lookup_iface(struct node_info * hip, const char *iface) { struct link * lnk; int j; for (j=0; (lnk = &hip->links[j], lnk->name); ++j) { if (strcmp(lnk->name, iface) == 0) { return lnk; } } return NULL; } /* * Look up the node in the configuration, returning the node * info structure */ struct node_info * lookup_node(const char * h) { int j; char *shost; if ( (shost = strdup(h)) == NULL) { return NULL; } g_strdown(shost); for (j=0; j < config->nodecount; ++j) { if (strcmp(shost, config->nodes[j].nodename) == 0) break; } free(shost); if (j == config->nodecount) { return NULL; } else { return (config->nodes+j); } } static int write_hostcachedata(gpointer notused) { hb_setup_child(); return write_cache_file(config); } static int write_delcachedata(gpointer notused) { hb_setup_child(); return write_delnode_file(config); } void hb_setup_child(void) { close(watchdogfd); cl_make_normaltime(); cl_cpu_limit_disable(); } static void change_logfile_ownership(void) { struct passwd * entry; const char* apiuser = HA_CCMUSER; entry = getpwnam(apiuser); if (entry == NULL){ cl_log(LOG_ERR, "%s: entry for user %s not found", __FUNCTION__, apiuser); return; } if (config->use_logfile){ if (chown(config->logfile, entry->pw_uid, entry->pw_gid) < 0) { cl_log(LOG_WARNING, "%s: failed to chown logfile: %s", __FUNCTION__, strerror(errno)); } } if (config->use_dbgfile){ if (chown(config->dbgfile, entry->pw_uid, entry->pw_gid) < 0) { cl_log(LOG_WARNING, "%s: failed to chown dbgfile: %s", __FUNCTION__, strerror(errno)); } } } /* * We can call this function once when we first start up and we can * also be called later to restart the FIFO process if it dies. * For R1-style clusters, the FIFO process is necessary for graceful * shutdown and restart. */ static int SetupFifoChild(void) { static IPC_Channel* fifochildipc[2] = {NULL, NULL}; static GCHSource* FifoChildSource = NULL; static int fifoproc = -1; int pid; if (FifoChildSource != NULL) { /* Not sure if this is really right... */ G_main_del_IPC_Channel(FifoChildSource); fifochildipc[P_READFD] = NULL; } if (fifochildipc[P_WRITEFD] != NULL) { IPC_Channel* ch = fifochildipc[P_WRITEFD]; ch->ops->destroy(ch); fifochildipc[P_WRITEFD] = NULL; } if (ipc_channel_pair(fifochildipc) != IPC_OK) { cl_perror("cannot create FIFO ipc channel"); return HA_FAIL; } /* Encourage better real-time behavior */ fifochildipc[P_READFD]->ops->set_recv_qlen(fifochildipc[P_READFD], 0); /* Fork FIFO process... */ if (fifoproc < 0) { fifoproc = procinfo->nprocs; } procinfo->nprocs++; switch ((pid=fork())) { case -1: cl_perror("Can't fork FIFO process!"); return HA_FAIL; break; case 0: /* Child */ hb_close_watchdog(); curproc = &procinfo->info[fifoproc]; cl_msg_setstats(&curproc->msgstats); curproc->type = PROC_HBFIFO; while (curproc->pid != getpid()) { sleep(1); } fifo_child(fifochildipc[P_WRITEFD]); cl_perror("FIFO child process exiting!"); cleanexit(1); default: fifochildipc[P_READFD]->farside_pid = pid; } NewTrackedProc(pid, 0, PT_LOGVERBOSE, GINT_TO_POINTER(fifoproc) , &CoreProcessTrackOps); if (ANYDEBUG) { cl_log(LOG_DEBUG, "FIFO process pid: %d", pid); } /* We only read from this source, we never write to it */ FifoChildSource = G_main_add_IPC_Channel(PRI_FIFOMSG , fifochildipc[P_READFD] , FALSE, FIFO_child_msg_dispatch, NULL, NULL); G_main_setmaxdispatchdelay((GSource*)FifoChildSource, config->heartbeat_ms); G_main_setmaxdispatchtime((GSource*)FifoChildSource, 50); G_main_setdescription((GSource*)FifoChildSource, "FIFO"); return HA_OK; } static void shutdown_io_childpair(int medianum) { struct hb_media* mp = sysmedia[medianum]; if (mp->wchan[P_WRITEFD] && mp->wchan[P_WRITEFD]->farside_pid) { if (ANYDEBUG) { cl_log(LOG_DEBUG, "Killing pid %d" , mp->wchan[P_WRITEFD]->farside_pid); } CL_KILL(mp->wchan[P_WRITEFD]->farside_pid, SIGKILL); } if (mp->rchan[P_WRITEFD] && mp->rchan[P_WRITEFD]->farside_pid) { if (ANYDEBUG) { cl_log(LOG_DEBUG, "Killing pid %d" , mp->rchan[P_WRITEFD]->farside_pid); } CL_KILL(mp->rchan[P_WRITEFD]->farside_pid, SIGKILL); } if (mp->readsource) { if (ANYDEBUG && mp->rchan[P_WRITEFD]) { cl_log(LOG_DEBUG, "%s: Closing socket %d" , __FUNCTION__ , mp->rchan[P_WRITEFD]->ops->get_recv_select_fd(mp->rchan[P_WRITEFD])); } G_main_del_IPC_Channel(mp->readsource); mp->readsource = NULL; } if (mp->writesource) { if (ANYDEBUG && mp->wchan[P_WRITEFD]) { cl_log(LOG_DEBUG, "%s: Closing socket %d" , __FUNCTION__ , mp->wchan[P_WRITEFD]->ops->get_recv_select_fd(mp->wchan[P_WRITEFD])); } G_main_del_IPC_Channel(mp->writesource); mp->writesource = NULL; mp->vf->close(mp); } mp->wchan[0] = mp->rchan[0] = mp->wchan[1] = mp->rchan[1] = NULL; } static int make_io_childpair(int medianum, int ourproc) { struct hb_media* mp = sysmedia[medianum]; int pid; GCHSource* s; int rc = HA_OK; if (medianum < 0 || medianum >= MAXMEDIA) { cl_log(LOG_ERR, "%s : media index is %d" , __FUNCTION__, medianum); return HA_FAIL; } if (mp->recovery_state != MEDIA_OK) { cl_log(LOG_ERR, "Attempt to start read/write children while in recovery"); } shutdown_io_childpair(medianum); /* Just in case... */ if (ANYDEBUG) { cl_log(LOG_DEBUG, "opening %s %s (%s)", mp->type , mp->name, mp->description); } if ((mp->vf->mopen)(mp) != HA_OK){ cl_log(LOG_ERR, "%s: cannot open %s %s" , __FUNCTION__, mp->type, mp->name); return HA_FAIL; } if (ipc_channel_pair(mp->wchan) != IPC_OK) { cl_perror("%s: cannot create hb write channel IPC", __FUNCTION__); goto failexit; } if (ipc_channel_pair(mp->rchan) != IPC_OK) { cl_perror("%s: cannot create hb read channel IPC", __FUNCTION__); goto failexit; } mp->ourproc = ourproc; switch ((pid=fork())) { case -1: cl_perror("Can't fork write proc."); goto failexit; break; case 0: /* Child */ hb_close_watchdog(); curproc = &procinfo->info[ourproc]; cl_msg_setstats(&curproc->msgstats); curproc->type = PROC_HBWRITE; while (curproc->pid != getpid()) { sleep(1); } write_child(mp, medianum); cl_perror("write process exiting"); cleanexit(1); default: mp->wchan[P_WRITEFD]->farside_pid = pid; } NewTrackedProc(pid, 0, PT_LOGVERBOSE , GINT_TO_POINTER(ourproc) , &CoreProcessTrackOps); if (ANYDEBUG) { cl_log(LOG_DEBUG, "write process pid: %d", pid); } /* ourproc = procinfo->nprocs; */ ourproc++; switch ((pid=fork())) { case -1: cl_perror("Can't fork read process"); goto failexit; break; case 0: /* Child */ hb_close_watchdog(); curproc = &procinfo->info[ourproc]; cl_msg_setstats(&curproc->msgstats); curproc->type = PROC_HBREAD; while (curproc->pid != getpid()) { sleep(1); } read_child(mp, medianum); cl_perror("read_child() exiting"); cleanexit(1); default: mp->rchan[P_WRITEFD]->farside_pid = pid; } if (ANYDEBUG) { cl_log(LOG_DEBUG, "read child process pid: %d", pid); } NewTrackedProc(pid, 0, PT_LOGVERBOSE, GINT_TO_POINTER(ourproc) , &CoreProcessTrackOps); if (mp->vf->close(mp) != HA_OK){ cl_log(LOG_ERR, "%s: cannot close %s %s" , __FUNCTION__, mp->type, mp->name); goto failexit; } /* * We cannot share a socket between the write and read * children, though it might sound like it would work ;-) */ if (ANYDEBUG) { cl_log(LOG_DEBUG, "%s: CREATED childpair wchan socket %d" , __FUNCTION__ , mp->wchan[P_WRITEFD]->ops -> get_recv_select_fd(mp->wchan[P_WRITEFD])); cl_log(LOG_DEBUG, "%s: CREATED childpair rchan socket %d" , __FUNCTION__ , mp->wchan[P_WRITEFD]->ops -> get_recv_select_fd(mp->rchan[P_WRITEFD])); } /* Connect up the write child IPC channel... */ s = G_main_add_IPC_Channel(PRI_SENDPKT , mp->wchan[P_WRITEFD], FALSE , NULL, sysmedia+medianum, NULL); G_main_setmaxdispatchdelay((GSource*)s, config->heartbeat_ms/4); G_main_setmaxdispatchtime((GSource*)s, 50); G_main_setdescription((GSource*)s, "write child"); /* Ensure that a hanging write process does not livelock * the MCP yet doesn't get kicked out */ mp->wchan[P_WRITEFD]->should_send_block = FALSE; mp->wchan[P_WRITEFD]->should_block_fail = FALSE; mp->writesource=s; /* Connect up the read child IPC channel... */ s = G_main_add_IPC_Channel(PRI_READPKT , mp->rchan[P_WRITEFD], FALSE , read_child_dispatch, sysmedia+medianum, NULL); /* Encourage better real-time behavior */ mp->rchan[P_WRITEFD]->ops->set_recv_qlen(mp->rchan[P_WRITEFD], 0); G_main_setmaxdispatchdelay((GSource*)s, config->heartbeat_ms/4); G_main_setmaxdispatchtime((GSource*)s, 50); G_main_setdescription((GSource*)s, "read child"); mp->readsource=s; cleanandexit: if (mp->rchan[P_READFD]) { mp->rchan[P_READFD]->ops->destroy(mp->rchan[P_READFD]); mp->rchan[P_READFD]= NULL; } if (mp->wchan[P_READFD]) { mp->wchan[P_READFD]->ops->destroy(mp->wchan[P_READFD]); mp->wchan[P_READFD]= NULL; } return rc; failexit: shutdown_io_childpair(medianum); rc = HA_FAIL; goto cleanandexit; } /* * This routine starts everything up and kicks off the heartbeat * process. */ static int initialize_heartbeat() { /* * Things we have to do: * * Create all our pipes * Open all our heartbeat channels * fork all our children, and start the old ticker going... * * Everything is forked from the parent process. That's easier to * monitor, and easier to shut down. */ int j; struct stat buf; int ourproc = 0; int (*getgen)(seqno_t * generation) = IncrGeneration; localdie = NULL; change_logfile_ownership(); if (timebasedgenno) { getgen = GetTimeBasedGeneration; } if (getgen(&config->generation) != HA_OK) { cl_perror("Cannot get/increment generation number"); return HA_FAIL; } cl_log(LOG_INFO, "Heartbeat generation: %lu", config->generation); if(GetUUID(config, curnode->nodename, &config->uuid) != HA_OK){ cl_log(LOG_ERR, "getting uuid for the local node failed"); return HA_FAIL; } if (ANYDEBUG){ char uuid_str[UU_UNPARSE_SIZEOF]; cl_uuid_unparse(&config->uuid, uuid_str); cl_log(LOG_DEBUG, "uuid is:%s", uuid_str); } add_uuidtable(&config->uuid, curnode); cl_uuid_copy(&curnode->uuid, &config->uuid); if (stat(FIFONAME, &buf) < 0 || !S_ISFIFO(buf.st_mode)) { cl_log(LOG_INFO, "Creating FIFO %s.", FIFONAME); unlink(FIFONAME); if (mkfifo(FIFONAME, FIFOMODE) < 0) { cl_perror("Cannot make fifo %s.", FIFONAME); return HA_FAIL; } }else{ chmod(FIFONAME, FIFOMODE); } if (stat(FIFONAME, &buf) < 0) { cl_log(LOG_ERR, "FIFO %s does not exist", FIFONAME); return HA_FAIL; }else if (!S_ISFIFO(buf.st_mode)) { cl_log(LOG_ERR, "%s is not a FIFO", FIFONAME); return HA_FAIL; } /* THIS IS RESOURCE WORK! FIXME */ /* Clean up tmp files from our resource scripts */ if (system("rm -fr " RSC_TMPDIR) != 0) { cl_log(LOG_INFO, "Removing %s failed, recreating.", RSC_TMPDIR); } /* Remake the temporary directory ... */ mkdir(RSC_TMPDIR , S_IRUSR|S_IWUSR|S_IXUSR | S_IRGRP|S_IWGRP|S_IXGRP | S_IROTH|S_IWOTH|S_IXOTH | S_ISVTX /* sticky bit */); PILSetDebugLevel(PluginLoadingSystem, NULL, NULL, debug_level); CoreProcessCount = 0; procinfo->nprocs = 0; ourproc = procinfo->nprocs; curproc = &procinfo->info[ourproc]; curproc->type = PROC_MST_CONTROL; cl_msg_setstats(&curproc->msgstats); NewTrackedProc(getpid(), 0, PT_LOGVERBOSE, GINT_TO_POINTER(ourproc) , &CoreProcessTrackOps); procinfo->nprocs++; curproc->pstat = RUNNING; /* We need to at least ignore SIGINTs early on */ hb_signal_set_common(NULL); /* Now the fun begins... */ /* * Optimal starting order: * fifo_child(); * write_child(); * read_child(); * master_control_process(); * */ SetupFifoChild(); /* Start up all read/write children */ for (j=0; j < nummedia; ++j) { if (make_io_childpair(j, procinfo->nprocs) != HA_OK) { return HA_FAIL; } procinfo->nprocs += 2; } master_control_process(); /*NOTREACHED*/ cl_log(LOG_ERR, "master_control_process exiting?"); cleanexit(LSB_EXIT_GENERIC); /*NOTREACHED*/ return HA_FAIL; } /* Create a read child process (to read messages from hb medium) */ static void read_child(struct hb_media* mp, int medianum) { IPC_Channel* ourchan = mp->rchan[P_READFD]; int nullcount=0; const int maxnullcount=10000; if (hb_signal_set_read_child(NULL) < 0) { cl_log(LOG_ERR, "read_child(): hb_signal_set_read_child(): " "Soldiering on..."); } cl_make_realtime(-1 , (hb_realtime_prio > 1 ? hb_realtime_prio-1 : hb_realtime_prio) , 16, 64); set_proc_title("%s: read: %s %s", cmdname, mp->type, mp->name); cl_cdtocoredir(); cl_set_all_coredump_signal_handlers(); drop_privs(0, 0); /* Become nobody */ hb_signal_process_pending(); curproc->pstat = RUNNING; curproc->medianum = medianum; if (ANYDEBUG) { /* Limit ourselves to 10% of the CPU */ cl_cpu_limit_setpercent(10); } for (;;) { void *pkt; IPC_Message *imsg; int rc; int rc2; int pktlen; hb_signal_process_pending(); if ((pkt=mp->vf->read(mp, &pktlen)) == NULL) { ++nullcount; if (nullcount > maxnullcount) { cl_perror("%d NULL vf->read() returns in a" " row. Exiting." , maxnullcount); exit(10); } continue; } hb_signal_process_pending(); imsg = wirefmt2ipcmsg(pkt, pktlen, ourchan); if (NULL == imsg) { ++nullcount; if (nullcount > maxnullcount) { cl_perror("%d NULL wirefmt2ipcmsg() returns" " in a row. Exiting.", maxnullcount); exit(10); } }else{ nullcount = 0; /* Send frees "imsg" "at the right time" */ rc = ourchan->ops->send(ourchan, imsg); rc2 = ourchan->ops->waitout(ourchan); if (rc != IPC_OK || rc2 != IPC_OK) { cl_log(LOG_ERR, "read_child send: RCs: %d %d" , rc, rc2); } if (ourchan->ch_status != IPC_CONNECT) { cl_log(LOG_ERR , "read_child channel status: %d" " - returning.", ourchan->ch_status); return; } } cl_cpu_limit_update(); cl_realtime_malloc_check(); } } /* Create a write child process (to write messages to hb medium) */ static void write_child(struct hb_media* mp, int medianum) { IPC_Channel* ourchan = mp->wchan[P_READFD]; int failcount=0; int supp_flushedmsgs=0; if (hb_signal_set_write_child(NULL) < 0) { cl_perror("write_child(): hb_signal_set_write_child(): " "Soldiering on..."); } set_proc_title("%s: write: %s %s", cmdname, mp->type, mp->name); cl_make_realtime(-1 , hb_realtime_prio > 1 ? hb_realtime_prio-1 : hb_realtime_prio , 16, 64); cl_cdtocoredir(); cl_set_all_coredump_signal_handlers(); drop_privs(0, 0); /* Become nobody */ curproc->pstat = RUNNING; curproc->medianum = medianum; if (ANYDEBUG) { /* Limit ourselves to 40% of the CPU */ /* This seems like a lot, but pings are expensive :-( */ cl_cpu_limit_setpercent(40); } for (;;) { IPC_Message* ipcmsg = ipcmsgfromIPC(ourchan); int rc; int saveerrno; hb_signal_process_pending(); if (ipcmsg == NULL) { continue; } cl_cpu_limit_update(); setmsalarm(config->heartbeat_ms); errno = 0; rc = mp->vf->write(mp, ipcmsg->msg_body, ipcmsg->msg_len); saveerrno=errno; cancelmstimer(); hb_signal_process_pending(); if (rc != HA_OK) { if (saveerrno == EINTR) { int flushcount = 0; if (!mp->suppresserrs) { errno=saveerrno; cl_perror("Write timeout on %s %s." , mp->type, mp->name); } /* Throw away messages currently in our input queue */ while (ourchan->recv_queue->current_qlen > 0) { IPC_Message* fmsg; ++flushcount; cl_cpu_limit_update(); if (NULL == (fmsg = ipcmsgfromIPC(ourchan))) { break; } if(fmsg->msg_done) { fmsg->msg_done(ipcmsg); } } if (flushcount && !mp->suppresserrs) { cl_log(LOG_WARNING , "%d messages discarded due to write errors on %s %s" , flushcount, mp->type, mp->name); }else{ supp_flushedmsgs += flushcount; } }else{ if (!mp->suppresserrs) { cl_perror("%s: write failure on %s %s." , __FUNCTION__ , mp->type, mp->name); } } if (failcount == 10) { cl_log(LOG_WARNING , "Temporarily Suppressing write error messages"); cl_log(LOG_WARNING, "Is a cable unplugged on %s %s?" , mp->type, mp->name); mp->suppresserrs=TRUE; }else if (failcount >= 1000) { if (supp_flushedmsgs) { cl_log(LOG_WARNING, "%s: %d %s %s messages discarded while suppressed." , __FUNCTION__ , supp_flushedmsgs , mp->type, mp->name); supp_flushedmsgs=0; } supp_flushedmsgs=0; failcount=0; mp->suppresserrs=FALSE; switch (errno) { case EBADF: case ENODEV: cl_perror( "%s: Exiting due to persistent errors" , __FUNCTION__); cleanexit(LSB_EXIT_GENERIC); break; default: /* Keep trying */ break; } } failcount++; }else{ /* Write succeeded! */ failcount=0; mp->suppresserrs=FALSE; if (supp_flushedmsgs) { cl_log(LOG_WARNING, "%s: %d %s %s messages discarded while suppressed." , __FUNCTION__ , supp_flushedmsgs , mp->type, mp->name); supp_flushedmsgs=0; } } if(ipcmsg->msg_done) { ipcmsg->msg_done(ipcmsg); } hb_signal_process_pending(); cl_cpu_limit_update(); cl_realtime_malloc_check(); } } /* * Read FIFO stream messages and translate to IPC msgs * Maybe in the future after all is merged together, we'll just poll for * these every second or so. Once we only use them for messages from * shell scripts, that would be good enough. * But, for now, we'll create this extra process... */ static void fifo_child(IPC_Channel* chan) { int fiforfd; FILE * fifo; int flags; struct ha_msg * msg = NULL; if (hb_signal_set_fifo_child(NULL) < 0) { cl_perror("fifo_child(): hb_signal_set_fifo_child()" ": Soldiering on..."); } set_proc_title("%s: FIFO reader", cmdname); fiforfd = open(FIFONAME, O_RDONLY|O_NDELAY|O_NONBLOCK); if (fiforfd < 0) { cl_perror("FIFO open (O_RDONLY) failed."); exit(1); } open(FIFONAME, O_WRONLY); /* Keep reads from getting EOF */ flags = fcntl(fiforfd, F_GETFL); flags &= ~(O_NONBLOCK|O_NDELAY); fcntl(fiforfd, F_SETFL, flags); fifo = fdopen(fiforfd, "r"); if (fifo == NULL) { cl_perror("FIFO fdopen failed."); exit(1); } cl_make_realtime(-1 , (hb_realtime_prio > 1 ? hb_realtime_prio-1 : hb_realtime_prio) , 16, 8); cl_cdtocoredir(); cl_set_all_coredump_signal_handlers(); drop_privs(0, 0); /* Become nobody */ curproc->pstat = RUNNING; if (ANYDEBUG) { /* Limit ourselves to 10% of the CPU */ cl_cpu_limit_setpercent(10); } /* Make sure we check for death of parent every so often... */ for (;;) { setmsalarm(1000L); msg = msgfromstream(fifo); setmsalarm(0L); hb_check_mcp_alive(); hb_signal_process_pending(); if (msg) { IPC_Message* m; if (DEBUGDETAILS) { cl_log(LOG_DEBUG, "fifo_child message:"); cl_log_message(LOG_DEBUG, msg); } m = hamsg2ipcmsg(msg, chan); if (m) { /* Send frees "m" "at the right time" */ chan->ops->send(chan, m); hb_check_mcp_alive(); hb_signal_process_pending(); chan->ops->waitout(chan); hb_check_mcp_alive(); hb_signal_process_pending(); } ha_msg_del(msg); msg = NULL; }else if (feof(fifo)) { if (ANYDEBUG) { return_to_orig_privs(); cl_log(LOG_DEBUG , "fifo_child: EOF on FIFO"); } hb_check_mcp_alive(); exit(2); } cl_cpu_limit_update(); cl_realtime_malloc_check(); hb_check_mcp_alive(); hb_signal_process_pending(); } /*notreached*/ } static gboolean Gmain_hb_signal_process_pending(void *unused) { hb_signal_process_pending(); return TRUE; } /* * We read a packet from the fifo (via fifo_child() process) */ static gboolean FIFO_child_msg_dispatch(IPC_Channel* source, gpointer user_data) { struct ha_msg* msg; if (DEBUGDETAILS) { cl_log(LOG_DEBUG, "FIFO_child_msg_dispatch() {"); } if (!source->ops->is_message_pending(source)) { return TRUE; } msg = msgfromIPC(source, 0); if (msg != NULL) { /* send_cluster_msg disposes of "msg" */ send_cluster_msg(msg); } if (DEBUGDETAILS) { cl_log(LOG_DEBUG, "}/*FIFO_child_msg_dispatch*/;"); } return TRUE; } /* * We read a packet from a read child */ static gboolean read_child_dispatch(IPC_Channel* source, gpointer user_data) { struct ha_msg* msg = NULL; struct hb_media** mp = user_data; int media_idx = mp - &sysmedia[0]; if (media_idx < 0 || media_idx >= MAXMEDIA) { cl_log(LOG_ERR, "read child_dispatch: media index is %d" , media_idx); ha_msg_del(msg); msg = NULL; return TRUE; } if (DEBUGDETAILS) { cl_log(LOG_DEBUG , "read_child_dispatch() {"); } if (!source->ops->is_message_pending(source)) { if (DEBUGDETAILS) { cl_log(LOG_DEBUG , "}/*read_child_dispatch(0)*/;"); } return TRUE; } msg = msgfromIPC(source, MSG_NEEDAUTH); if (msg != NULL) { const char * from = ha_msg_value(msg, F_ORIG); struct link* lnk = NULL; struct node_info* nip; if (from != NULL && (nip=lookup_node(from)) != NULL) { lnk = lookup_iface(nip, (*mp)->name); } process_clustermsg(msg, lnk); ha_msg_del(msg); msg = NULL; } if (DEBUGDETAILS) { cl_log(LOG_DEBUG , "}/*read_child_dispatch*/;"); } return TRUE; } #define SEQARRAYCOUNT 5 static gboolean Gmain_update_msgfree_count(void *unused) { static int seqarray[SEQARRAYCOUNT]= {0,0,0,0,0}; static int lastcount = -1; lastcount = (lastcount + 1) % SEQARRAYCOUNT; timer_lowseq = seqarray[lastcount]; seqarray[lastcount] = msghist.hiseq; return TRUE; } /* * What are our abstract event sources? * * Queued signals to be handled ("polled" high priority) * * Sending a heartbeat message (timeout-based) (high priority) * * Retransmitting packets for the protocol (timed medium priority) * * Timing out on heartbeats from other nodes (timed low priority) * * We currently combine all our timed/polled events together. * The only one that has critical timing needs is sending * out heartbeat messages * * Messages from the network (file descriptor medium-high priority) * * API requests from clients (file descriptor medium-low priority) * * Registration requests from clients (file descriptor low priority) * */ static void master_control_process(void) { /* * Create glib sources for: * - API clients * - communication with read/write_children * - various signals ala polled_input_dispatch * * Create timers for: * - sending out local status * - checking for dead nodes (one timer per node?) * - checking for dead links (one timer per link?) * - initial "comm is up" timer * - retransmission request timers (?) * (that is, timers for requesting that nodes * try retransmitting to us again) * * Set up signal handling for: * SIGINT termination * SIGUSR1 increment debugging * SIGUSR2 decrement debugging * SIGCHLD process termination * SIGHUP reread configuration * (should this propagate to client children?) * */ volatile struct process_info * pinfo; int allstarted; int j; GMainLoop* mainloop; guint id; write_hostcachefile = G_main_add_tempproc_trigger(PRI_WRITECACHE , write_hostcachedata, "write_hostcachedata" , NULL, NULL, NULL, NULL); write_delcachefile = G_main_add_tempproc_trigger(PRI_WRITECACHE , write_delcachedata, "write_delcachedata" , NULL, NULL, NULL, NULL); /* * We _really_ only need to write out the uuid file if we're not yet * in the host cache file on disk. */ G_main_set_trigger(write_hostcachefile); init_xmit_hist (&msghist); hb_init_watchdog(); /*add logging channel into mainloop*/ cl_log_set_logd_channel_source(NULL, NULL); if (hb_signal_set_master_control_process(NULL) < 0) { cl_log(LOG_ERR, "master_control_process(): " "hb_signal_set_master_control_process(): " "Soldiering on..."); } if (ANYDEBUG) { /* Limit ourselves to 70% of the CPU */ cl_cpu_limit_setpercent(70); /* Update our CPU limit periodically */ id=Gmain_timeout_add_full(G_PRIORITY_HIGH-5 , cl_cpu_limit_ms_interval() , hb_update_cpu_limit, NULL, NULL); G_main_setall_id(id, "cpu limit", 50, 20); } cl_make_realtime(-1, hb_realtime_prio, 32, config->memreserve); set_proc_title("%s: master control process", cmdname); if (ANYDEBUG) { cl_log(LOG_DEBUG, "Waiting for child processes to start"); } /* Wait until all the child processes are really running */ do { allstarted = 1; for (pinfo=procinfo->info; pinfo < curproc; ++pinfo) { if (pinfo->pstat != RUNNING) { if (ANYDEBUG) { cl_log(LOG_DEBUG , "Wait for pid %d type %d stat %d" , (int) pinfo->pid, pinfo->type , pinfo->pstat); } allstarted=0; sleep(1); } } }while (!allstarted); hb_add_deadtime(2000); id = Gmain_timeout_add(5000, hb_pop_deadtime, NULL); G_main_setall_id(id, "hb_pop_deadtime", 500, 100); set_local_status(UPSTATUS); /* We're pretty sure we're up ;-) */ if (ANYDEBUG) { cl_log(LOG_DEBUG , "All your child process are belong to us"); } send_local_status(); if (G_main_add_input(G_PRIORITY_HIGH, FALSE, &polled_input_SourceFuncs) ==NULL){ cl_log(LOG_ERR, "master_control_process: G_main_add_input failed"); } if (ANYDEBUG) { cl_log(LOG_DEBUG , "Starting local status message @ %ld ms intervals" , config->heartbeat_ms); } /* * Things to do on a periodic basis... */ /* Send local status at the "right time" */ id=Gmain_timeout_add_full(PRI_SENDSTATUS, config->heartbeat_ms , hb_send_local_status, NULL, NULL); G_main_setall_id(id, "send local status", 10+config->heartbeat_ms/2, 50); id=Gmain_timeout_add_full(PRI_AUDITCLIENT , config->initial_deadtime_ms , set_init_deadtime_passed_flag , NULL , NULL); G_main_setall_id(id, "init deadtime passed", config->warntime_ms, 50); /* Audit clients for liveness periodically */ id=Gmain_timeout_add_full(PRI_AUDITCLIENT, 9*1000 , api_audit_clients, NULL, NULL); G_main_setall_id(id, "client audit", 5000, 100); /* Reset timeout times to "now" */ for (j=0; j < config->nodecount; ++j) { struct node_info * hip; hip= &config->nodes[j]; hip->local_lastupdate = time_longclock(); } /* Check for pending signals */ id=Gmain_timeout_add_full(PRI_CHECKSIGS, config->heartbeat_ms , Gmain_hb_signal_process_pending, NULL, NULL); G_main_setall_id(id, "check for signals", 10+config->heartbeat_ms/2, 50); id=Gmain_timeout_add_full(PRI_FREEMSG, 500 , Gmain_update_msgfree_count, NULL, NULL); G_main_setall_id(id, "update msgfree count", config->deadtime_ms, 50); if (UseApphbd) { Gmain_timeout_add_full(PRI_DUMPSTATS , 60*(1000-10) /* Not quite on a minute boundary */ , hb_reregister_with_apphbd , NULL, NULL); } if (UseOurOwnPoll) { g_main_set_poll_func(cl_glibpoll); ipc_set_pollfunc(cl_poll); } mainloop = g_main_new(TRUE); g_main_run(mainloop); } static void hb_del_ipcmsg(IPC_Message* m) { /* this is perfectly safe in our case - reference counts are small ints */ int refcnt = POINTER_TO_SIZE_T(m->msg_private); /*pointer cast as int*/ if (DEBUGPKTCONT) { cl_log(LOG_DEBUG , "Message 0x%lx: refcnt %d" , (unsigned long)m, refcnt); } if (refcnt <= 1) { if (DEBUGPKTCONT) { cl_log(LOG_DEBUG, "Message 0x%lx freed." , (unsigned long)m); } memset(m->msg_body, 0, m->msg_len); free(m->msg_buf); memset(m, 0, sizeof(*m)); free(m); }else{ refcnt--; m->msg_private = GINT_TO_POINTER(refcnt); } } static IPC_Message* hb_new_ipcmsg(const void* data, int len, IPC_Channel* ch, int refcnt) { IPC_Message* hdr; char* copy; if (ch == NULL){ cl_log(LOG_ERR, "hb_new_ipcmsg:" " invalid parameter"); return NULL; } if (ch->msgpad > MAX_MSGPAD){ cl_log(LOG_ERR, "hb_new_ipcmsg: too many pads " "something is wrong"); return NULL; } if ((hdr = (IPC_Message*)malloc(sizeof(*hdr))) == NULL) { return NULL; } memset(hdr, 0, sizeof(*hdr)); if ((copy = (char*)malloc(ch->msgpad + len)) == NULL) { free(hdr); return NULL; } memcpy(copy + ch->msgpad, data, len); hdr->msg_len = len; hdr->msg_buf = copy; hdr->msg_body = copy + ch->msgpad; hdr->msg_ch = ch; hdr->msg_done = hb_del_ipcmsg; hdr->msg_private = GINT_TO_POINTER(refcnt); if (DEBUGPKTCONT) { cl_log(LOG_DEBUG, "Message allocated: 0x%lx: refcnt %d" , (unsigned long)hdr, refcnt); } return hdr; } /* Send this message to all of our heartbeat media */ static void send_to_all_media(const char * smsg, int len) { int j; IPC_Message* outmsg = NULL; int numwrites = 0; int nowritecount = 0; /* Throw away some packets if testing is enabled */ if (TESTSEND) { if (TestRand(send_loss_prob)) { if( '\0' == TestOpts->allow_nodes[0] || ';' == TestOpts->allow_nodes[0] ) { return; } } } /* Send the message to all our heartbeat interfaces */ for (j=0; j < nummedia; ++j) { IPC_Channel* wch; struct hb_media* mp; int wrc; mp = sysmedia[j]; if (mp == NULL || mp->recovery_state != MEDIA_OK || NULL == (wch = mp->wchan[P_WRITEFD])) { ++nowritecount; continue; } wch = mp->wchan[P_WRITEFD]; /* Take the first media write channel as this msg's chan * assumption all channel's msgpad is the same */ if (outmsg == NULL){ outmsg = hb_new_ipcmsg(smsg, len, wch, nummedia); } if (outmsg == NULL) { cl_log(LOG_ERR, "Out of memory. Shutting down."); hb_initiate_shutdown(FALSE); return ; } outmsg->msg_ch = wch; wrc=wch->ops->send(wch, outmsg); if (wrc != IPC_OK) { if (!shutting_down_comm) { cl_perror("Cannot write to media pipe %d", j); if (mp->recovery_state == MEDIA_OK) { cl_perror("Killing and restarting communications processes."); shutdown_io_childpair(j); } } }else if (!mp->vf->isping()) { ++numwrites; } alarm(0); } for (j=0; j < nowritecount && outmsg; ++j) { /* Decrement reference count */ hb_del_ipcmsg(outmsg); } if (numwrites == 0 && !shutting_down_comm) { cl_log(LOG_CRIT, "%s: No working comm channels to write to." , __FUNCTION__); } } static void LookForClockJumps(void) { static TIME_T lastnow = 0L; TIME_T now = time(NULL); /* Check for clock jumps */ if (now < lastnow) { cl_log(LOG_INFO , "Clock jumped backwards. Compensating."); ClockJustJumped = 1; }else{ ClockJustJumped = 0; } lastnow = now; } #define POLL_INTERVAL 250 /* milliseconds */ static gboolean polled_input_prepare(GSource* source, gint* timeout) { if (DEBUGPKT){ cl_log(LOG_DEBUG,"polled_input_prepare(): timeout=%d" , *timeout); } LookForClockJumps(); return ((hb_signal_pending() != 0) || ClockJustJumped); } static gboolean polled_input_check(GSource* source) { longclock_t now = time_longclock(); LookForClockJumps(); if (DEBUGPKT) { cl_log(LOG_DEBUG,"polled_input_check(): result = %d" , cmp_longclock(now, NextPoll) >= 0); } /* FIXME:?? should this say pending_handlers || cmp...? */ return (cmp_longclock(now, NextPoll) >= 0); } static gboolean polled_input_dispatch(GSource* source, GSourceFunc callback, gpointer user_data) { longclock_t now = time_longclock(); if (DEBUGPKT){ cl_log(LOG_DEBUG,"polled_input_dispatch() {"); } NextPoll = add_longclock(now, msto_longclock(POLL_INTERVAL)); LookForClockJumps(); cl_realtime_malloc_check(); hb_signal_process_pending(); /* Scan nodes and links to see if any have timed out */ if (!ClockJustJumped) { /* We'll catch it again next time around... */ /* I'm not sure we really need to check for clock jumps * any more since we now use longclock_t for everything * and don't use time_t or clock_t for anything critical. */ check_for_timeouts(); } /* Check to see we need to resend any rexmit requests... */ (void)check_rexmit_reqs; /* See if our comm channels are working yet... */ if (heartbeat_comm_state != COMM_LINKSUP) { check_comm_isup(); } /* THIS IS RESOURCE WORK! FIXME */ /* Check for "time to take over local resources */ if (nice_failback && resourcestate == HB_R_RSCRCVD && cmp_longclock(now, local_takeover_time) > 0) { resourcestate = HB_R_STABLE; req_our_resources(0); cl_log(LOG_INFO,"local resource transition completed."); hb_send_resources_held(TRUE, NULL); AuditResources(); } if (DEBUGPKT){ cl_log(LOG_DEBUG,"}/*polled_input_dispatch*/;"); } return TRUE; } /* * This should be something the code can register for. * and a nice set of hooks to call, etc... */ static void comm_now_up() { static int linksupbefore = 0; char regsock[] = API_REGSOCK; char path[] = IPC_PATH_ATTR; GHashTable* wchanattrs; GWCSource* regsource; IPC_WaitConnection* regwchan = NULL; if (linksupbefore) { return; } linksupbefore = 1; cl_log(LOG_INFO , "Comm_now_up(): updating status to " ACTIVESTATUS); /* Update our local status... */ set_local_status(ACTIVESTATUS); if (comm_up_callback) { comm_up_callback(); } /* Start to listen to the socket for clients*/ wchanattrs = g_hash_table_new(g_str_hash, g_str_equal); g_hash_table_insert(wchanattrs, path, regsock); regwchan = ipc_wait_conn_constructor(IPC_DOMAIN_SOCKET, wchanattrs); if (regwchan == NULL) { cl_log(LOG_DEBUG , "Cannot open registration socket at %s" , regsock); cleanexit(LSB_EXIT_EPERM); } regsource = G_main_add_IPC_WaitConnection(PRI_APIREGISTER, regwchan , NULL, FALSE, APIregistration_dispatch, NULL, NULL); G_main_setmaxdispatchdelay((GSource*)regsource, config->deadtime_ms); G_main_setmaxdispatchtime((GSource*)regsource, 20); G_main_setdescription((GSource*)regsource, "client registration"); if (regsource == NULL) { cl_log(LOG_DEBUG , "Cannot create registration source from IPC"); cleanexit(LSB_EXIT_GENERIC); } /* Start each of our known child clients */ if (!shutdown_in_progress) { g_list_foreach(config->client_list , start_a_child_client, NULL); } if (!startup_complete) { startup_complete = TRUE; if (shutdown_in_progress) { shutdown_in_progress = FALSE; hb_initiate_shutdown(FALSE); } } } static gboolean APIregistration_dispatch(IPC_Channel* chan, gpointer user_data) { /* * This channel must be non-blocking as * we don't want to block for a client */ chan->should_send_block = FALSE; if (ANYDEBUG) { cl_log(LOG_DEBUG, "APIregistration_dispatch() {"); } process_registerevent(chan, user_data); if (ANYDEBUG) { cl_log(LOG_DEBUG, "}/*APIregistration_dispatch*/;"); } return TRUE; } void hb_kill_managed_children(int nsig) { /* Kill our managed children... */ ForEachProc(&ManagedChildTrackOps , hb_kill_tracked_process , GINT_TO_POINTER(nsig)); } void hb_kill_rsc_mgmt_children(int nsig) { extern ProcTrack_ops hb_rsc_RscMgmtProcessTrackOps; ForEachProc(&hb_rsc_RscMgmtProcessTrackOps , hb_kill_tracked_process , GINT_TO_POINTER(nsig)); } void hb_kill_core_children(int nsig) { ForEachProc(&CoreProcessTrackOps , hb_kill_tracked_process , GINT_TO_POINTER(nsig)); } /* * Shutdown sequence: * If non-quick shutdown: * Giveup resources (if requested) * Wait for resources to be released * delay * * Final shutdown sequence: * Kill managed client children with SIGTERM * If non-quick, kill rsc_mgmt children with SIGTERM * Delay * If non-quick, kill rsc_mgmt children with SIGKILL * Kill core processes (except self) with SIGTERM * Delay * Kill core processes (except self) with SIGKILL * Wait for all children to die. * */ void hb_initiate_shutdown(int quickshutdown) { if (ANYDEBUG) { cl_log(LOG_DEBUG, "hb_initiate_shutdown() called."); } if (shutdown_in_progress) { if (ANYDEBUG) { cl_log(LOG_DEBUG, "hb_initiate_shutdown" "(): shutdown already in progress"); return; } } /* THINK maybe even add "need_shutdown", because it is not yet in * progress; or do a Gmain_timeout_add, or something like that. * A cleanexit(LSB_EXIT_OK) won't do, out children will continue * without us. */ shutdown_in_progress = TRUE; if (!startup_complete) { cl_log(LOG_WARNING , "Shutdown delayed until Communication is up."); return; } send_local_status(); if (!quickshutdown && DoManageResources) { /* THIS IS RESOURCE WORK! FIXME */ procinfo->giveup_resources = TRUE; hb_giveup_resources(); /* Do something more drastic in 60 minutes */ Gmain_timeout_add(1000*60*60, EmergencyShutdown, NULL); return; } if (ANYDEBUG) { cl_log(LOG_DEBUG , "hb_initiate_shutdown(): calling hb_mcp_final_shutdown()"); } /* Trigger initial shutdown process for quick shutdown */ hb_mcp_final_shutdown(NULL); /* phase 0 (quick) */ } /* * The general idea of this code is that we go through several shutdown phases: * * 0: We've given up release 1 style local resources * Action: we shut down our client children * each one in reverse start order * * 1: We've shut down all our client children * Action: delay one second to let * messages be received * * 2: We have delayed one second after phase 1 * Action: we kill all our "core" children * (read, write, fifo) * * We exit/restart after the last of our core children * dies. */ gboolean hb_mcp_final_shutdown(gpointer p) { static int shutdown_phase = 0; guint id; if (ANYDEBUG) { cl_log(LOG_DEBUG, "hb_mcp_final_shutdown() phase %d" , shutdown_phase); } DisableProcLogging(); /* We're shutting down */ CL_IGNORE_SIG(SIGTERM); switch (shutdown_phase) { case 0: /* From hb_initiate_shutdown -- quickshutdown*/ /* OR HBDoMsg_T_SHUTDONE -- long shutdown*/ shutdown_phase = 1; send_local_status(); if (!shutdown_last_client_child(SIGTERM)) { if (ANYDEBUG) { cl_log(LOG_DEBUG , "hb_mcp_final_shutdown()" "- immediate completion."); } return hb_mcp_final_shutdown(p); /* phase 1 (no children) */ } return FALSE; case 1: /* From ManagedChildDied() (or above) */ if (NULL != config->last_client) { g_list_foreach(config->client_list , print_a_child_client, NULL); abort(); } shutdown_phase = 2; if (procinfo->restart_after_shutdown) { hb_add_deadtime(30000); } send_local_status(); /* THIS IS RESOURCE WORK! FIXME */ if (procinfo->giveup_resources) { /* Shouldn't *really* need this */ hb_kill_rsc_mgmt_children(SIGTERM); } id=Gmain_timeout_add(1000, hb_mcp_final_shutdown /* phase 2 */ , NULL); G_main_setall_id(id, "shutdown phase 2", 500, 100); return FALSE; case 2: /* From 1-second delay above */ shutdown_phase = 3; shutting_down_comm=TRUE; if (procinfo->giveup_resources) { /* THIS IS RESOURCE WORK! FIXME */ /* Shouldn't *really* need this either ;-) */ hb_kill_rsc_mgmt_children(SIGKILL); } /* Kill any lingering processes in our process group */ CL_KILL(-getpid(), SIGTERM); hb_kill_core_children(SIGTERM); /* Is this redundant? */ hb_tickle_watchdog(); /* Ought to go down fast now... */ Gmain_timeout_add(30*1000, EmergencyShutdown, NULL); return FALSE; default: /* This should also never be reached */ hb_emergency_shutdown(); break; } hb_close_watchdog(); /* Whack 'em */ hb_kill_core_children(SIGKILL); cl_log(LOG_INFO,"%s Heartbeat shutdown complete.", localnodename); cl_flush_logs(); if (procinfo->restart_after_shutdown) { cl_log(LOG_INFO, "Heartbeat restart triggered."); restart_heartbeat(); } else{ cleanuptable(); } /*NOTREACHED*/ cleanexit(0); /* NOTREACHED*/ return FALSE; } static void hb_remove_msg_callback(const char * mtype) { if (message_callbacks == NULL) { return; } g_hash_table_remove(message_callbacks, mtype); } void hb_register_msg_callback(const char * mtype, HBmsgcallback callback) { char * msgtype = g_strdup(mtype); if (message_callbacks == NULL) { message_callbacks = g_hash_table_new(g_str_hash, g_str_equal); } g_hash_table_insert(message_callbacks, msgtype, callback); } void hb_register_comm_up_callback(void (*callback)(void)) { comm_up_callback = callback; } static gboolean HBDoMsgCallback(const char * type, struct node_info * fromnode , TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg) { HBmsgcallback cb; if ((cb = g_hash_table_lookup(message_callbacks, type))) { cb(type, fromnode, msgtime, seqno, iface, msg); return TRUE; } /* It's OK to register for "no one else wants it" with "" */ if ((cb = g_hash_table_lookup(message_callbacks, ""))) { cb(type, fromnode, msgtime, seqno, iface, msg); return TRUE; } return FALSE; } static void free_one_hist_slot(struct msg_xmit_hist* hist, int slot ) { struct ha_msg* msg; msg = hist->msgq[slot]; if (msg){ hist->lowseq = hist->seqnos[slot]; hist->msgq[slot] = NULL; if (!msg) { cl_log(LOG_CRIT, "Unallocated slotmsg in %s", __FUNCTION__); return; }else{ ha_msg_del(msg); } } return; } static void hist_display(struct msg_xmit_hist * hist) { if (ANYDEBUG) { cl_log(LOG_DEBUG, "hist->ackseq =%ld", hist->ackseq); cl_log(LOG_DEBUG, "hist->lowseq =%ld, hist->hiseq=%ld", hist->lowseq, hist->hiseq); dump_missing_pkts_info(); if (hist->lowest_acknode){ cl_log(LOG_DEBUG,"expecting from %s",hist->lowest_acknode->nodename); cl_log(LOG_DEBUG,"it's ackseq=%ld", hist->lowest_acknode->track.ackseq); } cl_log(LOG_DEBUG, " "); } } static void reset_lowest_acknode(void) { struct msg_xmit_hist* hist = &msghist; hist->lowest_acknode = NULL; return; } static void HBDoMsg_T_ACKMSG(const char * type, struct node_info * fromnode, TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg) { const char* ackseq_str = ha_msg_value(msg, F_ACKSEQ); seqno_t ackseq; struct msg_xmit_hist* hist = &msghist; const char* to = (const char*)ha_msg_value(msg, F_TO); struct node_info* tonode; seqno_t new_ackseq = hist->ackseq; if (!to || (tonode = lookup_tables(to, NULL)) == NULL || tonode != curnode){ return; } if (ackseq_str == NULL || sscanf(ackseq_str, "%lx", &ackseq) != 1){ goto out; } if (ackseq == fromnode->track.ackseq){ /*dup message*/ goto out; } if (ackseq <= new_ackseq){ /* late or dup ack * ignored */ goto out; }else if (ackseq > hist->hiseq){ cl_log(LOG_ERR, "HBDoMsg_T_ACK" ": corrupted ackseq" " current hiseq = %ld" " ackseq =%ld in this message", hist->hiseq, ackseq); goto out; } if (ackseq < fromnode->track.ackseq) { /* late or dup ack * ignored */ goto out; } fromnode->track.ackseq = ackseq; if (hist->lowest_acknode != NULL && STRNCMP_CONST(hist->lowest_acknode->status,DEADSTATUS)==0){ /* the lowest acked node is dead * we cannot count on that node * to update our ackseq */ hist->lowest_acknode = NULL; } if (hist->lowest_acknode == NULL || hist->lowest_acknode == fromnode){ /*find the new lowest and update hist->ackseq*/ seqno_t minseq; int minidx; int i; hist->lowest_acknode = NULL; minidx = -1; minseq = 0; for (i = 0; i < config->nodecount; i++){ struct node_info* hip = &config->nodes[i]; if (hip->nodetype == PINGNODE_I || STRNCMP_CONST(hip->status, DEADSTATUS) == 0) { continue; } if (minidx == -1 || hip->track.ackseq < minseq){ minseq = hip->track.ackseq; minidx = i; } } if (minidx == -1) { /* Every node is DEADSTATUS */ goto out; } if (live_node_count < 2) { /* * Update hist->ackseq so we don't hang onto * messages indefinitely and flow control clients */ if ((hist->hiseq - new_ackseq) >= FLOWCONTROL_LIMIT) { new_ackseq = hist->hiseq - (FLOWCONTROL_LIMIT-1); } hist->lowest_acknode = NULL; goto cleanupandout; } if (minidx >= config->nodecount) { cl_log(LOG_ERR, "minidx out of bound" "minidx=%d",minidx ); goto out; } if (minseq > 0){ new_ackseq = minseq; } hist->lowest_acknode = &config->nodes[minidx]; } cleanupandout: update_ackseq(new_ackseq); out: return; } static void update_ackseq(seqno_t new_ackseq) { struct msg_xmit_hist* hist = &msghist; long count; seqno_t start; seqno_t old_ackseq = hist->ackseq; #if 0 cl_log(LOG_INFO, "new_ackseq = %ld, old_ackseq=%ld" , new_ackseq, old_ackseq); #endif if (new_ackseq <= old_ackseq){ return; } hist->ackseq = new_ackseq; if ((hist->hiseq - hist->ackseq) < FLOWCONTROL_LIMIT){ all_clients_resume(); } count = hist->ackseq - hist->lowseq - send_cluster_msg_level; if (old_ackseq == 0){ start = 0; count = count - 1; }else{ start = hist->lowseq; } while(count -- > 0){ /* * If the seq number is greater than the lowseq number * the timer set, we should not free any more messages */ if (start > timer_lowseq){ break; } free_one_hist_slot(hist, start%MAXMSGHIST); start++; if (hist->lowseq > hist->ackseq){ cl_log(LOG_ERR, "lowseq cannnot be greater than ackseq"); cl_log(LOG_INFO, "hist->ackseq =%ld, old_ackseq=%ld" , hist->ackseq, old_ackseq); cl_log(LOG_INFO, "hist->lowseq =%ld, hist->hiseq=%ld" ", send_cluster_msg_level=%d" , hist->lowseq, hist->hiseq, send_cluster_msg_level); abort(); } } (void)dump_missing_pkts_info; #ifdef DEBUG_FOR_GSHI if (ANYDEBUG){ cl_log(LOG_DEBUG, "hist->ackseq =%ld, node %s's ackseq=%ld", hist->ackseq, fromnode->nodename, fromnode->track.ackseq); cl_log(LOG_DEBUG, "hist->lowseq =%ld, hist->hiseq=%ld", hist->lowseq, hist->hiseq); dump_missing_pkts_info(); if (hist->lowest_acknode){ cl_log(LOG_DEBUG,"expecting from %s",hist->lowest_acknode->nodename); } cl_log(LOG_DEBUG, " "); } #endif } static int getnodes(const char* nodelist, char** nodes, int* num){ const char* p; int i; int j; memset(nodes, 0, *num); i = 0; p = nodelist ; while(*p != 0){ int nodelen; while(*p == ' ') { p++; } if (*p == 0){ break; } nodelen = strcspn(p, " \t\0") ; if (i >= *num){ cl_log(LOG_ERR, "%s: more memory needed(%d given but require %d)", __FUNCTION__, *num, i+1); goto errexit; } nodes[i] = malloc(nodelen + 1); if (nodes[i] == NULL){ cl_log(LOG_ERR, "%s: malloc failed", __FUNCTION__); goto errexit; } memcpy(nodes[i], p, nodelen); nodes[i][nodelen] = 0; p += nodelen; i++; } *num = i; return HA_OK; errexit: for (j = 0; j < i ; j++){ if (nodes[j]){ free(nodes[j]); nodes[j] =NULL; } } return HA_FAIL; } static int hb_add_one_node(const char* node) { struct node_info* thisnode = NULL; cl_log(LOG_INFO, "%s: Adding new node[%s] to configuration.", __FUNCTION__, node); thisnode = lookup_node(node); if (thisnode){ cl_log(LOG_ERR, "%s: node(%s) already exists", __FUNCTION__, node); return HA_FAIL; } add_node(node, NORMALNODE_I); thisnode = lookup_node(node); if (thisnode == NULL) { cl_log(LOG_ERR, "%s: adding node(%s) failed", __FUNCTION__, node); return HA_FAIL; } return HA_OK; } /* * Process a request to add a node to the cluster. * This can _only_ come from a manual addnode request. */ static void HBDoMsg_T_ADDNODE(const char * type, struct node_info * fromnode , TIME_T msgtime, seqno_t seqno, const char * iface , struct ha_msg * msg) { const char* nodelist; char* nodes[MAXNODE]; int num = MAXNODE; int i; nodelist = ha_msg_value(msg, F_NODELIST); if (nodelist == NULL){ cl_log(LOG_ERR, "%s: nodelist not found in msg", __FUNCTION__); cl_log_message(LOG_INFO, msg); return ; } if (getnodes(nodelist, nodes, &num) != HA_OK){ cl_log(LOG_ERR, "%s: parsing failed", __FUNCTION__); return; } for (i = 0; i < num; i++){ if (ANYDEBUG) { cl_log(LOG_DEBUG, "%s: adding node %s" , __FUNCTION__, nodes[i]); } if (hb_add_one_node(nodes[i])!= HA_OK){ cl_log(LOG_ERR, "Add node %s failed", nodes[i]); } free(nodes[i]); nodes[i]=NULL; } G_main_set_trigger(write_hostcachefile); return; } /* * Process a request to set the quorum vote weight for a node. * This can only come from a manual setweight command. */ static void HBDoMsg_T_SETWEIGHT(const char * type, struct node_info * fromnode, TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg) { const char* node; int weight; node = ha_msg_value(msg, F_NODE); if (node == NULL){ cl_log(LOG_ERR, "%s: node not found in msg", __FUNCTION__); cl_log_message(LOG_INFO, msg); return ; } if (ha_msg_value_int(msg, F_WEIGHT, &weight) != HA_OK){ cl_log(LOG_ERR, "%s: weight not found in msg", __FUNCTION__); cl_log_message(LOG_INFO, msg); return ; } if (set_node_weight(node, weight) == HA_OK) { G_main_set_trigger(write_hostcachefile); } return; } /* * Process a request to set the site for a node. * This can only come from a manual setsite command. */ static void HBDoMsg_T_SETSITE(const char * type, struct node_info * fromnode, TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg) { const char* node; const char* site; node = ha_msg_value(msg, F_NODE); if (node == NULL){ cl_log(LOG_ERR, "%s: node not found in msg", __FUNCTION__); cl_log_message(LOG_INFO, msg); return ; } site = ha_msg_value(msg, F_SITE); if (node == NULL){ cl_log(LOG_ERR, "%s: site not found in msg", __FUNCTION__); cl_log_message(LOG_INFO, msg); return ; } if (set_node_site(node, site) == HA_OK) { G_main_set_trigger(write_hostcachefile); } return; } /* * Remove a single node from the configuration - for whatever reason * "deletion" is TRUE if it is to be permanently deleted from the * configuration and not allowed to autojoin back again. */ static int hb_remove_one_node(const char* node, int deletion) { struct node_info* thisnode = NULL; struct ha_msg* removemsg; cl_log(LOG_INFO, "Removing node [%s] from configuration.", node); thisnode = lookup_node(node); if (thisnode == NULL){ cl_log(LOG_ERR, "%s: node %s not found in config", __FUNCTION__, node); return HA_FAIL; } if (remove_node(node, deletion) != HA_OK){ cl_log(LOG_ERR, "%s: Deleting node(%s) failed", __FUNCTION__, node); return HA_FAIL; } removemsg = ha_msg_new(0); if (removemsg == NULL){ cl_log(LOG_ERR, "%s: creating new message failed",__FUNCTION__); return HA_FAIL; } /* * This message only goes to the CCM, etc. NOT to the network. */ if ( ha_msg_add(removemsg, F_TYPE, T_DELNODE)!= HA_OK || ha_msg_add(removemsg, F_NODE, node) != HA_OK){ cl_log(LOG_ERR, "%s: adding fields to msg failed", __FUNCTION__); return HA_FAIL; } heartbeat_monitor(removemsg, KEEPIT, NULL); reset_lowest_acknode(); return HA_OK; } /* * Process a message requesting a node deletion. * This can come ONLY from a manual node deletion. */ static void HBDoMsg_T_DELNODE(const char * type, struct node_info * fromnode, TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg) { const char* nodelist; char* nodes[MAXNODE]; int num = MAXNODE; int i; int j; nodelist = ha_msg_value(msg, F_NODELIST); if (nodelist == NULL){ cl_log(LOG_ERR, "%s: node not found in msg", __FUNCTION__); cl_log_message(LOG_INFO, msg); return ; } if (getnodes(nodelist, nodes, &num) != HA_OK){ cl_log(LOG_ERR, "%s: parsing failed", __FUNCTION__); return ; } for (i = 0; i < config->nodecount ;i++){ gboolean isdelnode =FALSE; for (j = 0 ; j < num; j++){ if (strncmp(config->nodes[i].nodename, nodes[j],HOSTLENG)==0){ isdelnode = TRUE; break; } } if (isdelnode){ if (STRNCMP_CONST(config->nodes[i].status, DEADSTATUS) != 0){ cl_log(LOG_WARNING, "deletion failed: node %s is not dead", config->nodes[i].nodename); goto out; } } if (!isdelnode){ if ( STRNCMP_CONST(config->nodes[i].status,UPSTATUS) != 0 && STRNCMP_CONST(config->nodes[i].status, ACTIVESTATUS) !=0 && config->nodes[i].nodetype == NORMALNODE_I){ cl_log(LOG_ERR, "%s: deletion failed. We don't have" " all required nodes alive (%s is dead)", __FUNCTION__, config->nodes[i].nodename); goto out; } } } for (i = 0; i < num; i++){ if (strncmp(nodes[i], curnode->nodename, HOSTLENG) == 0){ cl_log(LOG_ERR, "I am being deleted from the cluster." " This should not happen"); hb_initiate_shutdown(FALSE); return; } if (hb_remove_one_node(nodes[i], TRUE)!= HA_OK){ cl_log(LOG_ERR, "Deleting node %s failed", nodes[i]); } } out: for (i = 0; i < num; i++){ free(nodes[i]); nodes[i]= NULL; } G_main_set_trigger(write_hostcachefile); G_main_set_trigger(write_delcachefile); return ; } static int get_nodelist( char* nodelist, int len) { int i; char* p; int numleft = len; p = nodelist; for (i = 0; i< config->nodecount; i++){ int tmplen; if (config->nodes[i].nodetype != NORMALNODE_I) { continue; } tmplen= snprintf(p, numleft, "%s ", config->nodes[i].nodename); p += tmplen; numleft -= tmplen; if (tmplen <= 0){ cl_log(LOG_ERR, "%s: not enough buffer", __FUNCTION__); return HA_FAIL; } } return HA_OK; } static int get_delnodelist(char* delnodelist, int len) { char* p = delnodelist; int numleft = len; GSList* list = NULL; if (del_node_list == NULL){ delnodelist[0]= ' '; delnodelist[1]=0; goto out; } list = del_node_list; while( list){ struct node_info* hip; int tmplen; hip = (struct node_info*)list->data; if (hip == NULL){ cl_log(LOG_ERR, "%s: null data in del node list", __FUNCTION__); return HA_FAIL; } tmplen = snprintf(p, numleft, "%s ", hip->nodename); if (tmplen <= 0){ cl_log(LOG_ERR, "%s: not enough buffer", __FUNCTION__); return HA_FAIL; } p += tmplen; numleft -=tmplen; list = list->next; } out: cl_log(LOG_DEBUG, "%s: delnodelist=%s", __FUNCTION__, delnodelist); return HA_OK; } /* * Someone has joined the cluster and asked us for the current set of nodes * as modified by addnode and delnode commands or the autojoin option * (if enabled), and also the set of semi-permanently deleted nodes. * * We send them a T_REPNODES message in response - containing that information. * * We allow dynamic node configuration even if autojoin is disabled. In that * case you need to use the addnode and delnode commands. */ static void HBDoMsg_T_REQNODES(const char * type, struct node_info * fromnode , TIME_T msgtime, seqno_t seqno, const char * iface , struct ha_msg * msg) { char nodelist[MAXLINE]; char delnodelist[MAXLINE]; struct ha_msg* repmsg; if (fromnode == curnode){ cl_log(LOG_ERR, "%s: get reqnodes msg from myself!", __FUNCTION__); return; } if (ANYDEBUG){ cl_log(LOG_DEBUG, "Get a reqnodes message from %s" , fromnode->nodename); } if (get_nodelist(nodelist, MAXLINE) != HA_OK || get_delnodelist(delnodelist, MAXLINE) != HA_OK){ cl_log(LOG_ERR, "%s: get node list or del node list from config failed", __FUNCTION__); return; } repmsg = ha_msg_new(0); if ( repmsg == NULL || ha_msg_add(repmsg, F_TO, fromnode->nodename) != HA_OK || ha_msg_add(repmsg, F_TYPE, T_REPNODES) != HA_OK || ha_msg_add(repmsg, F_NODELIST, nodelist) != HA_OK || ha_msg_add(repmsg, F_DELNODELIST, delnodelist) != HA_OK){ cl_log(LOG_ERR, "%s: constructing REPNODES msg failed", __FUNCTION__); ha_msg_del(repmsg); return; } send_cluster_msg(repmsg); return; } /* * Got our requested reply (T_REPNODES) to our T_REQNODES request. * It has the current membership as modified by addnode/delnode commands and * autojoin options * * We allow dynamic node configuration even if autojoin is disabled. In that * case you need to use the addnode and delnode commands. */ static void HBDoMsg_T_REPNODES(const char * type, struct node_info * fromnode , TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg) { const char* nodelist = ha_msg_value(msg, F_NODELIST); const char* delnodelist = ha_msg_value(msg, F_DELNODELIST); char* nodes[MAXNODE]; char* delnodes[MAXNODE]; int num = MAXNODE; int delnum = MAXNODE; int i; int j; if (ANYDEBUG){ cl_log(LOG_DEBUG,"Get a repnodes msg from %s", fromnode->nodename); } if (fromnode == curnode){ /*our own REPNODES msg*/ return; } /* process nodelist*/ /* our local node list needs to be updated... * Any node that is in nodelist but not in local node list should be * added * Any node that is in local node list but not in nodelist should be * removed (but not deleted) */ /* term definition*/ /* added: a node in config->nodes[] deleted: a node in del_node_list removed: remove a node either from config->nodes[] or del_node_list */ if (nodelist != NULL){ memset(nodes, 0, MAXNODE); if (ANYDEBUG){ cl_log(LOG_DEBUG, "nodelist received:%s", nodelist); } if (getnodes(nodelist, nodes, &num) != HA_OK){ cl_log(LOG_ERR, "%s: get nodes from nodelist failed", __FUNCTION__); return; } for (i=0; i < num; i++){ for (j = 0; j < config->nodecount; j++){ if (strncmp(nodes[i], config->nodes[j].nodename , HOSTLENG) == 0){ break; } } if (j == config->nodecount){ /* * This node is not found in config - * we need to add it... */ if (ANYDEBUG) { cl_log(LOG_DEBUG , "%s: adding node %s" , __FUNCTION__, nodes[i]); } hb_add_one_node(nodes[i]); }else if (config->nodes[j].nodetype != NORMALNODE_I){ cl_log(LOG_ERR , "%s: Incoming %s node list contains %s" , __FUNCTION__ , T_REPNODES , config->nodes[i].nodename); } } for (i=0; i < config->nodecount; i++){ if (config->nodes[i].nodetype != NORMALNODE_I){ continue; } for (j=0; j < num; j++){ if (strncmp(config->nodes[i].nodename , nodes[j], HOSTLENG) == 0){ break; } } if (j == num) { /* * This node is not found in incoming nodelist, * therefore, we need to remove it from * config->nodes[] * * This assumes everyone the partner node we * sent the reqnodes message to has the current * configuration. * * The moral of the story is that you need to * not add and delete nodes by ha.cf on live * systems. * * If you use addnode and delnode commands then * everything should be OK here. */ hb_remove_one_node(config->nodes[i].nodename , FALSE); } } for (i = 0; i< num; i++){ if (nodes[i]) { free(nodes[i]); nodes[i] = NULL; } } get_reqnodes_reply = TRUE; G_main_set_trigger(write_hostcachefile); } if (delnodelist != NULL) { memset(delnodes, 0, MAXNODE); if (getnodes(delnodelist, delnodes, &delnum) != HA_OK){ cl_log(LOG_ERR, "%s: get del nodes from nodelist failed", __FUNCTION__); return; } /* process delnodelist*/ /* update our del node list to be the exact same list as the received one */ dellist_destroy(); for (i = 0; i < delnum; i++){ dellist_add(delnodes[i]); } for (i = 0; i < delnum; i++){ if (delnodes[i]){ free(delnodes[i]); delnodes[i] = NULL; } } get_reqnodes_reply = TRUE; G_main_set_trigger(write_delcachefile); } comm_now_up(); return; } static void HBDoMsg_T_REXMIT(const char * type, struct node_info * fromnode , TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg) { heartbeat_monitor(msg, PROTOCOL, iface); if (fromnode != curnode) { process_rexmit(&msghist, msg); } } /* Process status update (i.e., "heartbeat") message? */ static void HBDoMsg_T_STATUS(const char * type, struct node_info * fromnode , TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg) { const char * status; longclock_t messagetime = time_longclock(); const char *tmpstr; long deadtime; int protover; status = ha_msg_value(msg, F_STATUS); if (status == NULL) { cl_log(LOG_ERR, "HBDoMsg_T_STATUS: " "status update without " F_STATUS " field"); return; } /* Does it contain F_PROTOCOL field?*/ /* Do we already have a newer status? */ if (msgtime < fromnode->rmt_lastupdate && seqno < fromnode->status_seqno) { return; } /* Have we seen an update from here before? */ if (fromnode->nodetype != PINGNODE_I && enable_flow_control && ha_msg_value_int(msg, F_PROTOCOL, &protover) != HA_OK){ cl_log(LOG_INFO, "flow control disabled due to different version heartbeat"); enable_flow_control = FALSE; hb_remove_msg_callback(T_ACKMSG); } if (fromnode->local_lastupdate) { long heartbeat_ms; heartbeat_ms = longclockto_ms(sub_longclock ( messagetime, fromnode->local_lastupdate)); if (heartbeat_ms > config->warntime_ms) { cl_log(LOG_WARNING , "Late heartbeat: Node %s:" " interval %ld ms" , fromnode->nodename , heartbeat_ms); } } /* Is this a second status msg from a new node? */ if (fromnode->status_suppressed && fromnode->saved_status_msg) { fromnode->status_suppressed = FALSE; QueueRemoteRscReq(PerformQueuedNotifyWorld , fromnode->saved_status_msg); heartbeat_monitor(fromnode->saved_status_msg, KEEPIT, iface); ha_msg_del(fromnode->saved_status_msg); fromnode->saved_status_msg = NULL; } /* Is the node status the same? */ if (strcasecmp(fromnode->status, status) != 0 && fromnode != curnode) { cl_log(LOG_INFO , "Status update for node %s: status %s" , fromnode->nodename , status); if (ANYDEBUG) { cl_log(LOG_DEBUG , "Status seqno: %ld msgtime: %ld" , seqno, msgtime); } /* * If the restart of a node is faster than deadtime, * the previous status of node would be still ACTIVE * while current status is INITSTATUS. * So we reduce the live_node_count here. */ if (fromnode->nodetype == NORMALNODE_I && fromnode != curnode && ( STRNCMP_CONST(fromnode->status, ACTIVESTATUS) == 0 || STRNCMP_CONST(fromnode->status, UPSTATUS) == 0) && ( STRNCMP_CONST(status, INITSTATUS) == 0)) { --live_node_count; if (live_node_count < 1) { cl_log(LOG_ERR , "live_node_count too small (%d)" , live_node_count); } } /* * IF * It's from a normal node * It isn't from us * The node's old status was dead or init * The node's new status is up or active * THEN * increment the count of live nodes. */ if (fromnode->nodetype == NORMALNODE_I && fromnode != curnode && ( STRNCMP_CONST(fromnode->status, DEADSTATUS) == 0 || STRNCMP_CONST(fromnode->status, INITSTATUS) == 0) && ( STRNCMP_CONST(status, UPSTATUS) == 0 || STRNCMP_CONST(status, ACTIVESTATUS) == 0)) { ++live_node_count; if (live_node_count > config->nodecount) { cl_log(LOG_ERR , "live_node_count too big (%d)" , live_node_count); } } strncpy(fromnode->status, status, sizeof(fromnode->status)); if (!fromnode->status_suppressed) { QueueRemoteRscReq(PerformQueuedNotifyWorld, msg); heartbeat_monitor(msg, KEEPIT, iface); }else{ /* We know we don't already have a saved msg */ fromnode->saved_status_msg = ha_msg_copy(msg); } }else{ heartbeat_monitor(msg, NOCHANGE, iface); } if ((tmpstr = ha_msg_value(msg, F_DT)) != NULL && sscanf(tmpstr, "%lx", (unsigned long*)&deadtime) == 1) { fromnode->dead_ticks = msto_longclock(deadtime); } /* Did we get a status update on ourselves? */ if (fromnode == curnode) { hb_tickle_watchdog(); } fromnode->rmt_lastupdate = msgtime; fromnode->local_lastupdate = messagetime; fromnode->status_seqno = seqno; } static void /* This is a client status query from remote client */ HBDoMsg_T_QCSTATUS(const char * type, struct node_info * fromnode , TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg) { const char * clientid; const char * fromclient; struct ha_msg * m = NULL; int ret = HA_FAIL; if ((clientid = ha_msg_value(msg, F_CLIENTNAME)) == NULL || (fromclient = ha_msg_value(msg, F_FROMID)) == NULL) { cl_log(LOG_ERR, "%s ha_msg_value failed", __FUNCTION__); return; } if ((m = ha_msg_new(0)) == NULL){ cl_log(LOG_ERR, "%s Cannot add field", __FUNCTION__); return; } if (ha_msg_add(m, F_TYPE, T_RCSTATUS) != HA_OK || ha_msg_add(m, F_TO, fromnode->nodename) != HA_OK || ha_msg_add(m, F_APIRESULT, API_OK) != HA_OK || ha_msg_add(m, F_CLIENTNAME, clientid) != HA_OK || ha_msg_add(m, F_TOID, fromclient) != HA_OK) { cl_log(LOG_ERR, "Cannot create clent status msg"); return; } if (find_client(clientid, NULL) != NULL) { ret = ha_msg_add(m, F_CLIENTSTATUS, ONLINESTATUS); }else{ ret = ha_msg_add(m, F_CLIENTSTATUS, OFFLINESTATUS); } if (ret != HA_OK) { cl_log(LOG_ERR, "Cannot create clent status msg"); return; } send_cluster_msg(m); } static void update_client_status_msg_list(struct node_info* thisnode) { struct seqtrack * t = &thisnode->track; if(t->client_status_msg_queue){ struct ha_msg* msg ; GList* listrunner; seqno_t seq; const char * cseq; while ((listrunner = g_list_first(t->client_status_msg_queue)) != NULL){ msg = (struct ha_msg*) listrunner->data; cseq = ha_msg_value(msg, F_SEQ); if (cseq == NULL || sscanf(cseq, "%lx", &seq) != 1 || seq <= 0) { cl_log(LOG_ERR, "bad sequence number"); if (cseq){ cl_log(LOG_INFO, "cseq =%s", cseq); } return; } if ( t->first_missing_seq == 0 || seq < t->first_missing_seq){ /* deliver the message to client*/ cl_log(LOG_DEBUG, "delivering client status " "message to a client" " from queue"); heartbeat_monitor(msg, KEEPIT, NULL); ha_msg_del(msg); t->client_status_msg_queue = g_list_delete_link(t->client_status_msg_queue, listrunner); }else{ break; } } if (g_list_length(t->client_status_msg_queue) == 0){ g_list_free(t->client_status_msg_queue); t->client_status_msg_queue = NULL; cl_log(LOG_DEBUG,"client_status_msg_queue" "for node %s destroyed", thisnode->nodename); } } return; } static void send_ack(struct node_info* thisnode, seqno_t seq) { struct ha_msg* hmsg; char seq_str[32]; if ((hmsg = ha_msg_new(0)) == NULL) { cl_log(LOG_ERR, "no memory for " T_ACKMSG); return; } sprintf(seq_str, "%lx",seq); if (ha_msg_add(hmsg, F_TYPE, T_ACKMSG) == HA_OK && ha_msg_add(hmsg, F_TO, thisnode->nodename) == HA_OK && ha_msg_add(hmsg, F_ACKSEQ,seq_str) == HA_OK) { if (send_cluster_msg(hmsg) != HA_OK) { cl_log(LOG_ERR, "cannot send " T_ACKMSG " request to %s", thisnode->nodename); } }else{ ha_msg_del(hmsg); cl_log(LOG_ERR, "Cannot create " T_ACKMSG " message."); } return; } static void send_ack_if_needed(struct node_info* thisnode, seqno_t seq) { struct seqtrack* t = &thisnode->track; seqno_t fm_seq = t->first_missing_seq; if (!enable_flow_control){ return; } if ( (fm_seq != 0 && seq > fm_seq) || seq % ACK_MSG_DIV != thisnode->track.ack_trigger){ /*no need to send ACK */ return; } send_ack(thisnode, seq); return; } static void send_ack_if_necessary(const struct ha_msg* m) { const char* fromnode = ha_msg_value(m, F_ORIG); cl_uuid_t fromuuid; const char* seq_str = ha_msg_value(m, F_SEQ); seqno_t seq; struct node_info* thisnode = NULL; if (!enable_flow_control){ return; } if ( cl_get_uuid(m, F_ORIGUUID, &fromuuid) != HA_OK){ cl_uuid_clear(&fromuuid); } if (fromnode == NULL || seq_str == NULL || sscanf( seq_str, "%lx", &seq) != 1){ return; } thisnode = lookup_tables(fromnode, &fromuuid); if (thisnode == NULL){ cl_log(LOG_ERR, "node %s not found " "bad message", fromnode); return; } send_ack_if_needed(thisnode, seq); } /* * Process an incoming message from our read child processes * That is, packets coming from other nodes. */ static void process_clustermsg(struct ha_msg* msg, struct link* lnk) { struct node_info * thisnode = NULL; const char* iface; TIME_T msgtime = 0; longclock_t now = time_longclock(); const char * from; cl_uuid_t fromuuid; const char * ts; const char * type; int action; const char * cseq; seqno_t seqno = 0; longclock_t messagetime = now; int missing_packet =0 ; if (lnk == NULL) { iface = "?"; }else{ iface = lnk->name; } /* FIXME: We really ought to use gmainloop timers for this */ if (cmp_longclock(standby_running, zero_longclock) != 0) { if (DEBUGDETAILS) { unsigned long msleft; msleft = longclockto_ms(sub_longclock(standby_running , now)); cl_log(LOG_WARNING, "Standby timer has %ld ms left" , msleft); } /* * If there's a standby timer running, verify if it's * time to enable the standby messages again... */ if (cmp_longclock(now, standby_running) >= 0) { standby_running = zero_longclock; other_is_stable = 1; going_standby = NOT; cl_log(LOG_WARNING, "No reply to standby request" ". Standby request cancelled."); hb_shutdown_if_needed(); } } /* Extract message type, originator, timestamp, auth */ type = ha_msg_value(msg, F_TYPE); from = ha_msg_value(msg, F_ORIG); if ( cl_get_uuid(msg, F_ORIGUUID, &fromuuid) != HA_OK){ cl_uuid_clear(&fromuuid); } ts = ha_msg_value(msg, F_TIME); cseq = ha_msg_value(msg, F_SEQ); if (DEBUGDETAILS) { cl_log(LOG_DEBUG , "process_clustermsg: node [%s]" , from ? from :"?"); } if (from == NULL || ts == NULL || type == NULL) { cl_log(LOG_ERR , "process_clustermsg: %s: iface %s, from %s" , "missing from/ts/type" , iface , (from? from : "")); cl_log_message(LOG_ERR, msg); return; } if (cseq != NULL) { if (sscanf(cseq, "%lx", &seqno) <= 0) { cl_log(LOG_ERR , "process_clustermsg: %s: iface %s, from %s" , "has bad cseq" , iface , (from? from : "")); cl_log_message(LOG_ERR, msg); return; } }else{ seqno = 0L; if (strncmp(type, NOSEQ_PREFIX, STRLEN_CONST(NOSEQ_PREFIX)) != 0) { cl_log(LOG_ERR , "process_clustermsg: %s: iface %s, from %s" , "missing seqno" , iface , (from? from : "")); cl_log_message(LOG_ERR, msg); return; } } if (sscanf(ts, TIME_X, &msgtime) != 1 || ts == 0 || msgtime == 0) { return; } thisnode = lookup_tables(from, &fromuuid); if (thisnode == NULL) { if (config->rtjoinconfig == HB_JOIN_NONE) { /* If a node isn't in our config - whine */ cl_log(LOG_ERR , "process_status_message: bad node [%s] in message" , from); cl_log_message(LOG_ERR, msg); return; }else{ /* If a node isn't in our config, then add it... */ cl_log(LOG_INFO , "%s: Adding new node [%s] to configuration." , __FUNCTION__, from); add_node(from, NORMALNODE_I); thisnode = lookup_node(from); if (thisnode == NULL) { return; } /* * Suppress status updates to our clients until we * hear the second heartbeat from the new node. * * We've already updated the node table and we will * report its status if asked... * * This may eliminate an extra round of the membership * protocol. */ thisnode->status_suppressed = TRUE; update_tables(from, &fromuuid); G_main_set_trigger(write_hostcachefile); return; } } /* Throw away some incoming packets if testing is enabled */ if (TESTRCV) { if (thisnode != curnode && TestRand(rcv_loss_prob)) { char* match = strstr(TestOpts->allow_nodes,from); if ( NULL == match || ';' != *(match+strlen(from)) ) { return; } } } thisnode->anypacketsyet = 1; lnk = lookup_iface(thisnode, iface); /* Is this message a duplicate, or destined for someone else? */ action=should_drop_message(thisnode, msg, iface, &missing_packet); switch (action) { case DROPIT: /* Ignore it */ heartbeat_monitor(msg, action, iface); return; case DUPLICATE: heartbeat_monitor(msg, action, iface); /* fall through */ case KEEPIT: /* Even though it's a DUP, it could update link status*/ if (lnk) { lnk->lastupdate = messagetime; /* Is this from a link which was down? */ if (strcasecmp(lnk->status, LINKUP) != 0) { change_link_status(thisnode, lnk , LINKUP); } } if (action == DUPLICATE) { return; } break; } thisnode->track.last_iface = iface; if (HBDoMsgCallback(type, thisnode, msgtime, seqno, iface, msg)) { /* See if our comm channels are working yet... */ if (heartbeat_comm_state != COMM_LINKSUP) { check_comm_isup(); } }else{ /* Not a message anyone wants (yet) */ if (heartbeat_comm_state != COMM_LINKSUP) { check_comm_isup(); /* Make sure we don't lose this one message... */ if (heartbeat_comm_state == COMM_LINKSUP) { /* Someone may have registered for this one */ if (!HBDoMsgCallback(type, thisnode, msgtime , seqno, iface,msg)) { heartbeat_monitor(msg, action, iface); } } }else{ heartbeat_monitor(msg, action, iface); } } /* if this packet is a missing packet, * need look at * client status message list to see * if we can deliver any */ if (missing_packet){ update_client_status_msg_list(thisnode); } } void check_auth_change(struct sys_config *conf) { if (conf->rereadauth) { return_to_orig_privs(); /* parse_authfile() resets 'rereadauth' */ if (parse_authfile() != HA_OK) { /* OOPS. Sayonara. */ cl_log(LOG_ERR , "Authentication reparsing error, exiting."); hb_initiate_shutdown(FALSE); cleanexit(1); } return_to_dropped_privs(); conf->rereadauth = FALSE; } } static int hb_compute_authentication(int authindex, const void * data, size_t datalen , char * authstr, size_t authlen) { struct HBAuthOps * at; check_auth_change(config); if (authindex < 0) { authindex = config->authnum; } if (authindex < 0 || authindex >= MAXAUTH || ((at = config->auth_config[authindex].auth)) == NULL) { return HA_FAIL; } if (!at->auth(config->authmethod, data, datalen, authstr, authlen)) { ha_log(LOG_ERR , "Cannot compute message auth string [%s/%s/%s]" , config->authmethod->authname , config->authmethod->key , (const char *)data); return -2; } return authindex; } /*********************************************************************** * Track the core heartbeat processes ***********************************************************************/ static const char * CoreProcessName(ProcTrack* p) { /* This is perfectly safe - procindex is a small int */ int procindex = POINTER_TO_SIZE_T(p->privatedata);/*pointer cast as int*/ volatile struct process_info * pi = procinfo->info+procindex; return (pi ? core_proc_name(pi->type) : "Core heartbeat process"); } /* Log things about registered core processes */ static void CoreProcessRegistered(ProcTrack* p) { int procindex = POINTER_TO_SIZE_T(p->privatedata);/*pointer cast as int*/ ++CoreProcessCount; if (procindex < 0 || procindex >= MAXPROCS) { cl_log(LOG_ERR, "%s: invalid procindex [%d]", __FUNCTION__, procindex); return; } if (p->pid > 0) { processes[procindex] = p->pid; procinfo->info[procindex].pstat = FORKED; procinfo->info[procindex].pid = p->pid; } } static gboolean restart_comm_medium(gpointer data) { int medianum = POINTER_TO_SIZE_T(data);/*pointer cast as int*/ struct hb_media* mp; if (medianum < 0 || medianum >= MAXMEDIA || (mp=sysmedia[medianum]) == NULL || (mp->recovery_state != MEDIA_DELAYEDRECOVERY)) { cl_log(LOG_ERR, "%s: media index is invalid [%d]" , __FUNCTION__, medianum); cause_shutdown_restart(); return FALSE; } mp->recovery_state = MEDIA_OK; if (make_io_childpair(medianum, mp->ourproc) == HA_OK) { /* We succeeded. Stop repeating. */ return FALSE; } return TRUE; } /* Handle the death of a core heartbeat process */ static void CoreProcessDied(ProcTrack* p, int status, int signo , int exitcode, int waslogged) { int procindex = POINTER_TO_SIZE_T(p->privatedata);/*pointer cast as int*/ volatile struct process_info * pi = procinfo->info+procindex; -- CoreProcessCount; pi->pstat = PROCDEAD; if (shutdown_in_progress) { p->privatedata = NULL; cl_log(LOG_INFO,"Core process %d exited. %d remaining" , (int) p->pid, CoreProcessCount); if (CoreProcessCount <= 1) { cl_log(LOG_INFO,"%s Heartbeat shutdown complete.", localnodename); if (procinfo->restart_after_shutdown) { cl_log(LOG_INFO , "Heartbeat restart triggered."); restart_heartbeat(); } cl_flush_logs(); cleanexit(0); } return; } /* Was it the fifo process that died? */ if (pi->type == PROC_HBFIFO) { p->privatedata = NULL; cl_log(LOG_WARNING , "Restarting %s process.", core_proc_name(pi->type)); if (SetupFifoChild() != HA_OK) { cl_log(LOG_ERR, "%s restart failed. Restarting heartbeat." , core_proc_name(pi->type)); goto restart; } return; /* Was it an I/O child that died? */ }else if (pi->type == PROC_HBREAD || pi->type == PROC_HBWRITE) { int medianum = pi->medianum; struct hb_media* mp; p->privatedata = NULL; if (medianum < 0 || medianum >= MAXMEDIA || (mp=sysmedia[medianum]) == NULL) { cl_log(LOG_ERR, "%s: media index is invalid [%d]" , __FUNCTION__, medianum); goto restart; } switch(mp->recovery_state) { case MEDIA_DELAYEDRECOVERY: return; case MEDIA_OK: cl_log(LOG_ERR, "%s process died. Beginning" " communications restart process for comm channel %d." , core_proc_name(pi->type), medianum); shutdown_io_childpair(medianum); mp->recovery_state = MEDIA_INRECOVERY; return; case MEDIA_INRECOVERY: cl_log(LOG_ERR , "Both comm processes for channel %d have died" ". Restarting." , medianum); mp->recovery_state = MEDIA_OK; if (make_io_childpair(medianum,mp->ourproc)!=HA_OK) { mp->recovery_state = MEDIA_DELAYEDRECOVERY; cl_log(LOG_ERR , "Communications restart failed" ". Will try again later."); Gmain_timeout_add(10000, restart_comm_medium , GINT_TO_POINTER(medianum)); goto restart; } cl_log(LOG_INFO, "Communications restart succeeded."); return; } } restart: /* UhOh... */ cl_log(LOG_ERR , "Core heartbeat process %s (pid %d) died! Restarting." , CoreProcessName(p), p->pid); cause_shutdown_restart(); p->privatedata = NULL; return; } /*********************************************************************** * Track our managed child processes... ***********************************************************************/ static void ManagedChildRegistered(ProcTrack* p) { struct client_child* managedchild = p->privatedata; managed_child_count++; managedchild->pid = p->pid; managedchild->proctrack = p; } /* Handle the death of one of our managed child processes */ static void ManagedChildDied(ProcTrack* p, int status, int signo, int exitcode , int waslogged) { struct client_child* managedchild = p->privatedata; /*remove the child from API client table*/ api_remove_client_pid(p->pid, "died"); managedchild->pid = 0; managedchild->proctrack = NULL; managed_child_count --; /* Log anything out of the ordinary... */ if ((!shutdown_in_progress && !waslogged) || (ANYDEBUG)) { if (0 != exitcode) { cl_log(shutdown_in_progress ? LOG_DEBUG : LOG_ERR , "Client %s exited with return code %d." , managedchild->command , exitcode); } if (0 != signo) { cl_log(shutdown_in_progress ? LOG_DEBUG : LOG_ERR , "Client %s (pid=%d) killed by signal %d." , managedchild->command , (int)p->pid , signo); } } if (managedchild->rebootifitdies) { if (signo != 0 || ((exitcode != 0 && !shutdown_in_progress))) { /* Fail fast and safe - reboot this machine. * I'm not 100% sure whether we should do this for all * exits outside of shutdown intervals, but it's * clear that we should reboot in case of abnormal * exits... */ cl_reboot(config->heartbeat_ms, managedchild->command); } } /* If they exit 100 we won't restart them */ if (managedchild->respawn && !shutdown_in_progress && exitcode != 100) { longclock_t now = time_longclock(); longclock_t minticks = msto_longclock(30000); longclock_t shorttime = add_longclock(p->startticks, minticks); ++managedchild->respawncount; if (cmp_longclock(now, shorttime) < 0) { ++managedchild->shortrcount; }else{ managedchild->shortrcount = 0; } if (managedchild->shortrcount > 10) { cl_log(LOG_ERR , "Client %s \"%s\"" , managedchild->command , "respawning too fast"); managedchild->shortrcount = 0; }else{ cl_log(LOG_ERR , "Respawning client \"%s\":" , managedchild->command); start_a_child_client(managedchild, NULL); } } p->privatedata = NULL; if (shutdown_in_progress) { if (g_list_find(config->client_list, managedchild) != config->last_client){ /* Child died prematurely, ignore it and return */ if (ANYDEBUG) { cl_log(LOG_DEBUG , "client \"%s\" died early during" " shutdown." , managedchild->command); } return; } config->last_client = config->last_client->prev; if (!shutdown_last_client_child(SIGTERM)) { if (config->last_client) { cl_log(LOG_ERR , "ManagedChildDied()" ": config->last_client != NULL"); } if (ANYDEBUG) { cl_log(LOG_DEBUG , "Final client \"%s\" died." , managedchild->command); } /* Trigger next shutdown phase */ hb_mcp_final_shutdown(NULL); /* phase 1 - */ /* last child died */ } } } /* Handle the death of one of our managed child processes */ static const char * ManagedChildName(ProcTrack* p) { struct client_child* managedchild = p->privatedata; return managedchild->command; } void hb_kill_tracked_process(ProcTrack* p, void * data) { /* This is perfectly safe - procindex is a small int */ int nsig = POINTER_TO_SIZE_T(data); /*pointer cast as int*/ int pid = p->pid; const char * porg; const char * pname; pname = p->ops->proctype(p); if (p->isapgrp) { pid = -p->pid; porg = "process group"; }else{ pid = p->pid; porg = "process"; /* We never signal ourselves */ if (pid == getpid()) { return; } } cl_log(LOG_INFO, "killing %s %s %d with signal %d", pname, porg , (int) p->pid, nsig); /* Suppress logging this process' death */ p->loglevel = PT_LOGNONE; return_to_orig_privs(); CL_KILL(pid, nsig); return_to_dropped_privs(); } static void print_a_child_client(gpointer childentry, gpointer unused) { struct client_child* centry = childentry; if (centry->proctrack) { cl_log(LOG_DEBUG , "RUNNING Child client \"%s\" (%d,%d) pid %d" , centry->command, (int) centry->u_runas , (int) centry->g_runas , centry->pid); }else{ cl_log(LOG_DEBUG , "Idle Child client \"%s\" (%d,%d)" , centry->command, (int) centry->u_runas , (int) centry->g_runas); } } static void start_a_child_client(gpointer childentry, gpointer dummy) { struct client_child* centry = childentry; pid_t pid; struct passwd* pwent; cl_log(LOG_INFO, "Starting child client \"%s\" (%d,%d)" , centry->command, (int) centry->u_runas , (int) centry->g_runas); if (centry->pid != 0) { cl_log(LOG_ERR, "OOPS! client %s already running as pid %d" , centry->command, (int) centry->pid); } /* * We need to ensure that the exec will succeed before * we bother forking. We don't want to respawn something that * won't exec in the first place. */ if (access(centry->path, F_OK|X_OK) < 0) { cl_perror("Cannot exec %s", centry->command); return; } hb_add_deadtime(2000); /* We need to fork so we can make child procs not real time */ switch(pid=fork()) { case -1: cl_log(LOG_ERR , "start_a_child_client: Cannot fork."); return; default: /* Parent */ NewTrackedProc(pid, 1, PT_LOGVERBOSE , centry, &ManagedChildTrackOps); hb_pop_deadtime(NULL); return; case 0: /* Child */ break; } /* Child process: start the managed child */ hb_setup_child(); setpgid(0,0); /* Limit peak resource usage, maximize success chances */ if (centry->shortrcount > 0) { alarm(0); sleep(1); } cl_log(LOG_INFO, "Starting \"%s\" as uid %d gid %d (pid %d)" , centry->command, (int) centry->u_runas , (int) centry->g_runas, (int) getpid()); if ( (pwent = getpwuid(centry->u_runas)) == NULL || initgroups(pwent->pw_name, centry->g_runas) < 0 || setgid(centry->g_runas) < 0 || setuid(centry->u_runas) < 0 || CL_SIGINTERRUPT(SIGALRM, 0) < 0) { cl_perror("Cannot setup child process %s" , centry->command); }else{ const char * devnull = "/dev/null"; unsigned int j; struct rlimit oflimits; char *cmdexec = NULL; size_t cmdsize; #define CMDPREFIX "exec " CL_SIGNAL(SIGCHLD, SIG_DFL); alarm(0); CL_IGNORE_SIG(SIGALRM); /* A precautionary measure */ getrlimit(RLIMIT_NOFILE, &oflimits); for (j=0; j < oflimits.rlim_cur; ++j) { close(j); } (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ cmdsize = STRLEN_CONST(CMDPREFIX)+strlen(centry->command)+1; cmdexec = malloc(cmdsize); if (cmdexec != NULL) { strlcpy(cmdexec, CMDPREFIX, cmdsize); strlcat(cmdexec, centry->command, cmdsize); (void)execl("/bin/sh", "sh", "-c", cmdexec , (const char *)NULL); } /* Should not happen */ cl_perror("Cannot exec %s", centry->command); } /* Suppress respawning */ exit(100); } static gboolean /* return TRUE if any child was signalled */ shutdown_last_client_child(int nsig) { GList* last; struct client_child* lastclient; if (NULL == (last = config->last_client)) { return FALSE; } lastclient = last->data; if (lastclient) { if (ANYDEBUG) { cl_log(LOG_DEBUG, "Shutting down client %s" , lastclient->command); } lastclient->respawn = FALSE; if (lastclient->proctrack) { hb_kill_tracked_process(lastclient->proctrack , GINT_TO_POINTER(nsig)); return TRUE; } cl_log(LOG_INFO, "client [%s] is not running." , lastclient->command); }else{ cl_log(LOG_ERR, "shutdown_last_clent_child(NULL client)"); } if (ANYDEBUG) { cl_log(LOG_DEBUG, "shutdown_last_client_child: Try next one."); } /* OOPS! Couldn't kill a process this time... Try the next one... */ config->last_client = config->last_client->prev; return shutdown_last_client_child(nsig); } static const char * core_proc_name(enum process_type t) { const char * ct = "huh?"; switch(t) { case PROC_UNDEF: ct = "UNDEF"; break; case PROC_MST_CONTROL: ct = "MST_CONTROL"; break; case PROC_HBREAD: ct = "HBREAD"; break; case PROC_HBWRITE: ct = "HBWRITE"; break; case PROC_HBFIFO: ct = "HBFIFO"; break; case PROC_PPP: ct = "PPP"; break; default: ct = "core process??"; break; } return ct; } /* * Restart heartbeat - we never return from this... */ static void restart_heartbeat(void) { unsigned int j; struct rlimit oflimits; int quickrestart; shutdown_in_progress = 1; cl_make_normaltime(); return_to_orig_privs(); /* Remain privileged 'til the end */ cl_log(LOG_INFO, "Restarting heartbeat."); /* THIS IS RESOURCE WORK! FIXME */ quickrestart = (procinfo->giveup_resources ? FALSE : TRUE); cl_log(LOG_INFO, "Performing heartbeat restart exec."); hb_close_watchdog(); getrlimit(RLIMIT_NOFILE, &oflimits); for (j=3; j < oflimits.rlim_cur; ++j) { close(j); } if (quickrestart) { /* THIS IS RESOURCE WORK! FIXME */ if (nice_failback) { cl_log(LOG_INFO, "Current resources: -R -C %s" , decode_resources(procinfo->i_hold_resources)); execl(HA_LIBHBDIR "/heartbeat", "heartbeat", "-R" , "-C" , decode_resources(procinfo->i_hold_resources) , (const char *)NULL); }else{ execl(HA_LIBHBDIR "/heartbeat", "heartbeat", "-R" , (const char *)NULL); } }else{ /* Make sure they notice we're dead */ sleep((config->deadtime_ms+999)/1000+1); /* "Normal" restart (not quick) */ cl_unlock_pidfile(PIDFILE); execl(HA_LIBHBDIR "/heartbeat", "heartbeat", (const char *)NULL); } cl_log(LOG_ERR, "Could not exec " HA_LIBHBDIR "/heartbeat"); cl_log(LOG_ERR, "Shutting down..."); hb_emergency_shutdown(); } /* See if any nodes or links have timed out */ static void check_for_timeouts(void) { longclock_t now = time_longclock(); struct node_info * hip; longclock_t dead_ticks; longclock_t TooOld = msto_longclock(0); int j; for (j=0; j < config->nodecount; ++j) { hip= &config->nodes[j]; if (heartbeat_comm_state != COMM_LINKSUP) { /* * Compute alternative dead_ticks value for very first * dead interval. * * We do this because for some unknown reason * sometimes the network is slow to start working. * Experience indicates that 30 seconds is generally * enough. It would be nice to have a better way to * detect that the network isn't really working, but * I don't know any easy way. * Patches are being accepted ;-) */ dead_ticks = msto_longclock(config->initial_deadtime_ms); }else{ dead_ticks = hip->dead_ticks; } if (cmp_longclock(now, dead_ticks) <= 0) { TooOld = zero_longclock; }else{ TooOld = sub_longclock(now, dead_ticks); } /* If it's recently updated, or already dead, ignore it */ if (cmp_longclock(hip->local_lastupdate, TooOld) >= 0 || strcmp(hip->status, DEADSTATUS) == 0 ) { continue; } mark_node_dead(hip); } /* Check all links status of all nodes */ for (j=0; j < config->nodecount; ++j) { struct link * lnk; int i; hip = &config->nodes[j]; if (hip == curnode) { continue; } for (i=0; (lnk = &hip->links[i], lnk->name); i++) { if (lnk->lastupdate > now) { lnk->lastupdate = 0L; } if (cmp_longclock(lnk->lastupdate, TooOld) >= 0 || strcmp(lnk->status, DEADSTATUS) == 0 ) { continue; } change_link_status(hip, lnk, DEADSTATUS); } } } /* * Pick a machine, and ask it what the current ha.cf configuration is. * This is needed because of autojoin and also because of addnode/delnode * commands * * We allow dynamic node configuration even if autojoin is disabled. In that * case you need to use the addnode and delnode commands to update the * configuration. */ static gboolean send_reqnodes_msg(gpointer data){ struct ha_msg* msg; const char* destnode = NULL; unsigned long i; unsigned long startindex = POINTER_TO_ULONG(data); guint id; if (get_reqnodes_reply){ return FALSE; } if (startindex >= config->nodecount){ startindex = 0; } for (i = startindex; i< config->nodecount; i++){ if (STRNCMP_CONST(config->nodes[i].status, DEADSTATUS) != 0 && (&config->nodes[i]) != curnode && config->nodes[i].nodetype == NORMALNODE_I){ destnode = config->nodes[i].nodename; break; } } if (destnode == NULL){ get_reqnodes_reply = TRUE; comm_now_up(); return FALSE; } msg = ha_msg_new(0); if (msg == NULL){ cl_log(LOG_ERR, "%s: creating msg failed", __FUNCTION__); return FALSE; } if (ANYDEBUG){ cl_log(LOG_DEBUG, "sending reqnodes msg to node %s", destnode); } if (ha_msg_add(msg, F_TYPE, T_REQNODES) != HA_OK || ha_msg_add(msg, F_TO, destnode)!= HA_OK){ cl_log(LOG_ERR, "%s: Adding filed failed", __FUNCTION__); ha_msg_del(msg); return FALSE; } send_cluster_msg(msg); id = Gmain_timeout_add(1000, send_reqnodes_msg, (gpointer)i); G_main_setall_id(id, "send_reqnodes_msg", config->heartbeat_ms, 100); return FALSE; } static void check_comm_isup(void) { struct node_info * hip; int j; int heardfromcount = 0; if (heartbeat_comm_state == COMM_LINKSUP) { return; } if (config->rtjoinconfig != HB_JOIN_NONE && !init_deadtime_passed){ return; } for (j=0; j < config->nodecount; ++j) { hip= &config->nodes[j]; if (hip->anypacketsyet || strcmp(hip->status, DEADSTATUS) ==0){ ++heardfromcount; } } if (heardfromcount >= config->nodecount) { heartbeat_comm_state = COMM_LINKSUP; if (enable_flow_control){ send_reqnodes_msg(0); }else{ /*we have a mixed version of heartbeats *Disable request/reply node list feature and mark comm up now */ comm_now_up(); } } } /* Set our local status to the given value, and send it out */ static int set_local_status(const char * newstatus) { if (strcmp(newstatus, curnode->status) != 0 && strlen(newstatus) > 1 && strlen(newstatus) < STATUSLENG) { /* * We can't do this because of conflicts between the two * paths the updates otherwise arrive through... * (Is this still true? ? ?) */ strncpy(curnode->status, newstatus, sizeof(curnode->status)); send_local_status(); cl_log(LOG_INFO, "Local status now set to: '%s'", newstatus); return HA_OK; } cl_log(LOG_INFO, "Unable to set local status to: %s", newstatus); return HA_FAIL; } /* * send_cluster_msg: sends out a message to the cluster * First we add some necessary fields to the message, then * we "send it out" via process_outbound_packet. * * send_cluster_msg disposes of the message * */ int send_cluster_msg(struct ha_msg* msg) { const char * type; int rc = HA_OK; pid_t ourpid = getpid(); send_cluster_msg_level ++; if (msg == NULL || (type = ha_msg_value(msg, F_TYPE)) == NULL) { cl_perror("Invalid message in send_cluster_msg"); if (msg != NULL) { ha_msg_del(msg); } rc = HA_FAIL; goto out; } /* * Only the parent process can send messages directly to the cluster. * * Everyone else needs to write to the FIFO instead. * Sometimes we get called from the parent process, and sometimes * from child processes. */ if (ourpid == processes[0]) { /* Parent process... Write message directly */ if ((msg = add_control_msg_fields(msg)) != NULL) { rc = process_outbound_packet(&msghist, msg); } }else{ /* We're a child process - copy it to the FIFO */ int ffd = -1; char * smsg = NULL; int needprivs = !cl_have_full_privs(); size_t len; ssize_t writerc = -2; if (needprivs) { return_to_orig_privs(); } if (DEBUGDETAILS) { cl_log(LOG_INFO, "Writing type [%s] message to FIFO" , type); } /* * Convert the message to a string, and write it to the FIFO * It will then get written to the cluster properly. */ if ((smsg = msg2wirefmt_noac(msg, &len)) == NULL) { cl_log(LOG_ERR , "send_cluster_msg: cannot convert" " message to wire format (pid %d)", (int)getpid()); rc = HA_FAIL; }else if ((ffd = open(FIFONAME,O_WRONLY|O_APPEND)) < 0) { cl_perror("send_cluster_msg: cannot open " FIFONAME); rc = HA_FAIL; }else if ((writerc=write(ffd, smsg, len-1)) != (ssize_t)(len -1)){ cl_perror("send_cluster_msg: cannot write to " FIFONAME " [rc = %d]", (int)writerc); cl_log_message(LOG_ERR, msg); rc = HA_FAIL; } if (smsg) { if (ANYDEBUG) { /* FIXME - ANYDEBUG! */ cl_log(LOG_INFO , "FIFO message [type %s] written rc=%ld" , type, (long) writerc); } free(smsg); } if (ffd > 0) { if (close(ffd) < 0) { cl_perror("%s close failure", FIFONAME); } } /* Dispose of the original message */ ha_msg_del(msg); if (needprivs) { return_to_dropped_privs(); } } out: send_cluster_msg_level --; return rc; } /* Send our local status out to the cluster */ static int send_local_status() { struct ha_msg * m; int rc; char deadtime[64]; long cur_deadtime; if (DEBUGDETAILS){ cl_log(LOG_DEBUG, "PID %d: Sending local status" " curnode = %lx status: %s" , (int) getpid(), (unsigned long)curnode , curnode->status); } if ((m=ha_msg_new(0)) == NULL) { cl_log(LOG_ERR, "Cannot send local status."); return HA_FAIL; } cur_deadtime = longclockto_ms(curnode->dead_ticks); snprintf(deadtime, sizeof(deadtime), "%lx", cur_deadtime); if (ha_msg_add(m, F_TYPE, T_STATUS) != HA_OK || ha_msg_add(m, F_STATUS, curnode->status) != HA_OK || ha_msg_add(m, F_DT, deadtime) != HA_OK) { cl_log(LOG_ERR, "send_local_status: " "Cannot create local status msg"); rc = HA_FAIL; ha_msg_del(m); }else{ if (enable_flow_control && ha_msg_add_int(m, F_PROTOCOL, PROTOCOL_VERSION) != HA_OK){ cl_log(LOG_ERR, "send_local_status: " "Adding protocol number failed"); } rc = send_cluster_msg(m); } return rc; } gboolean hb_send_local_status(gpointer p) { if (DEBUGDETAILS) { cl_log(LOG_DEBUG, "hb_send_local_status() {"); } send_local_status(); if (DEBUGDETAILS) { cl_log(LOG_DEBUG, "}/*hb_send_local_status*/;"); } return TRUE; } static gboolean set_init_deadtime_passed_flag(gpointer p) { init_deadtime_passed =TRUE; return FALSE; } static gboolean hb_update_cpu_limit(gpointer p) { cl_cpu_limit_update(); return TRUE; } static gboolean EmergencyShutdown(gpointer p) { hb_emergency_shutdown(); return TRUE; /* Shouldn't get called twice, but... */ } /* Mark the given link dead */ static void change_link_status(struct node_info *hip, struct link *lnk , const char * newstat) { struct ha_msg * lmsg; if ((lmsg = ha_msg_new(8)) == NULL) { cl_log(LOG_ERR, "no memory to mark link dead"); return; } strncpy(lnk->status, newstat, sizeof(lnk->status)); cl_log(LOG_INFO, "Link %s:%s %s.", hip->nodename , lnk->name, lnk->status); if ( ha_msg_add(lmsg, F_TYPE, T_IFSTATUS) != HA_OK || ha_msg_add(lmsg, F_NODE, hip->nodename) != HA_OK || ha_msg_add(lmsg, F_IFNAME, lnk->name) != HA_OK || ha_msg_add(lmsg, F_STATUS, lnk->status) != HA_OK) { cl_log(LOG_ERR, "no memory to change link status"); ha_msg_del(lmsg); return; } heartbeat_monitor(lmsg, KEEPIT, ""); QueueRemoteRscReq(PerformQueuedNotifyWorld, lmsg); ha_msg_del(lmsg); lmsg = NULL; } /* Mark the given node dead */ static void mark_node_dead(struct node_info *hip) { cl_log(LOG_WARNING, "node %s: is dead", hip->nodename); if (hip == curnode) { /* Uh, oh... we're dead! */ cl_log(LOG_ERR, "No local heartbeat. Forcing restart."); cl_log(LOG_INFO, "See URL: %s" , HAURL("FAQ#No_Local_Heartbeat")); if (!shutdown_in_progress) { cause_shutdown_restart(); } return; } if (hip->nodetype == NORMALNODE_I && STRNCMP_CONST(hip->status, DEADSTATUS) != 0 && STRNCMP_CONST(hip->status, INITSTATUS) != 0) { --live_node_count; } strncpy(hip->status, DEADSTATUS, sizeof(hip->status)); /* THIS IS RESOURCE WORK! FIXME */ hb_rsc_recover_dead_resources(hip); hip->rmt_lastupdate = 0L; hip->anypacketsyet = 0; hip->track.nmissing = 0; hip->track.last_seq = NOSEQUENCE; hip->track.ackseq = 0; } static gboolean CauseShutdownRestart(gpointer p) { cause_shutdown_restart(); return FALSE; } static void cause_shutdown_restart() { /* Give up our resources, and restart ourselves */ /* This is cleaner than lots of other options. */ /* And, it really should work every time... :-) */ procinfo->restart_after_shutdown = 1; /* THIS IS RESOURCE WORK! FIXME */ procinfo->giveup_resources = 1; hb_giveup_resources(); /* Do something more drastic in 60 minutes */ Gmain_timeout_add(1000*60*60, EmergencyShutdown, NULL); } /* * Values of msgtype: * KEEPIT * DROPIT * DUPLICATE */ void heartbeat_monitor(struct ha_msg * msg, int msgtype, const char * iface) { api_heartbeat_monitor(msg, msgtype, iface); } extern const char *get_hg_version(void); static void printversion(void) { printf("%s (%s)\n", VERSION, get_hg_version()); return; } /* * Print our usage statement. */ static void usage(void) { const char * optionargs = OPTARGS; const char * thislet; fprintf(stderr, "\nUsage: %s [-", cmdname); for (thislet=optionargs; *thislet; ++thislet) { if (thislet[0] != ':' && thislet[1] != ':') { fputc(*thislet, stderr); } } fputc(']', stderr); for (thislet=optionargs; *thislet; ++thislet) { if (thislet[1] == ':') { const char * desc = "unknown-flag-argument"; /* THIS IS RESOURCE WORK! FIXME */ /* Put a switch statement here eventually... */ switch(thislet[0]) { case 'C': desc = "Current-resource-state"; break; } fprintf(stderr, " [-%c %s]", *thislet, desc); } } fprintf(stderr, "\n"); fprintf(stderr, "\t-C only valid with -R\n"); fprintf(stderr, "\t-r is mutually exclusive with -R\n"); cleanexit(LSB_EXIT_EINVAL); } int main(int argc, char * argv[], char **envp) { int flag; unsigned j; struct rlimit oflimits; int argerrs = 0; char * CurrentStatus=NULL; char * tmp_cmdname; long running_hb_pid = cl_read_pidfile(PIDFILE); int generic_error = LSB_EXIT_GENERIC; num_hb_media_types = 0; /* A precautionary measure */ getrlimit(RLIMIT_NOFILE, &oflimits); for (j=FD_STDERR+1; j < oflimits.rlim_cur; ++j) { close(j); } /* Redirect messages from glib functions to our handler */ g_log_set_handler(NULL , G_LOG_LEVEL_ERROR | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING | G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO | G_LOG_LEVEL_DEBUG | G_LOG_FLAG_RECURSION | G_LOG_FLAG_FATAL , cl_glib_msg_handler, NULL); cl_log_enable_stderr(TRUE); /* Weird enum (bitfield) */ g_log_set_always_fatal((GLogLevelFlags)0); /*value out of range*/ if ((tmp_cmdname = strdup(argv[0])) == NULL) { cl_perror("Out of memory in main."); exit(1); } if ((cmdname = strrchr(tmp_cmdname, '/')) != NULL) { ++cmdname; }else{ cmdname = tmp_cmdname; } cl_log_set_entity(cmdname); if (module_init() != HA_OK) { cl_log(LOG_ERR, "Heartbeat not started: module init error."); cleanexit(generic_error); } init_procinfo(); cl_set_oldmsgauthfunc(isauthentic); cl_set_authentication_computation_method(hb_compute_authentication); Argc = argc; while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch(flag) { case 'C': /* THIS IS RESOURCE WORK! FIXME */ CurrentStatus = optarg; procinfo->i_hold_resources = encode_resources(CurrentStatus); if (ANYDEBUG) { cl_log(LOG_DEBUG , "Initializing resource state to %s" , decode_resources(procinfo->i_hold_resources)); } break; case 'd': ++debug_level; break; case 'D': ++PrintDefaults; break; case 'k': ++killrunninghb; break; case 'M': DoManageResources=0; break; case 'r': ++RestartRequested; break; case 'R': ++WeAreRestarting; cl_log_enable_stderr(FALSE); break; case 's': ++rpt_hb_status; generic_error = LSB_STATUS_UNKNOWN; break; case 'l': cl_disable_realtime(); break; case 'v': verbose=TRUE; break; case 'V': printversion(); cleanexit(LSB_EXIT_OK); case 'W': ++WikiOutput; break; default: ++argerrs; break; } } if (optind > argc) { ++argerrs; } if (argerrs || (CurrentStatus && !WeAreRestarting)) { usage(); } if (PrintDefaults) { dump_default_config(WikiOutput); cleanexit(LSB_EXIT_OK); } get_localnodeinfo(); SetParameterValue(KEY_HBVERSION, VERSION); /* Default message handling... */ hb_register_msg_callback(T_REXMIT, HBDoMsg_T_REXMIT); hb_register_msg_callback(T_STATUS, HBDoMsg_T_STATUS); hb_register_msg_callback(T_NS_STATUS, HBDoMsg_T_STATUS); hb_register_msg_callback(T_QCSTATUS, HBDoMsg_T_QCSTATUS); hb_register_msg_callback(T_ACKMSG, HBDoMsg_T_ACKMSG); hb_register_msg_callback(T_ADDNODE, HBDoMsg_T_ADDNODE); hb_register_msg_callback(T_SETWEIGHT, HBDoMsg_T_SETWEIGHT); hb_register_msg_callback(T_SETSITE, HBDoMsg_T_SETSITE); hb_register_msg_callback(T_DELNODE, HBDoMsg_T_DELNODE); hb_register_msg_callback(T_REQNODES, HBDoMsg_T_REQNODES); hb_register_msg_callback(T_REPNODES, HBDoMsg_T_REPNODES); if (init_set_proc_title(argc, argv, envp) < 0) { cl_log(LOG_ERR, "Allocation of proc title failed."); cleanexit(generic_error); } set_proc_title("%s", cmdname); hbmedia_types = malloc(sizeof(struct hbmedia_types **)); if (hbmedia_types == NULL) { cl_log(LOG_ERR, "Allocation of hbmedia_types failed."); cleanexit(generic_error); } if (debug_level > 0) { static char cdebug[8]; snprintf(cdebug, sizeof(debug_level), "%d", debug_level); setenv(HADEBUGVAL, cdebug, TRUE); } /* * We've been asked to shut down the currently running heartbeat * process */ if (killrunninghb) { int err; if (running_hb_pid < 0) { fprintf(stderr , "INFO: Heartbeat already stopped.\n"); cleanexit(LSB_EXIT_OK); } if (CL_KILL((pid_t)running_hb_pid, SIGTERM) >= 0) { /* Wait for the running heartbeat to die */ alarm(0); do { sleep(1); continue; }while (CL_KILL((pid_t)running_hb_pid, 0) >= 0); cleanexit(LSB_EXIT_OK); } err = errno; fprintf(stderr, "ERROR: Could not kill pid %ld", running_hb_pid); perror(" "); cleanexit((err == EPERM || err == EACCES) ? LSB_EXIT_EPERM : LSB_EXIT_GENERIC); } /* * Report status of heartbeat processes, etc. * We report in both Red Hat and SuSE formats... */ if (rpt_hb_status) { if (running_hb_pid < 0) { printf("%s is stopped. No process\n", cmdname); cleanexit(-running_hb_pid); }else{ struct utsname u; if (uname(&u) < 0) { cl_perror("uname(2) call failed"); cleanexit(LSB_EXIT_EPERM); } g_strdown(u.nodename); printf("%s OK [pid %ld et al] is running on %s [%s]...\n" , cmdname, running_hb_pid, u.nodename, localnodename); cleanexit(LSB_STATUS_OK); } /*NOTREACHED*/ } /*init table for nodename/uuid lookup*/ inittable(); /* srand will be done several times at a few other places in the code, * e.g. in gen_uuid_from_name(); * it will be properly initialized once in cl_init_random(). */ srand(time(NULL)); /* * We think we just performed an "exec" of ourselves to restart. */ if (WeAreRestarting) { if (init_config(CONFIG_NAME) != HA_OK /* THIS IS RESOURCE WORK! FIXME */ || ! CHECK_HA_RESOURCES()){ int err = errno; cl_log(LOG_INFO , "Config errors: Heartbeat" " NOT restarted"); cleanexit((err == EPERM || err == EACCES) ? LSB_EXIT_EPERM : LSB_EXIT_GENERIC); } if (running_hb_pid < 0) { fprintf(stderr, "ERROR: %s is not running.\n" , cmdname); cleanexit(LSB_EXIT_NOTCONFIGED); } if (running_hb_pid != getpid()) { fprintf(stderr , "ERROR: Heartbeat already running" " [pid %ld].\n" , running_hb_pid); cleanexit(LSB_EXIT_GENERIC); } /* LOTS OF RESOURCE WORK HERE! FIXME */ /* * Nice_failback complicates things a bit here... * We need to allow for the possibility that the user might * have changed nice_failback options in the config file */ if (CurrentStatus && ANYDEBUG) { cl_log(LOG_INFO, "restart: i_hold_resources = %s" , decode_resources(procinfo->i_hold_resources)); } if (nice_failback) { /* nice_failback is currently ON */ if (CurrentStatus == NULL) { /* From !nice_failback to nice_failback */ procinfo->i_hold_resources = HB_LOCAL_RSC; hb_send_resources_held(TRUE, NULL); cl_log(LOG_INFO , "restart: assuming HB_LOCAL_RSC"); }else{ /* * From nice_failback to nice_failback. * Cool. Nothing special to do. */ } }else{ /* nice_failback is currently OFF */ if (CurrentStatus == NULL) { /* * From !nice_failback to !nice_failback. * Cool. Nothing special to do. */ }else{ /* From nice_failback to not nice_failback */ if ((procinfo->i_hold_resources & HB_LOCAL_RSC)) { /* We expect to have those */ cl_log(LOG_INFO, "restart: acquiring" " local resources."); req_our_resources(0); }else{ cl_log(LOG_INFO, "restart: " " local resources already acquired."); } } } } /* * We've been asked to restart currently running heartbeat * process (or at least get it to reread it's configuration * files) */ if (RestartRequested) { if (running_hb_pid < 0) { goto StartHeartbeat; } errno = 0; if (init_config(CONFIG_NAME) /* THIS IS RESOURCE WORK! FIXME */ && CHECK_HA_RESOURCES()){ cl_log(LOG_INFO , "Signalling heartbeat pid %ld to reread" " config files", running_hb_pid); if (CL_KILL(running_hb_pid, SIGHUP) >= 0) { cleanexit(0); } cl_perror("Unable to send SIGHUP to pid %ld" , running_hb_pid); }else{ int err = errno; cl_log(LOG_INFO , "Config errors: Heartbeat pid %ld" " NOT restarted" , running_hb_pid); cleanexit((err == EPERM || err == EACCES) ? LSB_EXIT_EPERM : LSB_EXIT_GENERIC); } cleanexit(LSB_EXIT_GENERIC); } StartHeartbeat: /* We have already initialized configs in case WeAreRestarting. */ if (WeAreRestarting || (init_config(CONFIG_NAME) /* THIS IS RESOURCE WORK! FIXME */ && CHECK_HA_RESOURCES())) { if (ANYDEBUG) { cl_log(LOG_DEBUG , "HA configuration OK. Heartbeat starting."); cl_log(LOG_INFO , "Heartbeat Hg Version: %s", get_hg_version()); } if (verbose) { dump_config(); } make_daemon(); /* Only child processes returns. */ setenv(LOGFENV, config->logfile, 1); setenv(DEBUGFENV, config->dbgfile, 1); if (config->log_facility >= 0) { char facility[40]; snprintf(facility, sizeof(facility) , "%s", config->facilityname); setenv(LOGFACILITY, facility, 1); } ParseTestOpts(); hb_versioninfo(); if (initialize_heartbeat() != HA_OK) { cleanexit((errno == EPERM || errno == EACCES) ? LSB_EXIT_EPERM : LSB_EXIT_GENERIC); } }else{ int err = errno; cl_log(LOG_ERR , "Configuration error, heartbeat not started."); cleanexit((err == EPERM || err == EACCES) ? LSB_EXIT_EPERM : LSB_EXIT_NOTCONFIGED); } /*NOTREACHED*/ return generic_error; } void cleanexit(rc) int rc; { hb_close_watchdog(); if (localdie) { if (ANYDEBUG) { cl_log(LOG_DEBUG, "Calling localdie() function"); } (*localdie)(); } if (ANYDEBUG) { cl_log(LOG_DEBUG, "Exiting from pid %d [rc=%d]" , (int) getpid(), rc); } if (config && config->log_facility >= 0) { closelog(); } exit(rc); } void hb_emergency_shutdown(void) { cl_make_normaltime(); return_to_orig_privs(); CL_IGNORE_SIG(SIGTERM); cl_log(LOG_CRIT, "Emergency Shutdown: " "Attempting to kill everything ourselves"); CL_KILL(-getpgrp(), SIGTERM); hb_kill_rsc_mgmt_children(SIGKILL); hb_kill_managed_children(SIGKILL); hb_kill_core_children(SIGKILL); sleep(2); CL_KILL(-getpgrp(), SIGKILL); /*NOTREACHED*/ cleanexit(100); } static void hb_check_mcp_alive(void) { pid_t ourpid = getpid(); int j; if (CL_PID_EXISTS(procinfo->info[0].pid)) { return; } return_to_orig_privs(); cl_log(LOG_CRIT, "Emergency Shutdown: Master Control process died."); for (j=0; j < procinfo->nprocs; ++j) { if (procinfo->info[j].pid == ourpid) { continue; } cl_log(LOG_CRIT, "Killing pid %d with SIGTERM" , (int)procinfo->info[j].pid); CL_KILL(procinfo->info[j].pid, SIGTERM); } /* We saved the best for last :-) */ cl_log(LOG_CRIT, "Emergency Shutdown(MCP dead): Killing ourselves."); CL_KILL(ourpid, SIGTERM); } extern pid_t getsid(pid_t); static void make_daemon(void) { long pid; const char * devnull = "/dev/null"; /* See if heartbeat is already running... */ if ((pid=cl_read_pidfile(PIDFILE)) > 0 && pid != getpid()) { cl_log(LOG_INFO, "%s: already running [pid %ld]." , cmdname, pid); exit(LSB_EXIT_OK); } /* Guess not. Go ahead and start things up */ if (!WeAreRestarting) { #if 1 pid = fork(); #else pid = 0; #endif if (pid < 0) { cl_log(LOG_ERR, "%s: could not start daemon\n" , cmdname); cl_perror("fork"); exit(LSB_EXIT_GENERIC); }else if (pid > 0) { exit(LSB_EXIT_OK); } } if ( cl_lock_pidfile(PIDFILE) < 0){ cl_log(LOG_ERR,"%s: could not create pidfile [%s]\n", cmdname, PIDFILE); exit(LSB_EXIT_EPERM); } cl_log_enable_stderr(FALSE); setenv(HADIRENV, HA_HBCONF_DIR, TRUE); setenv(DATEFMT, HA_DATEFMT, TRUE); setenv(HAFUNCENV, HA_FUNCS, TRUE); setenv("OCF_ROOT", OCF_ROOT_DIR, TRUE); umask(022); close(FD_STDIN); (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ close(FD_STDOUT); (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ close(FD_STDERR); (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ cl_cdtocoredir(); /* We need to at least ignore SIGINTs early on */ hb_signal_set_common(NULL); if (getsid(0) != pid) { if (setsid() < 0) { cl_perror("setsid() failure."); } } } #define APPHBINSTANCE "master_control_process" static void hb_init_register_with_apphbd(void) { static int failcount = 0; if (!UseApphbd || RegisteredWithApphbd) { return; } if (apphb_register(hbname, APPHBINSTANCE) != 0) { /* Log attempts once an hour or so... */ if ((failcount % 60) == 0) { cl_perror("Unable to register with apphbd."); cl_log(LOG_INFO, "Continuing to try and register."); } ++failcount; return; } RegisteredWithApphbd = TRUE; cl_log(LOG_INFO, "Registered with apphbd as %s/%s." , hbname, APPHBINSTANCE); if (apphb_setinterval(config->deadtime_ms) < 0 || apphb_setwarn(config->warntime_ms) < 0) { cl_perror("Unable to setup with apphbd."); apphb_unregister(); RegisteredWithApphbd = FALSE; ++failcount; }else{ failcount = 0; } } static gboolean hb_reregister_with_apphbd(gpointer dummy) { if (UseApphbd) { hb_init_register_with_apphbd(); } return UseApphbd; } static void hb_unregister_from_apphb(void) { if (RegisteredWithApphbd == TRUE ) { UseApphbd = FALSE; apphb_unregister(); } } static void hb_apphb_hb(void) { if (UseApphbd) { if (RegisteredWithApphbd) { if (apphb_hb() < 0) { /* apphb_hb() will fail if apphbd exits */ cl_perror("apphb_hb() failed."); apphb_unregister(); RegisteredWithApphbd = FALSE; } } /* * Our timeout job (hb_reregister_with_apphbd) will * reregister us if we become unregistered somehow... */ } } static void hb_init_watchdog_interval(void) { if (watchdogfd < 0) { return; } if (watchdog_timeout_ms == 0L) { watchdog_timeout_ms = config->deadtime_ms + 10; } #ifdef WDIOC_SETTIMEOUT { int timeout_secs; timeout_secs = (watchdog_timeout_ms+999)/1000; if (ANYDEBUG) { cl_log(LOG_DEBUG, "Set watchdog timer to %d seconds." , timeout_secs); } if (ioctl(watchdogfd, WDIOC_SETTIMEOUT, &timeout_secs) < 0) { cl_perror( "WDIOC_SETTIMEOUT" ": Failed to set watchdog timer to %d seconds." , timeout_secs); } } #endif } void hb_init_watchdog(void) { if (watchdogfd < 0 && watchdogdev != NULL) { watchdogfd = open(watchdogdev, O_WRONLY); if (watchdogfd >= 0) { if (fcntl(watchdogfd, F_SETFD, FD_CLOEXEC)) { cl_perror("Error setting the " "close-on-exec flag for watchdog"); } cl_log(LOG_NOTICE, "Using watchdog device: %s" , watchdogdev); hb_init_watchdog_interval(); hb_tickle_watchdog(); }else{ cl_log(LOG_ERR, "Cannot open watchdog device: %s" , watchdogdev); } } if ( UseApphbd == TRUE ) { hb_init_register_with_apphbd(); } } void hb_tickle_watchdog(void) { if (watchdogfd >= 0) { if (write(watchdogfd, "", 1) != 1) { cl_perror("Watchdog write failure: closing %s!" , watchdogdev); hb_close_watchdog(); watchdogfd=-1; } } hb_apphb_hb(); } void hb_close_watchdog(void) { if (watchdogfd >= 0) { if (write(watchdogfd, "V", 1) != 1) { cl_perror( "Watchdog write magic character failure: closing %s!" , watchdogdev); }else{ if (ANYDEBUG) { cl_log(LOG_INFO, "Successful watchdog 'V' write"); } } if (close(watchdogfd) < 0) { cl_perror("Watchdog close(2) failed."); }else{ if (ANYDEBUG) { cl_log(LOG_INFO, "Successful watchdog close"); } } watchdogfd=-1; } if (RegisteredWithApphbd) { hb_unregister_from_apphb(); } } void ha_assert(const char * assertion, int line, const char * file) { cl_log(LOG_ERR, "Assertion \"%s\" failed on line %d in file \"%s\"" , assertion, line, file); cleanexit(1); } /* * Check to see if we should copy this packet further into the ring */ int should_ring_copy_msg(struct ha_msg *m) { const char * us = curnode->nodename; const char * from; /* Originating Node name */ const char * ttl; /* Time to live */ /* Get originator and time to live field values */ if ((from = ha_msg_value(m, F_ORIG)) == NULL || (ttl = ha_msg_value(m, F_TTL)) == NULL) { cl_log(LOG_ERR , "bad packet in should_copy_ring_pkt"); return 0; } /* Is this message from us? */ if (strcmp(from, us) == 0 || ttl == NULL || atoi(ttl) <= 0) { /* Avoid infinite loops... Ignore this message */ return 0; } /* Must be OK */ return 1; } /* * From here to the end is protocol code. It implements our reliable * multicast protocol. * * This protocol is called from master_control_process(). */ static void client_status_msg_queue_cleanup(GList* list) { struct ha_msg* msg; GList* list_runner; if (list == NULL){ return; } while((list_runner = g_list_first(list))!= NULL) { msg = (struct ha_msg*) list_runner->data; if (msg){ ha_msg_del(msg); } list = g_list_delete_link(list, list_runner); } g_list_free(list); return; } static void reset_seqtrack(struct node_info *n) { struct seqtrack *t = &n->track; seqno_t seq; int i; for (i = 0; i < t->nmissing; ++i) { seq = t->seqmissing[i]; if (seq == NOSEQUENCE) continue; remove_msg_rexmit(n, seq); t->seqmissing[i] = NOSEQUENCE; } t->nmissing = 0; t->last_rexmit_req = zero_longclock; t->first_missing_seq = 0; if (t->client_status_msg_queue) { GList* mq = t->client_status_msg_queue; client_status_msg_queue_cleanup(mq); t->client_status_msg_queue = NULL; } } /* * Right now, this function is a little too simple. There is no * provision for sequence number wraparounds. But, it will take a very * long time to wrap around (~ 100 years) * * I suspect that there are better ways to do this, but this will * do for now... */ /* * Should we ignore this packet, or pay attention to it? */ static int should_drop_message(struct node_info * thisnode, const struct ha_msg *msg, const char *iface, int* is_missing_packet) { struct seqtrack * t = &thisnode->track; const char * cseq = ha_msg_value(msg, F_SEQ); const char * to = ha_msg_value(msg, F_TO); cl_uuid_t touuid; const char * from= ha_msg_value(msg, F_ORIG); cl_uuid_t fromuuid; const char * type = ha_msg_value(msg, F_TYPE); const char * cgen = ha_msg_value(msg, F_HBGENERATION); seqno_t seq; seqno_t gen = 0; int IsToUs; int j; int isrestart = 0; int ishealedpartition = 0; int is_status = 0; if ( cl_get_uuid(msg, F_ORIGUUID, &fromuuid) != HA_OK){ cl_uuid_clear(&fromuuid); } if (from && !cl_uuid_is_null(&fromuuid)){ /* We didn't know their uuid before, but now we do... */ if (update_tables(from, &fromuuid)){ G_main_set_trigger(write_hostcachefile); } } if (is_missing_packet == NULL){ cl_log(LOG_ERR, "should_drop_message: " "NULL input is_missing_packet"); return DROPIT; } /* Some packet types shouldn't have sequence numbers */ if (type != NULL && strncmp(type, NOSEQ_PREFIX, sizeof(NOSEQ_PREFIX)-1) == 0) { /* Is this a sequence number rexmit NAK? */ if (strcasecmp(type, T_NAKREXMIT) == 0) { const char * cnseq = ha_msg_value(msg, F_FIRSTSEQ); seqno_t nseq; if (cnseq == NULL || sscanf(cnseq, "%lx", &nseq) != 1 || nseq <= 0) { cl_log(LOG_ERR , "should_drop_message: bad nak seq number"); return DROPIT; } if (to == NULL){ cl_log(LOG_WARNING,"should_drop_message: tonodename not found " "heartbeat version not matching?"); } if (to == NULL || strncmp(to, curnode->nodename, HOSTLENG ) == 0){ cl_log(LOG_ERR , "%s: node %s seq %ld", "Irretrievably lost packet", thisnode->nodename, nseq); } is_lost_packet(thisnode, nseq); return DROPIT; }else if (to == NULL || strncmp(to, curnode->nodename, HOSTLENG ) == 0){ return KEEPIT; }else{ return DROPIT; } } if (strcasecmp(type, T_STATUS) == 0) { is_status = 1; } if (cseq == NULL || sscanf(cseq, "%lx", &seq) != 1 || seq <= 0) { cl_log(LOG_ERR, "should_drop_message: bad sequence number"); cl_log_message(LOG_ERR, msg); return DROPIT; } /* Extract the heartbeat generation number */ if (cgen != NULL && sscanf(cgen, "%lx", &gen) <= 0) { cl_log(LOG_ERR, "should_drop_message: bad generation number"); cl_log_message(LOG_ERR, msg); return DROPIT; } if ( cl_get_uuid(msg, F_TOUUID, &touuid) != HA_OK){ cl_uuid_clear(&touuid); } if(!cl_uuid_is_null(&touuid)){ IsToUs = (cl_uuid_compare(&touuid, &config->uuid) == 0); }else{ IsToUs = (to == NULL) || (strcmp(to, curnode->nodename) == 0); } /* * We need to do sequence number processing on every * packet, even those that aren't sent to us. */ /* Does this looks like a replay attack... */ if (gen < t->generation) { cl_log(LOG_ERR , "should_drop_message: attempted replay attack" " [%s]? [gen = %ld, curgen = %ld]" , thisnode->nodename, gen, t->generation); return DROPIT; }else if (is_status) { /* Look for apparent restarts/healed partitions */ if (gen == t->generation && gen > 0) { /* Is this a message from a node that was dead? */ if (strcmp(thisnode->status, DEADSTATUS) == 0) { /* Is this stale data? */ if (seq <= thisnode->status_seqno) { return DROPIT; } /* They're now alive, but were dead. */ /* No restart occured. UhOh. */ cl_log(LOG_CRIT , "Cluster node %s" " returning after partition." , thisnode->nodename); cl_log(LOG_INFO , "For information on cluster" " partitions, See URL: %s" , HAURL("Split_Brain")); cl_log(LOG_WARNING , "Deadtime value may be too small."); cl_log(LOG_INFO , "See FAQ for information" " on tuning deadtime."); cl_log(LOG_INFO , "URL: %s" , HAURL("FAQ#Heavy_Load")); /* THIS IS RESOURCE WORK! FIXME */ /* IS THIS RIGHT??? FIXME ?? */ if (DoManageResources) { guint id; send_local_status(); (void)CauseShutdownRestart; id = Gmain_timeout_add(2000 , CauseShutdownRestart,NULL); G_main_setall_id(id, "shutdown restart", 1000, 50); } ishealedpartition=1; } }else if (gen > t->generation) { isrestart = 1; if (t->generation > 0) { cl_log(LOG_INFO, "Heartbeat restart on node %s" , thisnode->nodename); } reset_seqtrack(thisnode); thisnode->rmt_lastupdate = 0L; thisnode->local_lastupdate = 0L; thisnode->status_seqno = 0L; /* THIS IS RESOURCE WORK! FIXME */ thisnode->has_resources = TRUE; } t->generation = gen; } /* Is this packet in sequence? */ if (t->last_seq == NOSEQUENCE || seq == (t->last_seq+1)) { t->last_seq = seq; t->last_iface = iface; send_ack_if_necessary(msg); return (IsToUs ? KEEPIT : DROPIT); }else if (seq == t->last_seq) { /* Same as last-seen packet -- very common case */ if (DEBUGPKT) { cl_log(LOG_DEBUG , "should_drop_message: Duplicate packet(1)"); } return DUPLICATE; } /* * Not in sequence... Hmmm... * * Is it newer than the last packet we got? */ if (seq > t->last_seq) { seqno_t k; seqno_t nlost; nlost = ((seqno_t)(seq - (t->last_seq+1))); cl_log(LOG_WARNING, "%lu lost packet(s) for [%s] [%lu:%lu]" , nlost, thisnode->nodename, t->last_seq, seq); if (nlost > FLOWCONTROL_LIMIT) { /* Something bad happened. Start over */ /* This keeps the loop below from going a long time */ reset_seqtrack(thisnode); t->last_seq = seq; t->last_iface = iface; cl_log(LOG_ERR, "lost a lot of packets!"); return (IsToUs ? KEEPIT : DROPIT); }else { request_msg_rexmit(thisnode, t->last_seq+1L, seq-1L); } /* Try and Record each of the missing sequence numbers */ if (t->first_missing_seq == 0 || t->first_missing_seq > t -> last_seq + 1 ){ t->first_missing_seq = t -> last_seq +1; } for(k = t->last_seq+1; k < seq; ++k) { if (t->nmissing < MAXMISSING-1) { t->seqmissing[t->nmissing] = k; ++t->nmissing; }else{ int minmatch = -1; seqno_t minseq = INT_MAX; /* * Replace the lowest numbered missing seqno * with this one */ for (j=0; j < MAXMISSING; ++j) { if (t->seqmissing[j] == NOSEQUENCE) { minmatch = j; break; } if (minmatch < 0 || t->seqmissing[j] < minseq) { minmatch = j; minseq = t->seqmissing[j]; } } t->seqmissing[minmatch] = k; } } t->last_seq = seq; t->last_iface = iface; return (IsToUs ? KEEPIT : DROPIT); } /* * This packet appears to be older than the last one we got. */ /* * Is it a (recorded) missing packet? */ if ( (*is_missing_packet = is_lost_packet(thisnode, seq))) { return (IsToUs ? KEEPIT : DROPIT); } if (ishealedpartition || isrestart) { const char * sts; TIME_T newts = 0L; send_ack_if_necessary(msg); if ((sts = ha_msg_value(msg, F_TIME)) == NULL || sscanf(sts, TIME_X, &newts) != 1 || newts == 0L) { /* Toss it. No valid timestamp */ cl_log(LOG_ERR, "should_drop_message: bad timestamp"); return DROPIT; } thisnode->rmt_lastupdate = newts; reset_seqtrack(thisnode); t->last_seq = seq; t->last_iface = iface; return (IsToUs ? KEEPIT : DROPIT); } /* This is a DUP packet (or a really old one we lost track of) */ if (DEBUGPKT) { cl_log(LOG_DEBUG, "should_drop_message: Duplicate packet"); cl_log_message(LOG_DEBUG, msg); } return DROPIT; } /* * Control (inbound) packet processing... * This is part of the control_process() processing. * * This is where the reliable multicast protocol is implemented - * through the use of process_rexmit(), and add2_xmit_hist(). * process_rexmit(), and add2_xmit_hist() use msghist to track sent * packets so we can retransmit them if they get lost. * * NOTE: It's our job to dispose of the packet we're given... */ static int process_outbound_packet(struct msg_xmit_hist* hist , struct ha_msg * msg) { char * smsg; const char * type; const char * cseq; seqno_t seqno = -1; const char * to; int IsToUs; size_t len; if (DEBUGPKTCONT) { cl_log(LOG_DEBUG, "got msg in process_outbound_packet"); } if ((type = ha_msg_value(msg, F_TYPE)) == NULL) { cl_log(LOG_ERR, "process_outbound_packet: no type in msg."); ha_msg_del(msg); msg = NULL; return HA_FAIL; } if ((cseq = ha_msg_value(msg, F_SEQ)) != NULL) { if (sscanf(cseq, "%lx", &seqno) != 1 || seqno <= 0) { cl_log(LOG_ERR, "process_outbound_packet: " "bad sequence number"); smsg = NULL; ha_msg_del(msg); return HA_FAIL; } } to = ha_msg_value(msg, F_TO); IsToUs = (to != NULL) && (strcmp(to, curnode->nodename) == 0); /* Convert the incoming message to a string */ smsg = msg2wirefmt(msg, &len); /* If it didn't convert, throw original message away */ if (smsg == NULL) { ha_msg_del(msg); return HA_FAIL; } /* Remember Messages with sequence numbers */ if (cseq != NULL) { add2_xmit_hist (hist, msg, seqno); } /* if (DEBUGPKT){ cl_msg_stats_add(time_longclock(), len); } */ /* Direct message to "loopback" processing */ process_clustermsg(msg, NULL); send_to_all_media(smsg, len); free(smsg); /* Throw away "msg" here if it's not saved above */ if (cseq == NULL) { ha_msg_del(msg); } /* That's All Folks... */ return HA_OK; } /* * Is this the sequence number of a lost packet? * If so, clean up after it. */ static int is_lost_packet(struct node_info * thisnode, seqno_t seq) { struct seqtrack * t = &thisnode->track; int j; int ret = 0; for (j=0; j < t->nmissing; ++j) { /* Is this one of our missing packets? */ if (seq == t->seqmissing[j]) { remove_msg_rexmit(thisnode, seq); /* Yes. Delete it from the list */ t->seqmissing[j] = NOSEQUENCE; /* Did we delete the last one on the list */ if (j == (t->nmissing-1)) { t->nmissing --; } /* Swallow up found packets */ while (t->nmissing > 0 && t->seqmissing[t->nmissing-1] == NOSEQUENCE) { t->nmissing --; } if (t->nmissing == 0) { cl_log(LOG_INFO, "No pkts missing from %s!" , thisnode->nodename); t->first_missing_seq = 0; } ret = 1; goto out; } } out: if (!enable_flow_control){ return ret; } if (ret && seq == t->first_missing_seq){ /*determine the new first missing seq*/ seqno_t old_missing_seq = t->first_missing_seq; seqno_t lastseq_to_ack; seqno_t x; seqno_t trigger = thisnode->track.ack_trigger; seqno_t ack_seq; t->first_missing_seq = 0; for (j=0; j < t->nmissing; ++j) { if (t->seqmissing[j] != NOSEQUENCE){ if (t->first_missing_seq == 0 || t->seqmissing[j] < t->first_missing_seq){ t->first_missing_seq = t->seqmissing[j]; } } } if (t->first_missing_seq == 0){ lastseq_to_ack = t->last_seq; }else { lastseq_to_ack = t->first_missing_seq - 1 ; } x = lastseq_to_ack % ACK_MSG_DIV; if (x >= trigger ){ ack_seq = lastseq_to_ack/ACK_MSG_DIV*ACK_MSG_DIV + trigger; }else{ ack_seq = (lastseq_to_ack/ACK_MSG_DIV -1)*ACK_MSG_DIV + trigger; } if (ack_seq >= old_missing_seq){ send_ack_if_needed(thisnode, ack_seq); } } return ret; } extern int max_rexmit_delay; #define REXMIT_MS max_rexmit_delay #define ACCEPT_REXMIT_REQ_MS (REXMIT_MS-10) static void dump_missing_pkts_info(void) { int j; for (j = 0; j < config->nodecount; ++j) { struct node_info * hip = &config->nodes[j]; struct seqtrack * t = &hip->track; int seqidx; if (t->nmissing == 0){ continue; }else{ cl_log(LOG_DEBUG, "At max %d pkts missing from %s", t->nmissing, hip->nodename); } for (seqidx = 0; seqidx < t->nmissing; ++seqidx) { if (t->seqmissing[seqidx] != NOSEQUENCE) { cl_log(LOG_DEBUG, "%d: missing pkt: %ld", seqidx, t->seqmissing[seqidx]); } } } } static void check_rexmit_reqs(void) { longclock_t minrexmit = 0L; int gottimeyet = FALSE; int j; for (j=0; j < config->nodecount; ++j) { struct node_info * hip = &config->nodes[j]; struct seqtrack * t = &hip->track; int seqidx; if (t->nmissing <= 0 ) { continue; } /* * We rarely reach this code, so avoid an extra system call */ if (!gottimeyet) { longclock_t rexmitms = msto_longclock(REXMIT_MS); longclock_t now = time_longclock(); gottimeyet = TRUE; if (cmp_longclock(now, rexmitms) < 0) { minrexmit = zero_longclock; }else{ minrexmit = sub_longclock(now, rexmitms); } } if (cmp_longclock(t->last_rexmit_req, minrexmit) > 0) { /* Too soon to ask for retransmission */ continue; } if (t->nmissing > MAX_MISSING_PKTS){ cl_log(LOG_ERR, "too many missing pkts(%d) from node %s", t->nmissing, hip->nodename); } /* Time to ask for some packets again ... */ for (seqidx = 0; seqidx < t->nmissing; ++seqidx) { if (t->seqmissing[seqidx] != NOSEQUENCE) { /* * The code for asking for these by groups * is complicated. This code is not. */ if (ANYDEBUG){ cl_log(LOG_INFO, "calling request_msg_rexmit()" "from %s", __FUNCTION__); } request_msg_rexmit(hip, t->seqmissing[seqidx] , t->seqmissing[seqidx]); } } } } /* Initialize the transmit history */ static void init_xmit_hist (struct msg_xmit_hist * hist) { int j; hist->lastmsg = MAXMSGHIST-1; hist->hiseq = hist->lowseq = 0; hist->ackseq = 0; hist->lowest_acknode = NULL; for (j=0; j < MAXMSGHIST; ++j) { hist->msgq[j] = NULL; hist->seqnos[j] = 0; hist->lastrexmit[j] = zero_longclock; } } #ifdef DO_AUDITXMITHIST void audit_xmit_hist(void) { int slot; for (slot = 0; slot < MAXMSGHIST; ++slot) { struct ha_msg* msg = msghist.msgq[slot]; gboolean doabort = FALSE; if (msg == NULL) { continue; } if (!msg) { cl_log(LOG_CRIT , "Unallocated message in audit_xmit_hist"); doabort=TRUE; } if (msg->nfields <= 0) { cl_log(LOG_CRIT , "Non-positive nfields in audit_xmit_hist"); doabort=TRUE; } if (msg->nalloc <= 0) { cl_log(LOG_CRIT , "Non-positive nalloc in audit_xmit_hist"); doabort=TRUE; } if (msg->stringlen <= 0) { cl_log(LOG_CRIT , "Non-positive stringlen in audit_xmit_hist"); doabort=TRUE; } if (msg->nfields > msg->nalloc) { cl_log(LOG_CRIT , "Improper nfields in audit_xmit_hist"); doabort=TRUE; } if (msg->nfields > 100) { cl_log(LOG_CRIT , "TOO Large nfields in audit_xmit_hist"); doabort=TRUE; } if (get_stringlen(msg) <= msg->nfields*4) { cl_log(LOG_CRIT , "Too small stringlen in audit_xmit_hist"); doabort=TRUE; } if (!msg->names) { cl_log(LOG_CRIT , "Unallocated msg->names in audit_xmit_hist"); doabort=TRUE; } if (!msg->nlens) { cl_log(LOG_CRIT , "Unallocated msg->nlens in audit_xmit_hist"); doabort=TRUE; } if (!msg->values) { cl_log(LOG_CRIT , "Unallocated msg->values in audit_xmit_hist"); doabort=TRUE; } if (!msg->vlens) { cl_log(LOG_CRIT , "Unallocated msg->vallens in audit_xmit_hist"); doabort=TRUE; } if (doabort) { cl_log(LOG_CRIT , "Message slot is %d", slot); abort(); } } } #endif gboolean heartbeat_on_congestion(void) { struct msg_xmit_hist* hist = &msghist; return hist->hiseq - hist->ackseq > FLOWCONTROL_LIMIT; } /* Add a packet to a channel's transmit history */ static void add2_xmit_hist (struct msg_xmit_hist * hist, struct ha_msg* msg , seqno_t seq) { int slot; struct ha_msg* slotmsg; if (!msg) { cl_log(LOG_CRIT, "Unallocated message in add2_xmit_hist"); abort(); } AUDITXMITHIST; /* Figure out which slot to put the message in */ slot = hist->lastmsg+1; if (slot >= MAXMSGHIST) { slot = 0; } hist->hiseq = seq; slotmsg = hist->msgq[slot]; /* Throw away old packet in this slot */ if (slotmsg != NULL) { /* Lowseq is less than the lowest recorded seqno */ hist->lowseq = hist->seqnos[slot]; hist->msgq[slot] = NULL; if (!slotmsg) { cl_log(LOG_CRIT , "Unallocated slotmsg in add2_xmit_hist"); }else{ ha_msg_del(slotmsg); } } hist->msgq[slot] = msg; hist->seqnos[slot] = seq; hist->lastrexmit[slot] = 0L; hist->lastmsg = slot; if (enable_flow_control && live_node_count > 1) { int priority = 0; if ((hist->hiseq - hist->lowseq) > ((MAXMSGHIST*9)/10)) { priority = LOG_ERR; } else if ((hist->hiseq - hist->lowseq) > ((MAXMSGHIST*3)/4)) { priority = LOG_WARNING; } if (priority > 0) { cl_log(priority , "Message hist queue is filling up" " (%d messages in queue)" , (int)(hist->hiseq - hist->lowseq)); hist_display(hist); } } AUDITXMITHIST; if (enable_flow_control && hist->hiseq - hist->ackseq > FLOWCONTROL_LIMIT){ if (live_node_count < 2) { update_ackseq(hist->hiseq - (FLOWCONTROL_LIMIT-1)); all_clients_resume(); }else{ #if 0 cl_log(LOG_INFO, "Flow control engaged with %d live nodes" , live_node_count); #endif all_clients_pause(); hist_display(hist); } } } #define MAX_REXMIT_BATCH 50 static void process_rexmit(struct msg_xmit_hist * hist, struct ha_msg* msg) { const char * cfseq; const char * clseq; seqno_t fseq = 0; seqno_t lseq = 0; seqno_t thisseq; int firstslot = hist->lastmsg-1; int rexmit_pkt_count = 0; const char* fromnodename = ha_msg_value(msg, F_ORIG); struct node_info* fromnode = NULL; if (fromnodename == NULL){ cl_log(LOG_ERR, "process_rexmit" ": from node not found in the message"); return; } if (firstslot >= MAXMSGHIST) { cl_log(LOG_ERR, "process_rexmit" ": firstslot out of range [%d]" , firstslot); hist->lastmsg = firstslot = MAXMSGHIST-1; } fromnode = lookup_tables(fromnodename, NULL); if (fromnode == NULL){ cl_log(LOG_ERR, "fromnode not found "); return ; } if ((cfseq = ha_msg_value(msg, F_FIRSTSEQ)) == NULL || (clseq = ha_msg_value(msg, F_LASTSEQ)) == NULL || (fseq=atoi(cfseq)) <= 0 || (lseq=atoi(clseq)) <= 0 || fseq > lseq) { cl_log(LOG_ERR, "Invalid rexmit seqnos"); cl_log_message(LOG_ERR, msg); } if (ANYDEBUG){ cl_log(LOG_DEBUG, "rexmit request from node %s for msg(%ld-%ld)", fromnodename, fseq, lseq); } /* * Retransmit missing packets in proper sequence. */ for (thisseq = fseq; thisseq <= lseq; ++thisseq) { int msgslot; int foundit = 0; if (thisseq <= fromnode->track.ackseq){ /* this seq has been ACKed by fromnode we can saftely ignore this request message*/ continue; } if (thisseq <= hist->lowseq) { /* Lowseq is less than the lowest recorded seqno */ nak_rexmit(hist, thisseq, fromnodename, "seqno too low"); continue; } if (thisseq > hist->hiseq) { /* * Hopefully we just restarted and things are * momentarily a little out of sync... * Since the rexmit request doesn't send out our * generation number, we're just guessing * ... nak_rexmit(thisseq, fromnode, "seqno too high"); ... * * Otherwise it's a bug ;-) */ cl_log(LOG_WARNING , "Rexmit of seq %lu requested. %lu is max." , thisseq, hist->hiseq); continue; } for (msgslot = firstslot ; !foundit && msgslot != (firstslot+1); --msgslot) { char * smsg; longclock_t now = time_longclock(); longclock_t last_rexmit; size_t len; if (msgslot < 0) { /* Time to wrap around */ if (firstslot == MAXMSGHIST-1) { /* We're back where we started */ break; } msgslot = MAXMSGHIST-1; } if (hist->msgq[msgslot] == NULL) { continue; } if (hist->seqnos[msgslot] != thisseq) { continue; } /* * We resend a packet unless it has been re-sent in * the last REXMIT_MS milliseconds. */ last_rexmit = hist->lastrexmit[msgslot]; if (cmp_longclock(last_rexmit, zero_longclock) != 0 && longclockto_ms(sub_longclock(now,last_rexmit)) < (ACCEPT_REXMIT_REQ_MS)) { /* Continue to outer loop */ goto NextReXmit; } /* * Don't send too many packets all at once... * or we could flood serial links... */ ++rexmit_pkt_count; if (rexmit_pkt_count > MAX_REXMIT_BATCH) { return; } /* Found it! Let's send it again! */ firstslot = msgslot -1; foundit=1; if (ANYDEBUG) { cl_log(LOG_INFO, "Retransmitting pkt %lu" , thisseq); cl_log(LOG_INFO, "msg size =%d, type=%s", get_stringlen(hist->msgq[msgslot]), ha_msg_value(hist->msgq[msgslot], F_TYPE)); } smsg = msg2wirefmt(hist->msgq[msgslot], &len); if (DEBUGPKT) { cl_log_message(LOG_INFO, hist->msgq[msgslot]); cl_log(LOG_INFO , "Rexmit STRING conversion: [%s]" , smsg); } /* If it didn't convert, throw original msg away */ if (smsg != NULL) { hist->lastrexmit[msgslot] = now; send_to_all_media(smsg , len); free(smsg); } } if (!foundit) { nak_rexmit(hist, thisseq, fromnodename, "seqno not found"); } NextReXmit:/* Loop again */; } } static void printout_histstruct(struct msg_xmit_hist* hist) { cl_log(LOG_INFO,"hist information:"); cl_log(LOG_INFO, "hiseq =%lu, lowseq=%lu,ackseq=%lu,lastmsg=%d", hist->hiseq, hist->lowseq, hist->ackseq, hist->lastmsg); } static void nak_rexmit(struct msg_xmit_hist * hist, seqno_t seqno, const char* fromnodename, const char * reason) { struct ha_msg* msg; char sseqno[32]; struct node_info* fromnode = NULL; fromnode = lookup_tables(fromnodename, NULL); if (fromnode == NULL){ cl_log(LOG_ERR, "fromnode not found "); return ; } snprintf(sseqno, sizeof(sseqno), "%lx", seqno); cl_log(LOG_ERR, "Cannot rexmit pkt %lu for %s: %s", seqno, fromnodename, reason); cl_log(LOG_INFO, "fromnode =%s, fromnode's ackseq = %ld", fromnode->nodename, fromnode->track.ackseq); printout_histstruct(hist); if ((msg = ha_msg_new(6)) == NULL) { cl_log(LOG_ERR, "no memory for " T_NAKREXMIT); return; } if (ha_msg_add(msg, F_TYPE, T_NAKREXMIT) != HA_OK || ha_msg_add(msg, F_FIRSTSEQ, sseqno) != HA_OK || ha_msg_add(msg, F_TO, fromnodename) !=HA_OK || ha_msg_add(msg, F_COMMENT, reason) != HA_OK) { cl_log(LOG_ERR, "cannot create " T_NAKREXMIT " msg."); ha_msg_del(msg); msg=NULL; return; } send_cluster_msg(msg); } int ParseTestOpts() { const char * openpath = HA_HBCONF_DIR "/OnlyForTesting"; FILE * fp; static struct TestParms p; char name[64]; char value[512]; int something_changed = 0; if ((fp = fopen(openpath, "r")) == NULL) { if (TestOpts) { cl_log(LOG_INFO, "Test Code Now disabled."); something_changed=1; } TestOpts = NULL; return something_changed; } TestOpts = &p; something_changed=1; memset(&p, 0, sizeof(p)); p.send_loss_prob = 0; p.rcv_loss_prob = 0; cl_log(LOG_INFO, "WARNING: Enabling Test Code"); while((fscanf(fp, "%[a-zA-Z_]=%s\n", name, value) == 2)) { if (strcmp(name, "rcvloss") == 0) { p.rcv_loss_prob = atof(value); p.enable_rcv_pkt_loss = 1; cl_log(LOG_INFO, "Receive loss probability = %.3f" , p.rcv_loss_prob); }else if (strcmp(name, "xmitloss") == 0) { p.send_loss_prob = atof(value); p.enable_send_pkt_loss = 1; cl_log(LOG_INFO, "Xmit loss probability = %.3f" , p.send_loss_prob); }else if (strcmp(name, "allownodes") == 0) { strncpy(p.allow_nodes, value, sizeof(p.allow_nodes)-1); cl_log(LOG_INFO, "Allow nodes = %s", p.allow_nodes); }else{ cl_log(LOG_ERR , "Cannot recognize test param [%s] in [%s]" , name, openpath); } } cl_log(LOG_INFO, "WARNING: Above Options Now Enabled."); fclose(fp); return something_changed; } #ifndef HB_VERS_FILE /* * This file needs to be persistent across reboots, but isn't * really a log */ # define HB_VERS_FILE HA_VARLIBHBDIR "/hb_generation" #endif #define GENLEN 16 /* Number of chars on disk for gen # and '\n' */ /* * Increment our generation number * It goes up each time we restart to prevent replay attacks. */ #ifndef O_SYNC # define O_SYNC 0 #endif static int IncrGeneration(seqno_t * generation) { char buf[GENLEN+1]; int fd; int flags = 0; if ((fd = open(HB_VERS_FILE, O_RDONLY)) < 0 || read(fd, buf, sizeof(buf)) < 1) { GetTimeBasedGeneration(generation); cl_log(LOG_WARNING, "No Previous generation - starting at %lu" , (unsigned long)(*generation)+1); snprintf(buf, sizeof(buf), "%*lu", GENLEN, *generation); flags = O_CREAT; } close(fd); buf[GENLEN] = EOS; if (sscanf(buf, "%lu", generation) <= 0) { GetTimeBasedGeneration(generation); cl_log(LOG_WARNING, "BROKEN previous generation - starting at %ld" , (*generation)+1); flags = O_CREAT; *generation = 0; } ++(*generation); snprintf(buf, sizeof(buf), "%*lu\n", GENLEN-1, *generation); if ((fd = open(HB_VERS_FILE, O_WRONLY|O_SYNC|flags, 0644)) < 0) { return HA_FAIL; } if (write(fd, buf, GENLEN) != GENLEN) { close(fd); return HA_FAIL; } /* * Some UNIXes don't implement O_SYNC. * So we do an fsync here for good measure. It can't hurt ;-) */ if (fsync(fd) < 0) { cl_perror("fsync failure on " HB_VERS_FILE); return HA_FAIL; } if (close(fd) < 0) { cl_perror("close failure on " HB_VERS_FILE); return HA_FAIL; } /* * We *really* don't want to lose this data. We won't be able to * join the cluster again without it. */ sync(); #if HAVE_UNRELIABLE_FSYNC sleep(10); #endif return HA_OK; } static int GetTimeBasedGeneration(seqno_t * generation) { *generation = (seqno_t) time(NULL); return HA_OK; } static void get_localnodeinfo(void) { const char * openpath = HA_HBCONF_DIR "/nodeinfo"; static struct utsname u; static char localnode[256]; FILE * fp; if (uname(&u) < 0) { cl_perror("uname(2) call failed"); return; } localnodename = u.nodename; if ((fp = fopen(openpath, "r")) != NULL && fgets(localnode, sizeof(localnode), fp) != NULL && localnode[0] != EOS) { char * nlpos; if ((nlpos = memchr(localnode, '\n', sizeof(localnode))) != NULL) { *nlpos = EOS; localnodename = localnode; } } if (fp) { fclose(fp); } g_strdown(localnodename); } static void hb_add_deadtime(int increment) { longclock_t new_ticks; new_ticks = msto_longclock(config->deadtime_ms + increment); if (curnode->dead_ticks < new_ticks) { curnode->dead_ticks = new_ticks; send_local_status(); } deadtime_tmpadd_count++; } static gboolean hb_pop_deadtime(gpointer p) { deadtime_tmpadd_count--; if (deadtime_tmpadd_count <= 0) { curnode->dead_ticks = msto_longclock(config->deadtime_ms); send_local_status(); deadtime_tmpadd_count = 0; } return FALSE; } Heartbeat-3-0-7e3a82377fa8/heartbeat/heartbeat_private.h0000644000000000000000000000546211576626513023007 0ustar00usergroup00000000000000/* * heartbeat_private.h: definitions for the Linux-HA heartbeat program * that are defined in heartbeat.c and are used by other .c files * that are only compiled into the heartbeat binary * * I evisage that eventually these funtions will be broken out * of heartbeat.c and that this heartbeat_private.h will no longer * be neccessary. * * Copyright (C) 2002 Horms * * This file created from heartbeat.c * Copyright (C) 2000 Alan Robertson * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #ifndef _HEARTBEAT_PRIVATE_H #define _HEARTBEAT_PRIVATE_H #include #include #include #include #include #include enum comm_state { COMM_STARTING, COMM_LINKSUP }; extern const char * cmdname; extern int nice_failback; extern int WeAreRestarting; extern int shutdown_in_progress; extern longclock_t local_takeover_time; extern enum comm_state heartbeat_comm_state; /* Used by signal handlers */ void hb_init_watchdog(void); void hb_tickle_watchdog(void); void hb_close_watchdog(void); /* Used to register with heartbeat for receiving messages directly */ typedef void (*HBmsgcallback) (const char * type, struct node_info* fromnode , TIME_T msgtime, seqno_t seqno, const char * iface, struct ha_msg * msg); void hb_register_msg_callback(const char * msgtype, HBmsgcallback callback); void hb_register_comm_up_callback(void(*callback)(void)); int hb_send_resources_held(int stable, const char * comment); void hb_setup_child(void); void init_resource_module(void); gboolean hb_send_local_status(gpointer p); gboolean hb_dump_all_proc_stats(gpointer p); void heartbeat_monitor(struct ha_msg * msg, int status, const char * iface); void hb_emergency_shutdown(void); void hb_initiate_shutdown(int quickshutdown); void hb_versioninfo(void); void hb_trigger_restart(int quickrestart); void hb_shutdown_if_needed(void); void hb_giveup_resources(void); void hb_kill_tracked_process(ProcTrack* p, void * data); gboolean hb_mcp_final_shutdown(gpointer p); struct ha_msg * add_control_msg_fields(struct ha_msg* ret); #endif /* _HEARTBEAT_PRIVATE_H */ Heartbeat-3-0-7e3a82377fa8/heartbeat/init.d/Makefile.am0000644000000000000000000000330711576626513022362 0ustar00usergroup00000000000000# # heartbeat: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in initddir = @INITDIR@ initd_SCRIPTS = heartbeat EXTRA_DIST = heartbeat.in install-initdSCRIPTS: $(initd_SCRIPTS) @$(NORMAL_INSTALL) $(mkinstalldirs) $(DESTDIR)$(initddir) @list='$(initd_SCRIPTS)'; for p in $$list; do \ f="`echo $$p|sed '$(transform)'`"; \ if test -f $$p; then \ echo " $(INSTALL_SCRIPT) $$p $(DESTDIR)$(initddir)/$$f@INIT_EXT@"; \ $(INSTALL_SCRIPT) $$p $(DESTDIR)$(initddir)/$$f@INIT_EXT@; \ elif test -f $(srcdir)/$$p; then \ echo " $(INSTALL_SCRIPT) $(srcdir)/$$p $(DESTDIR)$(initddir)/$$f@INIT_EXT@"; \ $(INSTALL_SCRIPT) $(srcdir)/$$p $(DESTDIR)$(initddir)/$$f@INIT_EXT@; \ else :; fi; \ done uninstall-initdSCRIPTS: @$(NORMAL_UNINSTALL) @list='$(initd_SCRIPTS)'; for p in $$list; do \ f="`echo $$p|sed '$(transform)'`"; \ echo " rm -f $(DESTDIR)$(initddir)/$$f@INIT_EXT@"; \ rm -f $(DESTDIR)$(initddir)/$$f;@INIT_EXT@ \ done Heartbeat-3-0-7e3a82377fa8/heartbeat/init.d/heartbeat.in0000755000000000000000000002523011576626513022617 0ustar00usergroup00000000000000#!/bin/sh # # # heartbeat Start high-availability services # # Author: Alan Robertson # License: GNU General Public License (GPL) # # This script works correctly under SuSE, Debian, # Conectiva, Red Hat and a few others. Please let me know if it # doesn't work under your distribution, and we'll fix it. # We don't hate anyone, and like for everyone to use # our software, no matter what OS or distribution you're using. # # chkconfig: - @HB_INITSTARTPRI@ @HB_INITSTOPPRI@ # description: Startup script high-availability services. # processname: heartbeat # pidfile: @localstatedir@/run/heartbeat.pid # config: @sysconfdir@/ha.d/ha.cf # ### BEGIN INIT INFO # Description: @HB_PKG@ is a basic high-availability subsystem. # It will start services at initialization, and when machines go up # or down. This version will also perform IP address takeover using # gratuitous ARPs. It works correctly for a 2-node configuration, # and is extensible to larger configurations. # # It implements the following kinds of heartbeats: # - Bidirectional Serial Rings ("raw" serial ports) # - UDP/IP broadcast (ethernet, etc) # - UDP/IP multicast (ethernet, etc) # - Unicast heartbeats # - "ping" heartbeats (for routers, switches, etc.) # (to be used for breaking ties in 2-node systems # and monitoring networking availability) # # Short-Description: High-availability services. # Provides: heartbeat HA # Required-Start: $remote_fs $network $time $syslog # Should-Start: openhpid # Required-Stop: $remote_fs $network $time $syslog # Should-stop: openhpid # Default-Start: 2 3 4 5 # Default-Stop: 0 1 6 ### END INIT INFO [ -r @INITDEFDIR@/heartbeat ] && . @INITDEFDIR@/heartbeat [ -r @INITDEFDIR@/pacemaker ] && . @INITDEFDIR@/pacemaker [ -r @INITDEFDIR@/sbd ] && . @INITDEFDIR@/sbd HA_DIR=@sysconfdir@/ha.d; export HA_DIR CONFIG=$HA_DIR/ha.cf . $HA_DIR/shellfuncs LOCKDIR=@localstatedir@/lock/subsys RUNDIR=@localstatedir@/run LRMADMIN=@sbindir@/lrmadmin SBD_BIN=@sbindir@/sbd StartSBD() { test -x $SBD_BIN || return test -z "$SBD_DEVICE" && return # Would be nice if sbd would # cleanly handle double start internally ps -eo cmd | grep "^$SBD_BIN -d" > /dev/null && return echo -n "Starting SBD - " local ERROR if ! ERROR=`$SBD_BIN -d $SBD_DEVICE -D $SBD_OPTS watch 2>&1` ; then echo "SBD failed to start; aborting." if [ ! -z "$ERROR" ]; then echo echo "$ERROR" fi exit 1 fi } StopSBD() { test -x $SBD_BIN || return test -z "$SBD_DEVICE" && return echo -n "Stopping SBD - " local ERROR if ! ERROR=`$SBD_BIN -d $SBD_DEVICE -D $SBD_OPTS message LOCAL exit 2>&1` ; then echo "SBD failed to stop; aborting." if [ ! -z "$ERROR" ]; then echo echo "$ERROR" fi exit 1 fi } echo_failure() { echo " Heartbeat failure [rc=$1]. $rc_failed" return $1 } echo_success() { echo "$rc_done" } if [ -r /etc/SuSE-release ] then # rc.status is new since SuSE 7.0 [ -r /etc/rc.status ] && . /etc/rc.status [ -r /etc/rc.config ] && . /etc/rc.config # Determine the base and follow a runlevel link name. base=${0##*/} link=${base#*[SK][0-9][0-9]} fi if [ -z "$rc_done" ] then rc_done="Done." rc_failed="Failed." rc_skipped="Skipped." fi # exec 2>>/var/log/ha-debug # This should probably be it's own autoconf parameter # because RH has moved it from time to time... # and I suspect Conectiva and Mandrake also supply it. DISTFUNCS=/etc/rc.d/init.d/functions SUBSYS=heartbeat MODPROBE=/sbin/modprobe US=`uname -n` # Set this to a 1 if you want to automatically load kernel modules USE_MODULES=@USE_MODULES@ [ -x $HA_BIN/heartbeat ] || exit 0 # # Some environments like it if we use their functions... # if [ ! -x $DISTFUNCS ] then # Provide our own versions of these functions status() { $HA_BIN/heartbeat -s } echo_failure() { echo " Heartbeat failure [rc=$1]. $rc_failed" return $1 } echo_success() { echo "$rc_done" } else . $DISTFUNCS fi # # See if they've configured things yet... # if [ ! -f $CONFIG ] then echo -n "Heartbeat not configured: $CONFIG not found." echo_failure 1 exit 0 fi CRM_ENABLED_RV="" CrmEnabled() { test -n "$CRM_ENABLED_RV" && return $CRM_ENABLED_RV local value value=`ha_parameter pacemaker | tr '[A-Z]' '[a-z]'` [ -n "$value" ] || value=`ha_parameter crm | tr '[A-Z]' '[a-z]'` case $value in y|yes|enable|on|true|1|manual|respawn) CRM_ENABLED_RV=0 return 0 ;; *) CRM_ENABLED_RV=1 return 1 ;; esac } init_watchdog() { if [ -f /proc/devices -a -x $MODPROBE ] then init_watchdog_linux fi } # # Install the softdog module if we need to # init_watchdog_linux() { # # We need to install it if watchdog is specified in $CONFIG, and # /dev/watchdog refers to a softdog device, or it /dev/watchdog # doesn't exist at all. # # If we need /dev/watchdog, then we'll make it if necessary. # # Whatever the user says we should use for watchdog device, that's # what we'll check for, use and create if necessary. If they misspell # it, or don't put it under /dev, so will we. # Hope they do it right :-) # # insmod=no # What do they think /dev/watchdog is named? MISCDEV=`grep ' misc$' /proc/devices | cut -c1-4` MISCDEV=`echo $MISCDEV` WATCHDEV=`ha_parameter watchdog` WATCHDEV=`echo $WATCHDEV` if [ "X$WATCHDEV" != X ] then : Watchdog requested by $CONFIG file # # We try and modprobe the module if there's no dev or the dev exists # and points to the softdog major device. # if [ ! -c "$WATCHDEV" ] then insmod=yes else case `ls -l "$WATCHDEV" 2>/dev/null` in *$MISCDEV,*) insmod=yes;; *) : "$WATCHDEV isn't a softdog device (wrong major)" ;; esac fi else : No watchdog device specified in $CONFIG file. fi case $insmod in yes) if grep softdog /proc/modules >/dev/null 2>&1 then : softdog already loaded else $MODPROBE softdog nowayout=0 >/dev/null 2>&1 fi;; esac if [ "X$WATCHDEV" != X -a ! -c "$WATCHDEV" -a $insmod = yes ] then minor=`cat /proc/misc | grep watchdog | cut -c1-4` mknod -m 600 $WATCHDEV c $MISCDEV $minor fi } # init_watchdog_linux() wait_for_lrmd() { local maxwait=20 local i=0 echo -n "waiting for lrmd to become responsive " while [ $i -lt $maxwait ]; do $LRMADMIN -C > /dev/null 2>&1 && break sleep 2 echo -n . i=$(($i+1)) done if [ $i -lt $maxwait ]; then return 0 else echo "lrmd apparently didn't start" return 1 fi } set_lrmd_options() { CrmEnabled || return test -x $LRMADMIN || return if [ -n "$LRMD_MAX_CHILDREN" ]; then wait_for_lrmd || return $LRMADMIN -p max-children $LRMD_MAX_CHILDREN fi } # # Start the heartbeat daemon... # start_heartbeat() { if ERROR=`$HA_BIN/heartbeat 2>&1` then : OK else return $? fi } mkdir_chmod_chown() { local d=$1 m=$2 o=$3 test -n "$d" || return test -d "$d" || mkdir "$d" || return chmod "$m" "$d" && chown "$o" "$d" } # # Start Linux-HA # StartHA() { echo -n "Starting High-Availability services: " if CrmEnabled then : OK else $HA_NOARCHBIN/ResourceManager verifyallidle fi if [ $USE_MODULES = 1 ] then # Create /dev/watchdog and load module if we should init_watchdog fi ( # $RUNDIR may be a tmpfs on some systems umask 027 # see http://developerbugs.linux-foundation.org/show_bug.cgi?id=2378 # HA_RSCTMP is defined in ocf-directories, sourced from ocf-shellfuncs, # sourced from $HA_DIR/shellfuncs. mkdir_chmod_chown "$HA_RSCTMP" 1755 root:root for d in crm heartbeat heartbeat/ccm heartbeat/crm heartbeat/dopd; do mkdir_chmod_chown "$RUNDIR/$d" 750 @HA_CCMUSER@:@HA_APIGROUP@ done ) if [ -f $HA_DIR/ipresources -a ! -f $HA_DIR/haresources ] then mv $HA_DIR/ipresources $HA_DIR/haresources fi # Start SBD, if enabled. # Start failure will be fatal (exit this script) StartSBD # Start heartbeat daemon if start_heartbeat then set_lrmd_options echo_success return 0 else RC=$? echo_failure $RC if [ ! -z "$ERROR" ]; then echo echo "$ERROR" fi return $RC fi } # # Ask heartbeat to stop. It will give up its resources... # StopHA() { echo -n "Stopping High-Availability services: " if $HA_BIN/heartbeat -k >/dev/null 2>&1 # Kill it then StopSBD echo_success return 0 else RC=$? echo_failure $RC return $RC fi } StatusHA() { $HA_BIN/heartbeat -s } StandbyHA() { auto_failback=`ha_parameter auto_failback | tr '[A-Z]' '[a-z]'` nice_failback=`ha_parameter nice_failback | tr '[A-Z]' '[a-z]'` case "$auto_failback" in *legacy*) echo "auto_failback is set to legacy. Cannot enter standby." exit 1;; esac case "$nice_failback" in *off*) echo "nice_failback is disabled. Cannot enter standby." exit 1;; esac case "${auto_failback}${nice_failback}" in "") echo "auto_failback defaulted to legacy. Cannot enter standby." exit 1;; esac echo "auto_failback: $auto_failback" if StatusHA >/dev/null 2>&1 then echo -n "Attempting to enter standby mode" if $HA_NOARCHBIN/hb_standby then # It's impossible to tell how long this will take. echo_success else echo_failure $? fi else echo "Heartbeat is not currently running." exit 1 fi } # # Ask heartbeat to restart. It will *keep* its resources # ReloadHA() { echo -n "Reloading High-Availability services: " if $HA_BIN/heartbeat -r # Restart, and keep your resources then echo_success return 0 else RC=$? echo_failure $RC return $RC fi } RunStartStop() { # Run pre-startup script if it exists if [ -f $HA_DIR/resource.d/startstop ] then $HA_DIR/resource.d/startstop "$@" fi } RC=0 # See how we were called. case "$1" in start) RunStartStop pre-start StartHA RC=$? echo if [ $RC -eq 0 ] then [ ! -d $LOCKDIR ] && mkdir -p $LOCKDIR touch $LOCKDIR/$SUBSYS fi RunStartStop post-start $RC ;; standby) if CrmEnabled; then echo "use 'crm_stanby -v on' instead" RC=1 else StandbyHA RC=$? fi ;; status) StatusHA RC=$?;; stop) RunStartStop "pre-stop" StopHA RC=$? echo if [ $RC -eq 0 ] then rm -f $LOCKDIR/$SUBSYS fi RunStartStop post-stop $RC ;; restart) sleeptime=`ha_parameter deadtime` StopHA if ! CrmEnabled ; then echo echo -n "Waiting to allow resource takeover to complete:" sleep $sleeptime sleep 10 # allow resource takeover to complete (hopefully). echo_success fi echo StartHA echo ;; force-reload|reload) ReloadHA echo RC=$? ;; *) echo "Usage: $0 {start|stop|status|restart|reload|force-reload}" exit 1 esac exit $RC Heartbeat-3-0-7e3a82377fa8/heartbeat/lib/BasicSanityCheck.in0000644000000000000000000005174711576626513023421 0ustar00usergroup00000000000000#!/bin/sh # # Support: linux-ha-dev@lists.tummy.com # License: GNU General Public License (GPL) # # Basic tests of sanity for a newly-built version of # linux-HA software (heartbeat) # # Conditions for running: # # Heartbeat must be installed. # # Must be root. # # CANNOT have a real heartbeat configuration. # # Must have networking configured with one working # network interface. # # should not have $TESTIP below used for anything ;-) # # should have multicast address $MCASTIP port 694 # available # (you don't need a multicast capable router). # # # Note: you might have to change TESTIP and MCASTIP # DEFAULTINTERFACE="" # Used when all guesses fail (which they shouldnt) TESTIP=10.253.252.251 MCASTIP=225.224.223.222 # IFCONFIG="@IFCONFIG@ @IFCONFIG_A_OPT@" HADIR=@sysconfdir@/ha.d HBSCRIPT=@INITDIR@/heartbeat@INIT_EXT@ STONITH=@sbindir@/stonith OCF_TESTER=@sbindir@/ocf-tester LIBDIR=@libdir@/ HBLIB=@libdir@/heartbeat SCRIPTDIR=@HA_NOARCHDATAHBDIR@ APPHBD=$HBLIB/apphbd CLSTATUS=@bindir@/cl_status APPHBTEST=$HBLIB/apphbtest IPCTEST=$HBLIB/ipctest MGMTDTEST=$SCRIPTDIR/mgmtdtest LRMTEST=$SCRIPTDIR/lrmtest/LRMBasicSanityCheck STONITHDTEST=$SCRIPTDIR/stonithdtest/STONITHDBasicSanityCheck CRMTEST="@PYTHON@ $SCRIPTDIR/cts/CTSlab.py --bsc" SNMPAGENTTEST=$SCRIPTDIR/SNMPAgentSanityCheck BASE64_MD5_TEST=$HBLIB/base64_md5_test MALLOC_CHECK_=2; export MALLOC_CHECK_ TESTPROG=@TEST@ # IDENTSTRING="Linux-HA TEST configuration file - REMOVEME!!" DUMMYNODE=ImAlwaysDead.com LOCALNODE=`uname -n` RSCDIR=$HADIR/resource.d PATH=$PATH:/sbin:/usr/sbin export PATH # Some signals to trap and act on SIGLIST="0 1 2 3 6 15" errcount=0 # Make a safe place to store logs maketempdir() { i=1 while [ $i -gt 0 ]; do tmp="/tmp/lha-dir-$$-$i" if (umask 077 && mkdir "$tmp"); then echo "$tmp" return 0 fi i=$((i+1)) done echo "Could not create tempoary directory to store logs" >& 2 return 1 } LOGDIR=`maketempdir` || exit 1 LOGFILE="$LOGDIR/log" touch "$LOGFILE" # CTSlab.py needs this otherwise it hangs. # Presumably it tries to stat or open the log file after # relinquishing root privileges. It would be nice to get to # the bottom of this and make a cleaner fix. chmod o+x "$LOGDIR" cd $HADIR ulimit -c unlimited GetAllIFNames() { if [ "apple" = "@build_vendor@" ]; then # "flags=8863" signifies an active interface that supports tcp $IFCONFIG | grep '^[a-zA-Z]' | grep "flags=8863" | sed 's%:* .*%%' else $IFCONFIG | grep '^[a-zA-Z]' | sed 's%:* .*%%' fi } GuessIFname() { # Machines with multiple nics rarely got this right # using ip and route should prove much more reliable # for those that have it external_host=123.0.0.1 if [ -x /sbin/ip ] then /sbin/ip r g $external_host | head -n 1 | awk '{print $5}' else route get $external_host | grep interface | awk '{print $2}' fi } INTERFACE=`GuessIFname` if [ "x" = "x$INTERFACE" ] then INTERFACE=`GetAllIFNames | grep -v '^lo' | head -n 1` fi if [ "x" = "x$INTERFACE" ] then INTERFACE=$DEFAULTINTERFACE fi if [ "x" = "x$INTERFACE" ] then echo "No valid interface found. Please modify DEFAULTINTERFACE in this script and re-execute" exit 1 fi echo "Using interface: $INTERFACE" # # Is it safe to overwrite this config file? # CheckConfigFile() { if [ ! -f $1 ] then return 0 fi if grep "$IDENTSTRING" $1 >/dev/null 2>&1 then return 0 else return 1 fi } GenerateAuthKeys() { if [ "$AuthTry" = "" ] then AuthTry=1 else AuthTry=`expr "$AuthTry" + 1` fi cat <<-! >$1 # $IDENTSTRING # auth $AuthTry 1 sha1 SuperSecretKey--SHHH!!! 2 md5 Encript-string-for-md5!! 3 crc ! chmod 600 $1 } GenerateHaCf() { cat <<-! >$1 # $IDENTSTRING logfile /dev/null debugfile $LOGFILE keepalive 10ms debug 1 deadtime 5 initdead 5 auto_failback on stonith_host * null $LOCALNODE $DUMMYNODE mcast $INTERFACE $MCASTIP 694 0 0 #bcast $INTERFACE ping 127.0.0.1 ping_group pingus_please 127.0.0.1 node $LOCALNODE $DUMMYNODE use_logd off respawn @HA_CCMUSER@ $HBLIB/ccm respawn @HA_CCMUSER@ $HBLIB/ipfail apiauth stonithd uid=root # Eventually I'd like to be able to enable this and # have things work right... #respawn root $HBLIB/hbagent ! chmod 644 $1 } GenerateCrmdHaCf() { cat <<-! >$1 # $IDENTSTRING syslogmsgfmt true logfile /dev/null debugfile $LOGFILE keepalive 10ms debug 1 deadtime 5 initdead 5 mcast $INTERFACE $MCASTIP 694 0 0 #bcast $INTERFACE ping 127.0.0.1 ping_group pingus_please 127.0.0.1 node $LOCALNODE $DUMMYNODE use_logd off crm yes apiauth cibmon uid=@HA_CCMUSER@ respawn @HA_CCMUSER@ $HBLIB/cibmon -d respawn root $HBLIB/pingd ! chmod 644 $1 # conn_logd_time 60 # use_logd true # respawn root $HBLIB/ha_logd -c $2 # cat <<-! >$2 # # $IDENTSTRING # logfile /dev/null # debugfile $LOGFILE # #logfacility # entity ha_logd # #useapphbd no # ! } GenerateMgmtdCrmdHaCf() { cat <<-! >$1 # $IDENTSTRING logfile /dev/null debugfile $LOGFILE keepalive 10ms debug 1 deadtime 5 initdead 5 mcast $INTERFACE $MCASTIP 694 0 0 #bcast $INTERFACE ping 127.0.0.1 ping_group pingus_please 127.0.0.1 node $LOCALNODE $DUMMYNODE use_logd off crm yes apiauth mgmtd uid=root respawn root $HBLIB/mgmtd -t ! chmod 644 $1 } GenerateHaResources() { cat <<-! >$1 # $IDENTSTRING $DUMMYNODE IPaddr::$TESTIP/30 ! chmod 644 $1 } CONFIGFILES="$HADIR/ha.cf $HADIR/authkeys $HADIR/haresources" CONFIGFILES="$CONFIGFILES @HA_VARLIBDIR@/heartbeat/crm/cib.xml" CONFIGFILES="$CONFIGFILES @HA_VARLIBDIR@/heartbeat/crm/cib.xml.last" CACHEFILES="@HA_VARLIBDIR@/heartbeat/hostcache @HA_VARLIBDIR@/heartbeat/delhostcache" CACHEFILES="$CACHEFILES @HA_VARLIBDIR@/heartbeat/crm/cib.xml.sig" CACHEFILES="$CACHEFILES @HA_VARLIBDIR@/heartbeat/crm/cib.xml.sig.last" SetUpConfigFiles() { if HBStatus then echo "Should not run tests with heartbeat already running." fi SaveConfigFiles for j in $CONFIGFILES do if CheckConfigFile $j then : OK else echo "OOPS! $j already exists!" echo "Real configuration already set up." echo "Sorry..." exit 1 fi done GenerateAuthKeys $HADIR/authkeys GenerateHaCf $HADIR/ha.cf GenerateHaResources $HADIR/haresources rm -f $HADIR/core } RemoveConfigFiles() { for j in $CONFIGFILES do if CheckConfigFile $j then rm -f $j else echo "OOPS! Cannot remove real config file $j!" fi done for j in $CACHEFILES do rm -f $j done RestoreConfigFiles } SaveLog() { SAVELOG=/tmp/linux-ha.testlog chmod a+r $LOGFILE mv $LOGFILE $SAVELOG rmdir "$LOGDIR" echo "$errcount errors. Log file is stored in $SAVELOG" } RemoveConfigFilesAndSaveLogAndExit() { trap "" $SIGLIST # tidying: ignore other signals RemoveConfigFiles SaveLog exit } SaveConfigFiles() { cd $HADIR if [ ! -d .cfsave ] then mkdir .cfsave fi mv $CONFIGFILES .cfsave >/dev/null 2>&1 mv $CACHEFILES .cfsave >/dev/null 2>&1 } RestoreConfigFiles() { mv $HADIR/.cfsave/cib.xml* @HA_VARLIBDIR@/heartbeat/crm/ >/dev/null 2>&1 mv $HADIR/.cfsave/hostcache @HA_VARLIBDIR@/heartbeat/ >/dev/null 2>&1 mv $HADIR/.cfsave/delhostcache @HA_VARLIBDIR@/heartbeat/ >/dev/null 2>&1 mv $HADIR/.cfsave/* $HADIR >/dev/null 2>&1 } HBStart() { echo "Starting heartbeat" $HBSCRIPT start } HBStop() { echo "Stopping heartbeat" $HBSCRIPT stop } HBReload() { echo "Reloading heartbeat" $HBSCRIPT reload >/dev/null 2>&1 rc=$? sleep 5 return $rc } HBStatus() { case `$HBSCRIPT status 2>&1` in *running*) true;; *) false;; esac } # # Search the log file for the given grep pattern # LookForString() { count=1 while if grep -i "$1" $LOGFILE then return 0 fi [ $count -lt 60 ] do count=`expr $count + 1` sleep 1 done return 1 } # Check for the given count of the given string # Complain unless the right number are there. CheckPat() { count=`egrep -ic "$1" $LOGFILE` min=$2 if [ $# -gt 2 ] then max=$3 else max=$2 fi if [ $count -lt $min -o $count -gt $max ] then echo "ERROR: Did not find [$2:$3] occurances of $1 in $LOGFILE `date`" 2>&1 | tee -a $LOGFILE echo "ERROR: Found $count instead." | tee -a $LOGFILE errcount=`expr $errcount + 1` fi } TestHeartbeat() { if HBStatus then echo "That's weird. Heartbeat seems to be running..." HBStop fi if $CLSTATUS hbstatus >/dev/null 2>&1 then echo "$CLSTATUS shows heartbeat running" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if HBStart then if HBStatus then : COOL! else echo "Heartbeat did not start." | tee -a $LOGFILE exit 1 fi fi sleep 5 j=0 while [ "$j" -lt 30 ] do if $CLSTATUS hbstatus >/dev/null 2>&1 then break else sleep 1 fi j=`expr "$j" + 1` done if $CLSTATUS hbstatus >/dev/null 2>&1 then : OK else echo "$CLSTATUS shows heartbeat not running ($?)" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi for node in $LOCALNODE 127.0.0.1 pingus_please do if $CLSTATUS nodestatus $node >/dev/null 2>&1 then : $node status is OK else echo "$CLSTATUS shows status of [$node] as dead ($?)" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi done # # Heartbeat seems to be running... # if LookForString "node $DUMMYNODE.*is dead" >/dev/null then : OK else echo "Does not look like we noticed $DUMMYNODE was dead" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if $CLSTATUS nodestatus $DUMMYNODE >/dev/null 2>&1 then echo "$CLSTATUS shows $DUMMYNODE status as alive(!)" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if LookForString "Resetting node $DUMMYNODE with" >/dev/null && LookForString "node $DUMMYNODE now reset" >/dev/null then : OK else echo "Does not look like we STONITHed $DUMMYNODE" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if LookForString "IPaddr.*$TESTIP" >/dev/null then : Looks good else echo "Does not look like we took over the IP address" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi # Wait until heartbeat thinks things are stable # that is, not in "transition" for j in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 do if STAT=`$CLSTATUS rscstatus 2>/dev/null` then case "$STAT" in transition) sleep 1 ;; all) break ;; *) echo "$CLSTATUS shows resource status as $STAT" | tee -a $LOGFILE break ;; esac else echo "$CLSTATUS rscstatus failed [$?]" | tee -a $LOGFILE break fi done if LookForString ARP >/dev/null then : Looks good else echo "Does not look like we ARPed the address" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if $RSCDIR/IPaddr $TESTIP status >/dev/null 2>&1 && $RSCDIR/IPaddr $TESTIP monitor >/dev/null 2>&1 then : COOL! else echo "Looks like monitor operation failed" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi # # Reload test -- ha.cf changed # echo "Performing ha.cf reload test" >> $LOGFILE touch $HADIR/ha.cf if HBReload then : OK! reload after touching ha.cf worked! else echo "Heartbeat reload operation returned $?" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if LookForString "restart exec" >/dev/null then : Looks good else echo "Does not look like we did a restart exec." | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if HBStatus then : "OK - reload didn't kill anything ;-)" fi # # Reload test -- authkeys changed # echo "Performing authkeys reload test" >> $LOGFILE GenerateAuthKeys $HADIR/authkeys if HBReload then : OK! reload after changing authkeys worked! else echo "Heartbeat reload operation returned $?" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if CheckPat "Signalling.* to reread config files" 2 >/dev/null then : OK else echo "Heartbeat did not reread config files exactly twice" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi sleep 2 if CheckPat "restart exec" 1 >/dev/null then : Looks good -- did not do another exec else echo "Looks like we did an extra exec" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if HBStatus then : "OK - reload didn't kill anything ;-)" fi echo "Stopping heartbeat." >> $LOGFILE if HBStop then : OK! else echo "Heartbeat stop operation returned $?" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if HBStatus then echo "Looks like heartbeat did not really stop." | tee -a $LOGFILE echo "You\'ll probably need to kill some processes yourself." errcount=`expr $errcount + 1` fi if $RSCDIR/IPaddr $TESTIP status >/dev/null 2>&1 && $RSCDIR/IPaddr $TESTIP monitor >/dev/null 2>&1 then echo "Looks like the test IP address is still live..." errcount=`expr $errcount + 1` fi } StonithCheck() { serrcount=0 echo "Checking STONITH basic sanity." | tee -a $LOGFILE if $STONITH -h >/dev/null then : OK else echo "$STONITH -h failed" | tee -a $LOGFILE serrcount=`expr $serrcount + 1` fi wc=`$STONITH -h | wc -l` if [ $wc -lt 100 ] then echo "$STONITH -h help message is too short ($wc lines)" | tee -a $LOGFILE serrcount=`expr $serrcount + 1` fi if FOOBARLIST=`$STONITH -t null -p "foo bar" -l` then : FOOBARLIST OK else echo "$STONITH -t null list option failed" | tee -a $LOGFILE serrcount=`expr $serrcount + 1` fi if echo $FOOBARLIST | grep foo >/dev/null && echo $FOOBARLIST | grep bar >/dev/null then : OK null list else echo "$STONITH -t null list option incorrect" | tee -a $LOGFILE serrcount=`expr $serrcount + 1` fi if RESETOUT=`$STONITH -t null -p "foo bar" foo 2>&1` then case $RESETOUT in *"Host null-reset: foo"*) : NULL Stonith output OK;; *) echo "NULL reset failed." serrcount=`expr $serrcount + 1`;; esac else echo "$STONITH -t null reset failed." | tee -a $LOGFILE fi errcount=`expr $errcount + $serrcount` } AppHBCheck() { CFFILE=/var/run/$$.apphbd.cf clientcount=5 cat <<-! >$CFFILE realtime yes debug_level 1 debugfile $LOGFILE logfile /dev/null ! echo "Performing apphbd success case tests" | tee -a $LOGFILE if $APPHBD -s >/dev/null 2>&1 then echo "That's odd, $APPHBD is already running." killcount=2 $APPHBD -k >/dev/null 2>&1 else killcount=1 fi $APPHBD -c $CFFILE sleep 5 $APPHBTEST -i 1000 -p $clientcount -n 5 >> $LOGFILE 2>&1 for pat in "apphb_client_register:" "type=setint" "info:.*apphb_client_remove:" do CheckPat "$pat" $clientcount done CheckPat "failed to heartbeat|resumed heartbeats" 0 echo "Performing apphbd failure case tests" | tee -a $LOGFILE $APPHBTEST -F -i 1000 -p 1 -n 5 >>$CFFILE 2>&1 for pat in "'failtest'.* failed to heartbeat" "'failtest'.* resumed heartbeats" do CheckPat "$pat" 1 2 done sleep 5 CheckPat "WARN:.*hangup" 1 $APPHBD -k $CFFILE CheckPat "info:.*apphbd.*stopped" $killcount if $APPHBD -s >/dev/null 2>&1 then echo "ERROR: $APPHBD is still running!" | tee -a $LOGFILE fi } IPCtest() { echo "Starting IPC tests" | tee -a $LOGFILE $IPCTEST >>$LOGFILE 2>&1 ret=$? if [ $ret -ne 0 ]; then errcount=`expr $errcount + $ret` return $ret fi ### Might here try "transient-test.sh" test. errcount=`expr $errcount + $ret` return $ret } LRMTest() { if [ ! -f $LRMTEST ] then return 0 fi echo "Starting LRM tests" | tee -a $LOGFILE $LRMTEST $HBLIB >> $LOGFILE 2>&1 ret=$? errcount=`expr $errcount + $ret` if [ $ret != 0 ] then echo "LRM tests failed." fi } StonithDaemonTest() { if [ ! -f $STONITHDTEST ] then return 1 fi if HBStart then : else echo "Cannot start heartbeat, so do not start stonith daemon test." | tee -a $LOGFILE return 1 fi i=0 while [ "$i" -lt 10 ] do if $CLSTATUS hbstatus >/dev/null 2>&1 then break else sleep 1 fi i=`expr "$i" + 1` done if $CLSTATUS hbstatus >/dev/null 2>&1 then : else echo "There are internal errors in heartbeat, so do not start stonith daemon test." | tee -a $LOGFILE return 1 fi echo "starting STONITH Daemon tests" | tee -a $LOGFILE $STONITHDTEST >> $LOGFILE 2>&1 ret=$? errcount=`expr $errcount + $ret` if [ $ret != 0 ] then echo "STONITH Daemon tests failed." else echo "STONITH Daemon tests passed." fi HBStop } TestCRM() { GenerateCrmdHaCf $HADIR/ha.cf $HADIR/ha_logd.cf echo "Starting CRM tests" | tee -a $LOGFILE $CRMTEST -L $LOGFILE 2> /dev/null ret=$? if [ $ret != 0 ]; then errcount=`expr $errcount + 1` echo "CRM tests failed (rc=$ret)." | tee -a $LOGFILE return 1 fi echo "CRM tests passed." | tee -a $LOGFILE return 0 } TestMgmtd() { if [ ! -f $MGMTDTEST ]; then return 0 fi GenerateMgmtdCrmdHaCf $HADIR/ha.cf $HADIR/ha_logd.cf echo "Starting Mgmtd tests" | tee -a $LOGFILE if HBStart then : else echo "cannot start heartbeat, so do not start mgmt daemon test." | tee -a $LOGFILE return 1 fi i=0 while [ "$i" -lt 10 ] do if $CLSTATUS hbstatus >/dev/null 2>&1 then break else sleep 1 fi i=`expr "$i" + 1` done if $CLSTATUS hbstatus >/dev/null 2>&1 then : else echo "There are internal errors in heartbeat, so do not start mgmt daemon test." | tee -a $LOGFILE $CLSTATUS hbstatus return 1 fi sleep 10 $MGMTDTEST > $LOGFILE 2>&1 ret=$? if [ $ret != 0 ]; then errcount=`expr $errcount + 1` echo "Mgmtd test failed." | tee -a $LOGFILE else echo "Mgmtd test passed." fi HBStop } SNMPAgentTest() { if [ ! -f $SNMPAGENTTEST ] then return 0 fi if HBStart then sleep 6 echo "starting SNMP Agent tests" | tee -a $LOGFILE $SNMPAGENTTEST >> $LOGFILE 2>&1 ret=$? errcount=`expr $errcount + $ret` if [ $ret != 0 ] then echo "SNMP Agent tests failed." else echo "SNMP Agent tests pass." fi HBStop fi } Base64MD5Test() { if [ ! -x $BASE64_MD5_TEST ] then return 0 fi echo "Starting base64 and md5 algorithm tests" | tee -a $LOGFILE $BASE64_MD5_TEST >> $LOGFILE 2>&1 ret=$? errcount=`expr $errcount + $ret` if [ $ret != 0 ] then echo "base64 and md5 algorithm tests failed." else echo "base64 and md5 algorithm tests succeeded." fi } TestRA() { if [ ! -x $OCF_TESTER ]; then return 0 fi OCF_ROOT=@OCF_ROOT_DIR@ export OCF_ROOT RADIR=@OCF_RA_DIR@/@HB_PKG@ echo "Starting Resource Agent tests" | tee -a $LOGFILE echo "Testing RA: Dummy" | tee -a $LOGFILE $OCF_TESTER -n DemoDummy $RADIR/Dummy >>$LOGFILE 2>&1 rc=$? if [ $rc -ne 0 ]; then echo "ERROR: Dummy RA failed" | tee -a $LOGFILE return fi echo "Testing RA: IPaddr" | tee -a $LOGFILE $OCF_TESTER -o ip=127.0.0.20 -o netmask=32 -n DemoIPaddr \ $RADIR/IPaddr >>$LOGFILE 2>&1 rc=$? if [ $rc -ne 0 ]; then echo "ERROR: IPaddr RA failed" | tee -a $LOGFILE return fi if [ -x @IP2UTIL@ ]; then echo "Testing RA: IPaddr2" | tee -a $LOGFILE $OCF_TESTER -o ip=127.0.0.20 -o netmask=32 -n DemoIPaddr2 \ $RADIR/IPaddr2 >>$LOGFILE 2>&1 rc=$? if [ $rc -ne 0 ]; then echo "ERROR: IPaddr2 RA failed" | tee -a $LOGFILE return fi fi if [ `uname -s` = 'Linux' ]; then MNT_DIR="$LOGDIR/mnt" mkdir "$MNT_DIR" echo "Testing RA: Filesystem" | tee -a $LOGFILE $OCF_TESTER -o device=/dev/null -o fstype=proc -o directory=$MNT_DIR \ -n DemoFS $RADIR/Filesystem >>$LOGFILE 2>&1 rc=$? rmdir $MNT_DIR if [ $rc -ne 0 ]; then echo "ERROR: Filesystem RA failed" | tee -a $LOGFILE return fi fi echo "RA tests PASSED" | tee -a $LOGFILE } # # Check our identity. # Set Up Config Files. # Run Tests. # Remove Config Files. # ID=`@WHOAMI@` case $ID in root) : OK;; *) echo "Must be root to run this. Sorry." exit 1;; esac touch $LOGFILE chown @HA_CCMUSER@ $LOGFILE trap 'RemoveConfigFilesAndSaveLogAndExit' $SIGLIST SetUpConfigFiles > $LOGFILE COREPID=/proc/sys/kernel/core_uses_pid if [ -f "$COREPID" ] then echo 1 > $COREPID fi # Create temp rsc dir if it doesn't yet exist, RAs depend on it HARSCTMP=@HA_VARRUNHBRSCDIR@ if [ ! -d "$HARSCTMP" ] then mkdir $HARSCTMP fi Base64MD5Test TestRA IPCtest ret=$? if [ $ret -ne 0 ]; then # If IPC is faulty, then it is pointless to attempt dependent checks. echo "ERROR: IPCtest failed; continuing is pointless" | tee -a $LOGFILE exit $ret fi # Might want to add a longer, more substantial IPC test later. TestHeartbeat StonithCheck AppHBCheck LRMTest StonithDaemonTest #TestCRM SNMPAgentTest #TestMgmtd ls -l $HADIR/core* 2>/dev/null if [ $? = 0 ] then errcount=`expr $errcount + 1` echo "OOPS! We generated a core file!" file $HADIR/core* fi if egrep 'CRIT|ERROR' $LOGFILE then echo "OOPS! Looks like we had some errors come up." errcount=`expr $errcount + 1` fi exit $errcount Heartbeat-3-0-7e3a82377fa8/heartbeat/lib/Makefile.am0000644000000000000000000000207211576626513021741 0ustar00usergroup00000000000000# # heartbeat: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in # subpath for internal scripts halibdir = "@HA_NOARCHDATAHBDIR@" halib_SCRIPTS = mach_down req_resource ResourceManager hb_standby \ BasicSanityCheck TestHeartbeatComm ha_config hb_takeover hb_addnode \ hb_delnode ha_propagate hb_setweight hb_setsite Heartbeat-3-0-7e3a82377fa8/heartbeat/lib/ResourceManager.in0000644000000000000000000002472011576626513023323 0ustar00usergroup00000000000000#!/bin/sh # # # Support: linux-ha-dev@lists.tummy.com # License: GNU General Public License (GPL) # # New haresources format: # # machine resource resource resource resource # # Where a resource can be an IP address or a scriptname, or a scriptname # and single argument. # # When it's a scriptname with an argument, the argument is connected to # the scriptname with "::". Another way of expressing an IP address is # via IPaddr::ip-address, since the script name IPaddr is the one assumed # for resources which are spelled like an IP address. # # As an illustration, the following two lines are identical in effect: # # node1 123.45.67.89 httpd # node1 IPaddr::123.45.67.89 httpd # # One can also pass multiple arguments to a script by separating each # argument with a double colon: # # node1 10.0.0.170 Filesystem::/dev/sda1::/data1::ext2 # # Note: The first resource on the line must be unique in the haresources # file. Maybe I should add a resource type called Unique which can # put on the front of a line to meet this requirement. # # # set -x : : : ==================== Starting ResourceManager $* ========================== unset LC_ALL; export LC_ALL # Make ifconfig work in France for David Jules :-) unset LANGUAGE; export LANGUAGE # Make ifconfig work in France for Fabrice :-) prefix=@prefix@ exec_prefix=@exec_prefix@ HA_DIR=@sysconfdir@/ha.d HA_FUNCS=$HA_DIR/shellfuncs export HA_DIR HA_FUNCS INITDIR=@INITDIR@ STANDBY=@HA_NOARCHDATAHBDIR@/hb_standby USAGE="usage: $0 listkeys machine | takegroup key | givegroup key|status resource"; . $HA_FUNCS : ${HA_STOPRETRYMAX=10} isip() { case $1 in [0-9]*.[0-9]*.[0-9]*.[0-9]*) true;; *) false;; esac } # # Reverseargs doesn't deal with arguments containing white space correctly. # To fix this I think you'd have to change it to put out the arguments # one per line, and then have the caller use them a line at a time using # "read" or something. This could be done either using a tmp file or # by using eval on the argument numbers starting from $# and going # down. But, no existing resource script deals with these either, so # I'm not too worried yet... # reverseargs() { L="" for arg in "$@" do case $L in ?*) L="$arg $L";; "") L=$arg;; esac done echo $L } # # Remove comments and extra blanks from haresources # translate all white space into single blanks. Each line ends with # a blank, making parsing it easier for dumb shell scripts. # ipres() { grepflags="" case $# in 0) pat='.' ;; 1) pat=$1 ;; 2) grepflags=$1; pat=$2 ;; esac if [ -f $HA_DIR/haresources ]; then cat $HA_DIR/haresources | # # Explanation of Sed -e expressions below: # # 1) Strip out comments # 2) Repeatedly join lines together when they're ended by '\' # (":a" is a label. "ta" is a conditional # branch to "a:") # 3) Append a blank to the end of the line # 4) Compress multiple blanks/tabs into a single blank # 5) Remove lines that only contain whitespace or are empty # 6) Strip off a leading space (if any) # sed \ -e 's%#.*%%' \ -e :a -e '/\\$/N; s/\\\n//; ta' \ -e 's%$% %' \ -e 's%[ ][ ]*% %g' \ -e '/^[ ]*$/ d' \ -e 's%^ %%' | awk '/.*/ {$1 = tolower($1); print $0, ""}' | egrep $grepflags "$pat" fi } ipresline() { ipres " $1 " } KeyResources() { ipres -i "^$1 " | cut -d' ' -f2 } OurGroupKeys() { ipres -i "^${HA_CURHOST} " | cut -d' ' -f2- } OtherGroupKeys() { ipres -iv "^${HA_CURHOST} " | cut -d' ' -f2- } StandbyGroupType() { if OurGroupKeys | grep "^$1\$" >/dev/null then echo local else echo foreign fi } canonname() { if isip $1 then echo "IPaddr::$1" else echo $1 fi } resource2script() { case `canonname $1` in *::*) echo $1 | sed 's%::.*%%' ;; *) echo $1;; esac } # Return the list of arguments after the script name. # multiple arguments are separated by :: delimiters resource2arg() { case `canonname $1` in *::*) echo $1 | sed 's%[^:]*::%%' | sed 's%::% %g' ;; esac } scriptpath() { script=`canonname $1` script=`resource2script $script` for dir in $HA_RESOURCEDIR $INITDIR do if [ -f $dir/$script -a -x $dir/$script ] then echo $dir/$script; return 0; fi done ha_log "ERROR: Cannot locate resource script $script" false } # # Since we're patterned after the LSB's init scripts, here are # the exit codes we should be returning for status... # # 0 program is running # 1 program is dead and /var/run pid file exists # 2 program is dead and /var/lock lock file exists # 3 program is stopped # 4-100 reserved for future LSB use # 100-149 reserved for distribution use # 150-199 reserved for application use # 200-254 reserved # we_own_resource() { arg=`resource2arg $1` spath=`scriptpath $1`; case `$spath $arg status` in *[Nn][Oo][Tt]\ *[Rr]unning*) return 3;; *[Rr]unning*|*OK*) return 0;; *) return 3;; esac } doscript() { script=`resource2script $1` arg=`resource2arg $1` spath=`scriptpath $script` if [ -f "$spath" -a -x "$spath" ] then ha_log "info: Running $spath $arg $2" ha_debug "debug: Starting $spath $arg $2" $spath $arg "$2" 2>>$HA_DEBUGLOG RC=$? ha_debug "debug: $spath $arg $2 done. RC=$RC" case $RC in 0);; *) ha_log "ERROR: Return code $RC from $spath";; esac return $RC; fi ha_log "ERROR: Cannot locate resource script $script" return 1; } # Arguments are: nodename resource1 resource2 ... giveupresourcegroup() { ha_log "info: Releasing resource group: $*" shift rc=0 for j in `reverseargs "$@"` do retries=0 while doscript $j stop >>$HA_DEBUGLOG 2>&1 rc1=$? [ $HA_STOPRETRYMAX -le 0 -o $retries -lt $HA_STOPRETRYMAX ] && [ $rc1 -ne 0 ] do sleep 1 retries=`expr 1 + "$retries"` ha_log "info: Retrying failed stop operation [$j]" done if [ $rc1 -ne 0 ] then if we_own_resource $j then rc=$rc1 RecoverFromFailedStop # Bye Bye! else # Red Hat (and probably others) Kludge! ha_log "ERROR: Resource script for $j probably not LSB-compliant." ha_log "WARN: it ($j) MUST succeed on a stop when already stopped" ha_log "WARN: Machine reboot narrowly avoided!" fi fi done return $rc } # Arguments are: nodename resource1 resource2 ... acquireresourcegroup() { ha_log "info: Acquiring resource group: $*" node="$1" shift rc=0; for j in "$@" do if we_own_resource "$j" || doscript "$j" start then : $j start succeeded else rc=$? ha_log "CRIT: Giving up resources due to failure of $j" giveupresourcegroup "$node" "$@" (sleep 30; $STANDBY `StandbyGroupType "$j"`) & break fi done return $rc } # # We may be given a resource to give up that we don't own... # GiveUpGroup() { # Get the list of resources we've been requested to give up... haresources=`ipresline $1` if [ ! -z "$haresources" ] then giveupresourcegroup $haresources fi } TakeGroup() { # Get the list of resources we've been requested to take... haresources=`ipresline $1` if [ ! -z "$haresources" ] then acquireresourcegroup $haresources fi } # # Determine the status of all the resources in a resource group # # Results are echoed to stdout: # # NONE: None of the resources in the resource group are held # (or there no such resource group) # ALL: All of the resources in the resource group are held # SOME: Some of the resources in the resource group are held # StatGroup() { result="FirstTime" set `ipresline $1` shift for resource do if we_own_resource $resource then case $result in FirstTime) result=ALL;; NONE) echo SOME; return 0;; SOME|ALL) ;; esac else case $result in FirstTime) result=NONE;; ALL) echo SOME; return 0;; SOME|NONE) ;; esac fi done case $result in FirstTime) echo NONE;; *) echo $result;; esac } # # Verify that all resources in the resource group are idle # VerifyAllIdle() { rc=0 for rsc in `KeyResources ".*"` do if we_own_resource $rsc then ha_log "CRITICAL: Resource $rsc is active, and should not be!" ha_log "CRITICAL: Non-idle resources can affect data integrity!" >&2 ha_log "info: If you don't know what this means, then get help!" ha_log "info: Read the docs and/or source to $0 for more details." cat <<-! >&2 CRITICAL: Resource $rsc is active, and should not be! CRITICAL: Non-idle resources can affect data integrity! info: If you don't know what this means, then get help! info: Read the docs and/or the source to $0 for more details. ! # # What this means is that if you have a shared disk and it's already mounted # before you start heartbeat, then you could have it mounted simultaneously # on both sides. If this happens then your disk data is toast! # So, this is sometimes VERY BAD INDEED! # # The most *common* cause for this message is that you told your OS to manage one # of the IP addresses that you asked heartbeat to manage. You can't put # both your OS startup scripts and heartbeat in charge of HA resources. # This particular case is discussed in detail in the docs. # rc=`expr $rc + 1` fi done if [ $rc -ne 0 ] then ha_log "CRITICAL: Non-idle resources will affect resource takeback!" ha_log "CRITICAL: Non-idle resources may affect data integrity!" fi exit $rc } # # If we are unable to stop a resource, then everything is in a # hosed state. The only way out is through a reboot... # RecoverFromFailedStop() { ha_log "CRIT: Resource STOP failure. Reboot required!" ha_log "CRIT: Killing heartbeat ungracefully!" for name in heartbeat ipfail ccm do pkill -9 $name done if [ -x @REBOOT@ ] && @REBOOT@ @REBOOT_OPTIONS@ then : OK - reboot succeeded elif [ -x /sbin/init ] && /sbin/init 6 then : OK - init 6 succeeded else ha_log "CRIT: Unable to force reboot." fi } case $1 in listkeys) KeyResources "$2";; allkeys) KeyResources '.*';; ourkeys) OurGroupKeys;; otherkeys) OtherGroupKeys;; status) we_own_resource $2;; givegroup) GiveUpGroup $2 >>$HA_DEBUGLOG 2>&1 ;; takegroup) TakeGroup $2 >>$HA_DEBUGLOG 2>&1 ;; statgroup) StatGroup $2 >>$HA_DEBUGLOG 2>&1 ;; verifyallidle)VerifyAllIdle ;; *) echo "$USAGE" >&2 echo "" >&2 exit 1;; esac Heartbeat-3-0-7e3a82377fa8/heartbeat/lib/TestHeartbeatComm.in0000644000000000000000000000300411576626513023604 0ustar00usergroup00000000000000#!/bin/sh # # Support: linux-ha-dev@lists.tummy.com # License: GNU General Public License (GPL) # # The purpose of this script is to break or fix the communication in the cluster. # HADIR=@sysconfdir@/ha.d HBSCRIPT=@INITDIR@/heartbeat@INIT_EXT@ LIBDIR=@libdir@/ HBLIB=$LIBDIR/heartbeat TESTFILE=OnlyForTesting USAGE="Usage: 'TestHeartbeatComm break-communication allow-nodes-list|reduce-communication xmit_loss recv_loss allow-nodes-list|fix-communication|delete-testingfile|save-testingfile testingfile|restore-communication testingfile'" if [ $# -lt 1 ] then echo "$USAGE"; exit 1; fi cd @sysconfdir@/ha.d # Create OnlyForTesting File. It is invoked by ParseTestOpts() in heartbeat.c GenerateTestingFile(){ save_IFS=$IFS IFS=';' cat <$TESTFILE xmitloss=$1 rcvloss=$2 EOF shift shift cat <>$TESTFILE allownodes=$*; EOF IFS=$save_IFS } DeleteTestingFile(){ rm -f $TESTFILE echo "DeleteTestFileOK" } RestoreTestingFile(){ if [ -f $1 ] then cp $1 $TESTFILE exit $? fi } SaveTestingFile(){ cp $TESTFILE $1 exit $? } HBReload(){ $HBSCRIPT reload } OPT=$1 case "$OPT" in break-communication) shift GenerateTestingFile 1 1 $@ HBReload ;; reduce-communication) shift GenerateTestingFile $@ HBReload ;; fix-communication) DeleteTestingFile HBReload ;; delete-testingfile) DeleteTestingFile ;; save-testingfile) shift SaveTestingFile $1 ;; restore-communication) shift RestoreTestingFile $1 HBReload ;; *) echo "$USAGE" ;; esac exit $? Heartbeat-3-0-7e3a82377fa8/heartbeat/lib/ha_config.in0000644000000000000000000000207411576626513022154 0ustar00usergroup00000000000000#!/bin/sh # # Support: linux-ha-dev@lists.tummy.com # License: GNU General Public License (GPL) # # # This script contains some variables for linux-ha which # are set at configure time. # # Created to alow this information to be passed to # debian postinst scripts. Might be useful for other # stuff too. # # /var/lib directory HA_VARLIBDIR="@HA_VARLIBDIR@" # directory to drop core files into HA_COREDIR="@HA_COREDIR@" # Group to own API fifos HA_APIGROUP="@HA_APIGROUP@" # GID to own API fifos HA_APIGID="@HA_APIGID@" # User to own CCM fifos HA_CCMUSER="@HA_CCMUSER@" # UID to own CCM fifos HA_CCMUID="@HA_CCMUID@" # Init start priority HB_INITSTARTPRI="@HB_INITSTARTPRI@" # Init stop priority HB_INITSTOPPRI="@HB_INITSTOPPRI@" # Init start priority LOGD_INITSTARTPRI="@LOGD_INITSTARTPRI@" # Init stop priority LOGD_INITSTOPPRI="@LOGD_INITSTOPPRI@" # heartbeat rc script directory # HB_RC_DIR is the old name HB_RC_DIR="@HA_RC_DIR@" HA_RC_DIR="@HA_RC_DIR@" # Version number of package VERSION="@VERSION@" Heartbeat-3-0-7e3a82377fa8/heartbeat/lib/ha_propagate.in0000644000000000000000000000210711576626513022666 0ustar00usergroup00000000000000#!@PYTHON@ # # Support: linux-ha-dev@lists.tummy.com # License: GNU General Public License (GPL) # # This script read the list of nodes from the ha.cf file # and then uses 'scp' to copy the ha.cf file to each node. # import os, sys from stat import * cfgdir = "@sysconfdir@/ha.d/" cfgfile = cfgdir + "ha.cf" authfile = cfgdir + "authkeys" try: os.stat(cfgfile) os.stat(authfile) except: print "HA Linux not configured on this node. Can not propagate." sys.exit() nodes = [] f=open(cfgfile) for line in f: if line.startswith("node"): toks = line.split() if (len(toks) == 2): nodeName = toks[1] nodes.append(nodeName) f.close() thisnode = os.uname()[1] if nodes.count(thisnode) > 0: nodes.remove(thisnode) for i, v in enumerate(nodes): print "Propagating HA configuration files to node " + v + "." res = os.system("scp " + cfgfile + " " + authfile + " root@" + v + ":" + cfgdir) print "Setting HA startup configuration on node " + v + "." res = os.system("ssh " + " root@" + v + " chkconfig `chkconfig heartbeat`") Heartbeat-3-0-7e3a82377fa8/heartbeat/lib/hb_addnode.in0000755000000000000000000000122411576626513022305 0ustar00usergroup00000000000000#!/bin/sh # # Support: linux-ha-dev@lists.tummy.com # License: GNU General Public License (GPL) # This script sets the node (where it was called) to *active* # It forces the other node to release all the held resources and lets # this node do a takeover. # # This script will only work for a two machine setup... # More than that and you need to vote, or something... HA_DIR=@sysconfdir@/ha.d . ${HA_DIR}/shellfuncs usage() { echo "usage: hb_addnode ..." exit 1 } if [ $# = 0 ] then usage fi nodelist="$@" if [ "$nodelist" = "--help" ]; then usage fi ha_clustermsg <<-!MSG t=addnode nodelist=$nodelist !MSG Heartbeat-3-0-7e3a82377fa8/heartbeat/lib/hb_delnode.in0000755000000000000000000000122411576626513022321 0ustar00usergroup00000000000000#!/bin/sh # # Support: linux-ha-dev@lists.tummy.com # License: GNU General Public License (GPL) # This script sets the node (where it was called) to *active* # It forces the other node to release all the held resources and lets # this node do a takeover. # # This script will only work for a two machine setup... # More than that and you need to vote, or something... HA_DIR=@sysconfdir@/ha.d . ${HA_DIR}/shellfuncs usage() { echo "usage: hb_delnode ..." exit 1 } if [ $# = 0 ] then usage fi nodelist="$@" if [ "$nodelist" = "--help" ]; then usage fi ha_clustermsg <<-!MSG t=delnode nodelist=$nodelist !MSG Heartbeat-3-0-7e3a82377fa8/heartbeat/lib/hb_setsite.in0000755000000000000000000000060311576626513022367 0ustar00usergroup00000000000000#!/bin/sh # # Support: linux-ha-dev@lists.tummy.com # License: GNU General Public License (GPL) # This script sets the site of node HA_DIR=@sysconfdir@/ha.d . ${HA_DIR}/shellfuncs usage() { echo "usage: hb_setsite " exit 1 } if [ $# = 0 ] then usage fi if [ "$1" = "--help" ]; then usage fi ha_clustermsg <<-!MSG t=setsite node=$1 site=$2 !MSG Heartbeat-3-0-7e3a82377fa8/heartbeat/lib/hb_setweight.in0000755000000000000000000000062111576626513022712 0ustar00usergroup00000000000000#!/bin/sh # # Support: linux-ha-dev@lists.tummy.com # License: GNU General Public License (GPL) # This script sets the weight of the node HA_DIR=@sysconfdir@/ha.d . ${HA_DIR}/shellfuncs usage() { echo "usage: hb_setweight " exit 1 } if [ $# = 0 ] then usage fi if [ "$1" = "--help" ]; then usage fi ha_clustermsg <<-!MSG t=setweight node=$1 weight=$2 !MSG Heartbeat-3-0-7e3a82377fa8/heartbeat/lib/hb_standby.in0000644000000000000000000000216511576626513022355 0ustar00usergroup00000000000000#!/bin/sh # # Support: linux-ha-dev@lists.tummy.com # License: GNU General Public License (GPL) # # This script sets the node (where it was called) to *standby* # It forces the node to release all the held resources and lets # the other node do a takeover. # This script will only work for a two machine setup... # More than that and you need to vote, or something... HA_DIR=@sysconfdir@/ha.d . ${HA_DIR}/shellfuncs : Now running $0: $* usage() { echo "usage:" echo "$0 [all|foreign|local|failback]" >&2 exit 1; } if [ $# -ge 2 ] then echo "$0: too many arguments" usage exit 1 fi if [ $# -ge 1 ] then resources=$1 else resources=all fi if [ $resources != "all" ] && [ $resources != "foreign" ] && [ $resources != "local" ] && [ $resources != "failback" ] && [ $resources != "--help" ]; then echo "$0: wrong resources($resources)." usage exit 1 fi case $resources in all|foreign|local) ;; failback) resources=foreign;; *) usage;; esac ha_log "Going standby [$resources]." ha_clustermsg <<-!MSG t=ask_resources rsc_hold=$resources info=me !MSG Heartbeat-3-0-7e3a82377fa8/heartbeat/lib/hb_takeover.in0000755000000000000000000000167711576626513022543 0ustar00usergroup00000000000000#!/bin/sh # # Support: linux-ha-dev@lists.tummy.com # License: GNU General Public License (GPL) # This script sets the node (where it was called) to *active* # It forces the other node to release all the held resources and lets # this node do a takeover. # # This script will only work for a two machine setup... # More than that and you need to vote, or something... HA_DIR=@sysconfdir@/ha.d . ${HA_DIR}/shellfuncs usage() { echo "usage:" echo "$0 [all|foreign|local|failback]" >&2 exit 1 } if [ $# -ge 2 ] then echo "$0: too many arguments" usage exit 1 fi if [ $# -ge 1 ] then resources=$1 else resources=all fi if [ $resources = "--help" ]; then usage fi if [ $resources != "all" ] && [ $resources != "foreign" ] && [ $resources != "local" ] && [ $resources != "failback" ]; then echo "$0: wrong resources($resources)." usage exit 1 fi ha_clustermsg <<-!MSG t=hb_takeover rsc_hold=$resources !MSG Heartbeat-3-0-7e3a82377fa8/heartbeat/lib/mach_down.in0000755000000000000000000000324311576626513022200 0ustar00usergroup00000000000000#!/bin/sh # # Support: linux-ha-dev@lists.tummy.com # License: GNU General Public License (GPL) # # This script will only work for a two machine setup... # More than that and you need to vote, or something... # # prefix=@prefix@ exec_prefix=@exec_prefix@ . @sysconfdir@/ha.d/shellfuncs : Now running $0: $* mdown=`echo $1 | tr '[A-Z]' '[a-z]'`; # The name of the downed machine... #ha_log "info: DEBUG: $0 -- taking resources for $mdown" case $mdown in $HA_CURHOST) exit 0;; # Oh No! It's us! esac case $HA_info in *ping*) exit 0;; esac # # In the case of nice_failback, we may know that we # already own these resources, but we ignore that fact # and take it over anyway, because getting that information # to us would require another environment variable or something # like that which would be a pain. # for groupkey in `$HA_NOARCHBIN/ResourceManager listkeys $mdown` do ha_log "info: Taking over resource group $groupkey" $HA_NOARCHBIN/ResourceManager takegroup $groupkey done # Be nice_failback compliant :) # case 1 - part 2 # # This code triggers actions inside heartbeat, because we receive our own # resource messages as well as those sent by others... # # This code shouldn't be executed if we aren't running nice_failback... # # The field info=mach_down tells heartbeat that the message is from us # so it can consider the takeover complete. # case $HA_NICEFAILBACK in [Yy]es) ha_log "info: $0: nice_failback: foreign resources acquired" ha_clustermsg <<-!MSG t=resource rsc_hold=foreign info=mach_down !MSG ;; esac # The CTS testing code needs to know when this is really done ;-) ha_log "info: mach_down takeover complete for node $mdown." Heartbeat-3-0-7e3a82377fa8/heartbeat/lib/req_resource.in0000755000000000000000000000463111576626513022741 0ustar00usergroup00000000000000#!/bin/sh # # Support: linux-ha-dev@lists.tummy.com # License: GNU General Public License (GPL) # # This script is called to politely request that a resource be given up # to us. # # At the end of $TIMEOUT, we take it anyway. # # It could be that we already have taken it, in which case it should # do nothing. # # # set -x prefix=@prefix@ exec_prefix=@exec_prefix@ HA_DIR=@sysconfdir@/ha.d; export HA_DIR . $HA_DIR/shellfuncs # # This code is the "ask nicely" case for when we first start up a node. # The case of the other node failing or being dead when we start up # is handled by mach_down which invokes takegroup just like we do # except it never asks for permission. # # The nice_failback = yes ($HA_NICEFAILBACK) case coordinates things # so always know the other guy doesn't have the resources before we # get here, so we don't need to ever wait in that case... # # If no one else is up, then we set HA_DONTASK in the environment. # It is possible that the TIMEOUT case should be removed now... # This is a failsafe case, but it's kind of a dangerous failsafe # case... # TIMEOUT=1200 RESOURCE=$1 # # Do we already have this resource? # $HA_NOARCHBIN/ResourceManager status $RESOURCE && exit 0 # # Now give our ip-request-message... # # "ip-request" Message type # ip-address IP address requested # DENY if we won't/can't # case $HA_debug in [123458789]*) ha_log "debug: in $0 $*" ha_log "debug: dont_ask: $HA_DONTASK nice_failback: $HA_NICEFAILBACK";; esac # # The nice failback case only takes over from dead machines... # case $HA_NICEFAILBACK in yes) HA_DONTASK=yes;; esac CMD=ip-request case $HA_DONTASK in yes) # # Pretend the current owner gave it up peacefully # ha_clustermsg <<-!MSG t=$CMD-resp dest=$HA_CURHOST ipaddr=$RESOURCE weown=yes ok=OK !MSG ;; *) : "Ask Nicely ;-)" ha_clustermsg <<-!MSG t=$CMD ipaddr=$RESOURCE !MSG # NOTE! # The original dangerous failsafe takeover of resources # has been disabled!! exit 0 # Normally they will have already replied to us before we get here # and we will have already taken things over. This is to allow # for the case that they somehow never tell us to go ahead... # Heartbeat should kill us if it shuts down before we finish. # For that to work we can't go into the background... sleep $TIMEOUT $HA_NOARCHBIN/ResourceManager status "$RESOURCE" || $HA_NOARCHBIN/ResourceManager takegroup "$RESOURCE" ;; esac Heartbeat-3-0-7e3a82377fa8/heartbeat/logrotate.d/Makefile.am0000644000000000000000000000165611576626513023424 0ustar00usergroup00000000000000# # heartbeat: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in logrotateddir = $(sysconfdir)/logrotate.d logrotated_DATA = heartbeat EXTRA_DIST = $(logrotated_DATA) Heartbeat-3-0-7e3a82377fa8/heartbeat/logrotate.d/heartbeat0000644000000000000000000000011011576626513023232 0ustar00usergroup00000000000000/var/log/ha-debug { missingok } /var/log/ha-log { missingok } Heartbeat-3-0-7e3a82377fa8/heartbeat/misc/Makefile.am0000644000000000000000000000162611576626513022132 0ustar00usergroup00000000000000# # linux-ha: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # This instance created by Horms # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in EXTRA_DIST = doit start stop syncup Heartbeat-3-0-7e3a82377fa8/heartbeat/misc/doit0000755000000000000000000000106411576626513020757 0ustar00usergroup00000000000000VERS=0.4.3-1 SUDO="" SUDO="sudo" $SUDO /etc/rc.d/init.d/heartbeat stop $SUDO rsh kathy /etc/rc.d/init.d/heartbeat stop & if $SUDO make rpm then $SUDO rpm --force --upgrade /usr/src/redhat/RPMS/i386/heartbeat-$VERS.i386.rpm & $SUDO rsh kathy rpm --force --upgrade /usr/src/redhat/RPMS/i386/heartbeat-$VERS.i386.rpm # $SUDO rdate -s drtime # $SUDO rsh kathy rdate -s drtime $SUDO sh -c ">/var/log/ha-log" $SUDO rsh kathy sh -c '">/var/log/ha-log"' $SUDO rsh kathy /etc/rc.d/init.d/heartbeat start & $SUDO /etc/rc.d/init.d/heartbeat start fi Heartbeat-3-0-7e3a82377fa8/heartbeat/misc/start0000755000000000000000000000004611576626513021154 0ustar00usergroup00000000000000sudo /etc/rc.d/init.d/heartbeat start Heartbeat-3-0-7e3a82377fa8/heartbeat/misc/stop0000755000000000000000000000004511576626513021003 0ustar00usergroup00000000000000sudo /etc/rc.d/init.d/heartbeat stop Heartbeat-3-0-7e3a82377fa8/heartbeat/misc/syncup0000755000000000000000000000045211576626513021341 0ustar00usergroup00000000000000sudo rsh kathy /etc/rc.d/init.d/heartbeat stop & sudo /etc/rc.d/init.d/heartbeat stop sudo rdate -s drtime sudo rsh kathy rdate -s drtime sudo sh -c ">/var/log/ha-log" sudo rsh kathy sh -c '">/var/log/ha-log"' sudo rsh kathy /etc/rc.d/init.d/heartbeat start & sudo /etc/rc.d/init.d/heartbeat start Heartbeat-3-0-7e3a82377fa8/heartbeat/module.c0000644000000000000000000001246511576626513020577 0ustar00usergroup00000000000000/* * module: Dynamic module support code * * Copyright (C) 2000 Alan Robertson * Copyright (C) 2000 Marcelo Tosatti * * Thanks to Conectiva S.A. for sponsoring Marcelo Tosatti work * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #define index FOOindex #define time FOOtime #include #undef index #undef time #include #include #include #include #include #include #include #include #include #include #include #ifndef RTLD_NOW # define RTLD_NOW 0 #endif extern struct hb_media_fns** hbmedia_types; extern int num_hb_media_types; PILPluginUniv* PluginLoadingSystem = NULL; GHashTable* AuthFunctions = NULL; GHashTable* CommFunctions = NULL; GHashTable* StonithFuncs = NULL; static GHashTable* Parameters = NULL; static void RegisterNewMedium(struct hb_media* mp); const char * GetParameterValue(const char * name); static void RegisterCleanup(void(*)(void)); struct hb_media_imports CommImports = { GetParameterValue /* So plugins can get option values */ , RegisterNewMedium , st_ttylock , st_ttyunlock , StringToBaud , RegisterCleanup , hb_signal_process_pending }; extern struct hb_media* sysmedia[]; extern int nummedia; static PILGenericIfMgmtRqst RegistrationRqsts [] = { {"HBauth", &AuthFunctions, NULL, NULL, NULL} , {"HBcomm", &CommFunctions, &CommImports, NULL, NULL} , {"stonith", &StonithFuncs, NULL, NULL, NULL} , {NULL, NULL, NULL, NULL, NULL} }; int module_init(void) { static int initialised = 0; #if 0 int errors = 0; #endif PIL_rc rc; /* Perform the init only once */ if (initialised) { return HA_FAIL; } #ifdef DLPREOPEN /* Initialize libltdl's list of preloaded modules */ LTDL_SET_PRELOADED_SYMBOLS(); #endif #if 0 /* Initialize ltdl */ if ((errors = lt_dlinit())) { return HA_FAIL; } #endif if ((PluginLoadingSystem = NewPILPluginUniv(HA_PLUGIN_D)) == NULL) { return(HA_FAIL); } if (DEBUGDETAILS) { PILSetDebugLevel(PluginLoadingSystem, NULL, NULL, debug_level); } if ((rc = PILLoadPlugin(PluginLoadingSystem, "InterfaceMgr", "generic" , &RegistrationRqsts)) != PIL_OK) { ha_log(LOG_ERR , "ERROR: cannot load generic interface manager plugin" " [%s/%s]: %s" , "InterfaceMgr", "generic" , PIL_strerror(rc)); return HA_FAIL; } PILSetDebugLevel(PluginLoadingSystem, NULL, NULL, debug_level); /* init completed */ ++initialised; return HA_OK; } static void RegisterNewMedium(struct hb_media* mp) { sysmedia[nummedia] = mp; ++nummedia; } /* * SetParameterValue() records a class of options given in the configuration * file so they can be passed to the plugins their use. This avoids coupling * through global variables which is problematic for plugins on some platforms. */ #define PREFIX "HA_" void SetParameterValue(const char * name, const char * value) { char * namedup; char * valdup; void * gname; void * gval; int name_len = strlen(name?name:""); if (Parameters == NULL) { Parameters = g_hash_table_new(g_str_hash, g_str_equal); if (Parameters == NULL) { ha_log(LOG_ERR , "ERROR: cannot create parameter table"); return; } } if (g_hash_table_lookup_extended(Parameters, name, &gname , &gval)) { g_hash_table_remove(Parameters, name); g_free(gval); g_free(gname); } namedup = g_strdup(name); valdup = g_strdup(value); g_hash_table_insert(Parameters, namedup, valdup); if(name_len > 0) { char * env_name = malloc(name_len + STRLEN_CONST(PREFIX)+1); if (env_name == NULL){ cl_log(LOG_ERR, "SetParameterValue():" "setenv() memory allocation failed."); return; } snprintf(env_name, name_len+4, PREFIX "%s", name); env_name[name_len+STRLEN_CONST(PREFIX)] = EOS; /* * It is unclear whether any given version of setenv * makes a copy of the name or value, or both. * Therefore it is UNSAFE to free either one. * Fortunately the size of the resulting potential memory leak * is small for this particular situation. */ setenv(env_name, value, 1); } } /* * GetParameterValue() provides information from the configuration file * for the plugins to use. This avoids coupling through global variables. */ const char * GetParameterValue(const char * name) { if (!Parameters) { return NULL; } return g_hash_table_lookup(Parameters, name); } static void RegisterCleanup(void(*fun)(void)) { localdie = fun; } Heartbeat-3-0-7e3a82377fa8/heartbeat/rc.d/Makefile.am0000644000000000000000000000173211576626513022023 0ustar00usergroup00000000000000# # heartbeat: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in harcddir = $(sysconfdir)/ha.d/rc.d harcd_SCRIPTS = ip-request ip-request-resp status \ ask_resources hb_takeover EXTRA_DIST = $(harcd_SCRIPTS) Heartbeat-3-0-7e3a82377fa8/heartbeat/rc.d/ask_resources.in0000644000000000000000000000255711576626513023175 0ustar00usergroup00000000000000#!/bin/sh # License: GNU General Public License (GPL) # This script was based on mach_down # It may be used along with hb_standby # # This script will only work for a two machine setup... # More than that and you need to vote, or something... # # . @sysconfdir@/ha.d/shellfuncs : Now running $0: $* standby_node=$HA_src ha_log "info: XXXX standby node $standby_node" # # In the case of nice_failback, we may know that we # already own these resources, but we ignore that fact # and take it over anyway, because getting that information # to us would require another environment variable or something # like that which would be a pain. # for groupkey in `$HA_NOARCHBIN/ResourceManager listkeys $standby_node` do ha_log "info: Taking over resource group $groupkey" $HA_NOARCHBIN/ResourceManager takegroup $groupkey done # Be nice_failback compliant :) # case 1 - part 2 # # This code triggers actions inside heartbeat, because we receive our own # resource messages as well as those sent by others... # # This code shouldn't be executed if we aren't running nice_failback... # # The field info=mach_down tells heartbeat that the message is from us # so it can consider the takeover complete. # case $HA_NICEFAILBACK in [Yy]es) ha_log "info: $0: nice_failback: acquiring foreign resources" ha_clustermsg <<-!MSG t=resource rsc_hold=foreign info=standby !MSG ;; esac Heartbeat-3-0-7e3a82377fa8/heartbeat/rc.d/hb_takeover.in0000644000000000000000000000057511576626513022614 0ustar00usergroup00000000000000#!/bin/sh # License: GNU General Public License (GPL) HA_DIR=@sysconfdir@/ha.d . ${HA_DIR}/shellfuncs ORIGNODE=$HA_src if [ "X$ORIGNODE" = "X$HA_CURHOST" ] then exit 0; # request is from us! fi case $HA_rsc_hold in all) standby_rsc=$HA_rsc_hold;; local) standby_rsc=foreign;; foreign) standby_rsc=local;; esac @HA_NOARCHDATAHBDIR@/hb_standby $standby_rsc Heartbeat-3-0-7e3a82377fa8/heartbeat/rc.d/ip-request0000644000000000000000000000244111576626513022006 0ustar00usergroup00000000000000#!/bin/sh # # License: GNU General Public License (GPL) # # This script is called to "give up" an IP address when requested # # It could be that we don't have it, in which case we ignore the request. # # # Note: this script is called by the heartbeat code, so it gets # most of its arguments through the environment. # . $HA_FUNCS IFCONFIG=/sbin/ifconfig ROUTE=/sbin/route RSCMGR=$HA_NOARCHBIN/ResourceManager # # # Really a resource group name... case "$HA_ipaddr" in ?*) false;; *) cat <<-!EOF $0 will give up the specified IP address if we have it assigned to us. Otherwise it will do nothing. $0 was invoked with these arguments: $* And this is the HA_ environment: !EOF env | grep '^HA_' exit 1;; esac # # HA_t, HA_src, and HA_ipaddr are fields the sender put in the message # CMD=$HA_t ORIGNODE=$HA_src IPADDR=$HA_ipaddr if [ "X$ORIGNODE" = "X$HA_CURHOST" ] then exit 0; # request is from us! fi # # Ignore this request if we don't own this resource # if $RSCMGR status $IPADDR then weown=yes if [ -x $HA_RCDIR/local_giveip ] then $HA_RCDIR/local_giveip $* fi $RSCMGR givegroup $IPADDR else weown=no fi # # Now give our ip-request-response message... # ha_clustermsg < * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #ifndef __TEST_H # define __TEST_H 1 #include struct TestParms { int enable_send_pkt_loss; int enable_rcv_pkt_loss; float send_loss_prob; float rcv_loss_prob; char allow_nodes[512]; }; extern struct TestParms * TestOpts; #define TESTSEND (TestOpts && TestOpts->enable_send_pkt_loss) #define TESTRCV (TestOpts && TestOpts->enable_rcv_pkt_loss) #ifdef __GNUC__ #define RandThresh(p) ((1.0*rand()) <= ((((double)RAND_MAX) * ((double)p)))) #else #define RandThresh(p) ((double)(rand()) <= ((((double)RAND_MAX) * ((double)p)))) #endif #define TestRand(field) (TestOpts && RandThresh(TestOpts->field)) int ParseTestOpts(void); #endif /* __TEST_H */ Heartbeat-3-0-7e3a82377fa8/include/HBauth.h0000644000000000000000000000237311576626513020153 0ustar00usergroup00000000000000/* * auth.h: Authentication functions for Linux-HA * * Copyright (C) 2000, 2001 Alan Robertson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef HBAUTH_H # define HBAUTH_H 1 struct HBauth_info { struct HBAuthOps * auth; const char * authname; char * key; }; /* Authentication interfaces */ struct HBAuthOps { int (*auth) ( const struct HBauth_info * authinfo, const void *data , size_t data_len, char * result, int resultlen); int (*needskey) (void); }; #define HB_AUTH_TYPE HBauth #define HB_AUTH_TYPE_S "HBauth" #endif /*HBAUTH_H*/ Heartbeat-3-0-7e3a82377fa8/include/HBcomm.h0000644000000000000000000000402211576626513020136 0ustar00usergroup00000000000000/* * HBcomm.h: Communication functions for Linux-HA * * Copyright (C) 2000, 2001 Alan Robertson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef HBCOMM_H # define HBCOMM_H 1 #define HB_COMM_TYPE HBcomm #define HB_COMM_TYPE_S "HBcomm" /* * List of functions provided by implementations of the heartbeat media * interface. */ struct hb_media_fns { struct hb_media*(*new) (const char * token); int (*parse) (const char * options); int (*mopen) (struct hb_media *mp); int (*close) (struct hb_media *mp); void* (*read) (struct hb_media *mp, int *len ); int (*write) (struct hb_media *mp , void *msg, int len); int (*mtype) (char **buffer); int (*descr) (char **buffer); int (*isping) (void); }; /* Functions imported by heartbeat media plugins */ struct hb_media_imports { const char * (*ParamValue)(const char * ParamName); void (*RegisterNewMedium)(struct hb_media* mp); int (*devlock)(const char *); /* Lock a device */ int (*devunlock)(const char *); /* Unlock a device */ int (*StrToBaud)(const char *); /* Convert baudrate */ void (*RegisterCleanup)(void(*)(void)); void (*CheckForEvents)(void); /* Check for signals */ /* Actually there are lots of other dependencies that ought to * be handled, but this is a start ;-) */ }; #define PKTTRACE 4 #define PKTCONTTRACE 5 #endif /*HBCOMM_H*/ Heartbeat-3-0-7e3a82377fa8/include/Makefile.am0000644000000000000000000000422511576626513020661 0ustar00usergroup00000000000000# # linux-ha: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # This instance created by Horms # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in ha_version.h config.h.in EXTRA_DIST = ha_version.h includedir=$(base_includedir)/heartbeat noinst_HEADERS = hb_api_core.h config.h lha_internal.h ha_version.h include_HEADERS = apphb.h apphb_notify.h HBauth.h HBcomm.h \ heartbeat.h hb_api.h hb_config.h SUBDIRS = ocf saf ## The backtick commands are not executed here, ## but rather as macro-expansions at use within the rules. HG_LIVE_VERSION=`$(HG) -R "$(top_srcdir)" id` ARCHIVE_VERSION="$(top_srcdir)/.hg_archival.txt" HG_TAR_VERSION=`$(EGREP) node: "$(ARCHIVE_VERSION)"` ha_version.h: $(ARCHIVE_VERSION) if [ -r ha_version.h -a ! -w ha_version.h ]; then \ hgv=""; \ echo "Saved Version"; \ elif [ -f $(ARCHIVE_VERSION) ]; then \ hgv="$(HG_TAR_VERSION)"; \ echo "Hg Archived Version: $${hgv}"; \ elif [ -x $(HG) -a -d $(top_srcdir)/.hg ]; then \ hgv="$(HG_LIVE_VERSION)"; \ echo "Hg Live Version: $${hgv}"; \ elif [ -r ha_version.h ]; then \ hgv=""; \ echo "Hg Saved Live Version"; \ cat ha_version.h; \ else \ hgv="Unknown"; \ echo "Unknown Hg Version"; \ fi ; \ if [ X"$${hgv}" != "X" ]; then \ echo "/* $${hgv} */" > ha_version.h; \ echo "#define HA_HG_VERSION \"$${hgv}\"" >> ha_version.h; \ fi .PHONY: $(ARCHIVE_VERSION) Heartbeat-3-0-7e3a82377fa8/include/apphb.h0000644000000000000000000001211511576626513020065 0ustar00usergroup00000000000000#ifndef _APPHB_H #define _APPHB_H /* * Copyright (C) 2002 Alan Robertson * This software licensed under the GNU LGPL. * * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * **************************************************************** * Application Heartbeat API * * Application heartbeating declares an expectation between a client * application and a heartbeat monitoring service. The heartbeat * monitoring service is used to monitor the basic sanity of * participating applications. * * To register with the monitoring service, use apphb_register(). * * Once an application has registered, it is expected that it * will make periodic calls to apphb_hb(). If it does not, that * fact will be logged by the heartbeat monitoring service. * * To tell the monitoring service how often to expect apphb_hb(), * calls, use apphb_setinterval(). * * To tell the monitoring service not to expect further apphb_hb() * calls, use apphb_unregister(). * **************************************************************** * * Each of these functions returns a negative value on error * and sets errno to an appropriate value. * * Success is indicated by a non-negative return value. */ /* * apphb_register: register a process for heartbeat monitoring. * * parameters: * appname: name this process is registered as (for notification purposes) * * The heartbeat interval for the current process is initially defaulted * to 10 seconds (10000 ms). * * NOTE: apphb_register() calls are not inherited by child processes. * child processes must register themselves. * * errno values: * EEXIST: current process already registered for monitoring. * EBADF: application heartbeat service not available * EINVAL: NULL 'appname' argument * ENOSPC: too many clients already registered * ENAMETOOLONG: appname or appinstance argument is too long. */ int apphb_register(const char * appname, const char * appinstance); /* * apphb_unregister: unregister a process from heartbeat monitoring. * * After this call, no further heartbeat calls are expected or allowed * from the current process, unless it reregisters. * * errno values: * EBADF: application heartbeat service not available * ESRCH: current process not registered for monitoring. */ int apphb_unregister(void); /* * apphb_setinterval: set heartbeat interval * parameters: * hbms: the expected heartbeat interval in milliseconds. * an hbms of zero temporarily diables heartbeat monitoring * * errno values: * EBADF: application heartbeat service not available * ESRCH: current process not registered for monitoring. * EINVAL: illegal/invalid hbms value * */ int apphb_setinterval(unsigned long hbms); /* * apphb_setwarn: set heartbeat warning time * parameters: * hbms: the heartbeat warning time in milliseconds * an hbms of zero temporarily diables heartbeat monitoring * * errno values: * EBADF: application heartbeat service not available * ESRCH: current process not registered for monitoring. * EINVAL: illegal/invalid hbms value * * */ int apphb_setwarn(unsigned long hbms); /* * apphb_setreboot: set auto-reboot on failure * When a process which has autoreboot enabled * exits prematurely doesn't heartbeat, the OS * is immediately rebooted. * parameters: * truefalse: set to a non-zero value to enable auto-reboot, * zero to disable auto-reboot for this process. * * errno values: * EBADF: application heartbeat service not available * ESRCH: current process not registered for monitoring. * EPERM: no permission to set this machine to auto-reboot * on failure. */ int apphb_setreboot(unsigned int truefalse); /* * apphb_hb: application heartbeat call. * * errno values: * EBADF: application heartbeat service not available * ESRCH: current process not registered for monitoring. * * If a registered application does not call apphb_hb() frequently * enough, then when the heartbeat falls out of spec, the * event is logged. Each time it resumes heartbeating afterwards, * this resumption is also logged. * * It is expected that there is a process somewhere watching these events, * and taking recovery actions if an application goes away or * fails to heartbeat either for too long, or heartbeats intermittently * too often. This application is outside the scope of this API, but * in spite of this, recovery is really the whole point of application * heartbeating ;-) */ int apphb_hb(void); #endif Heartbeat-3-0-7e3a82377fa8/include/apphb_notify.h0000644000000000000000000000365311576626513021464 0ustar00usergroup00000000000000/* * Author: Alan Robertson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _APPHB_NOTIFY_H # define _APPHB_NOTIFY_H /* * Definitions for apphb plugins. */ typedef struct AppHBNotifyOps_s AppHBNotifyOps; typedef struct AppHBNotifyImports_s AppHBNotifyImports; /* * Apphb event types */ enum apphb_event { APPHB_HUP = 1, /* Hangup w/o unregister */ APPHB_NOHB = 2, /* Failed to heartbeat as requested */ APPHB_HBAGAIN = 3, /* Heartbeating restarted */ APPHB_HBWARN = 4, /* Heartbeat outside warning interval */ APPHB_HBUNREG = 5 /* Application unregistered */ }; typedef enum apphb_event apphb_event_t; /* * Plugin exported functions. */ struct AppHBNotifyOps_s { int (*cregister)(pid_t pid, const char * appname, const char * appinst , const char * curdir, uid_t uid, gid_t gid, void * handle); int (*status)(const char * appname, const char * appinst , const char * curdir, pid_t pid, uid_t uid, gid_t gid , apphb_event_t event); }; /* * Plugin imported functions. */ struct AppHBNotifyImports_s { /* Boolean return value */ int (*auth) (void * clienthandle , uid_t * uidlist, gid_t* gidlist, int nuid, int ngid); }; #define APPHB_NOTIFY AppHBNotification #define APPHB_NOTIFY_S "AppHBNotification" #endif Heartbeat-3-0-7e3a82377fa8/include/hb_api.h0000644000000000000000000003252511576626513020224 0ustar00usergroup00000000000000/* * Client-side Low-level clustering API for heartbeat. * * Copyright (C) 2000 Alan Robertson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ /* * Currently the client-side heartbeat API needs to write in the /var/lock * directory for non-casual (named) clients. This has implications for the * euid, egid that we run as. * * Expect to set make your binaries setgid to uucp, or allow the uid * they run as to join the group uucp (or whatever your local system * has it set up as). * * Additionally, you must belong to the group hbapi. Fortunately, UNIX * group permissions are quite flexible, and you can do both. */ /* * Known deficiencies of this API: * * Each of the various set..callback functions should probably return * the current callback and private data parameter, so the caller can * restore them later. * */ #ifndef __HB_API_H # define __HB_API_H 1 #include #include #define LLC_PROTOCOL_VERSION 2 #include typedef void (*llc_msg_callback_t) (struct ha_msg* msg , void* private_data); typedef void (*llc_nstatus_callback_t) (const char *node, const char * status , void* private_data); typedef void (*llc_ifstatus_callback_t) (const char *node , const char * interface, const char * status , void* private_data); typedef void (*llc_cstatus_callback_t) (const char *node , const char * client, const char * status , void* private_date); typedef struct ll_cluster { void * ll_cluster_private; struct llc_ops* llc_ops; }ll_cluster_t; struct llc_ops { int (*signon) (ll_cluster_t*, const char * clientid); int (*signoff) (ll_cluster_t*, gboolean destroy_channel); int (*delete) (ll_cluster_t*); /* ************************************************************************* * Status Update Callbacks ************************************************************************* */ /* * set_msg_callback: Define callback for the given message type * * msgtype: Type of message being handled. * Messages intercepted by nstatus_callback or * ifstatus_callback functions won't be handled here. * * callback: callback function. * * p: private data - later passed to callback. */ int (*set_msg_callback) (ll_cluster_t*, const char * msgtype , llc_msg_callback_t callback, void * p); /* * set_nstatus_callback: Define callback for node status messages * This is a message of type "status" * * cbf: callback function. * * p: private data - later passed to callback. */ int (*set_nstatus_callback) (ll_cluster_t* , llc_nstatus_callback_t cbf, void * p); /* * set_ifstatus_callback: Define callback for interface status messages * This is a message of type "ifstat" * These messages are received whenever an interface goes * dead or becomes active again. * * cbf: callback function. * * p: private data - later passed to callback. */ int (*set_ifstatus_callback) (ll_cluster_t* , llc_ifstatus_callback_t cbf, void * p); /* * set_cstatus_callback: Define callback from client status messages * This is a message of type "hbapi-clstat" * These messages are received whenever an client on * other nodes goes dead or becomes active again. * * cbf callback function. * * p: private data - later passed to callback. */ int (*set_cstatus_callback) (ll_cluster_t* , llc_cstatus_callback_t cbf, void * p); /************************************************************************* * Getting Current Information *************************************************************************/ /* * init_nodewalk: Initialize walk through list of list of known nodes */ int (*init_nodewalk)(ll_cluster_t*); /* * nextnode: Return next node in the list of known nodes */ const char * (*nextnode)(ll_cluster_t*); /* * end_nodewalk: End walk through the list of known nodes */ int (*end_nodewalk)(ll_cluster_t*); /* * node_status: Return most recent heartbeat status of the given node */ const char * (*node_status)(ll_cluster_t*, const char * nodename); /* * node_status: Return the weight of the given node */ int (*node_weight)(ll_cluster_t*, const char * nodename); /* * node_status: Return the site of the given node */ const char * (*node_site)(ll_cluster_t*, const char * nodename); /* * node_type: Return type of the given node */ const char * (*node_type)(ll_cluster_t*, const char * nodename); /* * num_nodes: Return the number of nodes(excluding ping nodes) */ int (*num_nodes)(ll_cluster_t*); /* * init_ifwalk: Initialize walk through list of list of known interfaces */ int (*init_ifwalk)(ll_cluster_t*, const char * node); /* * nextif: Return next node in the list of known interfaces on node */ const char * (*nextif)(ll_cluster_t*); /* * end_ifwalk: End walk through the list of known interfaces */ int (*end_ifwalk)(ll_cluster_t*); /* * if_status: Return current status of the given interface */ const char* (*if_status)(ll_cluster_t*, const char * nodename , const char *iface); /* * client_status: Return current status of the given client */ const char* (*client_status)(ll_cluster_t*, const char *host, const char *clientid, int timeout); /* * get_uuid_by_name: * return the uuid for the node which has the given name */ int (*get_uuid_by_name)(ll_cluster_t*, const char*, cl_uuid_t*); /* * get_name_by_uuid: * return the name for the node which has the given uuid */ int (*get_name_by_uuid)(ll_cluster_t*, cl_uuid_t*, char*, size_t); /************************************************************************* * Intracluster messaging *************************************************************************/ /* * sendclustermsg: Send the given message to all cluster members */ int (*sendclustermsg)(ll_cluster_t* , struct ha_msg* msg); /* * sendnodemsg: Send the given message to the given node in cluster. */ int (*sendnodemsg)(ll_cluster_t* , struct ha_msg* msg , const char * nodename); /* * sendnodemsg_byuuid: * Send the given message to the given node in cluster. */ int (*sendnodemsg_byuuid)(ll_cluster_t*, struct ha_msg* msg, cl_uuid_t*); /* * send_ordered_clustermsg: Send ordered message to all cluster members. */ int (*send_ordered_clustermsg)(ll_cluster_t* , struct ha_msg* msg); /* * send_ordered_nodemsg: Send ordered message to node. */ int (*send_ordered_nodemsg)(ll_cluster_t* , struct ha_msg* msg , const char* nodename); /* * inputfd: Return fd which can be given to select(2) or poll(2) * for determining when messages are ready to be read. * Only to be used in select() or poll(), please... * Note that due to IPC input buffering, always check * msgready() before going into select() or poll() * or you might hang there forever. */ int (*inputfd)(ll_cluster_t*); /* * ipcchan: Return IPC channel which can be given to * G_main_add_IPC_Channel() for mainloop use. * Please do not use send(), recv() directly. * Feel free to use waitin(), waitout(), * is_message_pending(), is_sending_blocked(), * set_recv_qlen(), set_send_qlen(), resume_io(), * verify_auth(). */ IPC_Channel* (*ipcchan)(ll_cluster_t*); /* * msgready: Returns TRUE (1) when a message is ready to be read. */ int (*msgready)(ll_cluster_t*); /* * setmsgsignal: Associates the given signal with the "message waiting" * condition. */ int (*setmsgsignal)(ll_cluster_t*, int signo); /* * rcvmsg: Cause the next message to be read - activating callbacks for * processing the message. If no callback processes the message * it will be ignored. The message is automatically disposed of. * It returns 1 if a message was received. */ int (*rcvmsg)(ll_cluster_t*, int blocking); /* * Return next message not intercepted by a callback. * NOTE: you must dispose of this message by calling ha_msg_del(). */ struct ha_msg* (*readmsg)(ll_cluster_t*, int blocking); /* ************************************************************************* * Debugging ************************************************************************* * * setfmode: Set filter mode. Analagous to promiscous mode in TCP. * Gotta be root to turn on debugging! * * LLC_FILTER_DEFAULT (default) * In this mode, all messages destined for this pid * are received, along with all that don't go to specific pids. * * LLC_FILTER_PMODE See all messages, but filter heart beats * * that don't tell us anything new. * LLC_FILTER_ALLHB See all heartbeats, including those that * don't change status. * LLC_FILTER_RAW See all packets, from all interfaces, even * dups. Pkts with auth errors are still ignored. * * Set filter mode. Analagous to promiscous mode in TCP. * */ # define LLC_FILTER_DEFAULT 0 # define LLC_FILTER_PMODE 1 # define LLC_FILTER_ALLHB 2 # define LLC_FILTER_RAW 3 int (*setfmode)(ll_cluster_t*, unsigned mode); /* * Return the value of a heartbeat configuration parameter * as a malloc-ed string(). You need to free() the result when * you're done with it. */ char * (*get_parameter)(ll_cluster_t *, const char * paramname); /* * Return heartbeat's deadtime */ long (*get_deadtime)(ll_cluster_t *); /* * Return heartbeat's keepalive time */ long (*get_keepalive)(ll_cluster_t *); /* * Return my node id */ const char * (*get_mynodeid)(ll_cluster_t *); /* * Return a suggested logging facility for cluster things * * < 0 means we're not logging to syslog. */ int (*get_logfacility)(ll_cluster_t *); /* * Return the current resource ownership status. * * NOTE: this call will fail if heartbeat isn't * managing resources. It can return "all", "local" or "foreign", "none" * or "transition". This call will eventually go away when we rewrite * the resource management code. "transition" means that things are * currently changing. */ const char * (*get_resources)(ll_cluster_t *); /* * chan_is_connected() * * Return true if the channel is connected */ gboolean (*chan_is_connected)(ll_cluster_t *); /* Set the send queue length in heartbeat side for the channel. This function can be used to set a large send queue if the client will receive slowly */ int (*set_sendq_len)(ll_cluster_t* lcl, int length); /* set the send blocking mode * TRUE indicate blocking, i.e if the send queue is full * the function will block there until there are slots available * or the IPC is disconnected * FALSE indicates the function will return immediately even * if there is no slot available */ int (*set_send_block_mode)(ll_cluster_t*, gboolean); const char * (*errmsg)(ll_cluster_t*); }; /* Parameters we can ask for via get_parameter */ #define KEY_HBVERSION "hbversion" /* Not a configuration parameter */ #define KEY_CLUSTER "cluster" #define KEY_QSERVER "quorum_server" #define KEY_HOST "node" #define KEY_HOPS "hopfudge" #define KEY_KEEPALIVE "keepalive" #define KEY_DEADTIME "deadtime" #define KEY_DEADPING "deadping" #define KEY_WARNTIME "warntime" #define KEY_INITDEAD "initdead" #define KEY_WATCHDOG "watchdog" #define KEY_BAUDRATE "baud" #define KEY_UDPPORT "udpport" #define KEY_FACILITY "logfacility" #define KEY_LOGFILE "logfile" #define KEY_DBGFILE "debugfile" #define KEY_FAILBACK "nice_failback" #define KEY_AUTOFAIL "auto_failback" #define KEY_STONITH "stonith" #define KEY_STONITHHOST "stonith_host" #define KEY_CLIENT_CHILD "respawn" #define KEY_FAILFAST "failfast" #define KEY_COMPRESSION "compression" #define KEY_COMPRESSION_THRESHOLD "compression_threshold" #define KEY_TRADITIONAL_COMPRESSION "traditional_compression" #define KEY_RT_PRIO "rtprio" #define KEY_GEN_METH "hbgenmethod" #define KEY_REALTIME "realtime" #define KEY_DEBUGLEVEL "debug" #define KEY_NORMALPOLL "normalpoll" #define KEY_APIPERM "apiauth" #define KEY_MSGFMT "msgfmt" #define KEY_LOGDAEMON "use_logd" #define KEY_CONNINTVAL "conn_logd_time" #define KEY_BADPACK "log_badpack" #define KEY_REGAPPHBD "use_apphbd" #define KEY_SYSLOGFMT "syslogmsgfmt" #define KEY_COREDUMP "coredumps" #define KEY_COREROOTDIR "coreroot" #define KEY_REL2 "crm" /* historical alias for pacemaker */ #define KEY_PACEMAKER "pacemaker" #define KEY_AUTOJOIN "autojoin" #define KEY_UUIDFROM "uuidfrom" #define KEY_ENV "env" #define KEY_MEMRESERVE "memreserve" #define KEY_MAX_REXMIT_DELAY "max_rexmit_delay" #define KEY_LOG_CONFIG_CHANGES "record_config_changes" #define KEY_LOG_PENGINE_INPUTS "record_pengine_inputs" #define KEY_CONFIG_WRITES_ENABLED "enable_config_writes" ll_cluster_t* ll_cluster_new(const char * llctype); typedef struct GLLclusterSource_s GLLclusterSource; GLLclusterSource* G_main_add_ll_cluster(int priority, ll_cluster_t* api , gboolean can_recurse , gboolean (*dispatch)(ll_cluster_t* source_data,gpointer user_data) , gpointer userdata, GDestroyNotify notify); #endif /* __HB_API_H */ Heartbeat-3-0-7e3a82377fa8/include/hb_api_core.h0000644000000000000000000001306311576626513021230 0ustar00usergroup00000000000000/* * hb_api_core_h: Internal definitions and functions for the heartbeat API * * Copyright (C) 2000 Alan Robertson * * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ /* * NOTE: This header NOT intended to be included by anything other than * heartbeat. It is NOT a global header file, and should NOT be installed anywhere * outside the heartbeat tree. */ #ifndef _HB_API_CORE_H # define _HB_API_CORE_H 1 #include #include #include #include /* Dispatch priorities for various kinds of events */ #define PRI_SENDSTATUS (G_PRIORITY_HIGH-5) #define PRI_SENDPKT (PRI_SENDSTATUS+1) #define PRI_READPKT (PRI_SENDPKT+1) #define PRI_FIFOMSG (PRI_READPKT+1) #define PRI_CHECKSIGS (G_PRIORITY_DEFAULT) #define PRI_FREEMSG (PRI_CHECKSIGS+1) #define PRI_CLIENTMSG (PRI_FREEMSG+1) #define PRI_APIREGISTER (G_PRIORITY_LOW) #define PRI_RANDOM (PRI_APIREGISTER+1) #define PRI_AUDITCLIENT (PRI_RANDOM+1) #define PRI_WRITECACHE (PRI_AUDITCLIENT+1) #define PRI_DUMPSTATS (PRI_WRITECACHE+20) void process_registerevent(IPC_Channel* chan, gpointer user_data); /* * Per-client API data structure. */ typedef struct client_process { char client_id[32]; /* Client identification */ pid_t pid; /* PID of client process */ uid_t uid; /* UID of client process */ gid_t gid; /* GID of client process */ int iscasual; /* 1 if this is a "casual" client */ int isindispatch; /* TRUE if we're in dispatch now */ const char* removereason;/* non-NULL if client is being removed */ IPC_Channel*chan; /* client IPC channel */ GCHSource* gsource; /* return from G_main_add_fd() */ int signal; /* What signal to indicate new msgs */ int desired_types; /* A bit mask of desired message types*/ struct client_process* next; GHashTable* seq_snapshot_table; int cligen; }client_proc_t; /* * Types of messages. * DROPIT and/or DUPLICATE are only used when a debugging callback * is registered. */ /* * This next set of defines is for the types of packets that come through * heartbeat. * * Any given packet behaves like an enumeration (should only have one bit * on), but the options from client software treat them more like a set * (bit field), with more than one at a time being on. Normally the * client only requests KEEPIT packets, but for debugging may want to * ask to see the others too. */ #define KEEPIT 0x01 /* A set of bits */ #define NOCHANGE 0x02 #define DROPIT 0x04 #define DUPLICATE 0x08 #define APICALL 0x10 #define PROTOCOL 0x20 #define DEBUGTREATMENTS (DROPIT|DUPLICATE|APICALL|NOCHANGE|PROTOCOL) #define ALLTREATMENTS (DEBUGTREATMENTS|KEEPIT) #define DEFAULTREATMENT (KEEPIT) #define API_SIGNON "signon" #define API_SIGNOFF "signoff" #define API_SETFILTER "setfilter" # define F_FILTERMASK "fmask" #define API_SETSIGNAL "setsignal" # define F_SIGNAL "signal" #define API_NODELIST "nodelist" # define F_NODENAME "node" #define API_NODELIST_END "nodelist-end" #define API_NODESTATUS "nodestatus" #define API_NODEWEIGHT "nodeweight" #define API_NODESITE "nodesite" #define API_NODETYPE "nodetype" #define API_NUMNODES "numnodes" #define API_IFLIST "iflist" # define F_IFNAME "ifname" #define API_IFLIST_END "iflist-end" #define API_IFSTATUS "ifstatus" #define API_GETPARM "getparm" #define API_GETRESOURCES "getrsc" #define API_GETUUID "getuuid" # define F_QUERYUUID "queryuuid" #define API_GETNAME "getnodename" # define F_QUERYNAME "queryname" #define API_CLIENTSTATUS "clientstatus" #define API_SET_SENDQLEN "set_sendqlen" # define F_SENDQLEN "sendqlen" #define API_OK "OK" #define API_FAILURE "fail" #define API_BADREQ "badreq" #define API_MORE "ok/more" #define API_FIFO_DIR HA_VARLIBHBDIR "/api" #define API_FIFO_LEN (sizeof(API_FIFO_DIR)+32) #define NAMEDCLIENTDIR API_FIFO_DIR #define CASUALCLIENTDIR HA_VARLIBHBDIR "/casual" #define REQ_SUFFIX ".req" #define RSP_SUFFIX ".rsp" #ifndef API_REGSOCK # define API_REGSOCK HA_VARRUNDIR "/heartbeat/register" #endif void api_heartbeat_monitor(struct ha_msg *msg, int msgtype, const char *iface); void api_process_registration(struct ha_msg *msg); void process_api_msgs(fd_set* inputs, fd_set* exceptions); int compute_msp_fdset(fd_set* set, int fd1, int fd2); gboolean api_audit_clients(gpointer p); client_proc_t* find_client(const char * fromid, const char * pid); gboolean all_clients_resume(void); gboolean all_clients_pause(void); /* Return code for API query handlers */ #define I_API_RET 0 /* acknowledge client of successful API query */ #define I_API_IGN 1 /* do nothing */ #define I_API_BADREQ 2 /* send error msg to client with "failreason" as error reason */ /* Handler of API query */ typedef int (*api_query_handler_t) (const struct ha_msg* msg , struct ha_msg *resp, client_proc_t* client , const char **failreason); struct api_query_handler { const char *queryname; api_query_handler_t handler; }; #endif /* _HB_API_CORE_H */ Heartbeat-3-0-7e3a82377fa8/include/hb_config.h.in0000644000000000000000000000230211576626513021313 0ustar00usergroup00000000000000/* * hb_config.h: Definitions from the Linux-HA heartbeat program * for out-of-tree projects * * Copyright (C) 2007 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #ifndef _HB_CONFIG_H #define _HB_CONFIG_H /* have new heartbeat api */ #undef HAVE_NEW_HB_API /* Web site base URL */ #undef HA_URLBASE /* heartbeat rc script directory */ #undef HA_RC_DIR /* Custom name for libdir */ #undef HA_LIBDIR #include #endif /* _HB_CONFIG_H */ Heartbeat-3-0-7e3a82377fa8/include/heartbeat.h0000644000000000000000000003406711576626513020744 0ustar00usergroup00000000000000/* * heartbeat.h: core definitions for the Linux-HA heartbeat program * * Copyright (C) 2000 Alan Robertson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #ifndef _HEARTBEAT_H # define _HEARTBEAT_H 1 #ifdef SYSV # include # define TERMIOS termio # define GETATTR(fd, s) ioctl(fd, TCGETA, s) # define SETATTR(fd, s) ioctl(fd, TCSETA, s) # define FLUSH(fd) ioctl(fd, TCFLSH, 2) #else # define TERMIOS termios # include # define GETATTR(fd, s) tcgetattr(fd, s) # define SETATTR(fd, s) tcsetattr(fd, TCSAFLUSH, s) # define FLUSH(fd) tcflush(fd, TCIOFLUSH) #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define index FooIndex #define time FooTime #include #undef index #undef time /* * might not contain LOG_PRI... * So, we define it ourselves, or error out if we can't... */ #ifndef LOG_PRI # ifdef LOG_PRIMASK /* David Lee reports this works on Solaris */ # define LOG_PRI(p) ((p) & LOG_PRIMASK) # else # error "Syslog.h does not define either LOG_PRI or LOG_PRIMASK." # endif #endif #define MAXFIELDS 30 /* Max # of fields in a msg */ #define HOSTLENG 100 /* Maximum size of "uname -a" return */ #define STATUSLENG 32 /* Maximum size of status field */ #define MAXIFACELEN 30 /* Maximum interface length */ #define MAXSERIAL 4 #define MAXMEDIA 64 #define MAXNODE 100 #define MAXPROCS ((2*MAXMEDIA)+2) #define FIFOMODE 0600 #define RQSTDELAY 10 #define ACK_MSG_DIV 10 #define RSC_TMPDIR HA_VARRUNDIR "/heartbeat/rsctmp" #define HA_MODULE_D HA_LIBHBDIR "/modules" #define HA_PLUGIN_D HA_LIBHBDIR "/plugins" /* For compatability with older external facing headers * These variables are no longer used internally */ #define HA_D HA_RC_DIR #define HB_RC_DIR HA_RC_DIR #define VAR_RUN_D HA_VARRUNDIR #define VAR_LOG_D HA_VARLOGDIR #define VAR_LIB_D HA_VARLIBHBDIR #define HALIB HA_LIBDIR /* #define HA_debug */ #define DEFAULTLOG HA_VARLOGDIR "/ha-log" #define DEFAULTDEBUG HA_VARLOGDIR "/ha-debug" #define DEVNULL "/dev/null" #define HA_OKEXIT 0 #define HA_FAILEXIT 1 #define WHITESPACE " \t\n\r\f" #define DELIMS ", \t\n\r\f" #define COMMENTCHAR '#' #define CRLF "\r\n" #define STATUS "STATUS" #define INITSTATUS "init" /* Status of a node we've never heard from */ #define UPSTATUS "up" /* Listening (we might not be xmitting) */ #define ACTIVESTATUS "active" /* fully functional, and all links are up */ #define DEADSTATUS "dead" /* Status of non-working link or machine */ #define PINGSTATUS "ping" /* Status of a working ping node */ #define JOINSTATUS "join" /* Status when an api client joins */ #define LEAVESTATUS "leave" /* Status when an api client leaves */ #define ONLINESTATUS "online" /* Status of an online client */ #define OFFLINESTATUS "offline" /* Status of an offline client */ #define LINKUP "up" /* The status assigned to a working link */ #define LOADAVG "/proc/loadavg" #define PIDFILE HA_VARRUNDIR "/heartbeat.pid" #define KEYFILE HA_HBCONF_DIR "/authkeys" #define HA_SERVICENAME "ha-cluster" /* Our official reg'd service name */ #define UDPPORT 694 /* Our official reg'd port number */ /* Environment variables we pass to our scripts... */ #define CURHOSTENV "HA_CURHOST" #define OLDSTATUS "HA_OSTATUS" #define DATEFMT "HA_DATEFMT" /* Format string for date(1) */ #define LOGFENV "HA_LOGFILE" /* well-formed log file :-) */ #define DEBUGFENV "HA_DEBUGLOG" /* Debug log file */ #define LOGFACILITY "HA_LOGFACILITY"/* Facility to use for logger */ #define HADIRENV "HA_DIR" /* The base HA directory */ #define HAFUNCENV "HA_FUNCS" /* Location of ha shell functions */ #define HANICEFAILBACK "HA_NICEFAILBACK" /* "yes" when nice_failback is on */ #define HADONTASK "HA_DONTASK" /* "yes" when no other nodes "active" ...*/ #define HADEBUGVAL "HA_debug" /* current debug value (if nonzero) */ #define HALOGD "HA_LOGD" /* whether we use logging daemon or not */ #define DEFAULTBAUD B19200 /* Default serial link speed */ #define DEFAULTBAUDRATE 19200 /* Default serial link speed as int */ #define DEFAULTBAUDSTR "19200" /* Default serial link speed as string */ /* multicast defaults */ #define DEFAULT_MCAST_IPADDR "225.0.0.1" /* Default multicast group */ #define DEFAULT_MCAST_TTL 1 /* Default multicast TTL */ #define DEFAULT_MCAST_LOOP 0 /* Default mulitcast loopback option */ #define HB_STATIC_PRIO 1 /* Used with soft realtime scheduling */ #define PPP_D HA_VARRUNDIR "/ppp.d" #define FIFONAME HA_VARLIBHBDIR "/fifo" #define HOSTUUIDCACHEFILE HA_VARLIBHBDIR "/hostcache" #define DELHOSTCACHEFILE HA_VARLIBHBDIR "/delhostcache" #define HOSTUUIDCACHEFILETMP HOSTUUIDCACHEFILE ".tmp" #define DELHOSTCACHEFILETMP DELHOSTCACHEFILE ".tmp" #define RCSCRIPT HA_HBCONF_DIR "/harc" #define CONFIG_NAME HA_HBCONF_DIR "/ha.cf" #define RESOURCE_CFG HA_HBCONF_DIR "/haresources" /* dynamic module directories */ #define COMM_MODULE_DIR HA_MODULE_D "/comm" #define AUTH_MODULE_DIR HA_MODULE_D "/auth" #define STATIC /* static */ #define HA_DATEFMT "%Y/%m/%d_%T\t" #define HA_FUNCS HA_HBCONF_DIR "/shellfuncs" #define RC_ARG0 "harc" #define ENV_PREFIX "HA_" /* Which side of a pipe is which? */ #define P_READFD 0 #define P_WRITEFD 1 #define FD_STDIN 0 #define FD_STDOUT 1 #define FD_STDERR 2 #define PROTOCOL_VERSION 1 typedef unsigned long seqno_t; #define MAXMSGHIST 500 #define MAXMISSING MAXMSGHIST #define NOSEQUENCE 0xffffffffUL struct seqtrack { longclock_t last_rexmit_req; int nmissing; seqno_t generation; /* Heartbeat generation # */ seqno_t last_seq; seqno_t first_missing_seq; /* the smallest missing seq number*/ GList* client_status_msg_queue; /*client status message queue*/ seqno_t seqmissing[MAXMISSING]; const char * last_iface; seqno_t ack_trigger; /*whenever a message received *with seq % ACK_MSG_DIV == ack_trigger *we send back an ACK */ seqno_t ackseq; /* ACKed seq*/ }; struct link { longclock_t lastupdate; const char * name; int isping; char status[STATUSLENG]; /* up or down */ TIME_T rmt_lastupdate; /* node's idea of last update time for this link */ }; #define NORMALNODE_I 0 #define PINGNODE_I 1 #define NORMALNODE "normal" #define PINGNODE "ping" #define UNKNOWNNODE "unknown" struct node_info { int nodetype; char nodename[HOSTLENG]; /* Host name from config file */ cl_uuid_t uuid; char site[HOSTLENG]; int weight; char status[STATUSLENG]; /* Status from heartbeat */ gboolean status_suppressed; /* Status reports suppressed for now */ struct ha_msg* saved_status_msg; /* Last status (ignored) */ struct link links[MAXMEDIA]; int nlinks; TIME_T rmt_lastupdate; /* node's idea of last update time */ seqno_t status_seqno; /* Seqno of last status update */ longclock_t dead_ticks; /* # ticks to declare dead */ longclock_t local_lastupdate;/* Date of last update in clock_t time*/ int anypacketsyet; /* True after reception of 1st pkt */ struct seqtrack track; int has_resources; /* TRUE if node may have resources */ }; typedef enum { HB_JOIN_NONE = 0, /* Don't allow runtime joins of unknown nodes */ HB_JOIN_OTHER = 1, /* Allow runtime joins of other nodes */ HB_JOIN_ANY = 2, /* Don't even require _us_ to be in ha.cf */ }hbjointype_t; #define MAXAUTH 16 struct sys_config { TIME_T cfg_time; /* Timestamp of config file */ TIME_T auth_time; /* Timestamp of authorization file */ TIME_T rsc_time; /* Timestamp of haresources file */ int format_vers; /* Version of this info */ int nodecount; /* Number of nodes in cluster */ long heartbeat_ms; /* Milliseconds between heartbeats */ long deadtime_ms; /* Ticks before declaring dead */ long deadping_ms; /* Ticks before declaring ping nodes */ long initial_deadtime_ms; /* Ticks before saying dead 1st time*/ long warntime_ms; /* Ticks before issuing warning */ int hopfudge; /* hops beyond nodecount allowed */ int log_facility; /* syslog facility, if any */ char facilityname[PATH_MAX]; /* syslog facility name (if any) */ char logfile[PATH_MAX]; /* path to log file, if any */ int use_logfile; /* Flag to use the log file*/ char dbgfile[PATH_MAX]; /* path to debug file, if any */ int use_dbgfile; /* Flag to use the debug file*/ int memreserve; /* number of kbytes to preallocate in heartbeat */ int rereadauth; /* 1 if we need to reread auth file */ seqno_t generation; /* Heartbeat generation # */ cl_uuid_t uuid; /* uuid for this node*/ int uuidfromname; /* do we get uuid from nodename?*/ char cluster[PATH_MAX]; /* the name of cluster*/ char quorum_server[PATH_MAX];/* the quorum_server*/ hbjointype_t rtjoinconfig; /* Runtime join behavior */ int authnum; Stonith* stonith; /* Stonith method - r1-style cluster only */ struct HBauth_info* authmethod; /* auth_config[authnum] */ struct node_info nodes[MAXNODE]; struct HBauth_info auth_config[MAXAUTH]; GList* client_list; /* List data: struct client_child */ GList* last_client;/* Last in client_list */ }; typedef enum { MEDIA_OK = 0, MEDIA_INRECOVERY=1, MEDIA_DELAYEDRECOVERY=2 }media_recov_t; struct hb_media { void * pd; /* Private Data */ const char * name; /* Unique medium name */ char* type; /* Medium type */ char* description; /* Medium description */ const struct hb_media_fns*vf; /* Virtual Functions */ media_recov_t recovery_state; /* What's up with media? */ gboolean suppresserrs; /* TRUE if errors shouldn't be logged */ int ourproc; /* Value of ourproc for 1st process */ IPC_Channel* wchan[2]; /* Read by the write child processes. */ IPC_Channel* rchan[2]; /* Written to by the read child processes. */ GCHSource* readsource; GCHSource* writesource; }; int parse_authfile(void); struct msg_xmit_hist { struct ha_msg* msgq[MAXMSGHIST]; seqno_t seqnos[MAXMSGHIST]; longclock_t lastrexmit[MAXMSGHIST]; int lastmsg; seqno_t hiseq; seqno_t lowseq; /* one less than min actually present */ seqno_t ackseq; struct node_info* lowest_acknode; }; /* * client_child: information on clients that we spawn and keep track of * They don't strictly have to use the client API, but most probably do. * We start them when we start up, and shut them down when we shut down. * Normally, if they they die, we restart them. */ struct client_child { pid_t pid; /* Process id of child process */ ProcTrack* proctrack; /* Process tracking structure */ gboolean respawn; /* Respawn it if it dies? */ gboolean rebootifitdies; /* Reboot system it if it dies? */ uid_t u_runas; /* Which user to run as? */ gid_t g_runas; /* Which group id to run as? */ int respawncount; /* Last time we respawned */ int shortrcount; /* Count of fast respawns */ char* command; /* What command to run? */ char* path; /* Path (argv[0])? */ }; int api_remove_client_pid(pid_t c_pid, const char * reason); extern struct sys_config * config; extern int debug_level; extern int udpport; extern int RestartRequested; extern char * localnodename; #define ha_log cl_log #define ha_perror cl_perror /* Generally useful exportable HA heartbeat routines... */ extern void ha_assert(const char *s, int line, const char * file); gboolean heartbeat_on_congestion(void); extern int send_cluster_msg(struct ha_msg*msg); extern void cleanexit(int exitcode); extern void check_auth_change(struct sys_config *); extern void (*localdie)(void); extern int should_ring_copy_msg(struct ha_msg* m); extern int controlipc2msg(IPC_Channel * channel , struct ha_msg **); extern int add_msg_auth(struct ha_msg * msg); extern unsigned char * calc_cksum(const char * authmethod, const char * key, const char * value); struct node_info * lookup_node(const char *); struct link * lookup_iface(struct node_info * hip, const char *iface); struct link * iface_lookup_node(const char *); int add_node(const char * value, int nodetype); int set_node_weight(const char * value, int weight); int set_node_site(const char * value, const char * site); int remove_node(const char * value, int); void SetParameterValue(const char * name, const char * value); gint uuid_equal(gconstpointer v, gconstpointer v2); guint uuid_hash(gconstpointer key); int write_cache_file(struct sys_config * cfg); int read_cache_file(struct sys_config * cfg); int write_delnode_file(struct sys_config * cfg); void add_nametable(const char* nodename, struct node_info* value); void add_uuidtable(cl_uuid_t*, struct node_info* value); const char * uuid2nodename(cl_uuid_t* uuid); int nodename2uuid(const char* nodename, cl_uuid_t*); int inittable(void); gboolean update_tables(const char* nodename, cl_uuid_t* uuid); struct node_info* lookup_tables(const char* nodename, cl_uuid_t* uuid); void cleanuptable(void); int tables_remove(const char* nodename, cl_uuid_t* uuid); int GetUUID(struct sys_config*, const char*, cl_uuid_t* uuid); void remove_from_dellist( const char* nodename); void append_to_dellist(struct node_info* hip); void request_msg_rexmit(struct node_info *node, seqno_t lowseq, seqno_t hiseq); int remove_msg_rexmit(struct node_info *node, seqno_t seq); int init_rexmit_hash_table(void); int destroy_rexmit_hash_table(void); #endif /* _HEARTBEAT_H */ Heartbeat-3-0-7e3a82377fa8/include/lha_internal.h0000644000000000000000000001176611576626513021446 0ustar00usergroup00000000000000/* * Copyright (C) 2001 Alan Robertson * This software licensed under the GNU LGPL. * * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #ifndef LHA_INTERNAL_H # define LHA_INTERNAL_H #define EOS '\0' #define DIMOF(a) ((int) (sizeof(a)/sizeof(a[0])) ) #define STRLEN_CONST(conststr) ((size_t)((sizeof(conststr)/sizeof(char))-1)) #define STRNCMP_CONST(varstr, conststr) strncmp((varstr), conststr, STRLEN_CONST(conststr)+1) #define STRLEN(c) STRLEN_CONST(c) #define MALLOCT(t) ((t *) malloc(sizeof(t))) /* Needs to be defined before any other includes, otherwise some system * headers do not behave as expected! Major black magic... */ #undef _GNU_SOURCE /* in case it was defined on the command line */ #define _GNU_SOURCE /* Please leave this as the first #include - Solaris needs it there */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #ifdef BSD # define SCANSEL_CAST (void *) #else # define SCANSEL_CAST /* Nothing */ #endif #if defined(ANSI_ONLY) && !defined(inline) # define inline /* nothing */ # undef NETSNMP_ENABLE_INLINE # define NETSNMP_NO_INLINE 1 #endif #ifndef HAVE_DAEMON /* We supply a replacement function, but need a prototype */ int daemon(int nochdir, int noclose); #endif /* HAVE_DAEMON */ #ifndef HAVE_SETENV /* We supply a replacement function, but need a prototype */ int setenv(const char *name, const char * value, int why); #endif /* HAVE_SETENV */ #ifndef HAVE_UNSETENV /* We supply a replacement function, but need a prototype */ int unsetenv(const char *name); #endif /* HAVE_UNSETENV */ #ifndef HAVE_STRERROR /* We supply a replacement function, but need a prototype */ char * strerror(int errnum); #endif /* HAVE_STRERROR */ #ifndef HAVE_SCANDIR /* We supply a replacement function, but need a prototype */ # include int scandir (const char *directory_name, struct dirent ***array_pointer, int (*select_function) (const struct dirent *), #ifdef USE_SCANDIR_COMPARE_STRUCT_DIRENT /* This is what the Linux man page says */ int (*compare_function) (const struct dirent**, const struct dirent**) #else /* This is what the Linux header file says ... */ int (*compare_function) (const void *, const void *) #endif ); #endif /* HAVE_SCANDIR */ #ifndef HAVE_ALPHASORT # include int alphasort(const void *dirent1, const void *dirent2); #endif /* HAVE_ALPHASORT */ #ifndef HAVE_INET_PTON /* We supply a replacement function, but need a prototype */ int inet_pton(int af, const char *src, void *dst); #endif /* HAVE_INET_PTON */ #ifndef HAVE_STRNLEN size_t strnlen(const char *s, size_t maxlen); #else # define USE_GNU #endif #ifndef HAVE_STRNDUP char *strndup(const char *str, size_t len); #else # define USE_GNU #endif #ifndef HAVE_STRLCPY size_t strlcpy(char * dest, const char *source, size_t len); #endif #ifndef HAVE_STRLCAT size_t strlcat(char * dest, const char *source, size_t len); #endif #ifndef HAVE_NFDS_T typedef unsigned int nfds_t; #endif #ifdef HAVE_STRUCT_UCRED_DARWIN # include # ifndef SYS_NMLN # define SYS_NMLN _SYS_NAMELEN # endif /* SYS_NMLN */ #endif #define POINTER_TO_SIZE_T(p) ((size_t)(p)) /*pointer cast as size_t*/ #define POINTER_TO_SSIZE_T(p) ((ssize_t)(p)) /*pointer cast as ssize_t*/ #define POINTER_TO_ULONG(p) ((unsigned long)(p)) /*pointer cast as unsigned long*/ /* Sometimes we get a const g_something *, but need to pass it internally * to other functions taking a non-const g_something *, which results * with gcc and -Wcast-qual in a compile time warning, and with -Werror * even to a compile time error. * Workarounds have been to e.g. memcpy(&list, _list); or similar, * the reason of which is non-obvious to the casual reader. * This macro achieves the same, and annotates why it is done. */ #define UNCONST_CAST_POINTER(t, p) ((t)(unsigned long)(p)) #define HAURL(url) HA_URLBASE url /* * Some compilers may not have defined __FUNCTION__. */ #ifndef __FUNCTION__ /* Sun studio compiler */ # ifdef __SUNPRO_C # define __FUNCTION__ __func__ # endif /* Similarly add your compiler here ... */ #endif /* You may need to change this for your compiler */ #ifdef HAVE_STRINGIZE # define ASSERT(X) {if(!(X)) ha_assert(#X, __LINE__, __FILE__);} #else # define ASSERT(X) {if(!(X)) ha_assert("X", __LINE__, __FILE__);} #endif #endif /* LHA_INTERNAL_H */ Heartbeat-3-0-7e3a82377fa8/include/ocf/Makefile.am0000644000000000000000000000171411576626513021430 0ustar00usergroup00000000000000# # linux-ha: Linux-HA heartbeat code # # Copyright (C) 2002 International Business Machines. # Author: Alan Robertson # # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # # MAINTAINERCLEANFILES = Makefile.in includedir=$(base_includedir)/ocf include_HEADERS = \ oc_event.h \ oc_membership.h Heartbeat-3-0-7e3a82377fa8/include/ocf/oc_event.h0000644000000000000000000001655611576626513021361 0ustar00usergroup00000000000000/* * oc_event.h * * Definition of the Open Cluster Framework event notification API * * Copyright (C) 2002 Mark Haverkamp, Joe DiMartino * 2002 Open Source Development Lab * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #ifndef OC_EVENT_H #define OC_EVENT_H #include #include /* * An opaque token into the membership service is * defined as an int for portability. */ typedef int oc_ev_t; /* * oc_ed_t is the event descriptor for a callback event. An event * descriptor is unique for all events across all event classes. */ typedef uint32_t oc_ed_t; /* * Event descriptors: * upper 10 bits for Class * lower 22 bits for Event */ #define OC_EV_CLASS_SHIFT 22 #define OC_EV_EVENT_SHIFT 10 #define OC_EV_EVENT_MASK (~ (~((uint)0) << OC_EV_CLASS_SHIFT)) #define OC_EV_GET_CLASS(ed) ((uint)(ed) >> OC_EV_CLASS_SHIFT) #define OC_EV_GET_EVENT(ed) ((uint)(ed) & OC_EV_EVENT_MASK) #define OC_EV_SET_CLASS(cl,ev) (cl << OC_EV_CLASS_SHIFT | \ (ev & OC_EV_EVENT_MASK)) /* * The following event classes are defined: */ typedef enum { OC_EV_CONN_CLASS = 1, /* Connectivity Event Class */ OC_EV_MEMB_CLASS, /* Node Membership Event Class */ OC_EV_GROUP_CLASS /* Group Messaging Event Class */ } oc_ev_class_t; /* * Within each event class, event types are defined. */ /* * Connectivity Events */ typedef enum { OC_EV_CS_INVALID = OC_EV_SET_CLASS(OC_EV_CONN_CLASS, 0), OC_EV_CS_INTERFACE, OC_EV_CS_ELIGIBLE, OC_EV_CS_CONNECT } oc_conn_event_t; /* Node Membership Events * *OC_EV_MS_NEW_MEMBERSHIP * CCM: membership with quorum * CRM/CIB quorum: true * CRM/CIB actions: update membership instance & contents * CRM actions: none. * We wait for the CRMd on that node to become active before any CRM * action is taken. The TE monitors CIB updates and detects and nodes that * unexpectedly left - starting/restarting a transition if required * *OC_EV_MS_MS_INVALID * CCM: membership without quorum * CRM/CIB quorum: false * CRM/CIB actions: update membership instance & contents * DC actions: Invoke the PolicyEngine, * to ensure the "No Quorum Policy" is observed * *OC_EV_MS_NOT_PRIMARY * CCM: old membership (not valid any longer) * CRM/CIB quorum: no change * DC actions: cancel the transition if one is in progress * *OC_EV_MS_PRIMARY_RESTORED * This event mean the cluster restores to a stable state that has the * same membership as before. It also implies it has the same quorum as * before. * CCM: old membership restored (same membership as before) * CRM/CIB quorum: no change * CRM/CIB actions: update membership instance * DC actions: Start/restart a transition now that everything is now * stable. In theory we would have gotten a OC_EV_MS_NOT_PRIMARY before * this which would have cancelled the transition. * *OC_EV_MS_EVICTED * CCM: the client is evicted from ccm. * CRM/CIB quorum: false * CRM/CIB actions: update membership instance & contents, shut down * This should not happen if correct startup/shutdown order is observed. * */ typedef enum { OC_EV_MS_INVALID = OC_EV_SET_CLASS(OC_EV_MEMB_CLASS, 0), OC_EV_MS_NEW_MEMBERSHIP, OC_EV_MS_NOT_PRIMARY, OC_EV_MS_PRIMARY_RESTORED, OC_EV_MS_EVICTED } oc_memb_event_t; /* * For events OC_EV_MS_NEW_MEMBERSHIP, OC_EV_MS_NOT_PRIMARY, and * OC_EV_MS_PRIMARY_RESTORED, the event handlers 'data' member points * to an oc_ev_mebership_t structure. For OC_EV_MS_EVICTED, 'data' is * NULL. */ /* * member node information */ typedef struct oc_node_s { char *node_uname; /* unique */ uint node_id; /* unique */ uint node_born_on; /* membership instance number */ } oc_node_t; /* * membership event information */ typedef struct oc_ev_membership_s { uint m_instance; /* instance # of current membership */ uint m_n_member; /* # of current members */ uint m_memb_idx; /* index into m_array for members */ uint m_n_out; /* # of previous members lost */ uint m_out_idx; /* index into m_array for lost */ uint m_n_in; /* # of new members in this instance */ uint m_in_idx; /* index into m_array for new */ oc_node_t m_array[1]; /* array of members (see above) */ } oc_ev_membership_t; /* * Group Events */ typedef enum { OC_EV_GS_INVALID = OC_EV_SET_CLASS(OC_EV_GROUP_CLASS, 0), OC_EV_GS_JOIN, OC_EV_GS_LEAVE, OC_EV_GS_CAST, OC_EV_GS_REPLY } oc_group_event_t; /* * This is the initial call to register for cluster event * notification service. Callers receive an opaque token. * Implementations define the contents of the opaque token. * Failure returns an appropriate value. */ int oc_ev_register(oc_ev_t **token); /* * Event service will terminate after calling oc_ev_unregister(). * This routine can be safely called from a callback routine. * Pending events may be dropped at the discression of the cluster * implementation. */ int oc_ev_unregister(oc_ev_t *token); /* * callback function definition */ typedef void oc_ev_callback_t(oc_ed_t event, void *cookie, size_t size, const void *data); /* * Event notification is performed through callbacks. Events are * delivered only for those event classes in which a callback has * been registered. The callback function is registered using * oc_ev_set_callback(). A callback is delivered when an event in * the corresponding event class occurs. */ int oc_ev_set_callback(const oc_ev_t *token, oc_ev_class_t class, oc_ev_callback_t *fn, oc_ev_callback_t **prev_fn); /* * For calls within the kernel only the event service token is * used and all other arguments are ignored. After activation, * kernel callbacks may be delivered immediately. All kernel * callbacks will be performed in a process context supplied by the * kernel compliant event notification service. */ int oc_ev_activate(const oc_ev_t *token, int *fd); /* * A user-level process determines that an event is pending using * select/poll on the file descriptor returned by oc_ev_activate(). * A callback will deliver the event in the context of this process * after calling oc_ev_handle_event(). */ int oc_ev_handle_event(const oc_ev_t *token); /* * It is necessary to inform the notification service that callback * processing is complete. Any data associated with this completed * callback is no longer valid upon successful return. */ int oc_ev_callback_done(void *cookie); /* * This is a synchronous call to return the event notification * service version number. It is safe to call anytime. int oc_ev_get_version(const oc_ev_t *token, oc_ver_t *ver); */ /* * This is a synchronous call to determine the local node identifier. */ int oc_ev_is_my_nodeid(const oc_ev_t *token, const oc_node_t *node); #endif /* OC_EVENT_H */ Heartbeat-3-0-7e3a82377fa8/include/ocf/oc_membership.h0000644000000000000000000001773111576626513022367 0ustar00usergroup00000000000000#ifndef OCF_OC_MEMBERSHIP_H # define OCF_OC_MEMBERSHIP_H /* * - membership APIs (version 0.1) * * The structures and functions in this header file work closely with * the oc_event.h event infrastructure. All (edata, esize) parameters * to functions in this header file refer to membership event bodies. * It is expected that all such are received by this mechanism. * * * There are a few things in this header file which don't really belong here * but are needed and they aren't in any other header file. * * These are: * definition of oc_node_id_t * oc_cluster_handle_t * * Maybe we ought to put common types into an * * The oc_cmp_node_id() and oc_localnodeid() functions also belong in * some more global header file. * * oc_member_eventttype_t and * oc_member_uniqueid_t are membership-unique * and don't belong in a set of ocf-common header files (IMHO) * * Copyright (C) 2002 Alan Robertson * * This copyright will be assigned to the Free Standards Group * in the future. * * This library is free software; you can redistribute it and/or * modify it under the terms of version 2.1 of the GNU Lesser General Public * License as published by the Free Software Foundation. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include /* * This enumeration is used both to indicated the type of an event * received, and to request the types of events one wants delivered. * (see oc_member_request_events() and oc_member_etype() for more * details on how this is used). */ enum oc_member_eventtype_e { OC_NOT_MEMBERSHIP, /* Not a (valid) membership event */ OC_FULL_MEMBERSHIP, /* full membership update */ OC_INCR_MEMBERSHIP /* incremental membership update */ }; typedef enum oc_member_eventtype_e oc_member_eventtype_t; /* controversial? */ typedef void * oc_cluster_handle_t; typedef cl_uuid_t oc_node_id_t; typedef struct oc_member_uniqueid_s oc_member_uniqueid_t; /* * A few words about the oc_node_id_t: * * An oc_node_id_t is assigned to a node no later than when it first * joins a cluster, and it will not change while that node is active * in some partition in the cluster. It is normally expected to * be assigned to a node, and not changed afterwards except by * adminstrative intervention. * * The mechanism for assigning oc_node_id_t's to nodes is outside the * scope of this specification. The only basic operation which * can be performed on these objects is comparison. * * See oc_cmp_node_id() for comparisons between them. */ /* * oc_member_uniqueid_t * The values of these fields are guaranteed to be the same across * all nodes within a given partition, and guaranteed to be different * between all active partitions in the cluster. * * In other words, if you exchange current oc_member_uniqueid_t objects * with another cluster node, you can tell with certainty, whether or not * you and the other node are currently members of the same partition. * * The m_instance field is guaranteed to be unique to a particular * membership instance while that node is active in the cluster. * If a node is shut down and restarts, then the m_instance might * repeat a value it had in the past. * * See oc_cmp_uniqueid() for comparing them. * * The meaning of the uniqueid field is not defined by this specification. * It may be the node_id of a node in the cluster or it may be a unique * checksum or it may be some other value. All that is specified is that * it and the m_instance are unique when taken as a whole. */ typedef unsigned char oc_mbr_uniqueid[16]; struct oc_member_uniqueid_s { unsigned m_instance; oc_mbr_uniqueid uniqueid; }; #ifdef __cplusplus extern "C" { #endif /* * Returns 0 for equal node_ids, * negative for node id l less than node id r * positive for node id l greater than node id r * * No meaning may be ascribed to the fact that a particular * node id is greater or less than some other node id. * The comparison operator is provided primarily for * equality comparisons, and secondarily for use in * sorting them into a canonical order. */ int oc_cmp_node_id(const oc_node_id_t* l, const oc_node_id_t* r); /* Return our local node id */ int oc_localnodeid(oc_node_id_t* us, oc_cluster_handle_t handle); /* * On failure these functions return -1: * The following errno values are defined: * EINVAL invalid handle argument * EL2HLT cluster software not currently running */ /* What kind of event did we get? */ /* (see oc_member_request_events() for more details) */ oc_member_eventtype_t oc_member_etype(const void* edata, size_t esize); /* * oc_member_uniqueid() returns the unique identifier associated * with this membership event. See the description in the typedef * for more details. */ int oc_member_uniqueid(const void* edata, size_t esize, oc_member_uniqueid_t* u); /* * Failure of these functions return -1. * The following errno values are defined: * EL2HLT cluster software not currently running * EINVAL edata does not refer to a membership event */ /* How many nodes of each category do we have? */ int oc_member_n_nodesjoined(const void* edata, size_t esize); int oc_member_n_nodesgone(void* edata, size_t esize); int oc_member_n_nodesconst(void* edata, size_t esize); /* * Failure of these functions return -1. * The following errno values are defined: * EL2HLT cluster software not currently running * EINVAL edata does not refer to a membership event * ENOSYS edata refers to an OC_INCR_MEMBERSHIP update, and * oc_member_n_nodesconst() was called. */ /* What nodes of each category do we have? */ oc_node_id_t* oc_member_nodesjoined(const void* edata, size_t esize); oc_node_id_t* oc_member_nodesgone(void* edata, size_t esize); oc_node_id_t* oc_member_nodesconst(void* edata, size_t esize); /* * Failure of these functions return NULL. * The following errno values are defined: * EL2HLT cluster software not currently running * EINVAL edata does not refer to a membership event * ENOSYS edata refers to an OC_INCR_MEMBERSHIP update, and * oc_member_nodesconst() was called. */ /* * * OC_NO_MEMBERSHIP * No membership events will be delivered. This is the default on opening * a membership event connection. * * OC_FULL_MEMBERSHIP * Deliver all membership information including information on * members that didn't change. In this mode, the oc_member_nodesconst() * call is supported. * * OC_INCR_MEMBERSHIP * Deliver only changed membership events. In this mode, calls to * oc_member_nodesconst(), et al. are not supported. * * Setting OC_FULL_MEMBERSHIP or OC_INCR_MEMBERSHIP will result in the * delivery of a single OC_FULL_MEMBERSHIP event soon after making * this call. Subsequent events will be delivered as received in the * requested style (incremental or full). Because events may already * be pending when this operation is issued, no guarantee can be made * regarding when this triggered event will be delivered. * */ int oc_member_request_events(oc_member_eventtype_t etype, oc_ev_t token); /* * On failure this function returns -1: * The following errno values are defined: * EINVAL invalid etype or handle argument * EL2HLT cluster software not currently running * EBADF invalid oc_ev_t token parameter */ /* * if l.m_instance < r.m_instance then return -1 * if r.m_instance > r.m_instance then return 1 * if l.m_instance == r.m_instance and l.uniqueid == r.uniqueid * then return 0 * otherwise return 2 */ int oc_cmp_uniqueid(const oc_member_uniqueid_t l, const oc_member_uniqueid_t r); #ifdef __cplusplus } #endif #endif Heartbeat-3-0-7e3a82377fa8/include/saf/Makefile.am0000644000000000000000000000200311576626513021422 0ustar00usergroup00000000000000# # linux-ha: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # This instance created by alanr # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in includedir=@saf_includedir@ include_HEADERS = ais.h ais_amf.h ais_base.h ais_checkpoint.h ais_event.h ais_lock.h ais_membership.h ais_message.h Heartbeat-3-0-7e3a82377fa8/include/saf/ais.h0000644000000000000000000000077411576626513020330 0ustar00usergroup00000000000000/* --- ais.h Header file of SA Forum AIS APIs Version 1.0 In order to compile, all opaque types which appear as <...> in the spec have been defined as OPAQUE_TYPE (which is an integer). */ #ifndef _AIS_H_ #define _AIS_H_ #define AIS_VERSION_RELEASE_CODE 'A' #define AIS_VERSION_MAJOR 0x01 #define AIS_VERSION_MINOR 0x01 #include "ais_base.h" #include "ais_amf.h" #include "ais_membership.h" #include "ais_checkpoint.h" #include "ais_event.h" #include "ais_lock.h" #include "ais_message.h" #endif Heartbeat-3-0-7e3a82377fa8/include/saf/ais_amf.h0000644000000000000000000002352511576626513021152 0ustar00usergroup00000000000000#ifndef _AIS_AMF_H_ #define _AIS_AMF_H_ #include "ais_base.h" /* Chapter 5 */ typedef OPAQUE_TYPE SaAmfHandleT; typedef enum { SA_AMF_HEARTBEAT = 1, SA_AMF_HEALTHCHECK_LEVEL1 = 2, SA_AMF_HEALTHCHECK_LEVEL2 = 3, SA_AMF_HEALTHCHECK_LEVEL3 = 4 } SaAmfHealthcheckT; typedef enum { SA_AMF_OUT_OF_SERVICE = 1, SA_AMF_IN_SERVICE = 2, SA_AMF_STOPPING = 3 } SaAmfReadinessStateT; typedef enum { SA_AMF_ACTIVE = 1, SA_AMF_STANDBY = 2, SA_AMF_QUIESCED = 3 } SaAmfHAStateT; typedef enum { SA_AMF_COMPONENT_CAPABILITY_X_ACTIVE_AND_Y_STANDBY= 1, SA_AMF_COMPONENT_CAPABILITY_X_ACTIVE_OR_X_STANDBY = 2, SA_AMF_COMPONENT_CAPABILITY_1_ACTIVE_OR_Y_STANDBY = 3, SA_AMF_COMPONENT_CAPABILITY_1_ACTIVE_OR_1_STANDBY = 4, SA_AMF_COMPONENT_CAPABILITY_X_ACTIVE = 5, SA_AMF_COMPONENT_CAPABILITY_1_ACTIVE = 6, SA_AMF_COMPONENT_CAPABILITY_NO_STATE = 7 } SaAmfComponentCapabilityModelT; #define SA_AMF_CSI_ADD_NEW_INSTANCE 0X1 #define SA_AMF_CSI_ALL_INSTANCES 0X2 typedef SaUint32T SaAmfCSIFlagsT; typedef enum { SA_AMF_CSI_NEW_ASSIGN = 1, SA_AMF_CSI_QUIESCED = 2, SA_AMF_CSI_NOT_QUIESCED = 3, SA_AMF_CSI_STILL_ACTIVE = 4 } SaAmfCSITransitionDescriptorT; typedef enum { SA_AMF_RESET = 1, SA_AMF_REBOOT = 2, SA_AMF_POWER_ON = 3, SA_AMF_POWER_OFF = 4 } SaAmfExternalComponentActionT; #define SA_AMF_SWITCHOVER_OPERATION 0X1 #define SA_AMF_SHUTDOWN_OPERATION 0X2 typedef SaUint32T SaAmfPendingOperationFlagsT; typedef struct { SaNameT compName; SaAmfReadinessStateT readinessState; SaAmfHAStateT haState; } SaAmfProtectionGroupMemberT; typedef enum { SA_AMF_PROTECTION_GROUP_NO_CHANGE = 1, SA_AMF_PROTECTION_GROUP_ADDED = 2, SA_AMF_PROTECTION_GROUP_REMOVED = 3, SA_AMF_PROTECTION_GROUP_STATE_CHANGE = 4 } SaAmfProtectionGroupChangesT; typedef struct { SaAmfProtectionGroupMemberT member; SaAmfProtectionGroupChangesT change; } SaAmfProtectionGroupNotificationT; typedef enum { SA_AMF_COMMUNICATION_ALARM_TYPE = 1, SA_AMF_QUALITY_OF_SERVICE_ALARM_TYPE = 2, SA_AMF_PROCESSING_ERROR_ALARM_TYPE = 3, SA_AMF_EQUIPMENT_ALARM_TYPE = 4, SA_AMF_ENVIRONMENTAL_ALARM_TYPE = 5 } SaAmfErrorReportTypeT; typedef enum { SA_AMF_APPLICATION_SUBSYSTEM_FAILURE = 1, SA_AMF_BANDWIDTH_REDUCED = 2, SA_AMF_CALL_ESTABLISHMENT_ERROR = 3, SA_AMF_COMMUNICATION_PROTOCOL_ERROR = 4, SA_AMF_COMMUNICATION_SUBSYSTEM_FAILURE = 5, SA_AMF_CONFIGURATION_ERROR = 6, SA_AMF_CONGESTION = 7, SA_AMF_CORRUPT_DATA = 8, SA_AMF_CPU_CYCLES_LIMIT_EXCEEDED = 9, SA_AMF_EQUIPMENT_MALFUNCTION = 10, SA_AMF_FILE_ERROR = 11, SA_AMF_IO_DEVICE_ERROR = 12, SA_AMF_LAN_ERROR, SA_AMF_OUT_OF_MEMORY = 13, SA_AMF_PERFORMANCE_DEGRADED = 14, SA_AMF_PROCESSOR_PROBLEM = 15, SA_AMF_RECEIVE_FAILURE = 16, SA_AMF_REMOTE_NODE_TRANSMISSION_ERROR = 17, SA_AMF_RESOURCE_AT_OR_NEARING_CAPACITY = 18, SA_AMF_RESPONSE_TIME_EXCESSIVE = 19, SA_AMF_RETRANSMISSION_RATE_EXCESSIVE = 20, SA_AMF_SOFTWARE_ERROR = 21, SA_AMF_SOFTWARE_PROGRAM_ABNORMALLY_TERMINATED = 22, SA_AMF_SOFTWARE_PROGRAM_ERROR = 23, SA_AMF_STORAGE_CAPACITY_PROBLEM = 24, SA_AMF_TIMING_PROBLEM = 25, SA_AMF_UNDERLYING_RESOURCE_UNAVAILABLE = 26, SA_AMF_INTERNAL_ERROR = 27, SA_AMF_NO_SERVICE_ERROR = 28, SA_AMF_SOFTWARE_LIBRARY_ERROR = 29 } SaAmfProbableCauseT; typedef enum { SA_AMF_CLEARED = 1, SA_AMF_NO_IMPACT = 2, SA_AMF_INDETERMINATE = 3, SA_AMF_CRITICAL = 4, SA_AMF_MAJOR = 5, SA_AMF_WEDGED_COMPONENT_FAILURE = 6, SA_AMF_COMPONENT_TERMINATED_FAILURE= 7, SA_AMF_NODE_FAILURE = 8, SA_AMF_MINOR = 9, SA_AMF_WARNING = 10 } SaAmfErrorImpactAndSeverityT; typedef enum { SA_AMF_NO_RECOMMENDATION = 1, SA_AMF_INTERNALLY_RECOVERED = 2, SA_AMF_COMPONENT_RESTART = 3, SA_AMF_COMPONENT_FAILOVER = 4, SA_AMF_NODE_SWITCHOVER = 5, SA_AMF_NODE_FAILOVER = 6, SA_AMF_NODE_FAILFAST = 7, SA_AMF_CLUSTER_RESET = 8 } SaAmfRecommendedRecoveryT; #define SA_AMF_OPAQUE_BUFFER_SIZE_MAX 256 typedef struct { char *buffer; SaSizeT size; } SaAmfErrorBufferT; typedef struct { SaAmfErrorBufferT *specificProblem; SaAmfErrorBufferT *additionalText; SaAmfErrorBufferT *additionalInformation; } SaAmfAdditionalDataT; typedef struct { SaAmfErrorReportTypeT errorReportType; SaAmfProbableCauseT probableCause; SaAmfErrorImpactAndSeverityT errorImpactAndSeverity; SaAmfRecommendedRecoveryT recommendedRecovery; } SaAmfErrorDescriptorT; typedef void (*SaAmfHealthcheckCallbackT)(SaInvocationT invocation, const SaNameT *compName, SaAmfHealthcheckT checkType); typedef void (*SaAmfReadinessStateSetCallbackT)(SaInvocationT invocation, const SaNameT *compName, SaAmfReadinessStateT readinessState); typedef void (*SaAmfComponentTerminateCallbackT)(SaInvocationT invocation, const SaNameT *compName); typedef void (*SaAmfCSISetCallbackT)(SaInvocationT invocation, const SaNameT *compName, const SaNameT *csiName, SaAmfCSIFlagsT csiFlags, SaAmfHAStateT *haState, SaNameT *activeCompName, SaAmfCSITransitionDescriptorT transitionDescriptor); typedef void (*SaAmfCSIRemoveCallbackT)(SaInvocationT invocation, const SaNameT *compName, const SaNameT *csiName, const SaAmfCSIFlagsT *csiFlags); typedef void (*SaAmfProtectionGroupTrackCallbackT)(const SaNameT *csiName, SaAmfProtectionGroupNotificationT *notificationBuffer, SaUint32T numberOfItems, SaUint32T numberOfMembers, SaErrorT error); typedef void (*SaAmfExternalComponentRestartCallbackT)(SaInvocationT invocation, const SaNameT *externalCompName); typedef void (*SaAmfExternalComponentControlCallbackT)(const SaInvocationT invocation, const SaNameT *externalCompName, SaAmfExternalComponentActionT controlAction); typedef void (*SaAmfPendingOperationConfirmCallbackT)(const SaInvocationT invocation, const SaNameT *compName, SaAmfPendingOperationFlagsT pendingOperationFlags); typedef struct { SaAmfHealthcheckCallbackT saAmfHealthcheckCallback; SaAmfReadinessStateSetCallbackT saAmfReadinessStateSetCallback; SaAmfComponentTerminateCallbackT saAmfComponentTerminateCallback; SaAmfCSISetCallbackT saAmfCSISetCallback; SaAmfCSIRemoveCallbackT saAmfCSIRemoveCallback; SaAmfProtectionGroupTrackCallbackT saAmfProtectionGroupTrackCallback; SaAmfExternalComponentRestartCallbackT saAmfExternalComponentRestartCallback; SaAmfExternalComponentControlCallbackT saAmfExternalComponentControlCallback; SaAmfPendingOperationConfirmCallbackT saAmfPendingOperationConfirmCallback; } SaAmfCallbacksT; SaErrorT saAmfInitialize(SaAmfHandleT *amfHandle, const SaAmfCallbacksT *amfCallbacks, const SaVersionT *version); SaErrorT saAmfSelectionObjectGet(const SaAmfHandleT *amfHandle, SaSelectionObjectT *selectionObject); SaErrorT saAmfDispatch(const SaAmfHandleT *amfHandle, SaDispatchFlagsT dispatchFlags); SaErrorT saAmfFinalize(const SaAmfHandleT *amfHandle); SaErrorT saAmfComponentRegister( const SaAmfHandleT *amfHandle, const SaNameT *compName, const SaNameT *proxyCompName); SaErrorT saAmfComponentUnregister(const SaAmfHandleT *amfHandle, const SaNameT *compName, const SaNameT *proxyCompName); SaErrorT saAmfCompNameGet(const SaAmfHandleT *amfHandle, SaNameT *compName); SaErrorT saAmfReadinessStateGet(const SaNameT *compName, SaAmfReadinessStateT *readinessState); SaErrorT saAmfStoppingComplete(SaInvocationT invocation, SaErrorT error); SaErrorT saAmfHAStateGet(const SaNameT *compName, const SaNameT *csiName, SaAmfHAStateT *haState); SaErrorT saAmfProtectionGroupTrackStart(const SaAmfHandleT *amfHandle, const SaNameT *csiName, SaUint8T trackFlags, const SaAmfProtectionGroupNotificationT *notificationBufffer, SaUint32T numberOfItems); SaErrorT saAmfProtectionGroupTrackStop(const SaAmfHandleT *amfHandle, const SaNameT *csiName); SaErrorT saAmfErrorReport(const SaNameT *reportingComponent, const SaNameT *erroneousComponent, SaTimeT errorDetectionTime, const SaAmfErrorDescriptorT *errorDescriptor, const SaAmfAdditionalDataT *additionalData); SaErrorT saAmfErrorCancelAll(const SaNameT *compName); SaErrorT saAmfComponentCapabilityModelGet(const SaNameT *compName, SaAmfComponentCapabilityModelT *componentCapabilityModel); SaErrorT saAmfPendingOperationGet(const SaNameT *compName, SaAmfPendingOperationFlagsT *pendingOperationFlags); SaErrorT saAmfResponse(SaInvocationT invocation, SaErrorT error); #endif /* _AIS_AMF_H_ */ Heartbeat-3-0-7e3a82377fa8/include/saf/ais_base.h0000644000000000000000000000571111576626513021316 0ustar00usergroup00000000000000/* --- ais.h Header file of SA Forum AIS APIs Version 1.0 In order to compile, all opaque types which appear as <...> in the spec have been defined as OPAQUE_TYPE (which is an integer). */ #ifndef _AIS_BASE_H_ #define _AIS_BASE_H_ /* typedef OPAQUE_TYPE SaInvocationT; typedef OPAQUE_TYPE SaSizeT; typedef OPAQUE_TYPE SaOffsetT; typedef OPAQUE_TYPE SaSelectionObjectT; typedef OPAQUE_TYPE SaAmfHandleT; typedef OPAQUE_TYPE SaClmHandleT; typedef OPAQUE_TYPE SaCkptHandleT; typedef OPAQUE_TYPE SaCkptCheckpointHandleT; typedef OPAQUE_TYPE SaCkptSectionIteratorT; typedef OPAQUE_TYPE SaEvtHandleT; typedef OPAQUE_TYPE SaEvtEventHandleT; typedef OPAQUE_TYPE SaEvtChannelHandleT; typedef OPAQUE_TYPE SaMsgHandleT; typedef OPAQUE_TYPE SaMsgMessageHandleT; typedef OPAQUE_TYPE SaMsgQueueHandleT; typedef OPAQUE_TYPE SaMsgSenderIdT; typedef OPAQUE_TYPE SaLckHandleT; typedef OPAQUE_TYPE SaLckLockIdT; typedef OPAQUE_TYPE SaLckResourceIdT; */ /* Chapter 3 */ #define OPAQUE_TYPE int typedef OPAQUE_TYPE SaInvocationT; typedef OPAQUE_TYPE SaSizeT; typedef OPAQUE_TYPE SaOffsetT; typedef OPAQUE_TYPE SaSelectionObjectT; typedef enum { SA_FALSE = 0, SA_TRUE = 1 } SaBoolT; typedef char SaInt8T; typedef short SaInt16T; typedef long SaInt32T; typedef long long SaInt64T; typedef unsigned char SaUint8T; typedef unsigned short SaUint16T; typedef unsigned long SaUint32T; typedef unsigned long long SaUint64T; typedef SaInt64T SaTimeT; /* * the largest timestamp value: * Fri Apr 11 23:47:16.854775807 UTC 2262 */ #define SA_TIME_END ((SaTimeT)0x7FFFFFFFFFFFFFFFLL) /* * the smallest timestamp value: * Tue Sep 21 00:12:43.145224193 UTC 1667 */ #define SA_TIME_BEGIN ((SaTimeT)0x8000000000000001LL) #define SA_MAX_NAME_LENGTH 32 #define SA_MAX_ID_LENGTH 128 typedef struct { SaUint16T length; char value[SA_MAX_NAME_LENGTH]; } SaNameT; typedef struct { char releaseCode; unsigned char major; unsigned char minor; } SaVersionT; #define SA_TRACK_CURRENT 0x01 #define SA_TRACK_CHANGES 0x02 #define SA_TRACK_CHANGES_ONLY 0x04 typedef enum { SA_DISPATCH_ONE = 1, SA_DISPATCH_ALL = 2, SA_DISPATCH_BLOCKING = 3 } SaDispatchFlagsT; typedef enum { SA_OK = 1, SA_ERR_LIBRARY = 2, SA_ERR_VERSION = 3, SA_ERR_INIT = 4, SA_ERR_TIMEOUT = 5, SA_ERR_TRY_AGAIN = 6, SA_ERR_INVALID_PARAM = 7, SA_ERR_NO_MEMORY = 8, SA_ERR_BAD_HANDLE = 9, SA_ERR_BUSY = 10, SA_ERR_ACCESS = 11, SA_ERR_NOT_EXIST = 12, SA_ERR_NAME_TOO_LONG = 13, SA_ERR_EXIST = 14, SA_ERR_NO_SPACE = 15, SA_ERR_INTERRUPT =16, SA_ERR_SYSTEM = 17, SA_ERR_NAME_NOT_FOUND = 18, SA_ERR_NO_RESOURCES = 19, SA_ERR_NOT_SUPPORTED = 20, SA_ERR_BAD_OPERATION = 21, SA_ERR_FAILED_OPERATION = 22, SA_ERR_MESSAGE_ERROR = 23, SA_ERR_NO_MESSAGE = 24, SA_ERR_QUEUE_FULL = 25, SA_ERR_QUEUE_NOT_AVAILABLE = 26, SA_ERR_BAD_CHECKPOINT = 27, SA_ERR_BAD_FLAGS = 28 } SaErrorT; #endif /* _AIS_BASE_H_ */ Heartbeat-3-0-7e3a82377fa8/include/saf/ais_checkpoint.h0000644000000000000000000001535211576626513022535 0ustar00usergroup00000000000000#ifndef _AIS_CHECKPOINT_H_ #define _AIS_CHECKPOINT_H_ #include "ais_base.h" /* Chapter 7 */ #ifdef __CPLUSPLUS extern "C"{ #endif typedef OPAQUE_TYPE SaCkptHandleT; typedef OPAQUE_TYPE SaCkptCheckpointHandleT; typedef OPAQUE_TYPE SaCkptSectionIteratorT; #define SA_CKPT_WR_ALL_REPLICAS 0X1 #define SA_CKPT_WR_ACTIVE_REPLICA 0X2 #define SA_CKPT_WR_ACTIVE_REPLICA_WEAK 0X4 typedef SaUint32T SaCkptCheckpointCreationFlagsT; typedef struct { SaCkptCheckpointCreationFlagsT creationFlags; SaSizeT checkpointSize; SaTimeT retentionDuration; SaUint32T maxSections; SaSizeT maxSectionSize; SaUint32T maxSectionIdSize; } SaCkptCheckpointCreationAttributesT; #define SA_CKPT_CHECKPOINT_READ 0X1 #define SA_CKPT_CHECKPOINT_WRITE 0X2 #define SA_CKPT_CHECKPOINT_COLOCATED 0X4 typedef SaUint32T SaCkptCheckpointOpenFlagsT; #define SA_CKPT_DEFAULT_SECTION_ID {NULL, 0} #define SA_CKPT_GENERATED_SECTION_ID {NULL, 0} typedef struct { SaUint8T *id; SaUint32T idLen; } SaCkptSectionIdT; typedef struct { SaCkptSectionIdT *sectionId; SaTimeT expirationTime; } SaCkptSectionCreationAttributesT; typedef enum { SA_CKPT_SECTION_VALID = 1, SA_CKPT_SECTION_CORRUPTED = 2 } SaCkptSectionStateT; typedef struct { SaCkptSectionIdT sectionId; SaTimeT expirationTime; SaSizeT sectionSize; SaCkptSectionStateT sectionState; SaTimeT lastUpdate; } SaCkptSectionDescriptorT; typedef enum { SA_CKPT_SECTIONS_FOREVER = 1, SA_CKPT_SECTIONS_LEQ_EXPIRATION_TIME = 2, SA_CKPT_SECTIONS_GEQ_EXPIRATION_TIME = 3, SA_CKPT_SECTIONS_CORRUPTED = 4, SA_CKPT_SECTIONS_ANY = 5 } SaCkptSectionsChosenT; typedef struct { SaCkptSectionIdT sectionId; void *dataBuffer; SaSizeT dataSize; SaOffsetT dataOffset; SaSizeT readSize; /*[out] */ } SaCkptIOVectorElementT; typedef struct { SaCkptCheckpointCreationAttributesT checkpointCreationAttributes; SaUint32T numberOfSections; SaUint32T memoryUsed; } SaCkptCheckpointStatusT; typedef void (*SaCkptCheckpointOpenCallbackT)(SaInvocationT invocation, const SaCkptCheckpointHandleT *checkpointHandle, SaErrorT error); typedef void (*SaCkptCheckpointSynchronizeCallbackT)(SaInvocationT invocation, SaErrorT error); typedef struct { SaCkptCheckpointOpenCallbackT saCkptCheckpointOpenCallback; SaCkptCheckpointSynchronizeCallbackT saCkptCheckpointSynchronizeCallback; } SaCkptCallbacksT; SaErrorT saCkptInitialize(SaCkptHandleT *ckptHandle, const SaCkptCallbacksT *callbacks, const SaVersionT *version); SaErrorT saCkptSelectionObjectGet(const SaCkptHandleT *ckptHandle, SaSelectionObjectT *selectionObject); SaErrorT saCkptDispatch(const SaCkptHandleT *ckptHandle, SaDispatchFlagsT dispatchFlags); SaErrorT saCkptFinalize(const SaCkptHandleT *ckptHandle); SaErrorT saCkptCheckpointOpen( const SaCkptHandleT *ckptHandle, const SaNameT *ckeckpointName, const SaCkptCheckpointCreationAttributesT *checkpointCreationAttributes, SaCkptCheckpointOpenFlagsT checkpointOpenFlags, SaTimeT timeout, SaCkptCheckpointHandleT *checkpointHandle); SaErrorT saCkptCheckpointOpenAsync(const SaCkptHandleT *ckptHandle, SaInvocationT invocation, const SaNameT *ckeckpointName, const SaCkptCheckpointCreationAttributesT *checkpointCreationAttributes, SaCkptCheckpointOpenFlagsT checkpointOpenFlags); SaErrorT saCkptCheckpointClose(const SaCkptCheckpointHandleT *checkpointHandle); SaErrorT saCkptCheckpointUnlink( const SaCkptHandleT *ckptHandle, const SaNameT *checkpointName); SaErrorT saCkptCheckpointRetentionDurationSet(const SaCkptCheckpointHandleT *checkpointHandle, SaTimeT retentionDuration); SaErrorT saCkptActiveCheckpointSet(const SaCkptCheckpointHandleT *checkpointHandle); SaErrorT saCkptCheckpointStatusGet(const SaCkptCheckpointHandleT *checkpointHandle, SaCkptCheckpointStatusT *checkpointStatus); SaErrorT saCkptSectionCreate(const SaCkptCheckpointHandleT *checkpointHandle, SaCkptSectionCreationAttributesT *sectionCreationAttributes, const void *initialData, SaUint32T initialDataSize); SaErrorT saCkptSectionDelete(const SaCkptCheckpointHandleT *checkpointHandle, const SaCkptSectionIdT *sectionId); SaErrorT saCkptSectionExpirationTimeSet(const SaCkptCheckpointHandleT *checkpointHandle, const SaCkptSectionIdT* sectionId, SaTimeT expirationTime); SaErrorT saCkptSectionIteratorInitialize(const SaCkptCheckpointHandleT *checkpointHandle, SaCkptSectionsChosenT sectionsChosen, SaTimeT expirationTime, SaCkptSectionIteratorT *sectionIterator); SaErrorT saCkptSectionIteratorNext(SaCkptSectionIteratorT *sectionIterator, SaCkptSectionDescriptorT *sectionDescriptor); SaErrorT saCkptSectionIteratorFinalize(SaCkptSectionIteratorT *sectionIterator); SaErrorT saCkptCheckpointWrite(const SaCkptCheckpointHandleT *checkpointHandle, const SaCkptIOVectorElementT *ioVector, SaUint32T numberOfElements, SaUint32T *erroneousVectorIndex); SaErrorT saCkptSectionOverwrite(const SaCkptCheckpointHandleT *checkpointHandle, const SaCkptSectionIdT *sectionId, SaUint8T *dataBuffer, SaSizeT dataSize); SaErrorT saCkptCheckpointRead(const SaCkptCheckpointHandleT *checkpointHandle, SaCkptIOVectorElementT *ioVector, SaUint32T numberOfElements, SaUint32T *erroneousVectorIndex); SaErrorT saCkptCheckpointSynchronize(const SaCkptCheckpointHandleT *ckeckpointHandle, SaTimeT timeout); SaErrorT saCkptCheckpointSynchronizeAsync(const SaCkptHandleT *ckptHandle, SaInvocationT invocation, const SaCkptCheckpointHandleT *checkpointHandle); #ifdef __CPLUSPLUS } #endif #endif /* _AIS_CHECKPOINT_H_ */ Heartbeat-3-0-7e3a82377fa8/include/saf/ais_event.h0000644000000000000000000001032011576626513021515 0ustar00usergroup00000000000000#ifndef _AIS_EVENT_H_ #define _AIS_EVENT_H_ #include "ais_base.h" /* Chapter 8 */ #ifdef __CPLUSPLUS extern "C"{ #endif typedef SaUint32T SaEvtHandleT; typedef SaUint32T SaEvtEventHandleT; typedef SaUint32T SaEvtChannelHandleT; typedef SaUint32T SaEvtSubscriptionIdT; typedef void (*SaEvtEventDeliverCallbackT)(SaEvtSubscriptionIdT subscriptionId, const SaEvtEventHandleT eventHandle, const SaSizeT eventDataSize); typedef void (*SaEvtChannelOpenCallbackT)(SaInvocationT invocation, SaEvtChannelHandleT channelHandle, SaErrorT error); typedef struct{ SaEvtEventDeliverCallbackT saEvtEventDeliverCallback; SaEvtChannelOpenCallbackT saEvtChannelOpenCallback; } SaEvtCallbacksT; #define SA_EVT_CHANNEL_PUBLISHER 0X1 #define SA_EVT_CHANNEL_SUBSCRIBER 0X2 #define SA_EVT_CHANNEL_CREATE 0X4 typedef SaUint8T SaEvtChannelOpenFlagsT; typedef struct { SaUint8T *pattern; SaSizeT patternSize; } SaEvtEventPatternT; #define SA_EVT_HIGHEST_PRIORITY 0 #define SA_EVT_LOWEST_PRIORITY 3 #define SA_EVT_LOST_EVENT "SA_EVT_LOST_EVENT_PATTERN" typedef struct { SaEvtEventPatternT *patterns; SaSizeT patternsNumber; } SaEvtEventPatternArrayT; typedef SaUint8T SaEvtEventPriorityT; typedef SaUint64T SaEvtEventIdT; typedef enum { SA_EVT_PREFIX_FILTER = 1, SA_EVT_SUFFIX_FILTER = 2, SA_EVT_EXACT_FILTER = 3, SA_EVT_PASS_ALL_FILTER = 4 } SaEvtEventFilterTypeT; typedef struct { SaEvtEventFilterTypeT filterType; SaEvtEventPatternT filter; } SaEvtEventFilterT; typedef struct { SaEvtEventFilterT *filters; SaSizeT filtersNumber; } SaEvtEventFilterArrayT; SaErrorT saEvtInitialize(SaEvtHandleT *evtHandle, const SaEvtCallbacksT *callbacks, SaVersionT *version); SaErrorT saEvtSelectionObjectGet(SaEvtHandleT evtHandle, SaSelectionObjectT *selectionObject); SaErrorT saEvtDispatch(const SaEvtHandleT evtHandle, SaDispatchFlagsT dispatchFlags); SaErrorT saEvtFinalize(SaEvtHandleT evtHandle); SaErrorT saEvtChannelOpen(const SaEvtHandleT evtHandle, const SaNameT *channelName, SaEvtChannelOpenFlagsT channelOpenFlags, SaTimeT timeout, SaEvtChannelHandleT *channelHandle); SaErrorT saEvtChannelOpenAsync( SaEvtHandleT evtHandle, SaInvocationT invocation, const SaNameT *channelName, SaEvtChannelOpenFlagsT channelOpenFlags); SaErrorT saEvtChannelClose(SaEvtChannelHandleT channelHandle); SaErrorT saEvtEventAllocate(const SaEvtChannelHandleT channelHandle, SaEvtEventHandleT *eventHandle); SaErrorT saEvtEventFree(SaEvtEventHandleT eventHandle); SaErrorT saEvtEventAttributesSet(const SaEvtEventHandleT eventHandle, const SaEvtEventPatternArrayT *patternArray, SaUint8T priority, SaTimeT retentionTime, const SaNameT *publisherName); SaErrorT saEvtEventAttributesGet(const SaEvtEventHandleT eventHandle, SaEvtEventPatternArrayT *patternArray, SaUint8T *priority, SaTimeT *retentionTime, SaNameT *publisherName, SaTimeT *publishTime, SaEvtEventIdT *eventId); SaErrorT saEvtEventDataGet(const SaEvtEventHandleT eventHandle, void *eventData, SaSizeT *eventDataSize); SaErrorT saEvtEventPublish(const SaEvtEventHandleT eventHandle, const void *eventData, SaSizeT eventDataSize, SaEvtEventIdT *eventId); SaErrorT saEvtEventSubscribe(const SaEvtChannelHandleT channelHandle, const SaEvtEventFilterArrayT *filters, SaEvtSubscriptionIdT subscriptionId); SaErrorT saEvtEventUnsubscribe(const SaEvtChannelHandleT channelHandle, SaEvtSubscriptionIdT subscriptionId); SaErrorT saEvtChannelUnlink(SaEvtHandleT evtHandle, const SaNameT *channelName); SaErrorT saEvtEventRetentionTimeClear(SaEvtChannelHandleT channelHandle, const SaEvtEventIdT eventHandle); #ifdef __CPLUSPLUS } #endif #endif /* _AIS_EVENT_H_ */ Heartbeat-3-0-7e3a82377fa8/include/saf/ais_lock.h0000644000000000000000000000713211576626513021333 0ustar00usergroup00000000000000#ifndef _AIS_LOCK_H_ #define _AIS_LOCK_H_ #include "ais_base.h" /* Chapter 10 */ typedef OPAQUE_TYPE SaLckHandleT; typedef OPAQUE_TYPE SaLckLockIdT; typedef OPAQUE_TYPE SaLckResourceIdT; #define SA_LCK_LOCK_NO_QUEUE 0x1 #define SA_LCK_LOCK_ORPHAN 0x2 #define SA_LCK_LOCK_TIMEOUT 0X4 typedef SaUint32T SaLckLockFlagsT; typedef enum { SA_LCK_LOCK_GRANTED = 1, SA_LCK_LOCK_RELEASED = 2, SA_LCK_LOCK_DEADLOCK = 3, SA_LCK_LOCK_NOT_QUEUED = 4, SA_LCK_LOCK_TIMED_OUT = 5, SA_LCK_LOCK_ORPHANED = 6, SA_LCK_LOCK_NO_MORE = 7 } SaLckLockStatusT; typedef enum { SA_LCK_PR_LOCK_MODE = 1, SA_LCK_EX_LOCK_MODE = 2 } SaLckLockModeT; typedef void (*SaLckLockGrantCallbackT)(SaInvocationT invocation, const SaLckResourceIdT *resourceId, const SaLckLockIdT *lockId, SaLckLockModeT lockMode, SaLckLockStatusT lockStatus, SaErrorT error); typedef void (*SaLckLockWaiterCallbackT)(SaInvocationT invocation, const SaLckResourceIdT *resourceId, const SaLckLockIdT *lockId, SaLckLockModeT modeHeld, SaLckLockModeT modeRequested); typedef void (*SaLckResourceUnlockCallbackT)(SaInvocationT invocation, const SaLckResourceIdT *resourceId, const SaLckLockIdT *lockId, SaLckLockStatusT lockStatus, SaErrorT error); typedef struct{ SaLckLockGrantCallbackT saLckLockGrantCallback; SaLckLockWaiterCallbackT saLckLockWaiterCallback; SaLckResourceUnlockCallbackT saLckResourceUnlockCallback; } SaLckCallbacksT; SaErrorT saLckInitialize(SaLckHandleT *lckHandle, const SaLckCallbacksT *lckCallbacks, const SaVersionT *version); SaErrorT saLckSelectionObjectGet(const SaLckHandleT *lckHandle, SaSelectionObjectT *selectionObject); SaErrorT saLckDispatch(const SaLckHandleT *lckHandle, const SaDispatchFlagsT dispatchFlags); SaErrorT saLckFinalize(SaLckHandleT *lckHandle); SaErrorT saLckResourceOpen(const SaLckHandleT *lckHandle, const SaNameT *lockName, SaLckResourceIdT *resourceId); SaErrorT saLckResourceClose(SaLckHandleT *lckHandle, SaLckResourceIdT *resourceId); SaErrorT saLckResourceLock(const SaLckHandleT *lckHandle, SaInvocationT invocation, const SaLckResourceIdT *resourceId, SaLckLockIdT *lockId, SaLckLockModeT lockMode, SaLckLockFlagsT lockFlags, SaTimeT timeout, SaLckLockStatusT *lockStatus); SaErrorT SaLckResourceLockAsync(const SaLckHandleT *lckHandle, SaInvocationT invocation, const SaLckResourceIdT *resourceId, SaLckLockIdT *lockId, SaLckLockModeT lockMode, SaLckLockFlagsT lockFlags, SaTimeT timeout); SaErrorT saLckResourceUnlock(const SaLckHandleT *lckHandle, SaLckLockIdT *lockId, SaTimeT timeout); SaErrorT saLckResourceUnlockAsync(const SaLckHandleT *lckHandle, SaInvocationT invocation, const SaLckLockIdT *lockId); SaErrorT saLckLockPurge(const SaLckHandleT *lckHandle, const SaLckResourceIdT *resourceId); #endif /* _AIS_LOCK_H_ */ Heartbeat-3-0-7e3a82377fa8/include/saf/ais_membership.h0000644000000000000000000000501511576626513022534 0ustar00usergroup00000000000000#ifndef _AIS_MEMBERSHIP_H_ #define _AIS_MEMBERSHIP_H_ #include "ais_base.h" /* Chapter 6 */ #ifdef __CPLUSPLUS extern "C"{ #endif typedef SaUint32T SaClmHandleT; typedef SaUint32T SaClmNodeIdT; #define SA_CLM_MAX_ADDRESS_LENGTH 64 typedef struct { SaUint8T length; char value[SA_CLM_MAX_ADDRESS_LENGTH]; } SaClmNodeAddressT; typedef struct { SaClmNodeIdT nodeId; SaClmNodeAddressT nodeAddress; SaNameT nodeName; SaNameT clusterName; SaBoolT member; SaTimeT bootTimestamp; } SaClmClusterNodeT; typedef enum { SA_CLM_NODE_NO_CHANGE = 1, SA_CLM_NODE_JOINED = 2, SA_CLM_NODE_LEFT = 3 } SaClmClusterChangesT; typedef struct { SaClmClusterNodeT clusterNode; SaClmClusterChangesT clusterChanges; } SaClmClusterNotificationT; typedef void (*SaClmClusterTrackCallbackT) (SaClmClusterNotificationT *notificationBuffer, SaUint32T numberOfItems, SaUint32T numberOfMembers, SaUint64T viewNumber, SaErrorT error); typedef void (*SaClmClusterNodeGetCallbackT)(SaInvocationT invocation, SaClmClusterNodeT *clusterNode, SaErrorT error); typedef struct { SaClmClusterNodeGetCallbackT saClmClusterNodeGetCallback; SaClmClusterTrackCallbackT saClmClusterTrackCallback; } SaClmCallbacksT; SaErrorT saClmInitialize(SaClmHandleT *clmHandle, const SaClmCallbacksT *clmCallbacks, const SaVersionT *version); SaErrorT saClmSelectionObjectGet(const SaClmHandleT *clmHandle, SaSelectionObjectT *selectionObject); SaErrorT saClmDispatch(const SaClmHandleT *clmHandle, SaDispatchFlagsT dispatchFlags); SaErrorT saClmFinalize(SaClmHandleT *clmHandle); SaErrorT saClmClusterTrackStart(const SaClmHandleT *clmHandle, SaUint8T trackFlags, SaClmClusterNotificationT *notificationBuffer, SaUint32T numberOfItems); SaErrorT saClmClusterTrackStop(const SaClmHandleT *clmHandle); SaErrorT saClmClusterNodeGet(SaClmNodeIdT nodeId, SaTimeT timeout, SaClmClusterNodeT *clusterNode); SaErrorT saClmClusterNodeGetAsync(const SaClmHandleT *clmHandle, SaInvocationT invocation, SaClmNodeIdT nodeId, SaClmClusterNodeT *clusterNode); #ifdef __CPLUSPLUS } #endif #endif /* _AIS_MEMBERSHIP_H_ */ Heartbeat-3-0-7e3a82377fa8/include/saf/ais_message.h0000644000000000000000000001653211576626513022033 0ustar00usergroup00000000000000#ifndef _AIS_MESSAGE_H_ #define _AIS_MESSAGE_H_ #include "ais_base.h" /* Chapter 9 */ #ifdef __CPLUSPLUS extern "C" { #endif typedef OPAQUE_TYPE SaMsgHandleT; typedef OPAQUE_TYPE SaMsgMessageHandleT; typedef OPAQUE_TYPE SaMsgQueueHandleT; typedef OPAQUE_TYPE SaMsgSenderIdT; #define SA_MSG_MESSAGE_DELIVERED_ACK 0x1 typedef SaUint32T SaMsgAckFlagsT; #define SA_MSG_QUEUE_PERSISTENT 0x1 #define SA_MSG_QUEUE_MIGRATABLE 0x2 typedef SaUint32T SaMsgQueueCreationFlagsT; #define SA_MSG_MESSAGE_HIGHEST_PRIORITY 0 #define SA_MSG_MESSAGE_LOWEST_PRIORITY 3 typedef struct { SaMsgQueueCreationFlagsT creationFlags; SaSizeT size[SA_MSG_MESSAGE_LOWEST_PRIORITY + 1]; SaTimeT retentionTime; } SaMsgQueueCreationAttributesT; #define SA_MSG_QUEUE_CREATE 0x1 #define SA_MSG_QUEUE_RECEIVE_CALLBACK 0x2 #define SA_MSG_QUEUE_SELECTION_OBJECT_SET 0x4 #define SA_MSG_QUEUE_EMPTY 0x8 typedef SaUint32T SaMsgQueueOpenFlagsT; typedef enum { SA_MSG_QUEUE_UNAVAILABLE = 1, SA_MSG_QUEUE_AVAILABLE = 2 } SaMsgQueueSendingStateT; typedef struct { SaUint32T queueSize; SaSizeT queueUsed; SaUint32T numberOfMessages; } SaMsgQueueUsageT; typedef struct { SaMsgQueueSendingStateT sendingState; SaMsgQueueCreationFlagsT creationFlags; SaMsgQueueOpenFlagsT openFlags; SaTimeT retentionTime; SaTimeT closeTime; SaSizeT headerLength; SaMsgQueueUsageT saMsgQueueUsage[SA_MSG_MESSAGE_LOWEST_PRIORITY + 1]; } SaMsgQueueStatusT; typedef enum { SA_MSG_QUEUE_GROUP_ROUND_ROBIN = 1 } SaMsgQueueGroupPolicyT; typedef enum { SA_MSG_QUEUE_GROUP_NO_CHANGE = 1, SA_MSG_QUEUE_GROUP_ADDED = 2, SA_MSG_QUEUE_GROUP_REMOVED = 3, SA_MSG_QUEUE_GROUP_STATE_CHANGED = 4 } SaMsgQueueGroupChangesT; typedef struct { SaNameT queueName; SaMsgQueueStatusT queueStatus; } SaMsgQueueGroupMemberT; typedef struct { SaMsgQueueGroupChangesT change; SaMsgQueueGroupMemberT member; } SaMsgQueueGroupNotificationT; typedef struct { SaUint32T numberOfItems; SaMsgQueueGroupNotificationT *notification; } SaMsgQueueGroupNotificationBufferT; typedef struct { SaSizeT type; SaSizeT version; SaSizeT size; void *data; SaUint8T priority; } SaMsgMessageT; typedef struct { SaTimeT sendTime; SaNameT senderName; SaBoolT sendReceive; SaMsgSenderIdT senderId; } SaMsgMessageInfoT; typedef void (*SaMsgQueueOpenCallbackT)(SaInvocationT invocation, const SaMsgQueueHandleT *queueHandle, SaErrorT error); typedef void (*SaMsgQueueGroupTrackCallbackT)(const SaNameT *queueGroupName, const SaMsgQueueGroupNotificationBufferT *notificationBuffer, SaMsgQueueGroupPolicyT queueGroupPolicy, SaUint32T numberOfMembers, SaErrorT error); typedef void (*SaMsgMessageDeliveredCallbackT)(SaInvocationT invocation, SaErrorT error); typedef void (*SaMsgMessageReceivedCallbackT)(const SaMsgQueueHandleT *queueHandle); typedef struct { const SaMsgQueueOpenCallbackT saMsgQueueOpenCallback; const SaMsgQueueGroupTrackCallbackT saMsgQueueGroupTrackCallback; const SaMsgMessageDeliveredCallbackT saMsgMessageDeliveredCallback; const SaMsgMessageReceivedCallbackT saMsgMessageReceivedCallback; } SaMsgCallbacksT; SaErrorT saMsgInitialize(SaMsgHandleT *msgHandle, const SaMsgCallbacksT *msgCallbacks, const SaVersionT *version); SaErrorT saMsgSelectionObjectGet(const SaMsgHandleT *msgHandle, SaSelectionObjectT *selectionObject); SaErrorT saMsgDispatch(const SaMsgHandleT *msgHandle, SaDispatchFlagsT dispatchFlags); SaErrorT saMsgFinalize(SaMsgHandleT *msgHandle); SaErrorT saMsgQueueOpen(const SaMsgHandleT *msgHandle, const SaNameT *queueName, const SaMsgQueueCreationAttributesT *creationAttributes, SaMsgQueueOpenFlagsT openFlags, SaTimeT timeout, SaMsgQueueHandleT *queueHandle); SaErrorT saMsgQueueOpenAsync(const SaMsgHandleT *msgHandle, SaInvocationT invocation, const SaNameT *queueName, const SaMsgQueueCreationAttributesT *creationAttributes, SaMsgQueueOpenFlagsT openFlags); SaErrorT saMsgQueueClose(SaMsgQueueHandleT *queueHandle); SaErrorT saMsgQueueStatusGet(SaMsgHandleT *msgHandle, const SaNameT *queueName, SaMsgQueueStatusT *queueStatus); SaErrorT saMsgQueueUnlink(SaMsgHandleT *msgHandle, const SaNameT *queueName); SaErrorT saMsgQueueGroupCreate(SaMsgHandleT *msgHandle, const SaNameT *queueGroupName, SaMsgQueueGroupPolicyT queueGroupPolicy); SaErrorT saMsgQueueGroupDelete(SaMsgHandleT *msgHandle, const SaNameT *queueGroupName); SaErrorT saMsgQueueGroupInsert(SaMsgHandleT *msgHandle, const SaNameT *queueGroupName, const SaNameT *queueName); SaErrorT saMsgQueueGroupRemove(SaMsgHandleT *msgHandle, const SaNameT *queueGroupName, const SaNameT *queueName); SaErrorT saMsgQueueGroupTrack(const SaMsgHandleT *msgHandle, const SaNameT *queueGroupName, SaUint8T trackFlags, SaMsgQueueGroupNotificationBufferT *notificationBuffer); SaErrorT saMsgQueueGroupTrackStop(const SaMsgHandleT *msgHandle, const SaNameT *queueGroupName); SaErrorT saMsgMessageSend(const SaMsgHandleT *msgHandle, const SaNameT *destination, const SaMsgMessageT *message, SaMsgAckFlagsT ackFlags, SaTimeT timeout); SaErrorT saMsgMessageSendAsync(const SaMsgHandleT *msgHandle, SaInvocationT invocation, const SaNameT *destination, const SaMsgMessageT *message, SaMsgAckFlagsT ackFlags); SaErrorT saMsgMessageGet(const SaMsgQueueHandleT *queueHandle, SaMsgMessageT *message, SaMsgMessageInfoT *messageInfo, SaTimeT timeout); SaErrorT saMsgMessageReceivedGet(const SaMsgQueueHandleT *queueHandle, const SaMsgMessageHandleT *messageHandle, SaMsgMessageT *message, SaMsgMessageInfoT *messageInfo); SaErrorT saMsgMessageCancel(const SaMsgQueueHandleT *queueHandle); SaErrorT saMsgMessageSendReceive(SaMsgHandleT msgHandle, const SaNameT *destination, const SaMsgMessageT *sendMessage, SaMsgMessageT *receiveMessage, SaTimeT *replySendTime, SaTimeT timeout); SaErrorT saMsgMessageReply(SaMsgHandleT msgHandle, const SaMsgMessageT *replyMessage, const SaMsgSenderIdT *senderId, SaTimeT timeout); SaErrorT saMsgMessageReplyAsync(SaMsgHandleT msgHandle, SaInvocationT invocation, const SaMsgMessageT *replyMessage, const SaMsgSenderIdT *senderId, SaMsgAckFlagsT ackFlags); #ifdef __CPLUSPLUS } #endif #endif /* _AIS_MESSAGE_H_ */ Heartbeat-3-0-7e3a82377fa8/lib/Makefile.am0000644000000000000000000000214511576626513020003 0ustar00usergroup00000000000000# # heartbeat library directory: Linux-HA code # # Copyright (C) 2001 Alan Robertson # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in # # PILS is supposed to be an independent library # Stonith should rely only on PILS # # The rest can use clplumbing, and the plugins should be able # to use any of the libraries (nothing can be linked against them) # # ## Subdirectories... SUBDIRS = \ hbclient \ apphb \ plugins Heartbeat-3-0-7e3a82377fa8/lib/apphb/Makefile.am0000644000000000000000000000243711576626513021101 0ustar00usergroup00000000000000# # apphb: OCF application heartbeat library # # Copyright (C) 2002 Alan Robertson, International Business Machines # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl AM_CFLAGS = @CFLAGS@ ## libraries lib_LTLIBRARIES = libapphb.la libapphb_la_SOURCES = apphb.c if ENABLE_THREAD_SAFE libapphb_la_CFLAGS = -DTHREAD_SAFE -D_REENTRANT else libapphb_la_CFLAGS = -lpthread endif libapphb_la_LDFLAGS = -version-info 2:0:0 Heartbeat-3-0-7e3a82377fa8/lib/apphb/apphb.c0000644000000000000000000002104211576626513020274 0ustar00usergroup00000000000000/* * apphb.c: application heartbeat library code. * * Copyright (C) 2002 Alan Robertson * * * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #define time footime #define index fooindex #include #undef time #undef index #include #include #include #ifdef THREAD_SAFE #define G_STATIC_MUTEX_LOCK(lock) g_static_mutex_lock(lock) #define G_STATIC_MUTEX_UNLOCK(lock) g_static_mutex_unlock(lock) /* * G_DECLARE_STATIC_MUTEX(): * An empty ";" declaration in code breaks some compilers. * So this non-empty macro definition supplies its trailing ";". * Uses of this macro should not include the ";". */ #define G_DECLARE_STATIC_MUTEX(var) \ static GStaticMutex var = G_STATIC_MUTEX_INIT; #define G_THREAD_INIT(vtable) \ if ( !g_thread_supported() ) { \ g_thread_init(vtable); \ } #else #define G_STATIC_MUTEX_LOCK(lock) #define G_STATIC_MUTEX_UNLOCK(lock) #define G_DECLARE_STATIC_MUTEX(var) #define G_THREAD_INIT(vtable) #endif static struct IPC_CHANNEL* hbcomm = NULL; static GHashTable * hbattrs; static int hbstatus = -1; static int apphb_getrc(void); /* Get return code from last operation */ static int apphb_getrc(void) { G_DECLARE_STATIC_MUTEX(lock) struct apphb_rc * rcs; int rc; struct IPC_MESSAGE * msg; G_STATIC_MUTEX_LOCK(&lock); hbcomm->ops->waitin(hbcomm); if (hbcomm->ops->recv(hbcomm, &msg) != IPC_OK) { G_STATIC_MUTEX_UNLOCK(&lock); perror("Receive failure:"); return errno; } G_STATIC_MUTEX_UNLOCK(&lock); rcs = msg->msg_body; rc = rcs->rc; msg->msg_done(msg); return rc; } /* Register for application heartbeat services */ int apphb_register(const char * appname, const char * appinstance) { G_DECLARE_STATIC_MUTEX(lock) int err; struct IPC_MESSAGE Msg; struct apphb_signupmsg msg; static char path [] = IPC_PATH_ATTR; static char sockpath [] = APPHBSOCKPATH; G_THREAD_INIT(NULL); if (appname == NULL || appinstance == NULL) { errno = EINVAL; return -1; } if (strlen(appname) >= APPHB_OLEN || strlen(appinstance) >= APPHB_OLEN) { errno = ENAMETOOLONG; return -1; } G_STATIC_MUTEX_LOCK(&lock); if (hbcomm != NULL) { errno = EEXIST; G_STATIC_MUTEX_UNLOCK(&lock); return -1; } /* Create communication channel with server... */ hbattrs = g_hash_table_new(g_str_hash,g_str_equal); g_hash_table_insert(hbattrs, path, sockpath); hbcomm = ipc_channel_constructor(IPC_ANYTYPE, hbattrs); if (hbcomm == NULL || (hbstatus = hbcomm->ops->initiate_connection(hbcomm) != IPC_OK)) { G_STATIC_MUTEX_UNLOCK(&lock); apphb_unregister(); errno = EBADF; return -1; } /* Send registration message ... */ strncpy(msg.msgtype, REGISTER, sizeof(msg.msgtype)); strncpy(msg.appname, appname, sizeof(msg.appname)); strncpy(msg.appinstance, appinstance, sizeof(msg.appinstance)); /* Maybe we need current starting directory instead of * current work directory. */ if ( getcwd(msg.curdir, APPHB_OLEN) == NULL) { apphb_unregister(); G_STATIC_MUTEX_UNLOCK(&lock); return -1; } msg.pid = getpid(); msg.uid = getuid(); msg.gid = getgid(); memset(&Msg, 0, sizeof(Msg)); Msg.msg_buf = NULL; Msg.msg_body = &msg; Msg.msg_len = sizeof(msg); Msg.msg_done = NULL; Msg.msg_private = NULL; Msg.msg_ch = hbcomm; if (hbcomm->ops->send(hbcomm, &Msg) != IPC_OK) { G_STATIC_MUTEX_UNLOCK(&lock); apphb_unregister(); errno = EBADF; return -1; } G_STATIC_MUTEX_UNLOCK(&lock); if ((err = apphb_getrc()) != 0) { hbstatus = -1; errno = err; return -1; } return 0; } /* Unregister from application heartbeat services */ int apphb_unregister(void) { G_DECLARE_STATIC_MUTEX(lock) int rc = 0; int err; struct apphb_msg msg; struct IPC_MESSAGE Msg; G_THREAD_INIT(NULL); G_STATIC_MUTEX_LOCK(&lock); if (hbcomm == NULL || hbstatus != IPC_OK) { errno = ESRCH; rc = -1; } /* Send an unregister message to the server... */ if (hbcomm != NULL && hbstatus == IPC_OK) { strncpy(msg.msgtype, UNREGISTER, sizeof(msg.msgtype)); memset(&Msg, 0, sizeof(Msg)); Msg.msg_buf = NULL; Msg.msg_body = &msg; Msg.msg_len = sizeof(msg); Msg.msg_done = NULL; Msg.msg_private = NULL; Msg.msg_ch = hbcomm; if (hbcomm->ops->send(hbcomm, &Msg) != IPC_OK) { rc = -1; rc = EBADF; }else { if ((err = apphb_getrc()) != 0) { errno = err; rc = -1; } } } /* Destroy and NULL out hbcomm */ if (hbcomm) { hbcomm->ops->destroy(hbcomm); hbcomm = NULL; }else{ errno = ESRCH; rc = -1; } /* Destroy and NULL out hbattrs */ if (hbattrs) { g_hash_table_destroy(hbattrs); hbattrs = NULL; } G_STATIC_MUTEX_LOCK(&lock); return rc; } /* Set application heartbeat interval (in milliseconds) */ int apphb_setinterval(unsigned long hbms) { G_DECLARE_STATIC_MUTEX(lock) struct apphb_msmsg msg; struct IPC_MESSAGE Msg; int err; G_THREAD_INIT(NULL); G_STATIC_MUTEX_LOCK(&lock); if (hbcomm == NULL || hbstatus != IPC_OK) { errno = ESRCH; G_STATIC_MUTEX_UNLOCK(&lock); return -1; } strncpy(msg.msgtype, SETINTERVAL, sizeof(msg.msgtype)); msg.ms = hbms; memset(&Msg, 0, sizeof(Msg)); Msg.msg_buf = NULL; Msg.msg_body = &msg; Msg.msg_len = sizeof(msg); Msg.msg_done = NULL; Msg.msg_private = NULL; Msg.msg_ch = hbcomm; if (hbcomm->ops->send(hbcomm, &Msg) != IPC_OK) { G_STATIC_MUTEX_UNLOCK(&lock); errno = EBADF; return -1; } G_STATIC_MUTEX_UNLOCK(&lock); if ((err = apphb_getrc()) != 0) { errno = err; return -1; } return 0; } /* Set application heartbeat warning time (in milliseconds) */ int apphb_setwarn(unsigned long hbms) { G_DECLARE_STATIC_MUTEX(lock) struct apphb_msmsg msg; struct IPC_MESSAGE Msg; int err; if (hbms <= 0) { errno = EINVAL; return -1; } G_THREAD_INIT(NULL); G_STATIC_MUTEX_LOCK(&lock); if (hbcomm == NULL || hbstatus != IPC_OK) { errno = ESRCH; G_STATIC_MUTEX_UNLOCK(&lock); return -1; } strncpy(msg.msgtype, SETWARNTIME, sizeof(msg.msgtype)); msg.ms = hbms; memset(&Msg, 0, sizeof(Msg)); Msg.msg_buf = NULL; Msg.msg_body = &msg; Msg.msg_len = sizeof(msg); Msg.msg_done = NULL; Msg.msg_private = NULL; Msg.msg_ch = hbcomm; if (hbcomm->ops->send(hbcomm, &Msg) != IPC_OK) { G_STATIC_MUTEX_UNLOCK(&lock); errno = EBADF; return -1; } G_STATIC_MUTEX_UNLOCK(&lock); if ((err = apphb_getrc()) != 0) { errno = err; return -1; } return 0; } int apphb_setreboot(unsigned int truefalse) { G_DECLARE_STATIC_MUTEX(lock) struct apphb_msmsg msg; struct IPC_MESSAGE Msg; int err; G_THREAD_INIT(NULL); G_STATIC_MUTEX_LOCK(&lock); if (hbcomm == NULL || hbstatus != IPC_OK) { errno = ESRCH; G_STATIC_MUTEX_UNLOCK(&lock); return -1; } strncpy(msg.msgtype, SETREBOOT, sizeof(msg.msgtype)); msg.ms = truefalse ? 1UL : 0UL; memset(&Msg, 0, sizeof(Msg)); Msg.msg_buf = NULL; Msg.msg_body = &msg; Msg.msg_len = sizeof(msg); Msg.msg_done = NULL; Msg.msg_private = NULL; Msg.msg_ch = hbcomm; if (hbcomm->ops->send(hbcomm, &Msg) != IPC_OK) { G_STATIC_MUTEX_UNLOCK(&lock); errno = EBADF; return -1; } G_STATIC_MUTEX_UNLOCK(&lock); if ((err = apphb_getrc()) != 0) { errno = err; return -1; } return 0; } /* Perform application heartbeat */ int apphb_hb(void) { G_DECLARE_STATIC_MUTEX(lock) struct apphb_msg msg; struct IPC_MESSAGE Msg; G_THREAD_INIT(NULL); G_STATIC_MUTEX_LOCK(&lock); if (hbcomm == NULL || hbstatus != IPC_OK) { G_STATIC_MUTEX_UNLOCK(&lock); errno = ESRCH; return -1; } strncpy(msg.msgtype, HEARTBEAT, sizeof(msg.msgtype)); memset(&Msg, 0, sizeof(Msg)); Msg.msg_buf = NULL; Msg.msg_body = &msg; Msg.msg_len = sizeof(msg); Msg.msg_done = NULL; Msg.msg_private = NULL; Msg.msg_ch = hbcomm; if (hbcomm->ops->send(hbcomm, &Msg) != IPC_OK) { G_STATIC_MUTEX_UNLOCK(&lock); errno = EBADF; return -1; } G_STATIC_MUTEX_UNLOCK(&lock); /* NOTE: we do not expect a return code from server */ return 0; } Heartbeat-3-0-7e3a82377fa8/lib/cmsclient/Makefile.am0000644000000000000000000000376311576626513021773 0ustar00usergroup00000000000000# # Linux-HA telecom code: ais message service client library # # Copyright (C) 2004 Intel Corp. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include \ -I$(top_srcdir)/include \ -I$(top_builddir)/libltdl \ -I$(top_srcdir)/libltdl \ -I$(top_builddir)/linux-ha \ -I$(top_srcdir)/linux-ha \ -I$(top_builddir)/heartbeat \ -I$(top_srcdir)/heartbeat \ -I$(top_builddir)/include/clplumbing \ -I$(top_srcdir)/include/clplumbing \ -I$(top_builddir)/telecom/cms \ -I$(top_srcdir)/telecom/cms hadir = $(sysconfdir)/ha.d halibdir = $(libdir)/@HB_PKG@ commmoddir = $(halibdir)/modules/comm havarlibdir = $(localstatedir)/lib/@HB_PKG@ cmsvarlibdir = $(havarlibdir)/cms havarrundir = $(localstatedir)/run/ AM_CFLAGS = @CFLAGS@ \ -DCMSVARLIBDIR='"$(cmsvarlibdir)"' noinst_HEADERS = ## libraries lib_LTLIBRARIES = libcms.la ## binary progs halib_PROGRAMS = libcmsdir = $(libdir)/@HB_PKG@ libcms_PROGRAMS = libcms_la_SOURCES = cmslib_client.c cmslib_client.h libcms_la_CFLAGS = -D$(DEBUG)_LIBRARY libcms_la_LDFLAGS = install-exec-local: $(mkinstalldirs) $(DESTDIR)$(cmsvarlibdir) chmod 770 $(DESTDIR)/$(cmsvarlibdir) -test -p $(DESTDIR)$(cmsvarlibdir) || mkdir -p $(DESTDIR)$(cmsvarlibdir) uninstall-local: -rm -fr $(DESTDIR)$(cmsvarlibdir) Heartbeat-3-0-7e3a82377fa8/lib/cmsclient/cmsclient.h0000644000000000000000000000331011576626513022055 0ustar00usergroup00000000000000/* * cmslib_client.h: SAForum AIS Message Service client library header * * Copyright (c) 2004 Intel Corp. * * Author: Zou Yixiong (yixiong.zou@intel.com) * Author: Zhu Yi (yi.zhu@intel.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA * */ #ifndef __CMSLIB_CLIENT_H__ #define __CMSLIB_CLIENT_H__ #include #define CMS_LIBRARY_TRACE() dprintf("TRACE: %s\n", __FUNCTION__) typedef struct { IPC_Channel * ch; /* client daemon channel */ int active_fd; /* make select always returns */ int backup_fd; /* backup fd vs. active_fd */ SaMsgHandleT service_handle; SaMsgCallbacksT callbacks; /* client's callback func */ GList * dispatch_queue; /* client's dispatch queue */ GHashTable * queue_handle_hash; } __cms_handle_t; typedef struct { SaMsgQueueHandleT queue_handle; SaNameT queue_name; __cms_handle_t * cms_handle; } __cms_queue_handle_t; typedef struct { const SaNameT * name; SaUint8T flag; SaMsgQueueGroupPolicyT policy; } __mqgroup_track_t; #endif /* __CMSLIB_CLIENT_H__ */ Heartbeat-3-0-7e3a82377fa8/lib/cmsclient/cmslib_client.c0000644000000000000000000012350511576626513022707 0ustar00usergroup00000000000000/* * cmslib_client.c: SAForum AIS Message Service client library * * Copyright (c) 2004 Intel Corp. * * Author: Zou Yixiong (yixiong.zou@intel.com) * Author: Zhu Yi (yi.zhu@intel.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA * */ #include #include #include /* dup, dup2 */ #include #include #include #include #include #include #include "cmslib_client.h" #include "cms_client_types.h" #define PIPETRICK_DEBUG 0 #ifdef DEBUG_LIBRARY #define dprintf(arg...) fprintf(stderr, ##arg) #else #define dprintf(arg...) {} #endif #define GET_CMS_HANDLE(x) ((x == NULL) ? NULL : \ (__cms_handle_t *)g_hash_table_lookup( \ __cmshandle_hash, x)) #define GET_MQ_HANDLE(x) ((x == NULL) ? NULL : \ (__cms_handle_t *)g_hash_table_lookup( \ __mqhandle_hash, x)) static GHashTable * __cmshandle_hash; static GHashTable * __mqhandle_hash; static GHashTable * __group_tracking_hash; static guint __cmshandle_counter = 0; static gboolean __cmsclient_init_flag = FALSE; static gboolean __notify_acked = TRUE; void cmsclient_hash_init(void); IPC_Channel *cms_channel_conn(void); int enqueue_dispatch_msg(__cms_handle_t * hd, client_header_t * msg); client_header_t * dequeue_dispatch_msg(GList ** queue); int read_and_queue_ipc_msg(__cms_handle_t * handle); int dispatch_msg(__cms_handle_t * handle, client_header_t * msg); int wait_for_msg(__cms_handle_t * handle, size_t msgtype, const SaNameT * name, client_header_t ** msg, SaTimeT timeout); int get_timeout_value(SaTimeT timeout, struct timeval * tv); static int saname_cmp(const SaNameT s1, const SaNameT s2) { SaUint16T len1, len2; /* dprintf("Length of s1: %d, s2: %d\n", s1.length, s2.length); */ len1 = s1.value[s1.length - 1] ? s1.length : s1.length - 1; len2 = s2.value[s2.length - 1] ? s2.length : s2.length - 1; if (len1 != len2) return len2 - len1; return strncmp(s1.value, s2.value, len1); } static int bad_saname(const SaNameT * name) { int i; if (!name || name->length <= 0 || name->length > SA_MAX_NAME_LENGTH - 1) return TRUE; /* * We don't support '\0' inside a SaNameT.value. */ for (i = 0; i < name->length; i++) if (name->value[i] == '\0') return TRUE; return FALSE; } static char * saname2str(SaNameT name) { char * str; if (name.length <= 0) return NULL; if (name.length > SA_MAX_NAME_LENGTH - 1) name.length = SA_MAX_NAME_LENGTH - 1; if ((str = (char *)malloc(name.length + 1)) == NULL) return NULL; strncpy(str, name.value, name.length); str[name.length] = '\0'; return str; } static int active_poll(__cms_handle_t * hd) { int fd; if (hd->backup_fd >= 0) { cl_log(LOG_WARNING, "%s: recursion detected", __FUNCTION__); return 1; } if ((fd = hd->ch->ops->get_recv_select_fd(hd->ch)) < 0) { cl_log(LOG_ERR, "%s: get_recv_select_fd failed", __FUNCTION__); return 1; } if ((hd->backup_fd = dup(fd)) == -1) { cl_log(LOG_ERR, "%s: dup2 failed", __FUNCTION__); perror("dup2"); return 1; } close(fd); if (dup2(hd->active_fd, fd) == -1) { cl_log(LOG_ERR, "%s: dup2 failed", __FUNCTION__); perror("dup2"); return 1; } #if PIPETRICK_DEBUG dprintf("acitve_poll for <%p>\n", hd); #endif return 0; } static int restore_poll(__cms_handle_t * hd) { int fd; if (hd->backup_fd < 0) { cl_log(LOG_WARNING, "%s: recursion detected", __FUNCTION__); return 1; } if ((fd = hd->ch->ops->get_recv_select_fd(hd->ch)) < 0) { cl_log(LOG_ERR, "%s: get_recv_select_fd failed", __FUNCTION__); return 1; } if (dup2(hd->backup_fd, fd) == -1) { cl_log(LOG_ERR, "%s: dup2 failed", __FUNCTION__); return 1; } hd->backup_fd = -1; /* mark as unused */ #if PIPETRICK_DEBUG dprintf("restore_poll for <%p>\n", hd); #endif return 0; } static int cmsclient_message_recv(__cms_handle_t * hd, client_header_t ** data) { int ret; IPC_Message * ipc_msg; if (hd->backup_fd >= 0) restore_poll(hd); ret = hd->ch->ops->recv(hd->ch, &ipc_msg); if (ret != IPC_OK) return ret; *data = malloc(ipc_msg->msg_len); memcpy(*data, ipc_msg->msg_body, ipc_msg->msg_len); ipc_msg->msg_done(ipc_msg); return ret; } static void cmsclient_message_done(IPC_Message * msg) { char * name; client_header_t * message; message = msg->msg_body; name = saname2str(message->name); free(msg->msg_private); free(name); } static int cmsclient_message_send(__cms_handle_t * hd, size_t len, gpointer data) { IPC_Message * msg; if ((msg = malloc(sizeof(IPC_Message) + len)) == NULL) { cl_log(LOG_ERR, "%s: malloc failed", __FUNCTION__); return FALSE; } if (hd->backup_fd >= 0) restore_poll(hd); msg->msg_body = msg + 1; memcpy(msg->msg_body, data, len); msg->msg_len = len; msg->msg_private = msg; msg->msg_done = cmsclient_message_done; msg->msg_buf = 0; return hd->ch->ops->send(hd->ch, msg); } static gboolean msgqueue_remove(gpointer key, gpointer value, gpointer user_data) { __cms_queue_handle_t * qhd = (__cms_queue_handle_t *) value; client_mqueue_close_t cmg; SaNameT * qname; CMS_LIBRARY_TRACE(); qname = &(qhd->queue_name); cmg.header.type = CMS_QUEUE_CLOSE; cmg.header.name = *qname; cmg.handle = qhd->queue_handle; cmg.silent = TRUE; cmsclient_message_send(qhd->cms_handle, sizeof(cmg), &cmg); g_hash_table_remove(__mqhandle_hash, key); free((__cms_queue_handle_t *) qhd); return TRUE; } static gboolean library_initialized(void) { return __cmsclient_init_flag; } void cmsclient_hash_init() { if (library_initialized()) return; __cmshandle_hash = g_hash_table_new(g_int_hash, g_int_equal); __mqhandle_hash = g_hash_table_new(g_int_hash, g_int_equal); __group_tracking_hash = g_hash_table_new(g_str_hash, g_str_equal); __cmsclient_init_flag = TRUE; } /* * This is a blocking wait for a particular type of msg on a particular queue. * Note: memory allocated in this function. caller needs to free(). */ int wait_for_msg(__cms_handle_t * handle, size_t msgtype, const SaNameT * queueName, client_header_t ** msg, SaTimeT timeout) { int fd; client_header_t * cms_msg; longclock_t t_start = 0, t_end = 0; if (timeout < 0) return SA_ERR_INVALID_PARAM; if (timeout != SA_TIME_END) { t_start = time_longclock(); t_end = t_start + msto_longclock(timeout/1000); } if (handle->backup_fd >= 0) restore_poll(handle); fd = handle->ch->ops->get_recv_select_fd(handle->ch); dprintf("In %s for message type 0x%x\n", __FUNCTION__, msgtype); while (1) { int ret = -1; struct timeval * tv, to; fd_set rset; FD_ZERO(&rset); FD_SET(fd, &rset); tv = NULL; if (timeout != SA_TIME_END) { to.tv_sec = longclockto_ms((t_end - t_start))/1000; to.tv_usec = (((t_end - t_start) - secsto_longclock(to.tv_sec)))/1000; tv = &to; } if (!handle->ch->ops->is_message_pending(handle->ch) && (ret = select(fd + 1, &rset, NULL, NULL, tv)) == -1) { cl_log(LOG_ERR, "%s: select error", __FUNCTION__); return SA_ERR_LIBRARY; } else if (ret == 0) { cl_log(LOG_WARNING, "%s: timeout!", __FUNCTION__); return SA_ERR_TIMEOUT; } if ((ret = cmsclient_message_recv(handle, &cms_msg))!= IPC_OK) { if (ret == IPC_FAIL) { cl_shortsleep(); continue; } cl_log(LOG_ERR, "%s: cmsclient_message_recv failed, " "rc = %d", __FUNCTION__, ret); return SA_ERR_LIBRARY; } if (cms_msg->type & msgtype) { if (!queueName || (queueName && (saname_cmp(cms_msg->name, *queueName) == 0))) { *msg = cms_msg; if (g_list_length(handle->dispatch_queue)) active_poll(handle); return SA_OK; } } enqueue_dispatch_msg(handle, cms_msg); t_start = time_longclock(); } } IPC_Channel * cms_channel_conn(void) { IPC_Channel * ch; GHashTable * attrs; char path[] = IPC_PATH_ATTR; char cms_socket[] = CMS_DOMAIN_SOCKET; int ret; attrs = g_hash_table_new(g_str_hash,g_str_equal); g_hash_table_insert(attrs, path, cms_socket); ch = ipc_channel_constructor(IPC_DOMAIN_SOCKET, attrs); g_hash_table_destroy(attrs); if (ch) { ret = ch->ops->initiate_connection(ch); if (ret != IPC_OK) { cl_log(LOG_ERR, "cms_channel_conn failed, maybe " "you don't have cms server running..."); return NULL; } /* Disable input buffering. * Otherwise the buffering will interfere with * waiting for input based on the fd alone */ ch->ops->set_recv_qlen(ch, 0); return ch; } else return NULL; } static int enqueue_dispatch_item(GList **queue, client_header_t * item) { *queue = g_list_append(*queue, item); return SA_OK; } int enqueue_dispatch_msg(__cms_handle_t * hd, client_header_t * msg) { client_message_t * fmsg = (client_message_t *)msg; dprintf("calling enqueue_dispatch_msg ..... \n"); /* * If it is a message, then add it to the msg queue. */ if (msg->type == CMS_MSG_NOTIFY) { dprintf("got a CMS_MSG_NOTIFY msg\n"); __notify_acked = FALSE; } return enqueue_dispatch_item(&(hd->dispatch_queue), (client_header_t *) fmsg); } client_header_t * dequeue_dispatch_msg(GList ** queue) { client_header_t * msg = NULL; GList * head; if (!g_list_length(*queue)) return NULL; head = g_list_first(*queue); *queue = g_list_remove_link(*queue, head); msg = head->data; g_list_free_1(head); return msg; } /** * Read all the ipc msg in the buffer and queue them to * the msg queue or the dispatch queue. */ int read_and_queue_ipc_msg(__cms_handle_t * handle) { int ret, count = 0; client_header_t *rcmg; __mqgroup_track_t * track; client_mqgroup_notify_t *nsg, *m; dprintf("b4 the do loop of the read_and_queue_ipc_msg ...\n"); if (handle->backup_fd >= 0) restore_poll(handle); while (handle->ch->ops->is_message_pending(handle->ch)) { ret = cmsclient_message_recv(handle, &rcmg); if (ret == IPC_FAIL) { cl_shortsleep(); cl_log(LOG_WARNING, "%s: cmsclient_message_recv " "failed, rc = %d", __FUNCTION__, ret); break; } switch (rcmg->type) { case CMS_QUEUEGROUP_NOTIFY: /* * prepare the notify buffer */ m = (client_mqgroup_notify_t *)rcmg; m->data = (char *)rcmg + sizeof(client_mqgroup_notify_t); track = g_hash_table_lookup(__group_tracking_hash, (m->group_name).value); if (track == NULL) { /* * This is possible, because TrackStop * may be called before we get here. */ cl_log(LOG_INFO, "No one tracks the group" " [%s] membership now!" , m->group_name.value); return TRUE; } track->policy = m->policy; track->buf.numberOfItems = m->number; track->buf.notification = (SaMsgQueueGroupNotificationT *) malloc(m->number * sizeof(SaMsgQueueGroupNotificationT)); memcpy(track->buf.notification, m->data, m->number * sizeof(SaMsgQueueGroupNotificationT)); /* * only enqueue head is enough for us */ dprintf("enqueue group notify msg head\n"); nsg = (client_mqgroup_notify_t *) malloc(sizeof(client_mqgroup_notify_t)); memcpy(nsg, m, sizeof(client_mqgroup_notify_t)); enqueue_dispatch_msg(handle, (client_header_t *)nsg); free(rcmg); break; default: enqueue_dispatch_msg(handle, rcmg); /* TODO: we have a memory leak here need to call the msg_done() */ break; } } return count; } int dispatch_msg(__cms_handle_t * handle, client_header_t * msg) { client_mqueue_open_t * omsg; client_mqgroup_notify_t * nmsg; client_message_ack_t * amsg; __mqgroup_track_t * track; client_message_t * gmsg; char * name; __cms_queue_handle_t * qhd; dprintf("In Function %s..\n", __FUNCTION__); dprintf("handle=<%p> msg->type=<%d>\n", handle, msg->type); if (handle == NULL || msg == NULL) return HA_FAIL; switch (msg->type) { case CMS_QUEUE_OPEN_ASYNC: omsg = (client_mqueue_open_t *) msg; if ((handle->callbacks).saMsgQueueOpenCallback) { if (omsg->header.flag != SA_OK) { omsg->handle = 0; } (handle->callbacks).saMsgQueueOpenCallback( omsg->invocation, &(omsg->handle), omsg->header.flag); } free(omsg); break; case CMS_MSG_NOTIFY: gmsg = (client_message_t *) msg; qhd = g_hash_table_lookup(handle->queue_handle_hash, &(gmsg->handle)); if (handle->callbacks.saMsgMessageReceivedCallback) handle->callbacks.saMsgMessageReceivedCallback( &(qhd->queue_handle)); free(gmsg); break; case CMS_MSG_ACK: amsg = (client_message_ack_t *) msg; if ((handle->callbacks).saMsgMessageDeliveredCallback) { (handle->callbacks).saMsgMessageDeliveredCallback( amsg->invocation, msg->flag); } free(amsg); break; case CMS_QUEUEGROUP_NOTIFY: nmsg = (client_mqgroup_notify_t *)msg; name = (char *) malloc(nmsg->group_name.length + 1); if (name == NULL) { cl_log(LOG_ERR, "%s: malloc failed", __FUNCTION__); return FALSE; } dprintf("group name [%s], length [%d]\n" , nmsg->group_name.value, nmsg->group_name.length); strncpy(name, nmsg->group_name.value, nmsg->group_name.length); name[nmsg->group_name.length] = '\0'; dprintf("name = [%s]\n", name); track = g_hash_table_lookup(__group_tracking_hash, name); if (track == NULL) { cl_log(LOG_ERR, "Cannot find track buffer"); return FALSE; } if ((handle->callbacks).saMsgQueueGroupTrackCallback == NULL) return FALSE; (handle->callbacks).saMsgQueueGroupTrackCallback( track->name, &(track->buf), track->policy, track->buf.numberOfItems, SA_OK); free(name); free(nmsg); break; default: return HA_FAIL; } return HA_OK; } SaErrorT saMsgInitialize(SaMsgHandleT *msgHandle, const SaMsgCallbacksT *msgCallbacks, const SaVersionT *version) { IPC_Channel *ch; __cms_handle_t *hd; SaMsgHandleT * key; int pipefd[2]; cl_log_set_entity("libcms"); cl_log_set_facility(HA_LOG_FACILITY); #ifdef DEBUG_LIBRARY cl_log_enable_stderr(TRUE); #endif if ((!version) || version->releaseCode < 'A' || version->releaseCode > 'Z' || (version->releaseCode << 8) + (version->major << 4) + version->minor > (AIS_VERSION_RELEASE_CODE << 8) + (AIS_VERSION_MAJOR << 4) + AIS_VERSION_MINOR) { cl_log(LOG_ERR, "AIS library version is lower then required"); return SA_ERR_VERSION; } if (!msgHandle) return SA_ERR_INVALID_PARAM; if (!(ch = cms_channel_conn())) { cl_log(LOG_ERR, "cms_channel_conn failed."); return SA_ERR_LIBRARY; } if (pipe(pipefd) == -1) { cl_log(LOG_ERR, "create pipe failed"); return SA_ERR_LIBRARY; } /* * Write something to the pipe but we never read so that * select to this fd will always return immediately. */ if (write(pipefd[1], "ACTIVE", 6) < 0) { cl_log(LOG_ERR, "write pipe failed"); return SA_ERR_LIBRARY; } cmsclient_hash_init(); dprintf("ch_status = %d\n", ch->ch_status); dprintf("farside_pid = %d\n", ch->farside_pid); hd = (__cms_handle_t *)malloc(sizeof(__cms_handle_t)); memset(hd, 0, sizeof(__cms_handle_t)); hd->queue_handle_hash = g_hash_table_new(g_int_hash, g_int_equal); hd->ch = ch; if (msgCallbacks) { memcpy(&(hd->callbacks), msgCallbacks, sizeof(SaMsgCallbacksT)); } else { memset(&(hd->callbacks), 0, sizeof(SaMsgCallbacksT)); } *msgHandle = __cmshandle_counter++; hd->service_handle = *msgHandle; hd->active_fd = pipefd[0]; hd->backup_fd = -1; key = (SaMsgHandleT *) malloc(sizeof(SaMsgHandleT)); key = msgHandle; g_hash_table_insert(__cmshandle_hash, key, hd); return SA_OK; } SaErrorT saMsgFinalize(SaMsgHandleT *msgHandle) { __cms_handle_t *hd = GET_CMS_HANDLE(msgHandle); if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by handlle [%d]" , __FUNCTION__, msgHandle ? *msgHandle : -1); return SA_ERR_BAD_HANDLE; } g_hash_table_foreach_remove(hd->queue_handle_hash, msgqueue_remove, hd); g_hash_table_remove(__cmshandle_hash, msgHandle); if (hd->backup_fd >= 0) restore_poll(hd); hd->ch->ops->destroy(hd->ch); close(hd->active_fd); /* TODO: need to free the glist on the dispatch queue */ free(hd); return SA_OK; } SaErrorT saMsgQueueOpen(const SaMsgHandleT *msgHandle, const SaNameT *queueName, const SaMsgQueueCreationAttributesT *creationAttributes, SaMsgQueueOpenFlagsT openFlags, SaTimeT timeout, SaMsgQueueHandleT *queueHandle) { int ret; client_mqueue_open_t cmg; client_mqueue_open_t *rcmg; client_header_t * reply; __cms_handle_t *hd = GET_CMS_HANDLE(msgHandle); if (!openFlags) return SA_ERR_BAD_FLAGS; if (bad_saname(queueName) || !queueHandle || (!creationAttributes && (openFlags & SA_MSG_QUEUE_CREATE)) || (creationAttributes && !(openFlags & SA_MSG_QUEUE_CREATE))) return SA_ERR_INVALID_PARAM; cmg.header.type = CMS_QUEUE_OPEN; cmg.header.name = *queueName; if (creationAttributes) { if ((creationAttributes->creationFlags != 0) && creationAttributes->creationFlags != SA_MSG_QUEUE_PERSISTENT) return SA_ERR_BAD_FLAGS; cmg.attr = *creationAttributes; } else { /* * else set to -1, so that daemon knows client didn't * provide a creationAttributes */ cmg.attr.creationFlags = -1; } cmg.openflag = openFlags; cmg.invocation = 0; cmg.policy = 0; if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by handlle [%d]" , __FUNCTION__, msgHandle ? *msgHandle : -1); return SA_ERR_BAD_HANDLE; } if (openFlags & SA_MSG_QUEUE_RECEIVE_CALLBACK && !(hd->callbacks).saMsgMessageReceivedCallback) return SA_ERR_INIT; ret = cmsclient_message_send(hd, sizeof(cmg), &cmg); dprintf("%s: cmsclient_message_send returns %d\n", __FUNCTION__, ret); /* * We should only have one client blocking for it. */ ret = wait_for_msg(hd, CMS_QUEUE_OPEN, queueName, &reply, timeout); if (ret != SA_OK) { return ret; } rcmg = (client_mqueue_open_t *) reply; if ((ret = (rcmg->header).flag) == SA_OK) { SaMsgQueueHandleT *key; __cms_queue_handle_t *qhd; key = (SaMsgQueueHandleT *) malloc(sizeof(SaMsgQueueHandleT)); qhd = (__cms_queue_handle_t *) malloc(sizeof(__cms_queue_handle_t)); memset(qhd, 0, sizeof(__cms_queue_handle_t)); qhd->queue_handle = rcmg->handle; qhd->queue_name = *queueName; qhd->cms_handle = hd; *key = qhd->queue_handle; g_hash_table_insert(hd->queue_handle_hash, key, qhd); g_hash_table_insert(__mqhandle_hash, key, hd); *queueHandle = *key; } free(rcmg); return ret; } SaErrorT saMsgQueueClose(SaMsgQueueHandleT *queueHandle) { int ret; client_mqueue_close_t cmg; client_header_t *rcmg; __cms_handle_t *hd = NULL; __cms_queue_handle_t *qhd; SaNameT * qname; gpointer origkey, orighd; if (g_hash_table_lookup_extended(__mqhandle_hash, queueHandle, &origkey, &orighd)) { hd = (__cms_handle_t *) orighd; }; if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by handlle [%d]" , __FUNCTION__, queueHandle ? *queueHandle : -1); return SA_ERR_BAD_HANDLE; } qhd = g_hash_table_lookup(hd->queue_handle_hash, queueHandle); if (!qhd) { cl_log(LOG_ERR, "%s: Cannot find handlle [%d]" , __FUNCTION__, queueHandle ? *queueHandle : -1); return SA_ERR_BAD_HANDLE; } qname = &(qhd->queue_name); cmg.header.type = CMS_QUEUE_CLOSE; cmg.header.name = *qname; cmg.handle = *queueHandle; cmg.silent = FALSE; ret = cmsclient_message_send(hd, sizeof(cmg), &cmg); ret = wait_for_msg(hd, CMS_QUEUE_CLOSE, qname, &rcmg, SA_TIME_END); if (ret != SA_OK) return ret; if ((ret = rcmg->flag) == SA_OK) { g_hash_table_remove(hd->queue_handle_hash, queueHandle); g_hash_table_remove(__mqhandle_hash, queueHandle); free(origkey); free(qhd); /* TODO: free the queue msgs. */ } free((client_mqueue_close_t *) rcmg); return ret; } static void lookup_queuehandle(gpointer key, gpointer value, gpointer user_data) { char * qname; __cms_queue_handle_t *qhd = (__cms_queue_handle_t *)value; char * name = (char *)user_data; SaMsgQueueHandleT *queueHandle = (SaMsgQueueHandleT *)key; qname = saname2str(qhd->queue_name); if (!strcmp(qname, name)) { g_hash_table_remove(qhd->cms_handle->queue_handle_hash, key); } g_hash_table_remove(__mqhandle_hash, queueHandle); } SaErrorT saMsgQueueUnlink(SaMsgHandleT *msgHandle, const SaNameT *queueName) { int ret; char * name; client_mqueue_unlink_t cmg; client_header_t *rcmg; __cms_handle_t *hd = GET_CMS_HANDLE(msgHandle); if (bad_saname(queueName)) return SA_ERR_INVALID_PARAM; cmg.header.type = CMS_QUEUE_UNLINK; cmg.header.name = *queueName; if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by handlle [%d]" , __FUNCTION__, msgHandle ? *msgHandle : -1); return SA_ERR_BAD_HANDLE; } ret = cmsclient_message_send(hd, sizeof(cmg), &cmg); ret = wait_for_msg(hd, CMS_QUEUE_UNLINK, queueName, &rcmg, SA_TIME_END); if (ret != SA_OK) return ret; /* * remove from the mq from queue_handle_hash if possible */ name = saname2str(*queueName); g_hash_table_foreach(hd->queue_handle_hash, lookup_queuehandle, name); ret = rcmg->flag; free((client_mqueue_unlink_t *) rcmg); free(name); return ret; } SaErrorT saMsgQueueStatusGet(SaMsgHandleT *msgHandle, const SaNameT *queueName , SaMsgQueueStatusT *queueStatus) { int ret; client_mqueue_status_t cmg; client_mqueue_status_t *rcmg; client_header_t * reply; __cms_handle_t *hd = GET_CMS_HANDLE(msgHandle); if (bad_saname(queueName)) return SA_ERR_INVALID_PARAM; cmg.header.type = CMS_QUEUE_STATUS; cmg.header.name = *queueName; if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by handlle [%d]" , __FUNCTION__, msgHandle ? *msgHandle : -1); return SA_ERR_BAD_HANDLE; } ret = cmsclient_message_send(hd, sizeof(cmg), &cmg); ret = wait_for_msg(hd, CMS_QUEUE_STATUS, queueName, &reply, SA_TIME_END); if (ret != SA_OK) return ret; rcmg = (client_mqueue_status_t *) reply; ret = reply->flag; if (ret == SA_OK) *queueStatus = rcmg->qstatus; free((client_mqueue_status_t *) reply); return ret; } SaErrorT saMsgMessageSend(const SaMsgHandleT *msgHandle, const SaNameT *destination, const SaMsgMessageT *message, SaMsgAckFlagsT ackFlags, SaTimeT timeout) { client_message_t *cmg; client_header_t *rcmg; client_message_ack_t * ack; int ret; __cms_handle_t *hd = GET_CMS_HANDLE(msgHandle); if (message->priority > SA_MSG_MESSAGE_LOWEST_PRIORITY || !(ackFlags & SA_MSG_MESSAGE_DELIVERED_ACK)) return SA_ERR_INVALID_PARAM; if (ackFlags & ~SA_MSG_MESSAGE_DELIVERED_ACK) return SA_ERR_BAD_FLAGS; cmg = (client_message_t *) malloc(sizeof(client_message_t) + message->size); cmg->header.type = CMS_MSG_SEND; cmg->header.name = *destination; cmg->msg = *message; cmg->invocation = 0; cmg->data = cmg + 1; memcpy(cmg->data, message->data, message->size); cmg->ack = SA_MSG_MESSAGE_DELIVERED_ACK; /* according to the spec */ if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by handlle [%d]" , __FUNCTION__, msgHandle ? *msgHandle : -1); return SA_ERR_BAD_HANDLE; } if (ackFlags & SA_MSG_MESSAGE_DELIVERED_ACK && !(hd->callbacks).saMsgMessageDeliveredCallback) return SA_ERR_INIT; ret = cmsclient_message_send(hd, sizeof(client_message_t) + message->size, cmg); free(cmg); while (1) { ret = wait_for_msg(hd, CMS_MSG_ACK, destination, &rcmg, timeout); if (ret != SA_OK) return ret; ret = rcmg->flag; ack = (client_message_ack_t *) rcmg; /* * CMS_MSG_SEND is a blocking call, so we can only * have one client waiting for it. Thus when we get * an ACK that is for the request type CMS_MSG_SEND, * we know this is the ACK we are waiting for. */ dprintf("type is %d\n", ack->send_type); if (ack->send_type == CMS_MSG_SEND) { free((client_message_t *) rcmg); return ret; } else { enqueue_dispatch_msg(hd, rcmg); } } } SaErrorT saMsgMessageSendAsync(const SaMsgHandleT *msgHandle, SaInvocationT invocation, const SaNameT *destination, const SaMsgMessageT *message, SaMsgAckFlagsT ackFlags) { client_message_t *cmg; int ret; __cms_handle_t *hd = GET_CMS_HANDLE(msgHandle); cmg = (client_message_t *) malloc(sizeof(client_message_t) + message->size); cmg->header.type = CMS_MSG_SEND_ASYNC; cmg->header.name = *destination; cmg->msg = *message; cmg->invocation = invocation; cmg->data = (char *)cmg + sizeof(client_message_t); memcpy(cmg->data, message->data, message->size); cmg->ack = ackFlags; if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by handlle [%d]" , __FUNCTION__, msgHandle ? *msgHandle : -1); return SA_ERR_BAD_HANDLE; } ret = cmsclient_message_send(hd, sizeof(client_message_t) + message->size, cmg); free(cmg); return ret == IPC_OK ? SA_OK : SA_ERR_LIBRARY; } static int request_for_message(__cms_handle_t * hd, const SaNameT * name) { client_header_t request_msg; request_msg.type = CMS_MSG_REQUEST; request_msg.name = *name; return cmsclient_message_send(hd, sizeof(request_msg), &request_msg); } SaErrorT saMsgMessageGet(const SaMsgQueueHandleT *queueHandle, SaMsgMessageT *message, SaMsgMessageInfoT *messageInfo, SaTimeT timeout) { int ret; SaErrorT error = SA_OK; client_message_t * cmg; client_header_t *rcmg; __cms_handle_t *hd = GET_MQ_HANDLE(queueHandle); __cms_queue_handle_t *qhd; SaNameT * qname; int freecmg = 0; if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by queueHandle [%d]" , __FUNCTION__, queueHandle ? *queueHandle : -1); return SA_ERR_BAD_HANDLE; } if (!messageInfo || !message) return SA_ERR_INVALID_PARAM; memset(messageInfo, 0, sizeof(SaMsgMessageInfoT)); qhd = g_hash_table_lookup(hd->queue_handle_hash, queueHandle); assert(qhd != NULL); qname = &(qhd->queue_name); /* * request a message from daemon */ while (1) { request_for_message(hd, qname); ret = wait_for_msg(hd, CMS_MSG_GET | CMS_MSG_NOTIFY, qname, &rcmg, timeout); if (ret != SA_OK) { cl_log(LOG_ERR, "wait_for_msg error [%d]", ret); return ret; } if (rcmg->type == CMS_MSG_NOTIFY) { dprintf("Received CMS_MSG_NOTIFY\n"); continue; } else break; } cmg = (client_message_t *)rcmg; cmg->data = (void *)((char *)cmg + sizeof(client_message_t)); dprintf("message.data is [%s]\n", (char *)cmg->data); if (cmg->senderId) { messageInfo->senderId = cmg->senderId; } if (message->size < cmg->msg.size) error = SA_ERR_NO_SPACE; message->size = (cmg->msg).size; if (message->data) { memcpy(message->data, cmg->data, (cmg->msg.size > message->size ? message->size : cmg->msg.size)); freecmg = 1; } else { message->data = cmg->data; } message->type = cmg->msg.type; message->version = cmg->msg.version; message->priority = cmg->msg.priority; if (freecmg) free(cmg); /* TODO: message info */ return error; } SaErrorT saMsgMessageReceivedGet(const SaMsgQueueHandleT *queueHandle, const SaMsgMessageHandleT *messageHandle, SaMsgMessageT *message, SaMsgMessageInfoT *messageInfo) { return SA_ERR_NOT_SUPPORTED; } SaErrorT saMsgMessageCancel(const SaMsgQueueHandleT *queueHandle) { return SA_ERR_NOT_SUPPORTED; } SaErrorT saMsgSelectionObjectGet(const SaMsgHandleT *msgHandle, SaSelectionObjectT *selectionObject) { int ret; __cms_handle_t *hd = GET_CMS_HANDLE(msgHandle); if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by handlle [%d]" , __FUNCTION__, msgHandle ? *msgHandle : -1); return SA_ERR_BAD_HANDLE; } dprintf("hd->backup_fd is [%d]\n", hd->backup_fd); ret = hd->backup_fd >= 0 ? hd->active_fd : hd->ch->ops->get_recv_select_fd(hd->ch); if (ret < 0) return SA_ERR_LIBRARY; *selectionObject = ret; return SA_OK; } SaErrorT saMsgDispatch(const SaMsgHandleT *msgHandle, SaDispatchFlagsT dispatchFlags) { int ret; client_header_t *msg; __cms_handle_t *hd = GET_CMS_HANDLE(msgHandle); if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by handlle [%d]" , __FUNCTION__, msgHandle ? *msgHandle : -1); return SA_ERR_BAD_HANDLE; } switch (dispatchFlags) { case SA_DISPATCH_ONE: read_and_queue_ipc_msg(hd); if ((msg = dequeue_dispatch_msg(&hd->dispatch_queue)) == NULL) { cl_log(LOG_ERR, "%s: dequeue_dispatch_msg got NULL" , __FUNCTION__); return SA_OK; } ret = dispatch_msg(hd, msg); if (g_list_length(hd->dispatch_queue)) active_poll(hd); if (ret != HA_OK) return SA_ERR_LIBRARY; break; case SA_DISPATCH_ALL: read_and_queue_ipc_msg(hd); do { if ((msg = dequeue_dispatch_msg(&hd->dispatch_queue)) != NULL) { dispatch_msg(hd, msg); } } while (g_list_length(hd->dispatch_queue)); break; case SA_DISPATCH_BLOCKING: break; default: cl_log(LOG_ERR, "%s: wrong dispatchFlags [%d]", __FUNCTION__, dispatchFlags); return SA_ERR_INVALID_PARAM; } return SA_OK; } SaErrorT saMsgQueueOpenAsync(const SaMsgHandleT *msgHandle, SaInvocationT invocation, const SaNameT *queueName, const SaMsgQueueCreationAttributesT *creationAttributes, SaMsgQueueOpenFlagsT openFlags) { client_mqueue_open_t cmg; int ret; __cms_handle_t *hd = GET_CMS_HANDLE(msgHandle); if (!openFlags) return SA_ERR_BAD_FLAGS; if (bad_saname(queueName) || !msgHandle || (!creationAttributes && openFlags & SA_MSG_QUEUE_CREATE) || (creationAttributes && !(openFlags & SA_MSG_QUEUE_CREATE))) return SA_ERR_INVALID_PARAM; cmg.header.type = CMS_QUEUE_OPEN_ASYNC; cmg.header.name = *queueName; if (creationAttributes) { if ((creationAttributes->creationFlags != 0) && creationAttributes->creationFlags != SA_MSG_QUEUE_PERSISTENT) return SA_ERR_BAD_FLAGS; cmg.attr = *creationAttributes; } cmg.openflag = openFlags; cmg.invocation = invocation; cmg.policy = 0; if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by handlle [%d]" , __FUNCTION__, msgHandle ? *msgHandle : -1); return SA_ERR_BAD_HANDLE; } if (openFlags & SA_MSG_QUEUE_RECEIVE_CALLBACK && !(hd->callbacks).saMsgQueueOpenCallback && !(hd->callbacks).saMsgMessageReceivedCallback) return SA_ERR_INIT; ret = cmsclient_message_send(hd, sizeof(cmg), &cmg); if (ret != IPC_OK) cl_log(LOG_ERR, "%s: cmsclient_message_send failed, rc = %d" , __FUNCTION__, ret); return ret == IPC_OK ? SA_OK : SA_ERR_LIBRARY; } SaErrorT saMsgQueueGroupCreate(SaMsgHandleT *msgHandle, const SaNameT *queueGroupName, SaMsgQueueGroupPolicyT queueGroupPolicy) { int ret; client_mqueue_open_t cmg; client_mqueue_open_t *rcmg; client_header_t * reply; __cms_handle_t *hd = GET_CMS_HANDLE(msgHandle); cmg.header.type = CMS_QUEUEGROUP_CREATE; cmg.header.name = *queueGroupName; cmg.invocation = 0; if (queueGroupPolicy != SA_MSG_QUEUE_GROUP_ROUND_ROBIN) return SA_ERR_INVALID_PARAM; cmg.policy = queueGroupPolicy; if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by handlle [%d]" , __FUNCTION__, msgHandle ? *msgHandle : -1); return SA_ERR_BAD_HANDLE; } ret = cmsclient_message_send(hd, sizeof(cmg), &cmg); if (ret != IPC_OK) { cl_log(LOG_ERR, "%s: cmsclient_message_send returns %d" , __FUNCTION__,ret); return SA_ERR_LIBRARY; } ret = wait_for_msg(hd, CMS_QUEUEGROUP_CREATE, queueGroupName, &reply, SA_TIME_END); if (ret != SA_OK) return ret; rcmg = (client_mqueue_open_t *) reply; if ((rcmg->header).flag == SA_OK) { SaMsgQueueHandleT *key; key = (SaMsgQueueHandleT *)malloc(sizeof(SaMsgQueueHandleT)); *key = rcmg->handle; g_hash_table_insert(__mqhandle_hash, key, hd); } ret = (rcmg->header).flag; free(rcmg); return ret; } SaErrorT saMsgQueueGroupDelete(SaMsgHandleT *msgHandle, const SaNameT *queueGroupName) { /* TODO: we should remove the key that's in the __mqhandle_hash * as well */ return saMsgQueueUnlink(msgHandle, queueGroupName); } SaErrorT saMsgQueueGroupInsert(SaMsgHandleT *msgHandle, const SaNameT *queueGroupName, const SaNameT *queueName) { int ret; client_header_t * rcmg; client_mqgroup_ops_t cmg; __cms_handle_t *hd = GET_CMS_HANDLE(msgHandle); if (bad_saname(queueName)) return SA_ERR_INVALID_PARAM; cmg.header.type = CMS_QUEUEGROUP_INSERT; cmg.header.name = *queueName; cmg.qgname = *queueGroupName; if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by handlle [%d]" , __FUNCTION__, msgHandle ? *msgHandle : -1); return SA_ERR_BAD_HANDLE; } ret = cmsclient_message_send(hd, sizeof(cmg), &cmg); if (ret != IPC_OK) { cl_log(LOG_ERR, "%s: cmsclient_message_send returns %d" , __FUNCTION__, ret); return SA_ERR_LIBRARY; } ret = wait_for_msg(hd, CMS_QUEUEGROUP_INSERT, queueName, &rcmg, SA_TIME_END); if (ret != SA_OK) return ret; ret = rcmg->flag; free(rcmg); return ret; } SaErrorT saMsgQueueGroupRemove(SaMsgHandleT *msgHandle, const SaNameT *queueGroupName, const SaNameT *queueName) { int ret; client_header_t * rcmg; client_mqgroup_ops_t cmg; __cms_handle_t *hd = GET_CMS_HANDLE(msgHandle); if (bad_saname(queueName)) return SA_ERR_INVALID_PARAM; cmg.header.type = CMS_QUEUEGROUP_REMOVE; cmg.header.name = *queueName; cmg.qgname = *queueGroupName; if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by handlle [%d]" , __FUNCTION__, msgHandle ? *msgHandle : -1); return SA_ERR_BAD_HANDLE; } ret = cmsclient_message_send(hd, sizeof(cmg), &cmg); if (ret != IPC_OK) { cl_log(LOG_ERR, "%s: cmsclient_message_send returns %d" , __FUNCTION__, ret); return SA_ERR_LIBRARY; } ret = wait_for_msg(hd, CMS_QUEUEGROUP_REMOVE, queueName, &rcmg, SA_TIME_END); if (ret != SA_OK) return ret; ret = rcmg->flag; free((client_mqueue_unlink_t *) rcmg); return ret; } SaErrorT saMsgQueueGroupTrack(const SaMsgHandleT *msgHandle, const SaNameT *queueGroupName, SaUint8T trackFlags, SaMsgQueueGroupNotificationBufferT *notificationBuffer) { int ret; client_mqgroup_mem_t cmg; client_header_t * rcmg; client_mqgroup_notify_t * rmsg; __cms_handle_t *hd = GET_CMS_HANDLE(msgHandle); if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by handlle [%d]" , __FUNCTION__, msgHandle ? *msgHandle : -1); return SA_ERR_BAD_HANDLE; } if (!(hd->callbacks).saMsgQueueGroupTrackCallback) return SA_ERR_INIT; if ((trackFlags & SA_TRACK_CHANGES) && (trackFlags & SA_TRACK_CHANGES_ONLY)) return SA_ERR_BAD_FLAGS; /* tell server we care about the membership information */ cmg.header.type = CMS_QUEUEGROUP_TRACK_START; cmg.header.name = *queueGroupName; cmg.group_name = *queueGroupName; cmg.flag = trackFlags; ret = cmsclient_message_send(hd, sizeof(cmg), &cmg); if (ret != IPC_OK) { cl_log(LOG_ERR, "%s: cmsclient_message_send returns %d" , __FUNCTION__, ret); return SA_ERR_LIBRARY; } ret = wait_for_msg(hd, CMS_QUEUEGROUP_TRACK_START, queueGroupName, &rcmg, SA_TIME_END); if (ret != SA_OK) return ret; if ((ret = rcmg->flag) != SA_OK) return ret; rmsg = (client_mqgroup_notify_t *)rcmg; if ((trackFlags & SA_TRACK_CHANGES) || (trackFlags & SA_TRACK_CHANGES_ONLY)) { /* * Track membership changes with callbacks. */ __mqgroup_track_t * track; char * name; name = (char *) malloc(queueGroupName->length + 1); strncpy(name, queueGroupName->value, queueGroupName->length); name[queueGroupName->length] = '\0'; track = (__mqgroup_track_t *) malloc(sizeof(__mqgroup_track_t)); track->name = queueGroupName; track->flag = trackFlags & ~SA_TRACK_CURRENT; g_hash_table_insert(__group_tracking_hash, name, track); } if (trackFlags & SA_TRACK_CURRENT) { rmsg->data = (char *)rmsg + sizeof(client_mqgroup_notify_t); if (!notificationBuffer) { /* * Client wants saMsgQueueGroupTrackCallback. */ goto exit; } dprintf("numberOfItems %lu, real number %lu\n" , notificationBuffer->numberOfItems, rmsg->number); if (!notificationBuffer->notification) { notificationBuffer->notification = (SaMsgQueueGroupNotificationT *) malloc(rmsg->number * sizeof(SaMsgQueueGroupNotificationT)); if (!notificationBuffer->notification) { ret = SA_ERR_NO_MEMORY; goto exit; } } else if (notificationBuffer->numberOfItems < rmsg->number) { ret = SA_ERR_NO_SPACE; goto exit; } notificationBuffer->numberOfItems = rmsg->number; memcpy(notificationBuffer->notification, rmsg->data , rmsg->number * sizeof(SaMsgQueueGroupNotificationT)); } exit: free(rcmg); return ret; } SaErrorT saMsgQueueGroupTrackStop(const SaMsgHandleT *msgHandle, const SaNameT *queueGroupName) { char * name; gpointer key, track; __cms_handle_t *hd = GET_CMS_HANDLE(msgHandle); if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by handlle [%d]" , __FUNCTION__, msgHandle ? *msgHandle : -1); return SA_ERR_BAD_HANDLE; } if (bad_saname(queueGroupName)) return SA_ERR_INVALID_PARAM; name = (char *) malloc(queueGroupName->length + 1); strncpy(name, queueGroupName->value, queueGroupName->length); name[queueGroupName->length] = '\0'; if (g_hash_table_lookup_extended(__group_tracking_hash, name, &key , &track) == TRUE) { g_hash_table_remove(__group_tracking_hash, key); free(key); } else return SA_ERR_NOT_EXIST; free(name); return SA_OK; } SaErrorT saMsgMessageSendReceive(SaMsgHandleT msgHandle, const SaNameT *destination, const SaMsgMessageT *sendMessage, SaMsgMessageT *receiveMessage, SaTimeT *replySendTime, SaTimeT timeout) { SaErrorT error = SA_OK; int ret; __cms_handle_t *hd = GET_CMS_HANDLE(&msgHandle); const SaMsgMessageT * message; client_message_t *cmg; client_header_t *rcmg; client_message_t * ack; int freeack = 0; message = sendMessage; if (message->priority > SA_MSG_MESSAGE_LOWEST_PRIORITY) return SA_ERR_INVALID_PARAM; if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by handlle [%d]" , __FUNCTION__, msgHandle); return SA_ERR_BAD_HANDLE; } cmg = (client_message_t *) malloc(sizeof(client_message_t) + message->size); cmg->header.type = CMS_MSG_SEND_RECEIVE; cmg->header.name = *destination; cmg->msg = *message; cmg->invocation = 0; cmg->data = cmg + 1; cmg->sendreceive = 1; memcpy(cmg->data, message->data, message->size); cmg->ack = SA_MSG_MESSAGE_DELIVERED_ACK; /* according to the spec */ ret = cmsclient_message_send(hd, sizeof(client_message_t) + message->size, cmg); /* TODO: fix needed. this can only be called after the msg_done */ free(cmg); while (1) { ret = wait_for_msg(hd, CMS_MSG_RECEIVE, NULL, &rcmg, timeout); if (ret != SA_OK) return ret; else break; } ret = rcmg->flag; ack = (client_message_t *) rcmg; ack->data = (void *)((char *)cmg + sizeof(client_message_t)); if (ack->msg.size > receiveMessage->size) { error = SA_ERR_NO_SPACE; } receiveMessage->size = ack->msg.size; if (receiveMessage->data) { memcpy(receiveMessage->data, ack->data, (ack->msg.size > receiveMessage->size ? receiveMessage->size : ack->msg.size)); freeack = 1; } else { receiveMessage->data = ack->data; } receiveMessage->type = ack->msg.type; receiveMessage->version = ack->msg.version; receiveMessage->priority = 0; if (freeack) free(ack); dprintf("type is %d\n", ack->send_type); return error; } SaErrorT saMsgMessageReply(SaMsgHandleT msgHandle, const SaMsgMessageT *replyMessage, const SaMsgSenderIdT *senderId, SaTimeT timeout) { client_message_t *cmg; client_header_t *rcmg; client_message_ack_t * ack; int ret; __cms_handle_t *hd = GET_CMS_HANDLE(&msgHandle); cmg = (client_message_t *) malloc(sizeof(client_message_t) + replyMessage->size); cmg->header.type = CMS_MSG_REPLY; cmg->header.name.length = 0; cmg->msg = *replyMessage; cmg->invocation = 0; cmg->data = cmg + 1; memcpy(cmg->data, replyMessage->data, replyMessage->size); cmg->ack = SA_MSG_MESSAGE_DELIVERED_ACK; /* according to the spec */ if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by handlle [%d]" , __FUNCTION__, msgHandle); return SA_ERR_BAD_HANDLE; } ret = cmsclient_message_send(hd, sizeof(client_message_t) + replyMessage->size, cmg); free(cmg); while (1) { ret = wait_for_msg(hd, CMS_MSG_ACK, NULL, &rcmg, timeout); if (ret != SA_OK) return ret; ret = rcmg->flag; ack = (client_message_ack_t *) rcmg; /* * CMS_MSG_SEND is a blocking call, so we can only * have one client waiting for it. Thus when we get * an ACK that is for the request type CMS_MSG_SEND, * we know this is the ACK we are waiting for. */ dprintf("type is %d\n", ack->send_type); if (ack->send_type == CMS_MSG_REPLY) { free((client_message_t *) rcmg); return ret; } else { enqueue_dispatch_msg(hd, rcmg); } } return SA_ERR_NOT_SUPPORTED; } SaErrorT saMsgMessageReplyAsync(SaMsgHandleT msgHandle, SaInvocationT invocation, const SaMsgMessageT *replyMessage, const SaMsgSenderIdT *senderId, SaMsgAckFlagsT ackFlags) { client_message_t *cmg; int ret; __cms_handle_t *hd = GET_CMS_HANDLE(&msgHandle); cmg = (client_message_t *) malloc(sizeof(client_message_t) + replyMessage->size); cmg->header.type = CMS_MSG_REPLY_ASYNC; cmg->header.name.length = 0; cmg->msg = *replyMessage; cmg->invocation = 0; cmg->data = cmg + 1; memcpy(cmg->data, replyMessage->data, replyMessage->size); cmg->ack = SA_MSG_MESSAGE_DELIVERED_ACK; /* according to the spec */ if (hd == NULL) { cl_log(LOG_ERR, "%s: Cannot find hd by handlle [%d]" , __FUNCTION__, msgHandle); return SA_ERR_BAD_HANDLE; } ret = cmsclient_message_send(hd, sizeof(client_message_t) + replyMessage->size, cmg); free(cmg); return ret == IPC_OK ? SA_OK : SA_ERR_LIBRARY; } Heartbeat-3-0-7e3a82377fa8/lib/cmsclient/cmslib_client.h0000644000000000000000000000336111576626513022711 0ustar00usergroup00000000000000/* * cmslib_client.h: SAForum AIS Message Service client library header * * Copyright (c) 2004 Intel Corp. * * Author: Zou Yixiong (yixiong.zou@intel.com) * Author: Zhu Yi (yi.zhu@intel.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA * */ #ifndef __CMSLIB_CLIENT_H__ #define __CMSLIB_CLIENT_H__ #include #define CMS_LIBRARY_TRACE() dprintf("TRACE: %s\n", __FUNCTION__) typedef struct { IPC_Channel * ch; /* client daemon channel */ int active_fd; /* make select always returns */ int backup_fd; /* backup fd vs. active_fd */ SaMsgHandleT service_handle; SaMsgCallbacksT callbacks; /* client's callback func */ GList * dispatch_queue; /* client's dispatch queue */ GHashTable * queue_handle_hash; } __cms_handle_t; typedef struct { SaMsgQueueHandleT queue_handle; SaNameT queue_name; __cms_handle_t * cms_handle; } __cms_queue_handle_t; typedef struct { const SaNameT * name; SaUint8T flag; SaMsgQueueGroupPolicyT policy; SaMsgQueueGroupNotificationBufferT buf; } __mqgroup_track_t; #endif /* __CMSLIB_CLIENT_H__ */ Heartbeat-3-0-7e3a82377fa8/lib/hbclient/Makefile.am0000644000000000000000000000342011576626513021570 0ustar00usergroup00000000000000# # hbclient library: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # Copyright (C) 2004 International Business Machines # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl hadir = $(sysconfdir)/ha.d halibdir = $(libdir)/@HB_PKG@ commmoddir = $(halibdir)/modules/comm havarlibdir = $(localstatedir)/lib/@HB_PKG@ havarrundir = $(localstatedir)/run/ apidir = $(havarlibdir)/api apicasualdir = $(havarlibdir)/casual # fifos with path hafifo = $(havarlibdir)/fifo apigid = @HA_APIGID@ gliblib = @GLIBLIB@ LIBRT = @LIBRT@ AM_CFLAGS = @CFLAGS@ ## libraries lib_LTLIBRARIES = libhbclient.la libhbclient_la_SOURCES = client_lib.c libhbclient_la_LDFLAGS = -version-info 1:0:0 libhbclient_la_LIBADD = $(top_builddir)/replace/libreplace.la ## binary progs halib_PROGRAMS = api_test api_test_SOURCES = api_test.c api_test_LDADD = -lplumb \ libhbclient.la $(gliblib) \ -lpils Heartbeat-3-0-7e3a82377fa8/lib/hbclient/api_test.c0000644000000000000000000002051611576626513021515 0ustar00usergroup00000000000000/* * api_test: Test program for testing the heartbeat API * * Copyright (C) 2000 Alan Robertson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * A heartbeat API test program... */ void NodeStatus(const char * node, const char * status, void * private); void LinkStatus(const char * node, const char *, const char *, void*); void ClientStatus(const char * node, const char *, const char *, void*); void gotsig(int nsig); void NodeStatus(const char * node, const char * status, void * private) { cl_log(LOG_NOTICE, "Status update: Node %s now has status %s" , node, status); } void LinkStatus(const char * node, const char * lnk, const char * status , void * private) { cl_log(LOG_NOTICE, "Link Status update: Link %s/%s now has status %s" , node, lnk, status); } void ClientStatus(const char * node, const char * client, const char * status , void * private) { cl_log(LOG_NOTICE, "Status update: Client %s/%s now has status [%s]" , node, client, status); } static int quitnow; void gotsig(int nsig) { (void)nsig; quitnow = 1; } static const char *mandparms[] = { KEY_HBVERSION , KEY_HOPS , KEY_KEEPALIVE , KEY_DEADTIME , KEY_DEADPING , KEY_WARNTIME , KEY_INITDEAD , KEY_BAUDRATE , KEY_UDPPORT , KEY_AUTOFAIL , KEY_GEN_METH , KEY_REALTIME , KEY_DEBUGLEVEL , KEY_NORMALPOLL}; static const char *optparms[] = { KEY_LOGFILE , KEY_DBGFILE , KEY_FACILITY , KEY_RT_PRIO , KEY_WATCHDOG}; int main(int argc, char ** argv) { struct ha_msg* reply; struct ha_msg* pingreq = NULL; unsigned fmask; ll_cluster_t* hb; const char * node; const char * intf; int msgcount=0; char * ctmp; const char * cval; int j; const char * cstatus; int timeout = 100; /* milliseconds */ cl_log_set_entity(argv[0]); cl_log_enable_stderr(TRUE); cl_log_set_facility(LOG_USER); hb = ll_cluster_new("heartbeat"); cl_log(LOG_INFO, "PID=%ld", (long)getpid()); cl_log(LOG_INFO, "Signing in with heartbeat"); if (hb->llc_ops->signon(hb, "ping")!= HA_OK) { cl_log(LOG_ERR, "Cannot sign on with heartbeat"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(1); } if (hb->llc_ops->set_nstatus_callback(hb, NodeStatus, NULL) !=HA_OK){ cl_log(LOG_ERR, "Cannot set node status callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(2); } if (hb->llc_ops->set_ifstatus_callback(hb, LinkStatus, NULL)!=HA_OK){ cl_log(LOG_ERR, "Cannot set if status callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(3); } if (hb->llc_ops->set_cstatus_callback(hb, ClientStatus, NULL)!=HA_OK){ cl_log(LOG_ERR, "Cannot set client status callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(4); } /* Async get client status information in the cluster */ hb->llc_ops->client_status(hb, NULL, NULL, -1); #if 0 fmask = LLC_FILTER_RAW; #else fmask = LLC_FILTER_DEFAULT; #endif /* This isn't necessary -- you don't need this call - it's just for testing... */ cl_log(LOG_INFO, "Setting message filter mode"); if (hb->llc_ops->setfmode(hb, fmask) != HA_OK) { cl_log(LOG_ERR, "Cannot set filter mode"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(4); } for (j=0; j < DIMOF(mandparms); ++j) { if ((ctmp = hb->llc_ops->get_parameter(hb, mandparms[j])) != NULL) { cl_log(LOG_INFO, "Parameter %s is [%s]" , mandparms[j] , ctmp); free(ctmp); ctmp = NULL; }else{ cl_log(LOG_ERR, "Mandantory Parameter %s is not available!" , mandparms[j]); } } for (j=0; j < DIMOF(optparms); ++j) { if ((ctmp = hb->llc_ops->get_parameter(hb, optparms[j])) != NULL) { cl_log(LOG_INFO, "Optional Parameter %s is [%s]" , optparms[j] , ctmp); free(ctmp); ctmp = NULL; } } if ((cval = hb->llc_ops->get_resources(hb)) == NULL) { cl_perror("Cannot get resource status"); cl_log(LOG_ERR, "REASON: %s" , hb->llc_ops->errmsg(hb)); }else{ cl_log(LOG_INFO, "Current resource status: %s", cval); } cl_log(LOG_INFO, "Starting node walk"); if (hb->llc_ops->init_nodewalk(hb) != HA_OK) { cl_log(LOG_ERR, "Cannot start node walk"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(5); } while((node = hb->llc_ops->nextnode(hb))!= NULL) { cl_log(LOG_INFO, "Cluster node: %s: status: %s", node , hb->llc_ops->node_status(hb, node)); if (hb->llc_ops->init_ifwalk(hb, node) != HA_OK) { cl_log(LOG_ERR, "Cannot start if walk"); cl_log(LOG_ERR, "REASON: %s" , hb->llc_ops->errmsg(hb)); exit(6); } while ((intf = hb->llc_ops->nextif(hb))) { cl_log(LOG_INFO, "\tnode %s: intf: %s ifstatus: %s" , node, intf , hb->llc_ops->if_status(hb, node, intf)); } if (hb->llc_ops->end_ifwalk(hb) != HA_OK) { cl_log(LOG_ERR, "Cannot end if walk"); cl_log(LOG_ERR, "REASON: %s" , hb->llc_ops->errmsg(hb)); exit(7); } cstatus = hb->llc_ops->client_status(hb, node, "ping", timeout); cl_log(LOG_INFO, "%s/api_test status: [%s]", node , cstatus == NULL ? "timeout" : cstatus); } if (hb->llc_ops->end_nodewalk(hb) != HA_OK) { cl_log(LOG_ERR, "Cannot end node walk"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(8); } CL_SIGINTERRUPT(SIGINT, 1); CL_SIGNAL(SIGINT, gotsig); #if 0 /* This is not necessary either ;-) */ cl_log(LOG_INFO, "Setting message signal"); if (hb->llc_ops->setmsgsignal(hb, 0) != HA_OK) { cl_log(LOG_ERR, "Cannot set message signal"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(9); } #endif pingreq = ha_msg_new(0); ha_msg_add(pingreq, F_TYPE, "ping"); cl_log(LOG_INFO, "Sleeping..."); sleep(5); if (hb->llc_ops->sendclustermsg(hb, pingreq) == HA_OK) { cl_log(LOG_INFO, "Sent ping request to cluster"); }else{ cl_log(LOG_ERR, "PING request FAIL to cluster"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); } cl_log(LOG_INFO, "Waiting for messages..."); errno = 0; for(; !quitnow && (reply=hb->llc_ops->readmsg(hb, 1)) != NULL;) { const char * type; const char * orig; ++msgcount; if ((type = ha_msg_value(reply, F_TYPE)) == NULL) { type = "?"; } if ((orig = ha_msg_value(reply, F_ORIG)) == NULL) { orig = "?"; } cl_log(LOG_NOTICE, "Got message %d of type [%s] from [%s]" , msgcount, type, orig); if (strcasecmp(type, T_APICLISTAT) == 0) { cl_log_message(LOG_NOTICE, reply); cl_log(LOG_NOTICE, "%s", hb->llc_ops->errmsg(hb)); } #if 0 else { cl_log_message(LOG_NOTICE, reply); cl_log(LOG_NOTICE, "%s", hb->llc_ops->errmsg(hb)); } #endif if (strcmp(type, "ping") ==0) { struct ha_msg* pingreply = ha_msg_new(4); int count; ha_msg_add(pingreply, F_TYPE, "pingreply"); for (count=0; count < 10; ++count) { if (hb->llc_ops->sendnodemsg(hb, pingreply, orig) == HA_OK) { cl_log(LOG_INFO , "Sent ping reply(%d) to [%s]" , count, orig); }else{ cl_log(LOG_ERR, "PING %d FAIL to [%s]" , count, orig); } } ha_msg_del(pingreply); pingreply=NULL; } ha_msg_del(reply); reply=NULL; } if (!quitnow) { cl_log(LOG_ERR, "read_hb_msg returned NULL"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); } if (hb->llc_ops->signoff(hb, TRUE) != HA_OK) { cl_log(LOG_ERR, "Cannot sign off from heartbeat."); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(10); } if (hb->llc_ops->delete(hb) != HA_OK) { cl_log(LOG_ERR, "Cannot delete API object."); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(11); } return 0; } Heartbeat-3-0-7e3a82377fa8/lib/hbclient/client_lib.c0000644000000000000000000022544211576626513022016 0ustar00usergroup00000000000000/* * client_lib: heartbeat API client side code * * Copyright (C) 2000 Alan Robertson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Here's our approach: * * Have each application connect to heartbeat via our IPC layer. * This IPC layer currently uses sockets and provides * a suitable authorization API. * * We can validate permissions for "sniffing" using the builtin * IPC authorization API. * * This code thankfully no longer uses FIFOs. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define CLIENTID_MAXLEN 36 struct sys_config * config = NULL; int netstring_format = TRUE; struct stringlist { char * value; struct stringlist * next; }; /* * Queue of messages to be read later... */ struct MsgQueue { struct ha_msg * value; struct MsgQueue * next; struct MsgQueue * prev; }; typedef struct gen_callback { char * msgtype; llc_msg_callback_t cf; void * pd; struct gen_callback* next; }gen_callback_t; #define MXFIFOPATH 128 #define HBPREFIX "LCK..hbapi:" /* Order sequence */ typedef struct order_seq { char to_node[HOSTLENG]; seqno_t seqno; struct order_seq * next; }order_seq_t; /* Order Queue */ struct orderQ { struct ha_msg * orderQ[MAXMSGHIST]; int curr_index; seqno_t curr_oseqno; seqno_t curr_gen; seqno_t curr_client_gen; seqno_t first_msg_seq; seqno_t first_msg_gen; seqno_t first_msg_client_gen; struct orderQ *backupQ; }; typedef struct order_queue { char from_node[HOSTLENG]; struct orderQ node; struct orderQ cluster; struct order_queue* next; struct ha_msg* leave_msg; int client_leaving; }order_queue_t; /* * Our heartbeat private data */ typedef struct llc_private { const char * PrivateId; /* A "magic cookie */ llc_nstatus_callback_t node_callback; /* Node status callback fcn */ void* node_private; /* node status callback data*/ llc_ifstatus_callback_t if_callback; /* IF status callback fcn */ void* if_private; /* IF status callback data */ llc_cstatus_callback_t cstatus_callback;/*Client status callback fcn */ void* client_private; /* client status callback data*/ struct gen_callback* genlist; /* List of general callbacks*/ IPC_Channel* chan; /* IPC communication channel*/ struct stringlist * nodelist; /* List of nodes from query */ struct stringlist * iflist; /* List of IFs from query */ int SignedOn; /* 1 if we're signed on */ int iscasual; /* 1 if casual client */ long deadtime_ms; /* heartbeat's deadtime */ long keepalive_ms; /* HB's keepalive time*/ int logfacility; /* HB's logging facility */ struct stringlist* nextnode; /* Next node for walknode */ struct stringlist* nextif; /* Next interface for walkif*/ /* Messages to be read after current call completes */ struct MsgQueue * firstQdmsg; struct MsgQueue * lastQdmsg; /* The next two items are for ordered message delivery */ order_seq_t order_seq_head; /* head of order_seq list */ order_queue_t* order_queue_head;/* head of order queue */ }llc_private_t; static const char * OurID = "Heartbeat private data"; /* "Magic cookie" */ #define ISOURS(l) (l && l->ll_cluster_private && \ (((llc_private_t*)(l->ll_cluster_private))->PrivateId) == OurID) #define DEBUGORDER 0 static void ClearLog(void); /* Common code for request messages */ static struct ha_msg* hb_api_boilerplate(const char * apitype); static int hb_api_signon(struct ll_cluster*, const char * clientid); static int hb_api_signoff(struct ll_cluster*, gboolean); static int hb_api_setfilter(struct ll_cluster*, unsigned); static void destroy_stringlist(struct stringlist *); static struct stringlist* new_stringlist(const char *); static int get_nodelist(llc_private_t*); static void zap_nodelist(llc_private_t*); static int get_iflist(llc_private_t*, const char *host); static void zap_iflist(llc_private_t*); static void zap_order_seq(llc_private_t* pi); static void zap_order_queue(llc_private_t* pi); static void zap_msg_queue(llc_private_t* pi); static int enqueue_msg(llc_private_t*,struct ha_msg*); static struct ha_msg* dequeue_msg(llc_private_t*); static gen_callback_t* search_gen_callback(const char * type, llc_private_t*); static int add_gen_callback(const char * msgtype , llc_private_t*, llc_msg_callback_t, void*); static int del_gen_callback(llc_private_t*, const char * msgtype); static struct ha_msg* read_api_msg(llc_private_t*); static struct ha_msg* read_cstatus_respond_msg(llc_private_t*pi, int timeout); static struct ha_msg* read_hb_msg(ll_cluster_t*, int blocking); static int hb_api_setsignal(ll_cluster_t*, int nsig); static int set_msg_callback (ll_cluster_t*, const char * msgtype , llc_msg_callback_t callback, void * p); static int set_nstatus_callback (ll_cluster_t* , llc_nstatus_callback_t cbf, void * p); static int set_cstatus_callback (ll_cluster_t* , llc_cstatus_callback_t cbf, void * p); static int set_ifstatus_callback (ll_cluster_t* ci , llc_ifstatus_callback_t cbf, void * p); static int init_nodewalk (ll_cluster_t*); static const char * nextnode (ll_cluster_t* ci); static int init_ifwalk (ll_cluster_t* ci, const char * host); static const char * get_nodestatus(ll_cluster_t*, const char *host); static int get_nodeweight(ll_cluster_t*, const char *host); static const char * get_nodesite(ll_cluster_t*, const char *host); static const char * get_clientstatus(ll_cluster_t*, const char *host, const char *clientid , int timeout); static const char * get_nodetype(ll_cluster_t*, const char *host); static const char * get_ifstatus(ll_cluster_t*, const char *host , const char * intf); static char * get_parameter(ll_cluster_t*, const char* pname); static const char * get_resources(ll_cluster_t*); static int get_inputfd(ll_cluster_t*); static IPC_Channel* get_ipcchan(ll_cluster_t*); static int msgready(ll_cluster_t*); static int setfmode(ll_cluster_t*, unsigned mode); static int sendclustermsg(ll_cluster_t*, struct ha_msg* msg); static int sendnodemsg(ll_cluster_t*, struct ha_msg* msg , const char * nodename); STATIC order_seq_t* add_order_seq(llc_private_t*, struct ha_msg* msg); static int send_ordered_clustermsg(ll_cluster_t* lcl, struct ha_msg* msg); static int send_ordered_nodemsg(ll_cluster_t* lcl, struct ha_msg* msg , const char * nodename); static const char * APIError(ll_cluster_t*); static int CallbackCall(llc_private_t* p, struct ha_msg * msg); static struct ha_msg * read_msg_w_callbacks(ll_cluster_t* llc, int blocking); static int rcvmsg(ll_cluster_t* llc, int blocking); volatile struct process_info * curproc = NULL; static char OurPid[16]; static char OurClientID[CLIENTID_MAXLEN]; static char OurNode[SYS_NMLN]; static ll_cluster_t* hb_cluster_new(void); static void ha_api_perror(const char * fmt, ...) G_GNUC_PRINTF(1,2); static void ha_api_log(int priority, const char * fmt, ...) G_GNUC_PRINTF(2,3); static int get_num_nodes(ll_cluster_t* lcl); #define ZAPMSG(m) {ha_msg_del(m); (m) = NULL;} /* * All the boilerplate common to creating heartbeat API request * messages. */ static struct ha_msg* hb_api_boilerplate(const char * apitype) { struct ha_msg* msg; if ((msg = ha_msg_new(4)) == NULL) { ha_api_log(LOG_ERR, "boilerplate: out of memory"); return msg; } /* Message type: API request */ if (ha_msg_add(msg, F_TYPE, T_APIREQ) != HA_OK) { ha_api_log(LOG_ERR, "boilerplate: cannot add F_TYPE field"); ZAPMSG(msg); return msg; } /* Add field for API request type */ if (ha_msg_add(msg, F_APIREQ, apitype) != HA_OK) { ha_api_log(LOG_ERR, "boilerplate: cannot add F_APIREQ field"); ZAPMSG(msg); return msg; } /* Add field for destination */ if (ha_msg_add(msg, F_TO, OurNode) != HA_OK) { ha_api_log(LOG_ERR, "boilerplate: cannot add F_TO field"); ZAPMSG(msg); return msg; } /* Add our PID to the message */ if (ha_msg_add(msg, F_PID, OurPid) != HA_OK) { ha_api_log(LOG_ERR, "boilerplate: cannot add F_PID field"); ZAPMSG(msg); return msg; } /* Add our client ID to the message */ if (ha_msg_add(msg, F_FROMID, OurClientID) != HA_OK) { ha_api_log(LOG_ERR, "boilerplate: cannot add F_FROMID field"); ZAPMSG(msg); return msg; } return(msg); } /* * Sign ourselves on as a heartbeat client process. */ static int hb_api_signon(struct ll_cluster* cinfo, const char * clientid) { struct ha_msg* request; struct ha_msg* reply; struct utsname un; int rc; const char * result; int iscasual; llc_private_t* pi; const char *tmpstr; char regpath[] = API_REGSOCK; char path[] = IPC_PATH_ATTR; GHashTable* wchanattrs; char cuid[20]; char cgid[20]; if (!ISOURS(cinfo)) { ha_api_log(LOG_ERR, "hb_api_signon: bad cinfo"); return HA_FAIL; } pi = (llc_private_t*)cinfo->ll_cluster_private; /* Re-sign ourselves back on */ if (pi->SignedOn) { hb_api_signoff(cinfo, FALSE); } snprintf(OurPid, sizeof(OurPid), "%d", getpid()); /* Create our client id */ if (clientid != NULL) { strncpy(OurClientID, clientid, CLIENTID_MAXLEN); iscasual = 0; }else{ strncpy(OurClientID, OurPid, CLIENTID_MAXLEN); iscasual = 1; } pi->iscasual = iscasual; if (uname(&un) < 0) { ha_api_perror("uname failure"); return HA_FAIL; } memset(OurNode, 0, sizeof(OurNode)); strncpy(OurNode, un.nodename, sizeof(OurNode)-1); g_strdown(OurNode); /* Initialize order_seq_head */ pi->order_seq_head.seqno = 1; pi->order_seq_head.to_node[0] = '\0'; pi->order_seq_head.next = NULL; /* Initialize order_queue_head */ pi->order_queue_head = NULL; /* Crank out the boilerplate */ if ((request = hb_api_boilerplate(API_SIGNON)) == NULL) { return HA_FAIL; } snprintf(cuid, sizeof(cuid)-1, "%ld", (long)geteuid()); /* Add our UID to the message */ if (ha_msg_add(request, F_UID, cuid) != HA_OK) { ha_api_log(LOG_ERR, "hb_api_signon: cannot add F_UID field"); ZAPMSG(request); return HA_FAIL; } snprintf(cgid, sizeof(cgid)-1, "%ld", (long)getegid()); /* Add our GID to the message */ if (ha_msg_add(request, F_GID, cgid) != HA_OK) { ha_api_log(LOG_ERR, "hb_api_signon: cannot add F_GID field"); ZAPMSG(request); return HA_FAIL; } wchanattrs = g_hash_table_new(g_str_hash, g_str_equal); g_hash_table_insert(wchanattrs, path, regpath); /* Connect to the heartbeat API server */ pi->chan = ipc_channel_constructor(IPC_ANYTYPE, wchanattrs); g_hash_table_destroy(wchanattrs); if (pi->chan == NULL) { ha_api_log(LOG_ERR, "hb_api_signon: Can't connect" " to heartbeat"); ZAPMSG(request); return HA_FAIL; } pi->chan->should_send_block = TRUE; pi->chan->refcount++; if (pi->chan->ops->initiate_connection(pi->chan) != IPC_OK) { ha_api_log(LOG_ERR, "hb_api_signon: Can't initiate" " connection to heartbeat"); ZAPMSG(request); return HA_FAIL; } /* Send the registration request message */ if (msg2ipcchan(request, pi->chan) != HA_OK) { pi->chan->ops->destroy(pi->chan); pi->chan = NULL; ha_api_perror("can't send message to IPC"); ZAPMSG(request); return HA_FAIL; } ZAPMSG(request); pi->chan->ops->waitout(pi->chan); /* Read the reply... */ if ((reply=read_api_msg(pi)) == NULL) { return HA_FAIL; } /* Get the return code */ if ((result = ha_msg_value(reply, F_APIRESULT)) != NULL && strcmp(result, API_OK) == 0) { rc = HA_OK; pi->SignedOn = TRUE; if ((tmpstr = ha_msg_value(reply, F_DEADTIME)) == NULL || sscanf(tmpstr, "%lx", (unsigned long*)&(pi->deadtime_ms)) != 1) { ha_api_log(LOG_ERR , "hb_api_signon: Can't get deadtime "); ZAPMSG(reply); return HA_FAIL; } if ((tmpstr = ha_msg_value(reply, F_KEEPALIVE)) == NULL || sscanf(tmpstr, "%lx", (unsigned long*)&(pi->keepalive_ms)) != 1) { ha_api_log(LOG_ERR , "hb_api_signon: Can't get keepalive time "); ZAPMSG(reply); return HA_FAIL; } if ((tmpstr = ha_msg_value(reply, F_NODENAME)) == NULL || strlen(tmpstr) >= sizeof(OurNode)) { ha_api_log(LOG_ERR , "hb_api_signon: Can't get local node name"); ZAPMSG(reply); return HA_FAIL; }else{ strncpy(OurNode, tmpstr, sizeof(OurNode)-1); OurNode[sizeof(OurNode)-1] = EOS; } /* Sometimes they don't use syslog logging... */ tmpstr = ha_msg_value(reply, F_LOGFACILITY); if (tmpstr == NULL || sscanf(tmpstr, "%d", &(pi->logfacility)) != 1) { pi->logfacility = -1; } }else{ rc = HA_FAIL; } ZAPMSG(reply); return rc; } /* * Sign off (disconnect) as a heartbeat client process. */ static int hb_api_signoff(struct ll_cluster* cinfo,gboolean need_destroy_chan) { struct ha_msg* request; llc_private_t* pi; if (!ISOURS(cinfo)) { ha_api_log(LOG_ERR, "hb_api_signoff: bad cinfo"); return HA_FAIL; } pi = (llc_private_t*)cinfo->ll_cluster_private; if (debug_level > 1) { cl_log(LOG_DEBUG, "%s(%d){" , __FUNCTION__, need_destroy_chan); } if (pi->SignedOn && pi->chan && IPC_ISWCONN(pi->chan)) { if ((request = hb_api_boilerplate(API_SIGNOFF)) == NULL) { ha_api_log(LOG_ERR, "hb_api_signoff: can't create msg"); return HA_FAIL; } /* Send the message */ if (msg2ipcchan(request, pi->chan) != HA_OK) { ZAPMSG(request); ha_api_perror("can't send message to IPC"); return HA_FAIL; } pi->chan->ops->waitout(pi->chan); ZAPMSG(request); } OurClientID[0] = EOS; if(pi->chan) { if (need_destroy_chan) { pi->chan->ops->destroy(pi->chan); pi->chan = NULL; } else if (IPC_ISRCONN(pi->chan)) { pi->chan->ops->disconnect(pi->chan); } } pi->SignedOn = FALSE; zap_order_seq(pi); zap_order_queue(pi); if (debug_level > 1) { cl_log(LOG_DEBUG, "}/*%s(%d)*/", __FUNCTION__ , need_destroy_chan); } return HA_OK; } /* * delete: destroy the heartbeat API object */ static int hb_api_delete(struct ll_cluster* ci) { llc_private_t* pi; if (!ISOURS(ci)) { ha_api_log(LOG_ERR, "hb_api_delete: bad cinfo"); return HA_FAIL; } pi = (llc_private_t*)ci->ll_cluster_private; /* Sign off */ hb_api_signoff(ci, TRUE); /* Free up interface and node lists */ zap_iflist(pi); zap_nodelist(pi); /* Free up the message queue */ zap_msg_queue(pi); /* Free up the private information */ memset(pi, 0, sizeof(*pi)); free(pi); /* Free up the generic (llc) information */ memset(ci, 0, sizeof(*ci)); free(ci); return HA_OK; } /* * Set message filter mode. */ static int hb_api_setfilter(struct ll_cluster* ci, unsigned fmask) { struct ha_msg* request; struct ha_msg* reply; int rc; const char * result; char filtermask[32]; llc_private_t* pi; if (!ISOURS(ci)) { ha_api_log(LOG_ERR, "hb_api_setfilter: bad cinfo"); return HA_FAIL; } pi = (llc_private_t*)ci->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } if ((request = hb_api_boilerplate(API_SETFILTER)) == NULL) { ha_api_log(LOG_ERR, "hb_api_setfilter: can't create msg"); return HA_FAIL; } /* Format the filtermask information in hex */ snprintf(filtermask, sizeof(filtermask), "%x", fmask); if (ha_msg_add(request, F_FILTERMASK, filtermask) != HA_OK) { ha_api_log(LOG_ERR, "hb_api_setfilter: cannot add field/2"); ZAPMSG(request); return HA_FAIL; } /* Send the message */ if (msg2ipcchan(request, pi->chan) != HA_OK) { ZAPMSG(request); ha_api_perror("can't send message to IPC"); return HA_FAIL; } ZAPMSG(request); /* Read reply... */ if ((reply=read_api_msg(pi)) == NULL) { return HA_FAIL; } if ((result = ha_msg_value(reply, F_APIRESULT)) != NULL && strcmp(result, API_OK) == 0) { rc = HA_OK; }else{ rc = HA_FAIL; } ZAPMSG(reply); return rc; } /* * Set signal for message notification. * This is not believed to be a security hole :-) */ static int hb_api_setsignal(ll_cluster_t* lcl, int nsig) { struct ha_msg* request; struct ha_msg* reply; int rc; const char * result; char csignal[32]; llc_private_t* pi; ClearLog(); if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "hb_api_setsignal: bad cinfo"); return HA_FAIL; } pi = (llc_private_t*)lcl->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } if ((request = hb_api_boilerplate(API_SETSIGNAL)) == NULL) { ha_api_log(LOG_ERR, "hb_api_setsignal: can't create msg"); return HA_FAIL; } snprintf(csignal, sizeof(csignal), "%d", nsig); if (ha_msg_add(request, F_SIGNAL, csignal) != HA_OK) { ha_api_log(LOG_ERR, "hb_api_setsignal: cannot add field/2"); ZAPMSG(request); return HA_FAIL; } /* Send message */ if (msg2ipcchan(request, pi->chan) != HA_OK) { ha_api_perror("can't send message to IPC Channel"); ZAPMSG(request); return HA_FAIL; } ZAPMSG(request); /* Read reply... */ if ((reply=read_api_msg(pi)) == NULL) { return HA_FAIL; } if ((result = ha_msg_value(reply, F_APIRESULT)) != NULL && strcmp(result, API_OK) == 0) { rc = HA_OK; }else{ rc = HA_FAIL; } ZAPMSG(reply); return rc; } /* * Retrieve the list of nodes in the cluster. */ static int get_nodelist(llc_private_t* pi) { struct ha_msg* request; struct ha_msg* reply; const char * result = NULL; struct stringlist* sl; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } if ((request = hb_api_boilerplate(API_NODELIST)) == NULL) { ha_api_log(LOG_ERR, "get_nodelist: can't create msg"); return HA_FAIL; } /* Send message */ if (msg2ipcchan(request, pi->chan) != HA_OK) { ZAPMSG(request); ha_api_perror("can't send message to IPC Channel"); return HA_FAIL; } ZAPMSG(request); /* Loop as long as we get an API_MORE result */ /* The final node will (hopefully) have an API_OK result type */ while ((reply=read_api_msg(pi)) != NULL && (result = ha_msg_value(reply, F_APIRESULT)) != NULL && (strcmp(result, API_MORE) == 0 || strcmp(result, API_OK) == 0) && (sl = new_stringlist(ha_msg_value(reply, F_NODENAME))) != NULL){ sl->next = pi->nodelist; pi->nodelist = sl; if (strcmp(result, API_OK) == 0) { pi->nextnode = pi->nodelist; ZAPMSG(reply); return(HA_OK); } ZAPMSG(reply); } if (reply == NULL) { ha_api_log(LOG_ERR, "General read_api_msg() failure"); }else if (result == NULL) { ha_api_log(LOG_ERR, "API reply missing " F_APIRESULT " field."); }else if (strcmp(result, API_MORE) != 0 && strcmp(result, API_OK) != 0) { ha_api_log(LOG_ERR, "Unexpected API result value: [%s]", result); }else if (ha_msg_value(reply, F_NODENAME) == NULL) { ha_api_log(LOG_ERR, "No nodename in API reply"); }else{ ha_api_log(LOG_ERR, "new_stringlist() failure."); } if (reply != NULL) { zap_nodelist(pi); ZAPMSG(reply); } return HA_FAIL; } /* * Retrieve the list of interfaces for the given host. */ static int get_iflist(llc_private_t* pi, const char *host) { struct ha_msg* request; struct ha_msg* reply; const char * result; struct stringlist* sl; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } if ((request = hb_api_boilerplate(API_IFLIST)) == NULL) { ha_api_log(LOG_ERR, "get_iflist: can't create msg"); return HA_FAIL; } if (ha_msg_add(request, F_NODENAME, host) != HA_OK) { ha_api_log(LOG_ERR, "get_iflist: cannot add field"); ZAPMSG(request); return HA_FAIL; } /* Send message */ if (msg2ipcchan(request, pi->chan) != HA_OK) { ZAPMSG(request); ha_api_perror("Can't send message to IPC Channel"); return HA_FAIL; } ZAPMSG(request); /* Loop as long as we get an API_MORE result */ /* The final interface will (hopefully) have an API_OK result type */ while ((reply=read_api_msg(pi)) != NULL && (result = ha_msg_value(reply, F_APIRESULT)) != NULL && (strcmp(result, API_MORE) == 0 || strcmp(result, API_OK) == 0) && (sl = new_stringlist(ha_msg_value(reply, F_IFNAME))) != NULL){ sl->next = pi->iflist; pi->iflist = sl; if (strcmp(result, API_OK) == 0) { pi->nextif = pi->iflist; ZAPMSG(reply); return(HA_OK); } ZAPMSG(reply); } if (reply != NULL) { zap_iflist(pi); ZAPMSG(reply); } return HA_FAIL; } /* * Return the status of the given node. */ static const char * get_nodestatus(ll_cluster_t* lcl, const char *host) { struct ha_msg* request; struct ha_msg* reply; const char * result; const char * status; static char statbuf[128]; const char * ret; llc_private_t* pi; ClearLog(); if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "get_nodestatus: bad cinfo"); return NULL; } pi = (llc_private_t*)lcl->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return NULL; } if ((request = hb_api_boilerplate(API_NODESTATUS)) == NULL) { return NULL; } if (ha_msg_add(request, F_NODENAME, host) != HA_OK) { ha_api_log(LOG_ERR, "get_nodestatus: cannot add field"); ZAPMSG(request); return NULL; } /* Send message */ if (msg2ipcchan(request, pi->chan) != HA_OK) { ZAPMSG(request); ha_api_perror("Can't send message to IPC Channel"); return NULL; } ZAPMSG(request); /* Read reply... */ if ((reply=read_api_msg(pi)) == NULL) { return NULL; } if ((result = ha_msg_value(reply, F_APIRESULT)) != NULL && strcmp(result, API_OK) == 0 && (status = ha_msg_value(reply, F_STATUS)) != NULL) { memset(statbuf, 0, sizeof(statbuf)); strncpy(statbuf, status, sizeof(statbuf) - 1); ret = statbuf; }else{ ret = NULL; } ZAPMSG(reply); return ret; } /* * Return the weight of the given node. */ static int get_nodeweight(ll_cluster_t* lcl, const char *host) { struct ha_msg* request; struct ha_msg* reply; const char * result; const char * weight_s; int ret; llc_private_t* pi; ClearLog(); if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "get_nodeweight: bad cinfo"); return -1; } pi = (llc_private_t*)lcl->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return -1; } if ((request = hb_api_boilerplate(API_NODEWEIGHT)) == NULL) { return -1; } if (ha_msg_add(request, F_NODENAME, host) != HA_OK) { ha_api_log(LOG_ERR, "get_nodeweight: cannot add field"); ZAPMSG(request); return -1; } /* Send message */ if (msg2ipcchan(request, pi->chan) != HA_OK) { ZAPMSG(request); ha_api_perror("Can't send message to IPC Channel"); return -1; } ZAPMSG(request); /* Read reply... */ if ((reply=read_api_msg(pi)) == NULL) { return -1; } if ((result = ha_msg_value(reply, F_APIRESULT)) != NULL && strcmp(result, API_OK) == 0 && (weight_s = ha_msg_value(reply, F_WEIGHT)) != NULL) { ret = atoi(weight_s); }else{ ret = -1; } ZAPMSG(reply); return ret; } /* * Return the site of the given node. */ static const char * get_nodesite(ll_cluster_t* lcl, const char *host) { struct ha_msg* request; struct ha_msg* reply; const char * result; const char * site; static char sitebuf[HOSTLENG]; const char * ret; llc_private_t* pi; ClearLog(); if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "get_nodesite: bad cinfo"); return NULL; } pi = (llc_private_t*)lcl->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return NULL; } if ((request = hb_api_boilerplate(API_NODESITE)) == NULL) { return NULL; } if (ha_msg_add(request, F_NODENAME, host) != HA_OK) { ha_api_log(LOG_ERR, "get_nodesite: cannot add field"); ZAPMSG(request); return NULL; } /* Send message */ if (msg2ipcchan(request, pi->chan) != HA_OK) { ZAPMSG(request); ha_api_perror("Can't send message to IPC Channel"); return NULL; } ZAPMSG(request); /* Read reply... */ if ((reply=read_api_msg(pi)) == NULL) { return NULL; } if ((result = ha_msg_value(reply, F_APIRESULT)) != NULL && strcmp(result, API_OK) == 0 && (site = ha_msg_value(reply, F_SITE)) != NULL) { memset(sitebuf, 0, sizeof(sitebuf)); strncpy(sitebuf, site, sizeof(sitebuf) - 1); ret = sitebuf; }else{ ret = NULL; } ZAPMSG(reply); return ret; } /* * Return the status of the given client. */ static const char * get_clientstatus(ll_cluster_t* lcl, const char *host , const char *clientid, int timeout) { struct ha_msg* request; struct ha_msg* reply; const char * result; llc_private_t* pi; static char statbuf[128]; const char * clientname; const char * ret; ClearLog(); if (!ISOURS(lcl)){ ha_api_log(LOG_ERR,"get_clientstatus: bad cinfo"); return NULL; } pi = (llc_private_t*)lcl->ll_cluster_private; if (!pi->SignedOn){ ha_api_log(LOG_ERR,"not signed on"); return NULL; } clientname = (clientid == NULL) ? OurClientID : clientid; /* If host is NULL, user choose the callback method to * get the result. This also implies timeout is useless */ if (host == NULL) { int max_delay; struct ha_msg * m = NULL; int delay; int num_nodes; if ((m = ha_msg_new(0)) == NULL || ha_msg_add(m, F_TYPE, T_QCSTATUS) != HA_OK || ha_msg_add(m, F_CLIENTNAME, clientname) != HA_OK || ha_msg_add(m, F_FROMID, OurClientID) != HA_OK) { if (m){ ha_msg_del(m); } ha_log(LOG_ERR, "%s: cannot add field", __FUNCTION__); return NULL; } /* We delay random time here to distribute requests from different nodes * across time in a big cluster. Scale max delay as 50ms per node, * in a 100-node cluster, the max delay is 5 seconds */ num_nodes = get_num_nodes(lcl); max_delay = num_nodes * 50000; /* in microsecond*/ srand(cl_randseed()); delay = (1.0* rand()/RAND_MAX)*max_delay; if (ANYDEBUG){ cl_log(LOG_DEBUG, "Delaying cstatus request for %d ms", delay/1000); } usleep(delay); if (sendclustermsg(lcl, m) != HA_OK) { ha_log(LOG_ERR, "%s: sendclustermsg fail",__FUNCTION__); } ha_msg_del(m); return NULL; } if (*host == EOS) { ha_api_log(LOG_ERR, "client status : bad nodename"); return NULL; } if ((request = hb_api_boilerplate(API_CLIENTSTATUS)) == NULL) { ha_api_log(LOG_ERR, "hb_api_boilerplate failed"); return NULL; } if (ha_msg_add(request, F_NODENAME, host)!= HA_OK || ha_msg_add(request, F_CLIENTNAME, clientname)!= HA_OK) { ha_api_log(LOG_ERR, "get_clientstatus: cannot add message field"); ZAPMSG(request); return NULL; } /* Send message */ if (msg2ipcchan(request, pi->chan) != HA_OK) { ZAPMSG(request); ha_api_perror("Can't send message to IPC Channel"); return NULL; } ZAPMSG(request); /* Read reply... */ if ((reply = read_cstatus_respond_msg(pi, timeout)) == NULL) { return NULL; } if ((result = ha_msg_value(reply, F_APIRESULT)) != NULL && strcmp(result, API_OK) == 0 && (result = ha_msg_value(reply, F_CLIENTSTATUS)) != NULL) { memset(statbuf, 0, sizeof(statbuf)); strncpy(statbuf, result, sizeof(statbuf) - 1); ret = statbuf; } else { ha_api_perror("received wrong type of msg"); ret = NULL; } ZAPMSG(reply); return ret; } /* * Return the type of the given node. */ static const char * get_nodetype(ll_cluster_t* lcl, const char *host) { struct ha_msg* request; struct ha_msg* reply; const char * result; const char * status; static char statbuf[128]; const char * ret; llc_private_t* pi; ClearLog(); if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "get_nodetype: bad cinfo"); return NULL; } pi = (llc_private_t*)lcl->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return NULL; } if ((request = hb_api_boilerplate(API_NODETYPE)) == NULL) { return NULL; } if (ha_msg_add(request, F_NODENAME, host) != HA_OK) { ha_api_log(LOG_ERR, "get_nodetype: cannot add field"); ZAPMSG(request); return NULL; } /* Send message */ if (msg2ipcchan(request, pi->chan) != HA_OK) { ZAPMSG(request); ha_api_perror("Can't send message to IPC Channel"); return NULL; } ZAPMSG(request); /* Read reply... */ if ((reply=read_api_msg(pi)) == NULL) { return NULL; } if ((result = ha_msg_value(reply, F_APIRESULT)) != NULL && strcmp(result, API_OK) == 0 && (status = ha_msg_value(reply, F_NODETYPE)) != NULL) { memset(statbuf, 0, sizeof(statbuf)); strncpy(statbuf, status, sizeof(statbuf) - 1); ret = statbuf; }else{ ret = NULL; } ZAPMSG(reply); return ret; } static int get_num_nodes(ll_cluster_t* lcl) { struct ha_msg* request; struct ha_msg* reply; const char * result; llc_private_t* pi; const char* num_s; int num; ClearLog(); if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "%s: bad cinfo", __FUNCTION__); return -1; } pi = (llc_private_t*)lcl->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return -1; } if ((request = hb_api_boilerplate(API_NUMNODES)) == NULL) { return -1; } /* Send message */ if (msg2ipcchan(request, pi->chan) != HA_OK) { ZAPMSG(request); ha_api_perror("Can't send message to IPC Channel"); return -1; } ZAPMSG(request); /* Read reply... */ if ((reply=read_api_msg(pi)) == NULL) { return -1; } if ((result = ha_msg_value(reply, F_APIRESULT)) != NULL && strcmp(result, API_OK) == 0 && (num_s = ha_msg_value(reply, F_NUMNODES)) != NULL && (num = atoi(num_s)) > 0){ /*everything is good, do nothing*/ }else{ cl_log(LOG_ERR, "Wrong reply message"); cl_log_message(LOG_ERR, reply); num = -1; } ZAPMSG(reply); return num; } static char * get_parameter(ll_cluster_t* lcl, const char* pname) { struct ha_msg* request; struct ha_msg* reply; const char * result; const char * pvalue; char * ret; llc_private_t* pi; ClearLog(); if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "get_parameter: bad cinfo"); return NULL; } pi = (llc_private_t*)lcl->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return NULL; } if ((request = hb_api_boilerplate(API_GETPARM)) == NULL) { return NULL; } if (ha_msg_add(request, F_PNAME, pname) != HA_OK) { ha_api_log(LOG_ERR, "get_parameter: cannot add field"); ZAPMSG(request); return NULL; } /* Send message */ if (msg2ipcchan(request, pi->chan) != HA_OK) { ZAPMSG(request); ha_api_perror("Can't send message to IPC Channel"); return NULL; } ZAPMSG(request); /* Read reply... */ if ((reply=read_api_msg(pi)) == NULL) { return NULL; } if ((result = ha_msg_value(reply, F_APIRESULT)) != NULL && strcmp(result, API_OK) == 0 && (pvalue = ha_msg_value(reply, F_PVALUE)) != NULL) { ret = strdup(pvalue); }else{ ret = NULL; } ZAPMSG(reply); return ret; } static const char * get_resources(ll_cluster_t* lcl) { struct ha_msg* request; struct ha_msg* reply; const char * result; const char * rvalue; char * ret; llc_private_t* pi; ClearLog(); if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "get_resources: bad cinfo"); return NULL; } pi = (llc_private_t*)lcl->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return NULL; } if ((request = hb_api_boilerplate(API_GETRESOURCES)) == NULL) { return NULL; } /* Send message */ if (msg2ipcchan(request, pi->chan) != HA_OK) { ZAPMSG(request); ha_api_perror("Can't send message to IPC Channel"); return NULL; } ZAPMSG(request); /* Read reply... */ if ((reply=read_api_msg(pi)) == NULL) { return NULL; } if ((result = ha_msg_value(reply, F_APIRESULT)) != NULL && strcmp(result, API_OK) == 0 && (rvalue = ha_msg_value(reply, F_RESOURCES)) != NULL) { static char retvalue[64]; strncpy(retvalue, rvalue, sizeof(retvalue)-1); retvalue[DIMOF(retvalue)-1] = EOS; ret = retvalue; }else{ const char* failreason = ha_msg_value(reply, F_COMMENT); if (failreason){ ha_api_log(LOG_ERR, "%s", failreason); } ret = NULL; } ZAPMSG(reply); return ret; } /* * Return heartbeat's keepalive time */ static long get_keepalive(ll_cluster_t* lcl) { llc_private_t* pi; if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "get_keepalive: bad cinfo"); return HA_FAIL; } pi = (llc_private_t*)lcl->ll_cluster_private; return (pi->keepalive_ms); } /* * Return heartbeat's dead time */ static long get_deadtime(ll_cluster_t* lcl) { llc_private_t* pi; if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "get_deadtime: bad cinfo"); return HA_FAIL; } pi = (llc_private_t*)lcl->ll_cluster_private; return (pi->deadtime_ms); } /* * Return suggested logging facility */ static int get_logfacility(ll_cluster_t* lcl) { llc_private_t* pi; if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "get_logfacility: bad cinfo"); return -1; } pi = (llc_private_t*)lcl->ll_cluster_private; return (pi->logfacility); } /* * Return my nodeid. */ static const char * get_mynodeid(ll_cluster_t* lcl) { if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "get_mynodeid: bad cinfo"); return NULL; } return (OurNode); } /* * Return the status of the given interface for the given machine. */ static const char * get_ifstatus(ll_cluster_t* lcl, const char *host, const char * ifname) { struct ha_msg* request; struct ha_msg* reply; const char * result; const char * status; static char statbuf[128]; const char * ret; llc_private_t* pi; ClearLog(); if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "get_ifstatus: bad cinfo"); return NULL; } pi = (llc_private_t*)lcl->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return NULL; } if ((request = hb_api_boilerplate(API_IFSTATUS)) == NULL) { return NULL; } if (ha_msg_add(request, F_NODENAME, host) != HA_OK) { ha_api_log(LOG_ERR, "get_ifstatus: cannot add field"); ZAPMSG(request); return NULL; } if (ha_msg_add(request, F_IFNAME, ifname) != HA_OK) { ha_api_log(LOG_ERR, "get_ifstatus: cannot add field"); ZAPMSG(request); return NULL; } /* Send message */ if (msg2ipcchan(request, pi->chan) != HA_OK) { ZAPMSG(request); ha_api_perror("Can't send message to IPC Channel"); return NULL; } ZAPMSG(request); /* Read reply... */ if ((reply=read_api_msg(pi)) == NULL) { return NULL; } if ((result = ha_msg_value(reply, F_APIRESULT)) != NULL && strcmp(result, API_OK) == 0 && (status = ha_msg_value(reply,F_STATUS)) != NULL) { memset(statbuf, 0, sizeof(statbuf)); strncpy(statbuf, status, sizeof(statbuf) - 1); ret = statbuf; }else{ ret = NULL; } ZAPMSG(reply); return ret; } /* * Zap our list of nodes */ static void zap_nodelist(llc_private_t* pi) { destroy_stringlist(pi->nodelist); pi->nodelist=NULL; pi->nextnode = NULL; } /* * Zap our list of interfaces. */ static void zap_iflist(llc_private_t* pi) { destroy_stringlist(pi->iflist); pi->iflist=NULL; pi->nextif = NULL; } static void zap_order_seq(llc_private_t* pi) { order_seq_t * order_seq = pi->order_seq_head.next; order_seq_t * next; while (order_seq != NULL){ next = order_seq->next; free(order_seq); order_seq = next; } pi->order_seq_head.next = NULL; } static void zap_order_queue(llc_private_t* pi) { order_queue_t * oq = pi->order_queue_head; order_queue_t * next; int i; while (oq != NULL) { next = oq->next; for (i = 0; i < MAXMSGHIST; i++){ if (oq->node.orderQ[i]){ ZAPMSG(oq->node.orderQ[i]); oq->node.orderQ[i] = NULL; } if (oq->cluster.orderQ[i]){ ZAPMSG(oq->cluster.orderQ[i]); oq->cluster.orderQ[i] = NULL; } } free(oq); oq = next; } pi->order_queue_head = NULL; } static void zap_msg_queue(llc_private_t* pi) { struct MsgQueue* qelem = pi->firstQdmsg; struct MsgQueue* next; while (qelem != NULL){ next = qelem->next; ZAPMSG(qelem->value); free(qelem); qelem = next; } pi->firstQdmsg = NULL; pi->lastQdmsg = NULL; } /* * Create a new stringlist. */ static struct stringlist* new_stringlist(const char *s) { struct stringlist* ret; char * cp; if (s == NULL) { return(NULL); } if ((cp = strdup(s)) == NULL) { return(NULL); } if ((ret = MALLOCT(struct stringlist)) == NULL) { free(cp); return(NULL); } ret->next = NULL; ret->value = cp; return(ret); } /* * Destroy (free) a stringlist. */ static void destroy_stringlist(struct stringlist * s) { struct stringlist * this; struct stringlist * next; for (this=s; this; this=next) { next = this->next; free(this->value); memset(this, 0, sizeof(*this)); free(this); } } /* * Enqueue a message to be read later. */ static int enqueue_msg(llc_private_t* pi, struct ha_msg* msg) { struct MsgQueue* newQelem; if (msg == NULL) { return(HA_FAIL); } if ((newQelem = MALLOCT(struct MsgQueue)) == NULL) { return(HA_FAIL); } newQelem->value = msg; newQelem->prev = pi->lastQdmsg; newQelem->next = NULL; if (pi->lastQdmsg != NULL) { pi->lastQdmsg->next = newQelem; } pi->lastQdmsg = newQelem; if (pi->firstQdmsg == NULL) { pi->firstQdmsg = newQelem; } return HA_OK; } /* * Dequeue a message. */ static struct ha_msg * dequeue_msg(llc_private_t* pi) { struct MsgQueue* qret; struct ha_msg* ret = NULL; qret = pi->firstQdmsg; if (qret != NULL) { ret = qret->value; pi->firstQdmsg=qret->next; if (pi->firstQdmsg) { pi->firstQdmsg->prev = NULL; } memset(qret, 0, sizeof(*qret)); /* * The only two pointers to this element are the first pointer, * and the prev pointer of the next element in the queue. * (or possibly lastQdmsg... See below) */ free(qret); } if (pi->firstQdmsg == NULL) { /* Zap lastQdmsg if it pointed at this Q element */ pi->lastQdmsg=NULL; } return(ret); } /* * Search the general callback list for the given message type */ static gen_callback_t* search_gen_callback(const char * type, llc_private_t* lcp) { struct gen_callback* gcb; for (gcb=lcp->genlist; gcb != NULL; gcb=gcb->next) { if (strcmp(type, gcb->msgtype) == 0) { return(gcb); } } return(NULL); } /* * Add a general callback to the list of general callbacks. */ static int add_gen_callback(const char * msgtype, llc_private_t* lcp , llc_msg_callback_t funp, void* pd) { struct gen_callback* gcb; char * type; if ((gcb = search_gen_callback(msgtype, lcp)) == NULL) { gcb = MALLOCT(struct gen_callback); if (gcb == NULL) { return(HA_FAIL); } type = strdup(msgtype); if (type == NULL) { free(gcb); return(HA_FAIL); } gcb->msgtype = type; gcb->next = lcp->genlist; lcp->genlist = gcb; }else if (funp == NULL) { return(del_gen_callback(lcp, msgtype)); } gcb->cf = funp; gcb->pd = pd; return(HA_OK); } /* * Delete a general callback from the list of general callbacks. */ static int del_gen_callback(llc_private_t* lcp, const char * msgtype) { struct gen_callback* gcb; struct gen_callback* prev = NULL; for (gcb=lcp->genlist; gcb != NULL; gcb=gcb->next) { if (strcmp(msgtype, gcb->msgtype) == 0) { if (prev) { prev->next = gcb->next; }else{ lcp->genlist = gcb->next; } free(gcb->msgtype); gcb->msgtype = NULL; free(gcb); return(HA_OK); } prev = gcb; } return(HA_FAIL); } /* * Read an API message. All other messages are enqueued to be read later. */ static struct ha_msg * read_api_msg(llc_private_t* pi) { for (;;) { struct ha_msg* msg; const char * type; pi->chan->ops->waitin(pi->chan); if (pi->chan->ch_status == IPC_DISCONNECT){ break; } if ((msg=msgfromIPC(pi->chan, 0)) == NULL) { ha_api_perror("read_api_msg: " "Cannot read reply from IPC channel"); continue; } if ((type=ha_msg_value(msg, F_TYPE)) != NULL && strcmp(type, T_APIRESP) == 0) { return(msg); } /* Got an unexpected non-api message */ /* Queue it up for reading later */ enqueue_msg(pi, msg); } /*NOTREACHED*/ return(NULL); } /* * Read a client status respond message either from local node or from * a remote node. All other messages are enqueued to be read later. */ static struct ha_msg * read_cstatus_respond_msg(llc_private_t* pi, int timeout) { struct ha_msg* msg; const char * type; struct pollfd pfd; pfd.fd = pi->chan->ops->get_recv_select_fd(pi->chan); pfd.events = POLLIN; while ((pi->chan->ops->is_message_pending(pi->chan)) || (poll(&pfd, 1, timeout) > 0 && pfd.revents == POLLIN)) { while (pi->chan->ops->is_message_pending(pi->chan)) { if ((msg=msgfromIPC(pi->chan, 0)) == NULL) { ha_api_perror("read_api_msg: " "Cannot read reply from IPC channel"); continue; } if (((type=ha_msg_value(msg, F_TYPE)) != NULL && strcmp(type, T_RCSTATUS) == 0) || ((type=ha_msg_value(msg, F_SUBTYPE)) != NULL && strcmp(type, T_RCSTATUS) == 0)) { return(msg); } /* Got an unexpected non-api message */ /* Queue it up for reading later */ enqueue_msg(pi, msg); } } /* Timeout or caught a signal */ return NULL; } /* This is the place to handle out of order messages from a restarted * client. If we receive messages from a restarted client yet no leave * message has been received for the previous client, we need to * save the restarted client's messages in backup queue. When the leave * message is received, we then call moveup_backupQ() so that the backup * queue is promoted to our current queue, not backup any more. */ static void moveup_backupQ(struct orderQ* q) { int i; if (q == NULL){ return; } if (q->backupQ){ struct orderQ* backup_q = q->backupQ; memcpy(q, backup_q, sizeof(struct orderQ)); if (backup_q->backupQ != NULL){ cl_log(LOG_ERR, "moveup_backupQ:" "backupQ in backupQ is not NULL"); } free(backup_q); q->backupQ = NULL; }else { /*the queue must be empty*/ for (i = 0; i < MAXMSGHIST; i++) { if (q->orderQ[i]){ cl_log(LOG_ERR, "moveup_backupQ:" "queue is not empty" " possible memory leak"); cl_log_message(LOG_ERR, q->orderQ[i]); } } q->curr_oseqno = 0; } return ; } /* * Pop up orderQ. */ static struct ha_msg * pop_orderQ(struct orderQ * q) { struct ha_msg * msg; if (q->orderQ[q->curr_index]){ msg = q->orderQ[q->curr_index]; q->orderQ[q->curr_index] = NULL; q->curr_index = (q->curr_index + 1) % MAXMSGHIST; q->curr_oseqno++; return msg; } return NULL; } static int msg_oseq_compare(seqno_t oseq1, seqno_t gen1, seqno_t oseq2, seqno_t gen2) { int ret; if ( gen1 > gen2){ ret = 1; } else if (gen1 < gen2){ ret = -1; } else { if (oseq1 > oseq2){ ret = 1; } else if (oseq1 < oseq2){ ret = -1; } else{ ret = 0; } } return ret; } static void reset_orderQ(struct orderQ* q) { int i; for (i =0 ;i < MAXMSGHIST; i++){ if (q->orderQ[i]){ ha_msg_del(q->orderQ[i]); q->orderQ[i] = 0; } } if (q->backupQ != NULL){ reset_orderQ(q->backupQ); free(q->backupQ); q->backupQ = NULL; } memset(q, 0, sizeof(struct orderQ)); return; } /* * Process ordered message */ void display_orderQ(struct orderQ* q); void display_orderQ(struct orderQ* q){ if(!q){ return; } cl_log(LOG_INFO, "curr_index=%x, curr_oseqno=%lx, " "curr_gen=%lx, curr_client_gen=%lx", q->curr_index, q->curr_oseqno, q->curr_gen, q->curr_client_gen); cl_log(LOG_INFO, "first_msg_seq =%lx, first_msg_gen = %lx," "first_msg_client_gen =%lx", q->first_msg_seq, q->first_msg_gen, q->first_msg_client_gen); if (q->backupQ == NULL){ cl_log(LOG_INFO, "q->backupQ is NULL"); }else{ display_orderQ(q->backupQ); } } static struct ha_msg * process_ordered_msg(struct orderQ* q, struct ha_msg* msg, seqno_t gen, seqno_t cligen, seqno_t seq, seqno_t oseq, int popmsg) { int i; /* display_orderQ(q); */ /*if this is the first packet, pop it*/ if ( q->first_msg_seq == 0){ q->first_msg_seq = seq; q->first_msg_client_gen = cligen; q->first_msg_gen = gen; q->curr_gen = gen; q->curr_client_gen = cligen; q->curr_oseqno = oseq -1 ; goto out; } /*any message with lower sequence than q->first_msg_seq will be dropped*/ if (q->first_msg_seq != 0 && msg_oseq_compare(q->first_msg_seq, q->first_msg_gen, seq, gen) > 0 ) { return NULL; } if ( q->curr_oseqno == 0){ q->curr_gen = gen; q->curr_client_gen = cligen; goto out; } if ( gen > q->curr_gen ){ /*heartbeat restart, clean everything up*/ reset_orderQ(q); q->first_msg_seq = seq; q->first_msg_client_gen = cligen; q->first_msg_gen = gen; q->curr_gen = gen; q->curr_client_gen = cligen; q->curr_oseqno = oseq - 1; goto out; } else if (gen < q->curr_gen){ /* * message from previous heartbeat generation, * drop the message */ return NULL; } else if(cligen > q->curr_client_gen ){ /*client restarted*/ if (q->backupQ == NULL){ if ( (q->backupQ = malloc(sizeof(struct orderQ))) ==NULL ){ cl_log(LOG_ERR, "process_ordered_msg: " "allocating memory for backupQ failed"); return NULL; } memset(q->backupQ, 0, sizeof(struct orderQ)); } process_ordered_msg(q->backupQ, msg, gen, cligen, seq, oseq, 0); return NULL; } else if (cligen < q->curr_client_gen){ /*Message from a previous client*/ /*this should never happend*/ cl_log(LOG_ERR, "process_ordered_msg: Received message" " from previous client. This should never happen"); cl_log_message(LOG_ERR, msg); return NULL; }else if (oseq - q->curr_oseqno >= MAXMSGHIST){ /* * receives a very big sequence number, the * message is not reliable at this point */ if (DEBUGORDER) { cl_log(LOG_DEBUG , "lost at least one unretrievable " "packet! [%lx:%lx], force reset" , q->curr_oseqno , oseq); } q->curr_oseqno = oseq - 1; for (i = 0; i < MAXMSGHIST; i++) { /* Clear order queue, msg obsoleted */ if (q->orderQ[i]){ ha_msg_del(q->orderQ[i]); q->orderQ[i] = NULL; } } q->curr_index = 0; } out: /* Put the new received packet in queue */ q->orderQ[(q->curr_index + oseq - q->curr_oseqno -1 ) % MAXMSGHIST] = msg; /* if this is the packet we are expecting, pop it*/ if (popmsg && msg_oseq_compare(q->curr_oseqno + 1, q->curr_gen,oseq, gen) == 0){ return pop_orderQ(q); } return NULL; } static struct ha_msg* process_client_status_msg(llc_private_t* pi, struct ha_msg* msg, const char* from_node) { const char* status = ha_msg_value(msg, F_STATUS); order_queue_t * oq; struct ha_msg* retmsg; if (status && (strcmp(status, LEAVESTATUS) == 0 || strcmp(status, JOINSTATUS) == 0) ){ for (oq = pi->order_queue_head; oq != NULL; oq = oq->next){ if (strcmp(oq->from_node, from_node) == 0){ break; } } if (oq == NULL){ /*no ordered queue found, good, *simply return the message */ return msg; } if (strcmp(status, LEAVESTATUS) == 0 ){ if (oq->leave_msg != NULL){ cl_log(LOG_ERR, "process_client_status_msg: " " the previous leave msg " "is not delivered yet"); cl_log_message(LOG_ERR, oq->leave_msg); cl_log_message(LOG_ERR, msg); return NULL; } oq->leave_msg = msg; if ((retmsg = pop_orderQ(&oq->node))){ return retmsg; } if ((retmsg = pop_orderQ(&oq->cluster))){ return retmsg; } oq->leave_msg = NULL; moveup_backupQ(&oq->node); moveup_backupQ(&oq->cluster); return msg; }else { /*join message*/ return msg; } }else{ cl_log(LOG_ERR, "process_client_status_msg: " "no status found in client status msg"); cl_log_message(LOG_ERR, msg); return NULL; } return msg; } /* * Process msg gotten from IPC or msgQ. */ static struct ha_msg * process_hb_msg(llc_private_t* pi, struct ha_msg* msg) { const char * from_node; const char * to_node; order_queue_t * oq; const char * coseq; seqno_t oseq; const char * cgen; seqno_t gen; const char* cseq; seqno_t seq; const char* ccligen; seqno_t cligen; if ((cseq = ha_msg_value(msg, F_SEQ)) == NULL || sscanf(cseq, "%lx", &seq) != 1){ return msg; } if ((cgen = ha_msg_value(msg, F_HBGENERATION)) == NULL || sscanf(cgen, "%lx", &gen) != 1){ return msg; } if ((ccligen = ha_msg_value(msg, F_CLIENT_GENERATION)) == NULL || sscanf(ccligen, "%lx", &cligen) != 1){ return msg; } if ((from_node = ha_msg_value(msg, F_ORIG)) == NULL){ ha_api_log(LOG_ERR , "%s: extract F_ORIG failed", __FUNCTION__); ZAPMSG(msg); return NULL; } if ((coseq = ha_msg_value(msg, F_ORDERSEQ)) != NULL && sscanf(coseq, "%lx", &oseq) == 1){ /* find the order queue by from_node */ for (oq = pi->order_queue_head; oq != NULL; oq = oq->next){ if (strcmp(oq->from_node, from_node) == 0) break; } if (oq == NULL){ oq = (order_queue_t *) malloc(sizeof(order_queue_t)); if (oq == NULL){ ha_api_log(LOG_ERR , "%s: order_queue_t malloc failed" , __FUNCTION__); ZAPMSG(msg); return NULL; } memset(oq, 0, sizeof(*oq)); strncpy(oq->from_node, from_node, HOSTLENG); oq->next = pi->order_queue_head; pi->order_queue_head = oq; } if ((to_node = ha_msg_value(msg, F_TO)) == NULL) return process_ordered_msg(&oq->cluster, msg, gen, cligen, seq, oseq, 1); else return process_ordered_msg(&oq->node, msg, gen, cligen, seq, oseq, 1); }else { const char* type = ha_msg_value(msg, F_TYPE); if ( type && strcmp(type, T_APICLISTAT) == 0){ return process_client_status_msg(pi, msg, from_node); } /* Simply return no order required msg */ return msg; } } /* * Read a heartbeat message. Read from the queue first. */ static struct ha_msg * read_hb_msg(ll_cluster_t* llc, int blocking) { llc_private_t* pi; struct ha_msg* msg; struct ha_msg* retmsg; order_queue_t* oq; if (!ISOURS(llc)) { ha_api_log(LOG_ERR, "read_hb_msg: bad cinfo"); return NULL; } pi = (llc_private_t*)llc->ll_cluster_private; if (!pi->SignedOn) { return NULL; } /* Process msg from msgQ */ while ((msg = dequeue_msg(pi))){ if ((retmsg = process_hb_msg(pi, msg))) return retmsg; } for (oq = pi->order_queue_head; oq != NULL; oq = oq->next){ process_oq: if ((retmsg = pop_orderQ(&oq->node))){ return retmsg; } if ((retmsg = pop_orderQ(&oq->cluster))){ return retmsg; } if (oq->leave_msg != NULL){ retmsg = oq->leave_msg; oq->leave_msg = NULL; oq->client_leaving = 1; return retmsg; } if (oq->client_leaving){ moveup_backupQ(&oq->node); moveup_backupQ(&oq->cluster); oq->client_leaving = 0; goto process_oq; } } /* Process msg from channel */ while (msgready(llc)){ msg = msgfromIPC(pi->chan, 0); if (msg == NULL) { if (pi->chan->ch_status != IPC_CONNECT) { pi->SignedOn = FALSE; return NULL; } }else if ((retmsg = process_hb_msg(pi, msg))) { return retmsg; } } /* Process msg from orderQ */ if (!blocking) return NULL; /* If this is a blocking call, we keep on reading from channel, so * that we can finally return a non-NULL msg to user. */ for(;;) { pi->chan->ops->waitin(pi->chan); msg = msgfromIPC(pi->chan, 0); if (msg == NULL) { if (pi->chan->ch_status != IPC_CONNECT) { pi->SignedOn = FALSE; } return NULL; } if ((retmsg = process_hb_msg(pi, msg))) { return retmsg; } } } /* * Add a callback for the given message type. */ static int set_msg_callback(ll_cluster_t* ci, const char * msgtype , llc_msg_callback_t callback, void * p) { ClearLog(); if (!ISOURS(ci)) { ha_api_log(LOG_ERR, "set_msg_callback: bad cinfo"); return HA_FAIL; } return(add_gen_callback(msgtype, (llc_private_t*)ci->ll_cluster_private, callback, p)); } /* * Set the node status change callback. */ static int set_nstatus_callback (ll_cluster_t* ci , llc_nstatus_callback_t cbf, void * p) { llc_private_t* pi = ci->ll_cluster_private; pi->node_callback = cbf; pi->node_private = p; return(HA_OK); } /* * Set the interface status change callback. */ static int set_ifstatus_callback (ll_cluster_t* ci , llc_ifstatus_callback_t cbf, void * p) { llc_private_t* pi = ci->ll_cluster_private; pi->if_callback = cbf; pi->if_private = p; return(HA_OK); } /* * Set the client status change callback. */ static int set_cstatus_callback (ll_cluster_t* ci , llc_cstatus_callback_t cbf, void * p) { llc_private_t* pi = ci->ll_cluster_private; ClearLog(); if (!ISOURS(ci)) { ha_api_log(LOG_ERR, "%s: bad cinfo", __FUNCTION__); return HA_FAIL; } if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } pi->cstatus_callback = cbf; pi->client_private = p; return HA_OK; } /* * Call the callback associated with this message (if any) * Return TRUE if a callback was called. */ static int CallbackCall(llc_private_t* p, struct ha_msg * msg) { const char * mtype= ha_msg_value(msg, F_TYPE); struct gen_callback* gcb; if (mtype == NULL) { return(0); } /* Special case: node status (change) */ if ((strcasecmp(mtype, T_STATUS) == 0 || strcasecmp(mtype, T_NS_STATUS) == 0)) { /* If DEADSTATUS, cleanup order queue for the node */ const char *mstatus = ha_msg_value(msg, F_STATUS); if (mstatus && (strcmp(mstatus, DEADSTATUS) == 0)) { order_queue_t * oq = p->order_queue_head; order_queue_t * prev; order_queue_t * next; int i; for (prev = NULL; oq != NULL; prev = oq, oq = oq->next){ const char *morig = ha_msg_value(msg, F_ORIG); if (morig && (strcmp(oq->from_node, morig) == 0)) { break; } } if (oq){ next = oq->next; for (i = 0; i < MAXMSGHIST; i++){ if (oq->node.orderQ[i]) { ZAPMSG(oq->node.orderQ[i]); } if (oq->cluster.orderQ[i]) { ZAPMSG(oq->cluster.orderQ[i]); } } free(oq); if (prev) { prev->next = next; } else { p->order_queue_head = next; } } } if (p->node_callback) { p->node_callback(ha_msg_value(msg, F_ORIG) , ha_msg_value(msg, F_STATUS), p->node_private); return(1); } } /* Special case: interface status (change) */ if (p->if_callback && strcasecmp(mtype, T_IFSTATUS) == 0) { p->if_callback(ha_msg_value(msg, F_NODE) , ha_msg_value(msg, F_IFNAME) , ha_msg_value(msg, F_STATUS) , p->if_private); return(1); } /* Special case: client status (change) */ if (p->cstatus_callback && strcasecmp(mtype, T_APICLISTAT) == 0) { p->cstatus_callback(ha_msg_value(msg, F_ORIG) , ha_msg_value(msg, F_FROMID) , ha_msg_value(msg, F_STATUS) , p->client_private); return(1); } if (p->cstatus_callback && strcasecmp(mtype, T_RCSTATUS) == 0) { p->cstatus_callback(ha_msg_value(msg, F_ORIG) , ha_msg_value(msg, F_CLIENTNAME) , ha_msg_value(msg, F_CLIENTSTATUS) , p->client_private); return(1); } /* The general case: Any other message type */ if ((gcb = search_gen_callback(mtype, p)) != NULL) { gcb->cf(msg, gcb->pd); return 1; } return(0); } /* * Return the next message not handled by a callback. * Invoke callbacks for messages encountered along the way. */ static struct ha_msg * read_msg_w_callbacks(ll_cluster_t* llc, int blocking) { struct ha_msg* msg = NULL; llc_private_t* pi; if (!ISOURS(llc)) { ha_api_log(LOG_ERR, "read_msg_w_callbacks: bad cinfo"); return NULL; } pi = (llc_private_t*) llc->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "read_msg_w_callbacks: Not signed on"); return NULL; } do { if (msg) { ZAPMSG(msg); } msg = read_hb_msg(llc, blocking); }while (msg && CallbackCall(pi, msg)); return(msg); } /* * Receive messages. Activate callbacks. Messages without callbacks * are ignored. Potentially several messages could be acted on. */ static int rcvmsg(ll_cluster_t* llc, int blocking) { struct ha_msg* msg = NULL; msg=read_msg_w_callbacks(llc, blocking); if (msg) { ZAPMSG(msg); return(1); } return(0); } /* * Initialize nodewalk. (mainly retrieve list of nodes) */ static int init_nodewalk (ll_cluster_t* ci) { llc_private_t* pi; ClearLog(); if (!ISOURS(ci)) { ha_api_log(LOG_ERR, "init_nodewalk: bad cinfo"); return HA_FAIL; } pi = (llc_private_t*)ci->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } zap_nodelist(pi); return(get_nodelist(pi)); } /* * Return the next node in the list, or NULL if none. */ static const char * nextnode (ll_cluster_t* ci) { llc_private_t* pi; const char * ret; ClearLog(); if (!ISOURS(ci)) { ha_api_log(LOG_ERR, "nextnode: bad cinfo"); return NULL; } pi = (llc_private_t*)ci->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return NULL; } if (pi->nextnode == NULL) { return(NULL); } ret = pi->nextnode->value; pi->nextnode = pi->nextnode->next; return(ret); } /* * Clean up after a nodewalk (throw away node list) */ static int end_nodewalk(ll_cluster_t* ci) { llc_private_t* pi; ClearLog(); if (!ISOURS(ci)) { ha_api_log(LOG_ERR, "end_nodewalk: bad cinfo"); return HA_FAIL; } pi = ci->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } zap_nodelist(pi); return(HA_OK); } /* * Initialize interface walk. (mainly retrieve list of interfaces) */ static int init_ifwalk (ll_cluster_t* ci, const char * host) { llc_private_t* pi; ClearLog(); if (!ISOURS(ci)) { ha_api_log(LOG_ERR, "init_ifwalk: bad cinfo"); return HA_FAIL; } pi = (llc_private_t*)ci->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } zap_iflist(pi); return(get_iflist(pi, host)); } /* * Return the next interface in the iflist, or NULL if none. */ static const char * nextif (ll_cluster_t* ci) { llc_private_t* pi = ci->ll_cluster_private; const char * ret; ClearLog(); if (!ISOURS(ci)) { ha_api_log(LOG_ERR, "nextif: bad cinfo"); return HA_FAIL; } if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } if (pi->nextif == NULL) { return(NULL); } ret = pi->nextif->value; pi->nextif = pi->nextif->next; return(ret); } /* * Clean up after a ifwalk (throw away interface list) */ static int end_ifwalk(ll_cluster_t* ci) { llc_private_t* pi = ci->ll_cluster_private; ClearLog(); if (!ISOURS(ci)) { ha_api_log(LOG_ERR, "end_ifwalk: bad cinfo"); return HA_FAIL; } if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } zap_iflist(pi); return HA_OK; } /* * Return the input file descriptor associated with this object. */ static int get_inputfd(ll_cluster_t* ci) { llc_private_t* pi; ClearLog(); if (!ISOURS(ci)) { ha_api_log(LOG_ERR, "get_inputfd: bad cinfo"); return(-1); } pi = (llc_private_t*)ci->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return -1; } return pi->chan->ops->get_recv_select_fd(pi->chan); } /* * Return the IPC channel associated with this object. */ static IPC_Channel* get_ipcchan(ll_cluster_t* ci) { llc_private_t* pi; ClearLog(); if (!ISOURS(ci)) { ha_api_log(LOG_ERR, "get_ipcchan: bad cinfo"); return NULL; } pi = (llc_private_t*)ci->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return NULL; } return pi->chan; } /* * Return TRUE (1) if there is a message ready to read. */ static int msgready(ll_cluster_t*ci ) { llc_private_t* pi; ClearLog(); if (!ISOURS(ci)) { ha_api_log(LOG_ERR, "msgready: bad cinfo"); return 0; } pi = (llc_private_t*)ci->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return 0; } if (pi->firstQdmsg) { return 1; } return pi->chan->ops->is_message_pending(pi->chan); } /* * Set message filter mode */ static int setfmode(ll_cluster_t* lcl, unsigned mode) { unsigned filtermask; llc_private_t* pi; ClearLog(); if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "setfmode: bad cinfo"); return HA_FAIL; } pi = (llc_private_t*)lcl->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } switch(mode) { case LLC_FILTER_DEFAULT: filtermask = DEFAULTREATMENT; break; case LLC_FILTER_PMODE: filtermask = (KEEPIT|DUPLICATE|DROPIT); break; case LLC_FILTER_ALLHB: filtermask = (KEEPIT|DUPLICATE|DROPIT|NOCHANGE); break; case LLC_FILTER_RAW: filtermask = ALLTREATMENTS; break; default: return(HA_FAIL); } return(hb_api_setfilter(lcl, filtermask)); } /* This function set the send queue length in heartbeat for the channel connected heartbeat and the client Usually a client should set the length to a longer value if it will receives messages slowly */ static int set_sendq_len(ll_cluster_t* lcl, int length) { struct ha_msg* request; llc_private_t* pi; ClearLog(); if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "set_sendq_len: bad cinfo"); return HA_FAIL; } if (length <= 0){ ha_api_log(LOG_ERR, "invalid argument, length =%d", length); return HA_FAIL; } pi = (llc_private_t*)lcl->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "set_sendq_len: not signed on"); return HA_FAIL; } if ((request = hb_api_boilerplate(API_SET_SENDQLEN)) == NULL) { ha_api_log(LOG_ERR, "set_sendq_len: can't create msg"); return HA_FAIL; } if (ha_msg_add_int(request, F_SENDQLEN, length) != HA_OK){ ha_api_log(LOG_ERR, "set_sendq_length: adding field failed"); return HA_FAIL; } if (msg2ipcchan(request, pi->chan) != HA_OK){ ZAPMSG(request); ha_api_perror("set_sendq_len: can't send message to IPC"); return HA_FAIL; } return HA_OK; } static int socket_set_send_block_mode(ll_cluster_t* lcl, gboolean truefalse) { llc_private_t* pi; ClearLog(); if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "sendnodemsg: bad cinfo"); return HA_FAIL; } pi = (llc_private_t*)lcl->ll_cluster_private; if (pi->chan){ pi->chan->should_send_block = truefalse; return HA_OK; }else{ return HA_FAIL; } } /* * Send a message to the cluster. */ static int sendclustermsg(ll_cluster_t* lcl, struct ha_msg* msg) { llc_private_t* pi; ClearLog(); if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "sendclustermsg: bad cinfo"); return HA_FAIL; } pi = (llc_private_t*)lcl->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } if (pi->iscasual) { ha_api_log(LOG_ERR, "sendclustermsg: casual client"); return HA_FAIL; } return(msg2ipcchan(msg, pi->chan)); } /* * Send a message to a specific node in the cluster. */ static int sendnodemsg(ll_cluster_t* lcl, struct ha_msg* msg , const char * nodename) { llc_private_t* pi; ClearLog(); if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "sendnodemsg: bad cinfo"); return HA_FAIL; } pi = (llc_private_t*)lcl->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } if (pi->iscasual) { ha_api_log(LOG_ERR, "sendnodemsg: casual client"); return HA_FAIL; } if (*nodename == EOS) { ha_api_log(LOG_ERR, "sendnodemsg: bad nodename"); return HA_FAIL; } if (ha_msg_mod(msg, F_TO, nodename) != HA_OK) { ha_api_log(LOG_ERR, "sendnodemsg: cannot set F_TO field"); return(HA_FAIL); } return(msg2ipcchan(msg, pi->chan)); } static int sendnodemsg_byuuid(ll_cluster_t* lcl, struct ha_msg* msg, cl_uuid_t* uuid) { llc_private_t* pi; ClearLog(); if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "sendnodemsg_byuuid: bad cinfo"); return HA_FAIL; } pi = (llc_private_t*)lcl->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } if (pi->iscasual) { ha_api_log(LOG_ERR, "sendnodemsg_byuuid: casual client"); return HA_FAIL; } if (!uuid){ ha_api_log(LOG_ERR, "uuid is NULL"); return HA_FAIL; } if (cl_msg_moduuid(msg, F_TOUUID, uuid) != HA_OK) { ha_api_log(LOG_ERR, "sendnodemsg_byuuid: " "cannot set F_TOUUID field"); return(HA_FAIL); } return(msg2ipcchan(msg, pi->chan)); } static int get_uuid(llc_private_t* pi, const char* nodename, cl_uuid_t* uuid) { struct ha_msg* request; struct ha_msg* reply; const char * result; cl_uuid_t tmp; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } if ((request = hb_api_boilerplate(API_GETUUID)) == NULL) { ha_api_log(LOG_ERR, "get_uuid: can't create msg"); return HA_FAIL; } if (ha_msg_add(request, F_QUERYNAME, nodename) != HA_OK) { ha_api_log(LOG_ERR, "get_uuid: cannot add field"); ZAPMSG(request); return HA_FAIL; } /* Send message */ if (msg2ipcchan(request, pi->chan) != HA_OK) { ZAPMSG(request); ha_api_perror("Can't send message to IPC Channel"); return HA_FAIL; } ZAPMSG(request); if ((reply=read_api_msg(pi)) != NULL && (result = ha_msg_value(reply, F_APIRESULT)) != NULL && (strcmp(result, API_OK) == 0) && (cl_get_uuid(reply, F_QUERYUUID, &tmp)) == HA_OK){ cl_uuid_copy(uuid, &tmp); ZAPMSG(reply); return HA_OK; } if (reply != NULL) { ZAPMSG(reply); } return HA_FAIL; } static int get_uuid_by_name(ll_cluster_t* ci, const char* nodename, cl_uuid_t* uuid) { llc_private_t* pi; ClearLog(); if (!ISOURS(ci)) { ha_api_log(LOG_ERR, "get_nodeID_from_name: bad cinfo"); return HA_FAIL; } pi = (llc_private_t*)ci->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } if(!uuid || !nodename){ ha_api_log(LOG_ERR, "get_uuid_by_name: uuid or nodename is NULL"); return HA_FAIL; } return get_uuid(pi, nodename, uuid); } static int get_name(llc_private_t* pi, const cl_uuid_t* uuid, char* name, int maxnamlen) { struct ha_msg* request; struct ha_msg* reply; const char * result; const char * tmp; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } if ((request = hb_api_boilerplate(API_GETNAME)) == NULL) { ha_api_log(LOG_ERR, "get_name: can't create msg"); return HA_FAIL; } if (ha_msg_adduuid(request, F_QUERYUUID, uuid) != HA_OK) { ha_api_log(LOG_ERR, "get_uuid: cannot add field"); ZAPMSG(request); return HA_FAIL; } /* Send message */ if (msg2ipcchan(request, pi->chan) != HA_OK) { ZAPMSG(request); ha_api_perror("Can't send message to IPC Channel"); return HA_FAIL; } ZAPMSG(request); if ((reply=read_api_msg(pi)) != NULL && (result = ha_msg_value(reply, F_APIRESULT)) != NULL && (strcmp(result, API_OK) == 0) && (tmp = ha_msg_value(reply, F_QUERYNAME)) != NULL){ strncpy(name, tmp, maxnamlen -1 ); name[maxnamlen-1] = 0; ZAPMSG(reply); return HA_OK; } if (reply != NULL) { ZAPMSG(reply); } return HA_FAIL; } static int get_name_by_uuid(ll_cluster_t* ci, cl_uuid_t* uuid, char* nodename, size_t maxnamlen){ llc_private_t* pi; ClearLog(); if (!ISOURS(ci)) { ha_api_log(LOG_ERR, "get_nodeID_from_name: bad cinfo"); return HA_FAIL; } pi = (llc_private_t*)ci->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } if(!uuid || !nodename || maxnamlen <= 0){ ha_api_log(LOG_ERR, "get_name_by_uuid: bad paramter"); return HA_FAIL; } return get_name(pi, uuid, nodename, maxnamlen); } /* Add order sequence number field */ STATIC order_seq_t* add_order_seq(llc_private_t* pi, struct ha_msg* msg) { order_seq_t * order_seq = &pi->order_seq_head; const char * to_node; char seq[32]; to_node = ha_msg_value(msg, F_TO); if (to_node != NULL){ for (order_seq = pi->order_seq_head.next; order_seq != NULL ; order_seq = order_seq->next){ if (strcmp(order_seq->to_node, to_node) == 0) break; } } if (order_seq == NULL && to_node != NULL){ order_seq = (order_seq_t *) malloc(sizeof(order_seq_t)); if (order_seq == NULL){ ha_api_log(LOG_ERR , "add_order_seq: order_seq_t malloc failed!"); return NULL; } strncpy(order_seq->to_node, to_node, HOSTLENG); order_seq->seqno = 1; order_seq->next = pi->order_seq_head.next; pi->order_seq_head.next = order_seq; } sprintf(seq, "%lx", order_seq->seqno); ha_msg_mod(msg, F_ORDERSEQ, seq); return order_seq; } /* * Send an ordered message to the cluster. */ static int send_ordered_clustermsg(ll_cluster_t* lcl, struct ha_msg* msg) { llc_private_t* pi; order_seq_t* order_seq; int ret; ClearLog(); if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "%s: bad cinfo", __FUNCTION__); return HA_FAIL; } pi = (llc_private_t*)lcl->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } if (pi->iscasual) { ha_api_log(LOG_ERR, "%s: casual client", __FUNCTION__); return HA_FAIL; } order_seq = add_order_seq(pi, msg); if (order_seq == NULL){ ha_api_log(LOG_ERR, "add_order_seq failed"); return HA_FAIL; } ret = msg2ipcchan(msg, pi->chan); if (ret == HA_OK){ order_seq->seqno++; } return ret; } static int send_ordered_nodemsg(ll_cluster_t* lcl, struct ha_msg* msg , const char * nodename) { llc_private_t* pi; order_seq_t* order_seq; int ret; ClearLog(); if (!ISOURS(lcl)) { ha_api_log(LOG_ERR, "sendnodemsg: bad cinfo"); return HA_FAIL; } pi = (llc_private_t*)lcl->ll_cluster_private; if (!pi->SignedOn) { ha_api_log(LOG_ERR, "not signed on"); return HA_FAIL; } if (pi->iscasual) { ha_api_log(LOG_ERR, "sendnodemsg: casual client"); return HA_FAIL; } if (*nodename == EOS) { ha_api_log(LOG_ERR, "sendnodemsg: bad nodename"); return HA_FAIL; } if (ha_msg_mod(msg, F_TO, nodename) != HA_OK) { ha_api_log(LOG_ERR, "sendnodemsg: cannot set F_TO field"); return(HA_FAIL); } order_seq = add_order_seq(pi, msg); if (order_seq == NULL){ ha_api_log(LOG_ERR, "add_order_seq failed"); return HA_FAIL; } ret = msg2ipcchan(msg, pi->chan); if (ret == HA_OK){ order_seq->seqno++; } return ret; } static char APILogBuf[MAXLINE] = ""; size_t BufLen = 0; static void ClearLog(void) { memset(APILogBuf, 0, sizeof(APILogBuf)); APILogBuf[0] = EOS; BufLen = 1; } static gboolean chan_is_connected(ll_cluster_t* lcl) { llc_private_t* pi; if (lcl == NULL){ cl_log(LOG_ERR, "Invalid argument, " "lcl is NULL"); return FALSE; } if(lcl->ll_cluster_private == NULL){ cl_log(LOG_ERR, "Invalid argument, " "lcl->llc_cluster_private is NULL"); return FALSE; } pi = (llc_private_t*) lcl->ll_cluster_private; if (pi->chan == NULL){ cl_log(LOG_ERR, "Invalid argument: chan is NULL"); return FALSE; } return (pi->chan->ch_status == IPC_CONNECT); } static const char * APIError(ll_cluster_t* lcl) { return(APILogBuf); } static void ha_api_log(int priority, const char * fmt, ...) { size_t len; va_list ap; char buf[MAXLINE]; va_start(ap, fmt); vsnprintf(buf, sizeof(buf), fmt, ap); va_end(ap); len = strlen(buf); if ((BufLen + len) >= sizeof(APILogBuf)) { ClearLog(); } if (APILogBuf[0] != EOS && APILogBuf[BufLen-1] != '\n') { strncat(APILogBuf, "\n", sizeof(APILogBuf)-BufLen-1); BufLen++; } strncat(APILogBuf, buf, sizeof(APILogBuf)-BufLen-1); BufLen += len; } static void ha_api_perror(const char * fmt, ...) { const char * err; va_list ap; char buf[MAXLINE]; err = strerror(errno); va_start(ap, fmt); vsnprintf(buf, MAXLINE, fmt, ap); va_end(ap); ha_api_log(LOG_ERR, "%s: %s", buf, err); } /* * Our vector of member functions... */ static struct llc_ops heartbeat_ops = { hb_api_signon, hb_api_signoff, hb_api_delete, set_msg_callback, set_nstatus_callback, set_ifstatus_callback, set_cstatus_callback, init_nodewalk, nextnode, end_nodewalk, get_nodestatus, get_nodeweight, get_nodesite, get_nodetype, get_num_nodes, init_ifwalk, nextif, end_ifwalk, get_ifstatus, get_clientstatus, get_uuid_by_name, get_name_by_uuid, sendclustermsg, sendnodemsg, sendnodemsg_byuuid, send_ordered_clustermsg, send_ordered_nodemsg, get_inputfd, get_ipcchan, msgready, hb_api_setsignal, rcvmsg, read_msg_w_callbacks, setfmode, get_parameter, get_deadtime, get_keepalive, get_mynodeid, get_logfacility, get_resources, chan_is_connected, set_sendq_len, socket_set_send_block_mode, APIError, }; /* * Create a new heartbeat API object */ static ll_cluster_t* hb_cluster_new() { ll_cluster_t* ret; struct llc_private* hb; if ((hb = MALLOCT(struct llc_private)) == NULL) { return(NULL); } memset(hb, 0, sizeof(*hb)); if ((ret = MALLOCT(ll_cluster_t)) == NULL) { free(hb); hb = NULL; return(NULL); } memset(ret, 0, sizeof(*ret)); hb->PrivateId = OurID; ret->ll_cluster_private = hb; ret->llc_ops = &heartbeat_ops; return ret; } /* * Create a new low-level cluster object of the specified type. */ ll_cluster_t* ll_cluster_new(const char * llctype) { if (strcmp(llctype, "heartbeat") == 0) { return hb_cluster_new(); } return NULL; } #include #define OURMAGIC 0xbeef1234 struct GLLclusterSource_s { GCHSource chsrc; unsigned long magic2; ll_cluster_t* hbchan; gboolean(*dispatch)(ll_cluster_t* llc, gpointer udata); }; static gboolean G_llc_prepare_int(GSource* source, gint* timeout); static gboolean G_llc_check_int(GSource* source); static gboolean G_llc_dispatch_int(GSource* source, GSourceFunc callback , gpointer user_data); static void G_llc_destroy_int(GSource* source); static GSourceFuncs G_llc_SourceFuncs = { G_llc_prepare_int, G_llc_check_int, G_llc_dispatch_int, G_llc_destroy_int, }; #define CHECKMAGIC(s,value) {if ((s)->magic2 != OURMAGIC) { \ cl_log(LOG_ERR \ , "%s: invalid magic number"\ , __FUNCTION__); \ return value; \ }} static gboolean G_llc_prepare_int(GSource* source, gint* timeout) { GLLclusterSource* s = (GLLclusterSource*)source; CHECKMAGIC(s, FALSE); (void)G_CH_prepare_int(source, timeout); return s->hbchan->llc_ops->msgready(s->hbchan); } static gboolean G_llc_check_int(GSource* source) { GLLclusterSource* s = (GLLclusterSource*)source; CHECKMAGIC(s, FALSE); (void)G_CH_check_int(source); return s->hbchan->llc_ops->msgready(s->hbchan); } static gboolean G_llc_dispatch_int(GSource* source, GSourceFunc callback , gpointer user_data) { gboolean ret1 = TRUE; gboolean ret2 = TRUE; GLLclusterSource* s = (GLLclusterSource*)source; CHECKMAGIC(s, FALSE); ret1 = G_CH_dispatch_int(source, callback, user_data); if (s->hbchan->llc_ops->msgready(s->hbchan) && s->dispatch) { ret2 = s->dispatch(s->hbchan, s->chsrc.udata); } return ret1 && ret2; } static void G_llc_destroy_int(GSource* source) { GLLclusterSource* s = (GLLclusterSource*)source; llc_private_t* pi; pi = (llc_private_t*)s->hbchan->ll_cluster_private; CHECKMAGIC(s, ); s->magic2 = 0; G_CH_destroy_int(source); pi->chan = NULL; s->hbchan->llc_ops->delete(s->hbchan); } GLLclusterSource* G_main_add_ll_cluster(int priority, ll_cluster_t* api , gboolean can_recurse , gboolean (*dispatch)(ll_cluster_t* source_data,gpointer user_data) , gpointer userdata, GDestroyNotify notify) { GSource * source; IPC_Channel* ch; GLLclusterSource* s; source = g_source_new(&G_llc_SourceFuncs , sizeof(GLLclusterSource)); if (source == NULL || api == NULL || api->llc_ops == NULL || (ch = api->llc_ops->ipcchan(api)) == NULL) { return NULL; } s = (GLLclusterSource*)source; s->magic2 = OURMAGIC; s->hbchan = api; s->dispatch = dispatch; (void)G_main_IPC_Channel_constructor(source, ch, userdata, notify); g_source_set_priority(source, priority); g_source_set_can_recurse(source, can_recurse); s->chsrc.description = "Heartbeat API channel"; s->chsrc.gsourceid = g_source_attach(source, NULL); if (s->chsrc.gsourceid == 0) { g_source_remove_poll(source, &s->chsrc.infd); if (!s->chsrc.fd_fdx) { g_source_remove_poll(source, &s->chsrc.outfd); } g_source_unref(source); source = NULL; s = NULL; } return s; } Heartbeat-3-0-7e3a82377fa8/lib/lrm/Makefile.am0000644000000000000000000000312011576626513020567 0ustar00usergroup00000000000000# # Author: Sun Jiang Dong # Copyright (c) 2004 International Business Machines # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl halibdir = $(libdir)/@HB_PKG@ havarlibdir = $(localstatedir)/lib/@HB_PKG@ COMMONLIBS = -lplumb \ $(GLIBLIB) LRM_DIR = lrm lrmdir = $(HA_VARLIBDIR)/$(HB_PKG)/$(LRM_DIR) apigid = @HA_APIGID@ lib_LTLIBRARIES = liblrm.la liblrm_la_SOURCES = lrm_msg.c clientlib.c racommon.c liblrm_la_LDFLAGS = -version-info 2:0:0 $(COMMONLIBS) liblrm_la_CFLAGS = $(INCLUDES) install-exec-local: $(mkinstalldirs) $(DESTDIR)$(lrmdir) -chgrp $(apigid) $(DESTDIR)/$(lrmdir) chmod 770 $(DESTDIR)/$(lrmdir) Heartbeat-3-0-7e3a82377fa8/lib/lrm/clientlib.c0000644000000000000000000011572011576626513020656 0ustar00usergroup00000000000000/* * Client Library for Local Resource Manager API. * * Author: Huang Zhen * Copyright (c) 2004 International Business Machines * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include /* FIXME: Notice: this define should be replaced when merge to the whole pkg*/ #define LRM_MAXPIDLEN 256 #define LRM_ID "lrm" #define LOG_FAIL_create_lrm_msg(msg_type) \ cl_log(LOG_ERR, "%s(%d): failed to create a %s message with " \ "function create_lrm_msg." \ , __FUNCTION__, __LINE__, msg_type) #define LOG_FAIL_create_lrm_rsc_msg(msg_type) \ cl_log(LOG_ERR, "%s(%d): failed to create a %s message with " \ "function create_lrm_rsc_msg." \ , __FUNCTION__, __LINE__, msg_type) #define LOG_FAIL_receive_reply(msg_type) \ cl_log(LOG_ERR, "%s(%d): failed to receive a reply message of %s." \ , __FUNCTION__, __LINE__, msg_type) #define LOG_FAIL_SEND_MSG(msg_type, chan_name) \ cl_log(LOG_ERR, "%s(%d): failed to send a %s message to lrmd " \ "via %s channel." \ , __FUNCTION__, __LINE__, msg_type, chan_name) #define LOG_GOT_FAIL_RET(priority, msg_type) \ cl_log(priority, "%s(%d): got a return code HA_FAIL from " \ "a reply message of %s with function get_ret_from_msg." \ , __FUNCTION__, __LINE__, msg_type) #define LOG_BASIC_ERROR(apiname) \ cl_log(LOG_ERR, "%s(%d): %s failed." \ , __FUNCTION__, __LINE__, apiname) #define LOG_FAIL_GET_MSG_FIELD(priority, field_name, msg) \ {cl_log(priority, "%s(%d): failed to get the value " \ "of field %s from a ha_msg" \ , __FUNCTION__, __LINE__, field_name); \ cl_log(LOG_INFO, "%s: Message follows:", __FUNCTION__); \ cl_log_message(LOG_INFO, (msg)); \ } /* declare the functions used by the lrm_ops structure*/ static int lrm_signon (ll_lrm_t* lrm, const char * app_name); static int lrm_signoff (ll_lrm_t*); static int lrm_delete (ll_lrm_t*); static int lrm_set_lrm_callback (ll_lrm_t* lrm, lrm_op_done_callback_t op_done_callback_func); static GList* lrm_get_rsc_class_supported (ll_lrm_t* lrm); static GList* lrm_get_rsc_type_supported (ll_lrm_t* lrm, const char* class); static GList* lrm_get_rsc_provider_supported (ll_lrm_t* lrm ,const char* class, const char* type); static char* lrm_get_rsc_type_metadata(ll_lrm_t* lrm, const char* class ,const char* type, const char* provider); static GHashTable* lrm_get_all_type_metadata(ll_lrm_t*, const char* class); static GList* lrm_get_all_rscs (ll_lrm_t* lrm); static lrm_rsc_t* lrm_get_rsc (ll_lrm_t* lrm, const char* rsc_id); static int lrm_add_rsc (ll_lrm_t*, const char* id, const char* class ,const char* type, const char* provider ,GHashTable* parameter); static int lrm_delete_rsc (ll_lrm_t*, const char* id); static int lrm_fail_rsc (ll_lrm_t* lrm, const char* rsc_id, const int fail_rc ,const char* fail_reason); static int lrm_set_lrmd_param (ll_lrm_t* lrm, const char* name, const char *value); static char* lrm_get_lrmd_param (ll_lrm_t* lrm, const char* name); static IPC_Channel* lrm_ipcchan (ll_lrm_t*); static int lrm_msgready (ll_lrm_t*); static int lrm_rcvmsg (ll_lrm_t*, int blocking); static struct lrm_ops lrm_ops_instance = { lrm_signon, lrm_signoff, lrm_delete, lrm_set_lrm_callback, lrm_set_lrmd_param, lrm_get_lrmd_param, lrm_get_rsc_class_supported, lrm_get_rsc_type_supported, lrm_get_rsc_provider_supported, lrm_get_rsc_type_metadata, lrm_get_all_type_metadata, lrm_get_all_rscs, lrm_get_rsc, lrm_add_rsc, lrm_delete_rsc, lrm_fail_rsc, lrm_ipcchan, lrm_msgready, lrm_rcvmsg }; /* declare the functions used by the lrm_rsc_ops structure*/ static int rsc_perform_op (lrm_rsc_t*, lrm_op_t* op); static int rsc_cancel_op (lrm_rsc_t*, int call_id); static int rsc_flush_ops (lrm_rsc_t*); static GList* rsc_get_cur_state (lrm_rsc_t*, state_flag_t* cur_state); static lrm_op_t* rsc_get_last_result (lrm_rsc_t*, const char* op_type); static gint compare_call_id(gconstpointer a, gconstpointer b); static struct rsc_ops rsc_ops_instance = { rsc_perform_op, rsc_cancel_op, rsc_flush_ops, rsc_get_cur_state, rsc_get_last_result }; /* define the internal data used by the client library*/ static int is_signed_on = FALSE; static IPC_Channel* ch_cmd = NULL; static IPC_Channel* ch_cbk = NULL; static lrm_op_done_callback_t op_done_callback = NULL; /* define some utility functions*/ static int get_ret_from_ch(IPC_Channel* ch); static int get_ret_from_msg(struct ha_msg* msg); static struct ha_msg* op_to_msg (lrm_op_t* op); static lrm_op_t* msg_to_op(struct ha_msg* msg); static void free_op (lrm_op_t* op); /* define of the api functions*/ ll_lrm_t* ll_lrm_new (const char * llctype) { ll_lrm_t* lrm; /* check the parameter*/ if (0 != STRNCMP_CONST(llctype, LRM_ID)) { cl_log(LOG_ERR, "ll_lrm_new: wrong parameter"); return NULL; } /* alloc memory for lrm*/ if (NULL == (lrm = (ll_lrm_t*) g_new(ll_lrm_t,1))) { cl_log(LOG_ERR, "ll_lrm_new: can not allocate memory"); return NULL; } /* assign the ops*/ lrm->lrm_ops = &lrm_ops_instance; return lrm; } static int lrm_signon (ll_lrm_t* lrm, const char * app_name) { GHashTable* ch_cmd_attrs; GHashTable* ch_cbk_attrs; struct ha_msg* msg; char path[] = IPC_PATH_ATTR; char cmd_path[] = LRM_CMDPATH; char callback_path[] = LRM_CALLBACKPATH; /* check parameters*/ if (NULL == lrm || NULL == app_name) { cl_log(LOG_ERR, "lrm_signon: wrong parameter"); return HA_FAIL; } /* if already signed on, sign off first*/ if (is_signed_on) { cl_log(LOG_WARNING, "lrm_signon: the client is alreay signed on, re-sign"); lrm_signoff(lrm); } /* create the command ipc channel to lrmd*/ ch_cmd_attrs = g_hash_table_new(g_str_hash, g_str_equal); g_hash_table_insert(ch_cmd_attrs, path, cmd_path); ch_cmd = ipc_channel_constructor(IPC_ANYTYPE, ch_cmd_attrs); g_hash_table_destroy(ch_cmd_attrs); if (NULL == ch_cmd){ lrm_signoff(lrm); cl_log(LOG_WARNING, "lrm_signon: can not connect to lrmd for cmd channel"); return HA_FAIL; } if (IPC_OK != ch_cmd->ops->initiate_connection(ch_cmd)) { lrm_signoff(lrm); cl_log(LOG_WARNING, "lrm_signon: can not initiate connection"); return HA_FAIL; } /* construct the reg msg*/ if (NULL == (msg = create_lrm_reg_msg(app_name))) { lrm_signoff(lrm); cl_log(LOG_ERR,"lrm_signon: failed to create a register message"); return HA_FAIL; } /* send the msg*/ if (HA_OK != msg2ipcchan(msg,ch_cmd)) { lrm_signoff(lrm); ha_msg_del(msg); LOG_FAIL_SEND_MSG(REGISTER, "ch_cmd"); return HA_FAIL; } /* parse the return msg*/ if (HA_OK != get_ret_from_ch(ch_cmd)) { ha_msg_del(msg); lrm_signoff(lrm); LOG_FAIL_receive_reply(REGISTER); return HA_FAIL; } /* create the callback ipc channel to lrmd*/ ch_cbk_attrs = g_hash_table_new(g_str_hash, g_str_equal); g_hash_table_insert(ch_cbk_attrs, path, callback_path); ch_cbk = ipc_channel_constructor(IPC_ANYTYPE,ch_cbk_attrs); g_hash_table_destroy(ch_cbk_attrs); if (NULL == ch_cbk) { ha_msg_del(msg); lrm_signoff(lrm); cl_log(LOG_ERR, "lrm_signon: failed to construct a callback " "channel to lrmd"); return HA_FAIL; } if (IPC_OK != ch_cbk->ops->initiate_connection(ch_cbk)) { ha_msg_del(msg); lrm_signoff(lrm); cl_log(LOG_ERR, "lrm_signon: failed to initiate the callback channel."); return HA_FAIL; } /* send the msg*/ if (HA_OK != msg2ipcchan(msg,ch_cbk)) { lrm_signoff(lrm); ha_msg_del(msg); LOG_FAIL_SEND_MSG(REGISTER, "ch_cbk"); return HA_FAIL; } ha_msg_del(msg); /* parse the return msg*/ if (HA_OK != get_ret_from_ch(ch_cbk)) { lrm_signoff(lrm); LOG_FAIL_receive_reply(REGISTER); return HA_FAIL; } /* ok, we sign on sucessfully now*/ is_signed_on = TRUE; return HA_OK; } static int lrm_signoff (ll_lrm_t* lrm) { /* close channels */ if (NULL != ch_cmd) { if (IPC_ISWCONN(ch_cmd)) { ch_cmd->ops->destroy(ch_cmd); } ch_cmd = NULL; } if (NULL != ch_cbk) { if (IPC_ISWCONN(ch_cbk)) { ch_cbk->ops->destroy(ch_cbk); } ch_cbk = NULL; } is_signed_on = FALSE; return HA_OK; } static int lrm_delete (ll_lrm_t* lrm) { /* check the parameter */ if (NULL == lrm) { cl_log(LOG_ERR,"lrm_delete: the parameter is a null pointer."); return HA_FAIL; } g_free(lrm); return HA_OK; } static int lrm_set_lrm_callback (ll_lrm_t* lrm, lrm_op_done_callback_t op_done_callback_func) { op_done_callback = op_done_callback_func; return HA_OK; } static GList* lrm_get_rsc_class_supported (ll_lrm_t* lrm) { struct ha_msg* msg; struct ha_msg* ret; GList* class_list = NULL; /* check whether the channel to lrmd is available */ if (NULL == ch_cmd) { cl_log(LOG_ERR, "lrm_get_rsc_class_supported: ch_cmd is a null pointer."); return NULL; } /* create the get ra type message */ msg = create_lrm_msg(GETRSCCLASSES); if ( NULL == msg) { LOG_FAIL_create_lrm_msg(GETRSCCLASSES); return NULL; } /* send the msg to lrmd */ if (HA_OK != msg2ipcchan(msg,ch_cmd)) { ha_msg_del(msg); LOG_FAIL_SEND_MSG(GETRSCCLASSES, "ch_cmd"); return NULL; } ha_msg_del(msg); /* get the return message */ ret = msgfromIPC(ch_cmd, MSG_ALLOWINTR); if (NULL == ret) { LOG_FAIL_receive_reply(GETRSCCLASSES); return NULL; } /* get the return code of the message */ if (HA_OK != get_ret_from_msg(ret)) { LOG_GOT_FAIL_RET(LOG_WARNING, GETRSCCLASSES); ha_msg_del(ret); return NULL; } /* get the ra type list from message */ class_list = ha_msg_value_str_list(ret,F_LRM_RCLASS); ha_msg_del(ret); return class_list; } static GList* lrm_get_rsc_type_supported (ll_lrm_t* lrm, const char* rclass) { struct ha_msg* msg; struct ha_msg* ret; GList* type_list = NULL; /* check whether the channel to lrmd is available */ if (NULL == ch_cmd) { cl_log(LOG_ERR, "%s(%d): ch_cmd is null." , __FUNCTION__, __LINE__); return NULL; } /* create the get ra type message */ msg = create_lrm_msg(GETRSCTYPES); if ( NULL == msg) { LOG_FAIL_create_lrm_msg(GETRSCTYPES); return NULL; } if ( HA_OK != ha_msg_add(msg, F_LRM_RCLASS, rclass)) { ha_msg_del(msg); LOG_BASIC_ERROR("ha_msg_add"); return NULL; } /* send the msg to lrmd */ if (HA_OK != msg2ipcchan(msg,ch_cmd)) { ha_msg_del(msg); LOG_FAIL_SEND_MSG(GETRSCTYPES, "ch_cmd"); return NULL; } ha_msg_del(msg); /* get the return message */ ret = msgfromIPC(ch_cmd, MSG_ALLOWINTR); if (NULL == ret) { LOG_FAIL_receive_reply(GETRSCTYPES); return NULL; } /* get the return code of the message */ if (HA_OK != get_ret_from_msg(ret)) { LOG_GOT_FAIL_RET(LOG_ERR, GETRSCTYPES); ha_msg_del(ret); return NULL; } /* get the ra type list from message */ type_list = ha_msg_value_str_list(ret,F_LRM_RTYPES); ha_msg_del(ret); return type_list; } static GList* lrm_get_rsc_provider_supported (ll_lrm_t* lrm, const char* class, const char* type) { struct ha_msg* msg; struct ha_msg* ret; GList* provider_list = NULL; /* check whether the channel to lrmd is available */ if (NULL == ch_cmd) { cl_log(LOG_ERR, "lrm_get_rsc_provider_supported: ch_mod is null."); return NULL; } /* create the get ra providers message */ msg = create_lrm_msg(GETPROVIDERS); if ( NULL == msg) { LOG_FAIL_create_lrm_msg(GETPROVIDERS); return NULL; } if (HA_OK != ha_msg_add(msg, F_LRM_RCLASS, class) || HA_OK != ha_msg_add(msg, F_LRM_RTYPE, type)) { ha_msg_del(msg); LOG_BASIC_ERROR("ha_msg_add"); return NULL; } /* send the msg to lrmd */ if (HA_OK != msg2ipcchan(msg,ch_cmd)) { ha_msg_del(msg); LOG_FAIL_SEND_MSG(GETPROVIDERS, "ch_cmd"); return NULL; } ha_msg_del(msg); /* get the return message */ ret = msgfromIPC(ch_cmd, MSG_ALLOWINTR); if (NULL == ret) { LOG_FAIL_receive_reply(GETPROVIDERS); return NULL; } /* get the return code of the message */ if (HA_OK != get_ret_from_msg(ret)) { LOG_GOT_FAIL_RET(LOG_ERR, GETPROVIDERS); ha_msg_del(ret); return NULL; } /* get the ra provider list from message */ provider_list = ha_msg_value_str_list(ret,F_LRM_RPROVIDERS); ha_msg_del(ret); return provider_list; } /* * lrm_get_all_type_metadatas(): * The key of the hash table is in the format "type:provider" * The value of the hash table is the metadata. */ static GHashTable* lrm_get_all_type_metadata (ll_lrm_t* lrm, const char* rclass) { GHashTable* metas = g_hash_table_new_full(g_str_hash, g_str_equal , g_free, g_free); GList* types = lrm_get_rsc_type_supported (lrm, rclass); GList* providers = NULL; GList* cur_type = NULL; GList* cur_provider = NULL; cur_type = g_list_first(types); while (cur_type != NULL) { const char* type; char key[MAXLENGTH]; type = (const char*) cur_type->data; providers = lrm_get_rsc_provider_supported(lrm, rclass, type); cur_provider = g_list_first(providers); while (cur_provider != NULL) { const char* meta; const char* provider; provider = (const char*) cur_provider->data; meta = lrm_get_rsc_type_metadata(lrm,rclass,type,provider); if (NULL == meta) { cur_provider = g_list_next(cur_provider); continue; } snprintf(key,MAXLENGTH, "%s:%s",type,provider); key[MAXLENGTH-1]='\0'; g_hash_table_insert(metas,g_strdup(key),g_strdup(meta)); cur_provider = g_list_next(cur_provider); } lrm_free_str_list(providers); cur_type=g_list_next(cur_type); } lrm_free_str_list(types); return metas; } static char* lrm_get_rsc_type_metadata (ll_lrm_t* lrm, const char* rclass, const char* rtype, const char* provider) { struct ha_msg* msg; struct ha_msg* ret; const char* tmp = NULL; char* metadata = NULL; /* check whether the channel to lrmd is available */ if (NULL == ch_cmd) { cl_log(LOG_ERR, "lrm_get_rsc_type_metadata: ch_mod is null."); return NULL; } /* create the get ra type message */ msg = create_lrm_msg(GETRSCMETA); if (NULL == msg ) { LOG_FAIL_create_lrm_msg(GETRSCMETA); return NULL; } if (HA_OK != ha_msg_add(msg, F_LRM_RCLASS, rclass) || HA_OK != ha_msg_add(msg, F_LRM_RTYPE, rtype)){ ha_msg_del(msg); LOG_BASIC_ERROR("ha_msg_add"); return NULL; } if( provider ) { if (HA_OK != ha_msg_add(msg, F_LRM_RPROVIDER, provider)) { LOG_BASIC_ERROR("ha_msg_add"); ha_msg_del(msg); return NULL; } } /* send the msg to lrmd */ if (HA_OK != msg2ipcchan(msg,ch_cmd)) { ha_msg_del(msg); LOG_FAIL_SEND_MSG(GETRSCMETA, "ch_cmd"); return NULL; } ha_msg_del(msg); /* get the return message */ ret = msgfromIPC(ch_cmd, MSG_ALLOWINTR); if (NULL == ret) { LOG_FAIL_receive_reply(GETRSCMETA); return NULL; } /* get the return code of the message */ if (HA_OK != get_ret_from_msg(ret)) { LOG_GOT_FAIL_RET(LOG_ERR, GETRSCMETA); ha_msg_del(ret); return NULL; } /* get the metadata from message */ tmp = cl_get_string(ret, F_LRM_METADATA); if (NULL!=tmp) { metadata = g_strdup(tmp); } ha_msg_del(ret); return metadata; } static GList* lrm_get_all_rscs (ll_lrm_t* lrm) { struct ha_msg* msg = NULL; struct ha_msg* ret = NULL; GList* rid_list = NULL; /* check whether the channel to lrmd is available */ if (NULL == ch_cmd) { cl_log(LOG_ERR, "lrm_get_all_rscs: ch_mod is null."); return NULL; } /* create the msg of get all resource */ msg = create_lrm_msg(GETALLRCSES); if ( NULL == msg) { LOG_FAIL_create_lrm_msg(GETALLRCSES); return NULL; } /* send the msg to lrmd */ if (HA_OK != msg2ipcchan(msg,ch_cmd)) { ha_msg_del(msg); LOG_FAIL_SEND_MSG(GETALLRCSES, "ch_cmd"); return NULL; } ha_msg_del(msg); /* get the return msg */ ret = msgfromIPC(ch_cmd, MSG_ALLOWINTR); if (NULL == ret) { LOG_FAIL_receive_reply(GETALLRCSES); return NULL; } /* get the return code of msg */ if (HA_OK != get_ret_from_msg(ret)) { LOG_GOT_FAIL_RET(LOG_ERR, GETALLRCSES); ha_msg_del(ret); return NULL; } /* get the rsc_id list from msg */ rid_list = ha_msg_value_str_list(ret,F_LRM_RID); ha_msg_del(ret); /* return the id list */ return rid_list; } static lrm_rsc_t* lrm_get_rsc (ll_lrm_t* lrm, const char* rsc_id) { struct ha_msg* msg = NULL; struct ha_msg* ret = NULL; lrm_rsc_t* rsc = NULL; /* check whether the rsc_id is available */ if (strlen(rsc_id) >= RID_LEN) { cl_log(LOG_ERR, "lrm_get_rsc: rsc_id is too long."); return NULL; } /* check whether the channel to lrmd is available */ if (NULL == ch_cmd) { cl_log(LOG_ERR, "lrm_get_rsc: ch_mod is null."); return NULL; } /* create the msg of get resource */ msg = create_lrm_rsc_msg(rsc_id, GETRSC); if ( NULL == msg) { LOG_FAIL_create_lrm_rsc_msg(GETRSC); return NULL; } /* send the msg to lrmd */ if (HA_OK != msg2ipcchan(msg,ch_cmd)) { ha_msg_del(msg); LOG_FAIL_SEND_MSG(GETRSC, "ch_cmd"); return NULL; } ha_msg_del(msg); /* get the return msg from lrmd */ ret = msgfromIPC(ch_cmd, MSG_ALLOWINTR); if (NULL == ret) { LOG_FAIL_receive_reply(GETRSC); return NULL; } /* get the return code of return message */ if (HA_OK != get_ret_from_msg(ret)) { ha_msg_del(ret); return NULL; } /* create a new resource structure */ rsc = g_new(lrm_rsc_t, 1); /* fill the field of resource with the data from msg */ rsc->id = g_strdup(ha_msg_value(ret, F_LRM_RID)); rsc->type = g_strdup(ha_msg_value(ret, F_LRM_RTYPE)); rsc->class = g_strdup(ha_msg_value(ret, F_LRM_RCLASS)); rsc->provider = g_strdup(ha_msg_value(ret, F_LRM_RPROVIDER)); rsc->params = ha_msg_value_str_table(ret,F_LRM_PARAM); rsc->ops = &rsc_ops_instance; ha_msg_del(ret); /* return the new resource */ return rsc; } static int lrm_fail_rsc (ll_lrm_t* lrm, const char* rsc_id, const int fail_rc , const char* fail_reason) { struct ha_msg* msg; /* check whether the rsc_id is available */ if (NULL == rsc_id || RID_LEN <= strlen(rsc_id)) { cl_log(LOG_ERR, "%s: wrong parameter rsc_id.", __FUNCTION__); return HA_FAIL; } /* check whether the channel to lrmd is available */ if (NULL == ch_cmd) { cl_log(LOG_ERR, "%s: ch_mod is null.", __FUNCTION__); return HA_FAIL; } /* create the message */ msg = create_lrm_rsc_msg(rsc_id,FAILRSC); if (NULL == msg) { LOG_FAIL_create_lrm_rsc_msg(FAILRSC); return HA_FAIL; } if ((fail_reason && HA_OK != ha_msg_add(msg,F_LRM_FAIL_REASON,fail_reason)) || HA_OK != ha_msg_add_int(msg, F_LRM_ASYNCMON_RC, fail_rc) ) { ha_msg_del(msg); LOG_BASIC_ERROR("ha_msg_add"); return HA_FAIL; } /* send to lrmd */ if (HA_OK != msg2ipcchan(msg,ch_cmd)) { ha_msg_del(msg); LOG_FAIL_SEND_MSG(FAILRSC, "ch_cmd"); return HA_FAIL; } ha_msg_del(msg); /* check the result */ if (HA_OK != get_ret_from_ch(ch_cmd)) { LOG_GOT_FAIL_RET(LOG_ERR, FAILRSC); return HA_FAIL; } return HA_OK; } static int lrm_set_lrmd_param(ll_lrm_t* lrm, const char* name, const char *value) { struct ha_msg* msg; if (!name || !value) { cl_log(LOG_ERR, "%s: no parameter name or value", __FUNCTION__); return HA_FAIL; } /* check whether the channel to lrmd is available */ if (NULL == ch_cmd) { cl_log(LOG_ERR, "%s: ch_mod is null.", __FUNCTION__); return HA_FAIL; } /* create the message */ msg = create_lrm_msg(SETLRMDPARAM); if (NULL == msg) { LOG_FAIL_create_lrm_rsc_msg(SETLRMDPARAM); return HA_FAIL; } if (HA_OK != ha_msg_add(msg,F_LRM_LRMD_PARAM_NAME,name) || HA_OK != ha_msg_add(msg,F_LRM_LRMD_PARAM_VAL,value)) { ha_msg_del(msg); LOG_BASIC_ERROR("ha_msg_add"); return HA_FAIL; } /* send to lrmd */ if (HA_OK != msg2ipcchan(msg,ch_cmd)) { ha_msg_del(msg); LOG_FAIL_SEND_MSG(FAILRSC, "ch_cmd"); return HA_FAIL; } ha_msg_del(msg); /* check the result */ if (HA_OK != get_ret_from_ch(ch_cmd)) { LOG_GOT_FAIL_RET(LOG_ERR, FAILRSC); return HA_FAIL; } return HA_OK; } static char* lrm_get_lrmd_param (ll_lrm_t* lrm, const char *name) { struct ha_msg* msg = NULL; struct ha_msg* ret = NULL; const char* value = NULL; char* v2; /* check whether the channel to lrmd is available */ if (NULL == ch_cmd) { cl_log(LOG_ERR, "lrm_get_rsc: ch_mod is null."); return NULL; } /* create the msg of get resource */ msg = create_lrm_msg(GETLRMDPARAM); if ( NULL == msg) { LOG_FAIL_create_lrm_msg(GETLRMDPARAM); return NULL; } if (HA_OK != ha_msg_add(msg,F_LRM_LRMD_PARAM_NAME,name)) { ha_msg_del(msg); LOG_BASIC_ERROR("ha_msg_add"); return NULL; } /* send the msg to lrmd */ if (HA_OK != msg2ipcchan(msg,ch_cmd)) { ha_msg_del(msg); LOG_FAIL_SEND_MSG(GETLRMDPARAM, "ch_cmd"); return NULL; } ha_msg_del(msg); /* get the return msg from lrmd */ ret = msgfromIPC(ch_cmd, MSG_ALLOWINTR); if (NULL == ret) { LOG_FAIL_receive_reply(GETLRMDPARAM); return NULL; } /* get the return code of return message */ if (HA_OK != get_ret_from_msg(ret)) { ha_msg_del(ret); return NULL; } value = ha_msg_value(ret,F_LRM_LRMD_PARAM_VAL); if (!value) { LOG_FAIL_GET_MSG_FIELD(LOG_ERR, F_LRM_LRMD_PARAM_VAL, ret); ha_msg_del(ret); return NULL; } v2 = g_strdup(value); ha_msg_del(ret); return v2; } static int lrm_add_rsc (ll_lrm_t* lrm, const char* rsc_id, const char* class , const char* type, const char* provider, GHashTable* parameter) { struct ha_msg* msg; /* check whether the rsc_id is available */ if (NULL == rsc_id || RID_LEN <= strlen(rsc_id)) { cl_log(LOG_ERR, "lrm_add_rsc: wrong parameter rsc_id."); return HA_FAIL; } /* check whether the channel to lrmd is available */ if (NULL == ch_cmd) { cl_log(LOG_ERR, "lrm_add_rsc: ch_mod is null."); return HA_FAIL; } /* create the message of add resource */ msg = create_lrm_addrsc_msg(rsc_id, class, type, provider, parameter); if ( NULL == msg) { cl_log(LOG_ERR, "%s(%d): failed to create a ADDSRC message " "with function create_lrm_addrsc_msg" , __FUNCTION__, __LINE__); return HA_FAIL; } /* send to lrmd */ if (HA_OK != msg2ipcchan(msg,ch_cmd)) { ha_msg_del(msg); LOG_FAIL_SEND_MSG(ADDRSC, "ch_cmd"); return HA_FAIL; } ha_msg_del(msg); /* check the result */ if (HA_OK != get_ret_from_ch(ch_cmd)) { LOG_GOT_FAIL_RET(LOG_ERR, ADDRSC); return HA_FAIL; } return HA_OK; } static int lrm_delete_rsc (ll_lrm_t* lrm, const char* rsc_id) { struct ha_msg* msg = NULL; /* check whether the rsc_id is available */ if (NULL == rsc_id || RID_LEN <= strlen(rsc_id)) { cl_log(LOG_ERR, "lrm_delete_rsc: wrong parameter rsc_id."); return HA_FAIL; } /* check whether the channel to lrmd is available */ if (NULL == ch_cmd) { cl_log(LOG_ERR, "lrm_delete_rsc: ch_mod is null."); return HA_FAIL; } /* create the msg of del resource */ msg = create_lrm_rsc_msg(rsc_id, DELRSC); if ( NULL == msg) { LOG_FAIL_create_lrm_rsc_msg(DELRSC); return HA_FAIL; } /* send the msg to lrmd */ if (HA_OK != msg2ipcchan(msg,ch_cmd)) { ha_msg_del(msg); LOG_FAIL_SEND_MSG(DELRSC, "ch_cmd"); return HA_FAIL; } ha_msg_del(msg); /* check the response of the msg */ if (HA_OK != get_ret_from_ch(ch_cmd)) { LOG_GOT_FAIL_RET(LOG_ERR, DELRSC); return HA_FAIL; } return HA_OK; } static IPC_Channel* lrm_ipcchan (ll_lrm_t* lrm) { if (NULL == ch_cbk) { cl_log(LOG_ERR, "lrm_inputfd: callback channel is null."); return NULL; } return ch_cbk; } static gboolean lrm_msgready (ll_lrm_t* lrm) { if (NULL == ch_cbk) { cl_log(LOG_ERR, "lrm_msgready: callback channel is null."); return FALSE; } return ch_cbk->ops->is_message_pending(ch_cbk); } static int lrm_rcvmsg (ll_lrm_t* lrm, int blocking) { struct ha_msg* msg = NULL; lrm_op_t* op = NULL; int msg_count = 0; /* if it is not blocking mode and no message in the channel, return */ if ((!lrm_msgready(lrm)) && (!blocking)) { cl_log(LOG_DEBUG, "lrm_rcvmsg: no message and non-block."); return msg_count; } /* wait until message ready */ if (!lrm_msgready(lrm)) { ch_cbk->ops->waitin(ch_cbk); } while (lrm_msgready(lrm)) { if (ch_cbk->ch_status == IPC_DISCONNECT) { return msg_count; } /* get the message */ msg = msgfromIPC(ch_cbk, MSG_ALLOWINTR); if (msg == NULL) { cl_log(LOG_WARNING, "%s(%d): receive a null message with msgfromIPC." , __FUNCTION__, __LINE__); return msg_count; } msg_count++; op = msg_to_op(msg); if (NULL!=op && NULL!=op_done_callback) { (*op_done_callback)(op); } free_op(op); ha_msg_del(msg); } return msg_count; } /* following are the functions for rsc_ops */ static int rsc_perform_op (lrm_rsc_t* rsc, lrm_op_t* op) { int rc = 0; struct ha_msg* msg = NULL; char* rsc_id; /* check whether the channel to lrmd is available */ if (NULL == ch_cmd || NULL == rsc || NULL == rsc->id || NULL == op || NULL == op->op_type) { cl_log(LOG_ERR, "rsc_perform_op: wrong parameters."); return HA_FAIL; } /* create the msg of perform op */ rsc_id = op->rsc_id; op->rsc_id = rsc->id; msg = op_to_msg(op); op->rsc_id = rsc_id; if ( NULL == msg) { cl_log(LOG_ERR, "rsc_perform_op: failed to create a message " "with function op_to_msg"); return HA_FAIL; } /* send it to lrmd */ if (HA_OK != msg2ipcchan(msg,ch_cmd)) { ha_msg_del(msg); LOG_FAIL_SEND_MSG(PERFORMOP, "ch_cmd"); return HA_FAIL; } ha_msg_del(msg); /* check return code, the return code is the call_id of the op */ rc = get_ret_from_ch(ch_cmd); return rc; } static int rsc_cancel_op (lrm_rsc_t* rsc, int call_id) { int rc; struct ha_msg* msg = NULL; /* check whether the channel to lrmd is available */ if (NULL == ch_cmd) { cl_log(LOG_ERR, "rsc_cancel_op: ch_mod is null."); return HA_FAIL; } /* check parameter */ if (NULL == rsc) { cl_log(LOG_ERR, "rsc_cancel_op: parameter rsc is null."); return HA_FAIL; } /* create the msg of flush ops */ msg = create_lrm_rsc_msg(rsc->id,CANCELOP); if (NULL == msg) { LOG_FAIL_create_lrm_rsc_msg(CANCELOP); return HA_FAIL; } if (HA_OK != ha_msg_add_int(msg, F_LRM_CALLID, call_id)) { LOG_BASIC_ERROR("ha_msg_add_int"); ha_msg_del(msg); return HA_FAIL; } /* send the msg to lrmd */ if (HA_OK != msg2ipcchan(msg,ch_cmd)) { ha_msg_del(msg); LOG_FAIL_SEND_MSG(CANCELOP, "ch_cmd"); return HA_FAIL; } ha_msg_del(msg); rc = get_ret_from_ch(ch_cmd); return rc; } static int rsc_flush_ops (lrm_rsc_t* rsc) { int rc; struct ha_msg* msg = NULL; /* check whether the channel to lrmd is available */ if (NULL == ch_cmd) { cl_log(LOG_ERR, "rsc_flush_ops: ch_mod is null."); return HA_FAIL; } /* check parameter */ if (NULL == rsc) { cl_log(LOG_ERR, "rsc_flush_ops: parameter rsc is null."); return HA_FAIL; } /* create the msg of flush ops */ msg = create_lrm_rsc_msg(rsc->id,FLUSHOPS); if ( NULL == msg) { LOG_FAIL_create_lrm_rsc_msg(CANCELOP); return HA_FAIL; } /* send the msg to lrmd */ if (HA_OK != msg2ipcchan(msg,ch_cmd)) { ha_msg_del(msg); LOG_FAIL_SEND_MSG(FLUSHOPS, "ch_cmd"); return HA_FAIL; } ha_msg_del(msg); rc = get_ret_from_ch(ch_cmd); return rc>0?HA_OK:HA_FAIL; } static gint compare_call_id(gconstpointer a, gconstpointer b) { const lrm_op_t* opa = (const lrm_op_t*)a; const lrm_op_t* opb = (const lrm_op_t*)b; return opa->call_id - opb->call_id; } static GList* rsc_get_cur_state (lrm_rsc_t* rsc, state_flag_t* cur_state) { GList* op_list = NULL, * tmplist = NULL; struct ha_msg* msg = NULL; struct ha_msg* ret = NULL; struct ha_msg* op_msg = NULL; lrm_op_t* op = NULL; int state; int op_count, i; /* check whether the channel to lrmd is available */ if (NULL == ch_cmd) { cl_log(LOG_ERR, "rsc_get_cur_state: ch_mod is null."); return NULL; } /* check paramter */ if (NULL == rsc) { cl_log(LOG_ERR, "rsc_get_cur_state: parameter rsc is null."); return NULL; } /* create the msg of get current state of resource */ msg = create_lrm_rsc_msg(rsc->id,GETRSCSTATE); if ( NULL == msg) { LOG_FAIL_create_lrm_rsc_msg(GETRSCSTATE); return NULL; } /* send the msg to lrmd */ if (HA_OK != msg2ipcchan(msg,ch_cmd)) { ha_msg_del(msg); LOG_FAIL_SEND_MSG(GETRSCSTATE, "ch_cmd"); return NULL; } ha_msg_del(msg); /* get the return msg */ ret = msgfromIPC(ch_cmd, MSG_ALLOWINTR); if (NULL == ret) { LOG_FAIL_receive_reply(GETRSCSTATE); return NULL; } /* get the state of the resource from the message */ if (HA_OK != ha_msg_value_int(ret, F_LRM_STATE, &state)) { LOG_FAIL_GET_MSG_FIELD(LOG_ERR, F_LRM_STATE, ret); ha_msg_del(ret); return NULL; } *cur_state = (state_flag_t)state; /* the first msg includes the count of pending ops. */ if (HA_OK != ha_msg_value_int(ret, F_LRM_OPCNT, &op_count)) { LOG_FAIL_GET_MSG_FIELD(LOG_WARNING, F_LRM_OPCNT, ret); ha_msg_del(ret); return NULL; } ha_msg_del(ret); for (i = 0; i < op_count; i++) { /* one msg for one op */ op_msg = msgfromIPC(ch_cmd, MSG_ALLOWINTR); if (NULL == op_msg) { cl_log(LOG_WARNING, "%s(%d): failed to receive a " "(pending operation) message from lrmd." , __FUNCTION__, __LINE__); continue; } op = msg_to_op(op_msg); /* add msg to the return list */ if (NULL != op) { op_list = g_list_append(op_list, op); } else { cl_log(LOG_WARNING, "%s(%d): failed to make a operation " "from a message with function msg_to_op" , __FUNCTION__, __LINE__); } ha_msg_del(op_msg); } op_list = g_list_sort(op_list, compare_call_id); /* Delete the duplicate op for call_id */ #if 0 cl_log(LOG_WARNING, "Before uniquing"); tmplist = g_list_first(op_list); while (tmplist != NULL) { cl_log(LOG_WARNING, "call_id=%d", ((lrm_op_t*)(tmplist->data))->call_id); tmplist = g_list_next(tmplist); } #endif tmplist = g_list_first(op_list); while (tmplist != NULL) { if (NULL != g_list_previous(tmplist)) { if (((lrm_op_t*)(g_list_previous(tmplist)->data))->call_id == ((lrm_op_t*)(tmplist->data))->call_id) { op_list = g_list_remove_link (op_list, tmplist); free_op((lrm_op_t *)tmplist->data); g_list_free_1(tmplist); tmplist = g_list_first(op_list); } } tmplist = g_list_next(tmplist); } #if 0 cl_log(LOG_WARNING, "After uniquing"); while (tmplist != NULL) { cl_log(LOG_WARNING, "call_id=%d", ((lrm_op_t*)(tmplist->data))->call_id); tmplist = g_list_next(tmplist); } #endif return op_list; } static lrm_op_t* rsc_get_last_result (lrm_rsc_t* rsc, const char* op_type) { struct ha_msg* msg = NULL; struct ha_msg* ret = NULL; lrm_op_t* op = NULL; int opcount = 0; /* check whether the channel to lrmd is available */ if (NULL == ch_cmd) { cl_log(LOG_ERR, "rsc_get_last_result: ch_mod is null."); return NULL; } /* check parameter */ if (NULL == rsc) { cl_log(LOG_ERR, "rsc_get_last_result: parameter rsc is null."); return NULL; } /* create the msg of get last op */ msg = create_lrm_rsc_msg(rsc->id,GETLASTOP); if (NULL == msg) { LOG_FAIL_create_lrm_rsc_msg(GETLASTOP); return NULL; } if (HA_OK != ha_msg_add(msg, F_LRM_RID, rsc->id)) { LOG_BASIC_ERROR("ha_msg_add"); ha_msg_del(msg); return NULL; } if (HA_OK != ha_msg_add(msg, F_LRM_OP, op_type)) { LOG_BASIC_ERROR("ha_msg_add"); ha_msg_del(msg); return NULL; } /* send the msg to lrmd */ if (HA_OK != msg2ipcchan(msg,ch_cmd)) { ha_msg_del(msg); LOG_FAIL_SEND_MSG(GETLASTOP, "ch_cmd"); return NULL; } /* get the return msg */ ret = msgfromIPC(ch_cmd, MSG_ALLOWINTR); if (NULL == ret) { LOG_FAIL_receive_reply(GETLASTOP); ha_msg_del(msg); return NULL; } if (HA_OK != ha_msg_value_int(ret,F_LRM_OPCNT, &opcount)) { op = NULL; } else if ( 1 == opcount ) { op = msg_to_op(ret); } ha_msg_del(msg); ha_msg_del(ret); return op; } /* * following are the implements of the utility functions */ lrm_op_t* lrm_op_new(void) { lrm_op_t* op; op = g_new0(lrm_op_t, 1); op->op_status = LRM_OP_PENDING; return op; } static lrm_op_t* msg_to_op(struct ha_msg* msg) { lrm_op_t* op; const char* op_type; const char* app_name; const char* rsc_id; const char* fail_reason; const char* output; const void* user_data; op = lrm_op_new(); /* op->timeout, op->interval, op->target_rc, op->call_id*/ if (HA_OK != ha_msg_value_int(msg,F_LRM_TIMEOUT, &op->timeout) || HA_OK != ha_msg_value_int(msg,F_LRM_INTERVAL, &op->interval) || HA_OK != ha_msg_value_int(msg,F_LRM_TARGETRC, &op->target_rc) || HA_OK != ha_msg_value_int(msg,F_LRM_DELAY, &op->start_delay) || HA_OK != ha_msg_value_int(msg,F_LRM_CALLID, &op->call_id)) { LOG_BASIC_ERROR("ha_msg_value_int"); free_op(op); return NULL; } /* op->op_status */ if (HA_OK != ha_msg_value_int(msg, F_LRM_OPSTATUS, (int*)&op->op_status)) { LOG_FAIL_GET_MSG_FIELD(LOG_WARNING, F_LRM_OPSTATUS, msg); op->op_status = LRM_OP_PENDING; } /* if it finished successfully */ if (LRM_OP_DONE == op->op_status ) { /* op->rc */ if (HA_OK != ha_msg_value_int(msg, F_LRM_RC, &op->rc)) { free_op(op); LOG_FAIL_GET_MSG_FIELD(LOG_ERR, F_LRM_RC, msg); return NULL; } /* op->output */ output = cl_get_string(msg, F_LRM_DATA); if (NULL != output){ op->output = g_strdup(output); } else { op->output = NULL; } } else if(op->op_status == LRM_OP_PENDING) { op->rc = EXECRA_STATUS_UNKNOWN; } else { op->rc = EXECRA_EXEC_UNKNOWN_ERROR; } /* op->app_name */ app_name = ha_msg_value(msg, F_LRM_APP); if (NULL == app_name) { LOG_FAIL_GET_MSG_FIELD(LOG_ERR, F_LRM_APP, msg); free_op(op); return NULL; } op->app_name = g_strdup(app_name); /* op->op_type */ op_type = ha_msg_value(msg, F_LRM_OP); if (NULL == op_type) { LOG_FAIL_GET_MSG_FIELD(LOG_ERR, F_LRM_OP, msg); free_op(op); return NULL; } op->op_type = g_strdup(op_type); /* op->rsc_id */ rsc_id = ha_msg_value(msg, F_LRM_RID); if (NULL == rsc_id) { LOG_FAIL_GET_MSG_FIELD(LOG_ERR, F_LRM_RID, msg); free_op(op); return NULL; } op->rsc_id = g_strdup(rsc_id); /* op->fail_reason present only on async failures */ fail_reason = ha_msg_value(msg, F_LRM_FAIL_REASON); if (fail_reason) { op->fail_reason = g_strdup(fail_reason); } /* op->user_data */ user_data = cl_get_string(msg, F_LRM_USERDATA); if (NULL != user_data) { op->user_data = g_strdup(user_data); } /* time_stamps */ if (ha_msg_value_ul(msg, F_LRM_T_RUN, &op->t_run) != HA_OK || ha_msg_value_ul(msg, F_LRM_T_RCCHANGE, &op->t_rcchange) != HA_OK || ha_msg_value_ul(msg, F_LRM_EXEC_TIME, &op->exec_time) != HA_OK || ha_msg_value_ul(msg, F_LRM_QUEUE_TIME, &op->queue_time) != HA_OK) { /* cl_log(LOG_WARNING , "%s:%d: failed to get the timing information" , __FUNCTION__, __LINE__); */ } /* op->params */ op->params = ha_msg_value_str_table(msg, F_LRM_PARAM); return op; } static struct ha_msg* op_to_msg (lrm_op_t* op) { struct ha_msg* msg = ha_msg_new(15); if (!msg) { LOG_BASIC_ERROR("ha_msg_new"); return NULL; } if (HA_OK != ha_msg_add(msg, F_LRM_TYPE, PERFORMOP) || HA_OK != ha_msg_add(msg, F_LRM_RID, op->rsc_id) || HA_OK != ha_msg_add(msg, F_LRM_OP, op->op_type) || HA_OK != ha_msg_add_int(msg, F_LRM_TIMEOUT, op->timeout) || HA_OK != ha_msg_add_int(msg, F_LRM_INTERVAL, op->interval) || HA_OK != ha_msg_add_int(msg, F_LRM_DELAY, op->start_delay) || HA_OK != ha_msg_add_int(msg, F_LRM_COPYPARAMS, op->copyparams) || HA_OK != ha_msg_add_ul(msg, F_LRM_T_RUN,op->t_run) || HA_OK != ha_msg_add_ul(msg, F_LRM_T_RCCHANGE, op->t_rcchange) || HA_OK != ha_msg_add_ul(msg, F_LRM_EXEC_TIME, op->exec_time) || HA_OK != ha_msg_add_ul(msg, F_LRM_QUEUE_TIME, op->queue_time) || HA_OK != ha_msg_add_int(msg, F_LRM_TARGETRC, op->target_rc) || ( op->app_name && (HA_OK != ha_msg_add(msg, F_LRM_APP, op->app_name))) || ( op->user_data && (HA_OK != ha_msg_add(msg,F_LRM_USERDATA,op->user_data))) || ( op->params && (HA_OK != ha_msg_add_str_table(msg,F_LRM_PARAM,op->params)))) { LOG_BASIC_ERROR("op_to_msg conversion failed"); ha_msg_del(msg); return NULL; } return msg; } static int get_ret_from_ch(IPC_Channel* ch) { int ret; struct ha_msg* msg; msg = msgfromIPC(ch, MSG_ALLOWINTR); if (NULL == msg) { cl_log(LOG_ERR , "%s(%d): failed to receive message with function msgfromIPC" , __FUNCTION__, __LINE__); return HA_FAIL; } if (HA_OK != ha_msg_value_int(msg, F_LRM_RET, &ret)) { LOG_FAIL_GET_MSG_FIELD(LOG_ERR, F_LRM_RET, msg); ha_msg_del(msg); return HA_FAIL; } ha_msg_del(msg); return ret; } static int get_ret_from_msg(struct ha_msg* msg) { int ret; if (NULL == msg) { cl_log(LOG_ERR, "%s(%d): the parameter is a NULL pointer." , __FUNCTION__, __LINE__); return HA_FAIL; } if (HA_OK != ha_msg_value_int(msg, F_LRM_RET, &ret)) { LOG_FAIL_GET_MSG_FIELD(LOG_ERR, F_LRM_RET, msg); return HA_FAIL; } return ret; } static void free_op (lrm_op_t* op) { if (NULL == op) { return; } if (NULL != op->op_type) { g_free(op->op_type); } if (NULL != op->output) { g_free(op->output); } if (NULL != op->rsc_id) { g_free(op->rsc_id); } if (NULL != op->app_name) { g_free(op->app_name); } if (NULL != op->user_data) { g_free(op->user_data); } if (NULL != op->params) { free_str_table(op->params); } g_free(op); } void lrm_free_op(lrm_op_t* op) { free_op(op); } void lrm_free_rsc(lrm_rsc_t* rsc) { if (NULL == rsc) { return; } if (NULL != rsc->id) { g_free(rsc->id); } if (NULL != rsc->type) { g_free(rsc->type); } if (NULL != rsc->class) { g_free(rsc->class); } if (NULL != rsc->provider) { g_free(rsc->provider); } if (NULL != rsc->params) { free_str_table(rsc->params); } g_free(rsc); } void lrm_free_str_list(GList* list) { GList* item; if (NULL == list) { return; } item = g_list_first(list); while (NULL != item) { if (NULL != item->data) { g_free(item->data); } list = g_list_delete_link(list, item); item = g_list_first(list); } } void lrm_free_op_list(GList* list) { GList* item; if (NULL == list) { return; } item = g_list_first(list); while (NULL != item) { if (NULL != item->data) { free_op((lrm_op_t*)item->data); } list = g_list_delete_link(list, item); item = g_list_first(list); } } void lrm_free_str_table(GHashTable* table) { if (NULL != table) { free_str_table(table); } } const char * execra_code2string(uniform_ret_execra_t code) { switch(code) { case EXECRA_EXEC_UNKNOWN_ERROR: return "unknown exec error"; case EXECRA_NO_RA: return "no RA"; case EXECRA_OK: return "ok"; case EXECRA_UNKNOWN_ERROR: return "unknown error"; case EXECRA_INVALID_PARAM: return "invalid parameter"; case EXECRA_UNIMPLEMENT_FEATURE: return "unimplemented feature"; case EXECRA_INSUFFICIENT_PRIV: return "insufficient privileges"; case EXECRA_NOT_INSTALLED: return "not installed"; case EXECRA_NOT_CONFIGURED: return "not configured"; case EXECRA_NOT_RUNNING: return "not running"; /* For status command only */ case EXECRA_RUNNING_MASTER: return "master"; case EXECRA_FAILED_MASTER: return "master (failed)"; case EXECRA_RA_DEAMON_DEAD1: return "status: deamon dead"; case EXECRA_RA_DEAMON_DEAD2: return "status: deamon dead"; case EXECRA_RA_DEAMON_STOPPED: return "status: deamon stopped"; case EXECRA_STATUS_UNKNOWN: return "status: unknown"; default: break; } return ""; } Heartbeat-3-0-7e3a82377fa8/lib/lrm/lrm_msg.c0000644000000000000000000001135711576626513020352 0ustar00usergroup00000000000000/* * Message Functions For Local Resource Manager * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ /* * By Huang Zhen 2004/2/13 * */ #include #include #include #include #include #include #include #include #include #define LOG_BASIC_ERROR(apiname) \ cl_log(LOG_ERR, "%s(%d): %s failed.", __FUNCTION__, __LINE__, apiname) const lrm_op_t lrm_zero_op; /* Default initialized to zeros */ static void copy_pair(gpointer key, gpointer value, gpointer user_data) { GHashTable* taget_table = (GHashTable*)user_data; g_hash_table_insert(taget_table, g_strdup(key), g_strdup(value)); } GHashTable* copy_str_table(GHashTable* src_table) { GHashTable* target_table = NULL; if ( NULL == src_table) { return NULL; } target_table = g_hash_table_new(g_str_hash, g_str_equal); g_hash_table_foreach(src_table, copy_pair, target_table); return target_table; } static void merge_pair(gpointer key, gpointer value, gpointer user_data) { GHashTable *merged = (GHashTable*)user_data; if (g_hash_table_lookup(merged, key)) { return; } g_hash_table_insert(merged, g_strdup(key), g_strdup(value)); } GHashTable* merge_str_tables(GHashTable* old, GHashTable* new) { GHashTable* merged = NULL; if ( NULL == old ) { return copy_str_table(new); } if ( NULL == new ) { return copy_str_table(old); } merged = copy_str_table(new); g_hash_table_foreach(old, merge_pair, merged); return merged; } static gboolean free_pair(gpointer key, gpointer value, gpointer user_data) { g_free(key); g_free(value); return TRUE; } void free_str_table(GHashTable* hash_table) { g_hash_table_foreach_remove(hash_table, free_pair, NULL); g_hash_table_destroy(hash_table); } struct ha_msg* create_lrm_msg (const char* msg) { struct ha_msg* ret; if ((NULL == msg) || (0 == strlen(msg))) { return NULL; } ret = ha_msg_new(1); if (HA_OK != ha_msg_add(ret, F_LRM_TYPE, msg)) { ha_msg_del(ret); LOG_BASIC_ERROR("ha_msg_add"); return NULL; } return ret; } struct ha_msg* create_lrm_reg_msg(const char* app_name) { struct ha_msg* ret; if ((NULL == app_name) || (0 == strlen(app_name))) { return NULL; } ret = ha_msg_new(5); if(HA_OK != ha_msg_add(ret, F_LRM_TYPE, REGISTER) || HA_OK != ha_msg_add(ret, F_LRM_APP, app_name) || HA_OK != ha_msg_add_int(ret, F_LRM_PID, getpid()) || HA_OK != ha_msg_add_int(ret, F_LRM_GID, getegid()) || HA_OK != ha_msg_add_int(ret, F_LRM_UID, getuid())) { ha_msg_del(ret); LOG_BASIC_ERROR("ha_msg_add"); return NULL; } return ret; } struct ha_msg* create_lrm_addrsc_msg(const char* rid, const char* class, const char* type, const char* provider, GHashTable* params) { struct ha_msg* msg; if (NULL==rid||NULL==class||NULL==type) { return NULL; } msg = ha_msg_new(5); if(HA_OK != ha_msg_add(msg, F_LRM_TYPE, ADDRSC) || HA_OK != ha_msg_add(msg, F_LRM_RID, rid) || HA_OK != ha_msg_add(msg, F_LRM_RCLASS, class) || HA_OK != ha_msg_add(msg, F_LRM_RTYPE, type)) { ha_msg_del(msg); LOG_BASIC_ERROR("ha_msg_add"); return NULL; } if( provider ) { if (HA_OK != ha_msg_add(msg, F_LRM_RPROVIDER, provider)) { ha_msg_del(msg); LOG_BASIC_ERROR("ha_msg_add"); return NULL; } } if ( params ) { if (HA_OK != ha_msg_add_str_table(msg,F_LRM_PARAM,params)) { ha_msg_del(msg); LOG_BASIC_ERROR("ha_msg_add"); return NULL; } } return msg; } struct ha_msg* create_lrm_rsc_msg(const char* rid, const char* msg) { struct ha_msg* ret; if ((NULL == rid) ||(NULL == msg) || (0 == strlen(msg))) { return NULL; } ret = ha_msg_new(2); if(HA_OK != ha_msg_add(ret, F_LRM_TYPE, msg) || HA_OK != ha_msg_add(ret, F_LRM_RID, rid)) { ha_msg_del(ret); LOG_BASIC_ERROR("ha_msg_add"); return NULL; } return ret; } struct ha_msg* create_lrm_ret(int ret, int fields) { struct ha_msg* msg = ha_msg_new(fields); if(HA_OK != ha_msg_add(msg, F_LRM_TYPE, RETURN) || HA_OK != ha_msg_add_int(msg, F_LRM_RET, ret)) { ha_msg_del(msg); LOG_BASIC_ERROR("ha_msg_add"); return NULL; } return msg; } Heartbeat-3-0-7e3a82377fa8/lib/lrm/racommon.c0000644000000000000000000000744111576626513020524 0ustar00usergroup00000000000000/* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * File: raexecocf.c * Author: Sun Jiang Dong * Copyright (c) 2004 International Business Machines * * This code implements the Resource Agent Plugin Module for LSB style. * It's a part of Local Resource Manager. Currently it's used by lrmd only. */ #include #include #include #include #include #include #include /* Add it for compiling on OSX */ #include #include #include #include #include void get_ra_pathname(const char* class_path, const char* type, const char* provider, char pathname[]) { char* type_dup; char* base_name; type_dup = g_strndup(type, RA_MAX_NAME_LENGTH); if (type_dup == NULL) { cl_log(LOG_ERR, "No enough memory to allocate."); pathname[0] = '\0'; return; } base_name = basename(type_dup); if ( strncmp(type, base_name, RA_MAX_NAME_LENGTH) == 0 ) { /*the type does not include path*/ if (provider) { snprintf(pathname, RA_MAX_NAME_LENGTH, "%s/%s/%s", class_path, provider, type); }else{ snprintf(pathname, RA_MAX_NAME_LENGTH, "%s/%s", class_path,type); } }else{ /*the type includes path, just copy it to pathname*/ g_strlcpy(pathname, type, RA_MAX_NAME_LENGTH); } g_free(type_dup); } /* * Description: Filter a file. * Return Value: * TRUE: the file is qualified. * FALSE: the file is unqualified. * Notes: A qualifed file is a regular file with execute bits * which does not start with '.' */ gboolean filtered(char * file_name) { struct stat buf; char *s; if ( stat(file_name, &buf) != 0 ) { return FALSE; } if ( ((s = strrchr(file_name,'/')) && *(s+1) == '.') || *file_name == '.' ) { return FALSE; } if ( S_ISREG(buf.st_mode) && ( ( buf.st_mode & S_IXUSR ) || ( buf.st_mode & S_IXGRP ) || ( buf.st_mode & S_IXOTH ) ) ) { return TRUE; } return FALSE; } int get_runnable_list(const char* class_path, GList ** rsc_info) { struct dirent **namelist; int file_num; if ( rsc_info == NULL ) { cl_log(LOG_ERR, "Parameter error: get_resource_list"); return -2; } if ( *rsc_info != NULL ) { cl_log(LOG_ERR, "Parameter error: get_resource_list."\ "will cause memory leak."); *rsc_info = NULL; } file_num = scandir(class_path, &namelist, NULL, alphasort); if (file_num < 0) { cl_log(LOG_ERR, "scandir failed in RA plugin"); return -2; } else{ while (file_num--) { char tmp_buffer[FILENAME_MAX+1]; tmp_buffer[0] = '\0'; tmp_buffer[FILENAME_MAX] = '\0'; snprintf(tmp_buffer, FILENAME_MAX, "%s/%s", class_path, namelist[file_num]->d_name ); if ( filtered(tmp_buffer) == TRUE ) { *rsc_info = g_list_append(*rsc_info, g_strdup(namelist[file_num]->d_name)); } free(namelist[file_num]); } free(namelist); } return g_list_length(*rsc_info); } void closefiles(void) { int fd; /* close all descriptors except stdin/out/err and channels to logd */ for (fd = getdtablesize(); fd > STDERR_FILENO; fd--) { if (!cl_log_is_logd_fd(fd)) close(fd); } } Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBauth/Makefile.am0000644000000000000000000000275111576626513022642 0ustar00usergroup00000000000000# # InterfaceMgr: Interface manager plugins for Linux-HA # # Copyright (C) 2001 Alan Robertson # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \ -I$(top_builddir)/lib/upmls -I$(top_srcdir)/lib/upmls AM_CFLAGS = @CFLAGS@ ## libraries halibdir = $(libdir)/@HB_PKG@ plugindir = $(halibdir)/plugins/HBauth plugin_LTLIBRARIES = md5.la crc.la sha1.la md5_la_SOURCES = md5.c md5_la_LDFLAGS = -export-dynamic -module -avoid-version md5_la_LIBADD = -lplumb crc_la_SOURCES = crc.c crc_la_LDFLAGS = -export-dynamic -module -avoid-version sha1_la_SOURCES = sha1.c sha1_la_LDFLAGS = -export-dynamic -module -avoid-version Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBauth/crc.c0000644000000000000000000001267111576626513021523 0ustar00usergroup00000000000000#include #include #include #include #define PIL_PLUGINTYPE HB_AUTH_TYPE #define PIL_PLUGIN crc #define PIL_PLUGINTYPE_S "HBauth" #define PIL_PLUGIN_S "crc" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL #include static int crc_auth_calc(const struct HBauth_info * , const void* text, size_t text_len, char * result, int resultlen); static int crc_auth_needskey (void); static struct HBAuthOps crcOps = { crc_auth_calc , crc_auth_needskey }; static unsigned long const crctab[256] = { 0x0ul, 0x04C11DB7ul, 0x09823B6Eul, 0x0D4326D9ul, 0x130476DCul, 0x17C56B6Bul, 0x1A864DB2ul, 0x1E475005ul, 0x2608EDB8ul, 0x22C9F00Ful, 0x2F8AD6D6ul, 0x2B4BCB61ul, 0x350C9B64ul, 0x31CD86D3ul, 0x3C8EA00Aul, 0x384FBDBDul, 0x4C11DB70ul, 0x48D0C6C7ul, 0x4593E01Eul, 0x4152FDA9ul, 0x5F15ADACul, 0x5BD4B01Bul, 0x569796C2ul, 0x52568B75ul, 0x6A1936C8ul, 0x6ED82B7Ful, 0x639B0DA6ul, 0x675A1011ul, 0x791D4014ul, 0x7DDC5DA3ul, 0x709F7B7Aul, 0x745E66CDul, 0x9823B6E0ul, 0x9CE2AB57ul, 0x91A18D8Eul, 0x95609039ul, 0x8B27C03Cul, 0x8FE6DD8Bul, 0x82A5FB52ul, 0x8664E6E5ul, 0xBE2B5B58ul, 0xBAEA46EFul, 0xB7A96036ul, 0xB3687D81ul, 0xAD2F2D84ul, 0xA9EE3033ul, 0xA4AD16EAul, 0xA06C0B5Dul, 0xD4326D90ul, 0xD0F37027ul, 0xDDB056FEul, 0xD9714B49ul, 0xC7361B4Cul, 0xC3F706FBul, 0xCEB42022ul, 0xCA753D95ul, 0xF23A8028ul, 0xF6FB9D9Ful, 0xFBB8BB46ul, 0xFF79A6F1ul, 0xE13EF6F4ul, 0xE5FFEB43ul, 0xE8BCCD9Aul, 0xEC7DD02Dul, 0x34867077ul, 0x30476DC0ul, 0x3D044B19ul, 0x39C556AEul, 0x278206ABul, 0x23431B1Cul, 0x2E003DC5ul, 0x2AC12072ul, 0x128E9DCFul, 0x164F8078ul, 0x1B0CA6A1ul, 0x1FCDBB16ul, 0x018AEB13ul, 0x054BF6A4ul, 0x0808D07Dul, 0x0CC9CDCAul, 0x7897AB07ul, 0x7C56B6B0ul, 0x71159069ul, 0x75D48DDEul, 0x6B93DDDBul, 0x6F52C06Cul, 0x6211E6B5ul, 0x66D0FB02ul, 0x5E9F46BFul, 0x5A5E5B08ul, 0x571D7DD1ul, 0x53DC6066ul, 0x4D9B3063ul, 0x495A2DD4ul, 0x44190B0Dul, 0x40D816BAul, 0xACA5C697ul, 0xA864DB20ul, 0xA527FDF9ul, 0xA1E6E04Eul, 0xBFA1B04Bul, 0xBB60ADFCul, 0xB6238B25ul, 0xB2E29692ul, 0x8AAD2B2Ful, 0x8E6C3698ul, 0x832F1041ul, 0x87EE0DF6ul, 0x99A95DF3ul, 0x9D684044ul, 0x902B669Dul, 0x94EA7B2Aul, 0xE0B41DE7ul, 0xE4750050ul, 0xE9362689ul, 0xEDF73B3Eul, 0xF3B06B3Bul, 0xF771768Cul, 0xFA325055ul, 0xFEF34DE2ul, 0xC6BCF05Ful, 0xC27DEDE8ul, 0xCF3ECB31ul, 0xCBFFD686ul, 0xD5B88683ul, 0xD1799B34ul, 0xDC3ABDEDul, 0xD8FBA05Aul, 0x690CE0EEul, 0x6DCDFD59ul, 0x608EDB80ul, 0x644FC637ul, 0x7A089632ul, 0x7EC98B85ul, 0x738AAD5Cul, 0x774BB0EBul, 0x4F040D56ul, 0x4BC510E1ul, 0x46863638ul, 0x42472B8Ful, 0x5C007B8Aul, 0x58C1663Dul, 0x558240E4ul, 0x51435D53ul, 0x251D3B9Eul, 0x21DC2629ul, 0x2C9F00F0ul, 0x285E1D47ul, 0x36194D42ul, 0x32D850F5ul, 0x3F9B762Cul, 0x3B5A6B9Bul, 0x0315D626ul, 0x07D4CB91ul, 0x0A97ED48ul, 0x0E56F0FFul, 0x1011A0FAul, 0x14D0BD4Dul, 0x19939B94ul, 0x1D528623ul, 0xF12F560Eul, 0xF5EE4BB9ul, 0xF8AD6D60ul, 0xFC6C70D7ul, 0xE22B20D2ul, 0xE6EA3D65ul, 0xEBA91BBCul, 0xEF68060Bul, 0xD727BBB6ul, 0xD3E6A601ul, 0xDEA580D8ul, 0xDA649D6Ful, 0xC423CD6Aul, 0xC0E2D0DDul, 0xCDA1F604ul, 0xC960EBB3ul, 0xBD3E8D7Eul, 0xB9FF90C9ul, 0xB4BCB610ul, 0xB07DABA7ul, 0xAE3AFBA2ul, 0xAAFBE615ul, 0xA7B8C0CCul, 0xA379DD7Bul, 0x9B3660C6ul, 0x9FF77D71ul, 0x92B45BA8ul, 0x9675461Ful, 0x8832161Aul, 0x8CF30BADul, 0x81B02D74ul, 0x857130C3ul, 0x5D8A9099ul, 0x594B8D2Eul, 0x5408ABF7ul, 0x50C9B640ul, 0x4E8EE645ul, 0x4A4FFBF2ul, 0x470CDD2Bul, 0x43CDC09Cul, 0x7B827D21ul, 0x7F436096ul, 0x7200464Ful, 0x76C15BF8ul, 0x68860BFDul, 0x6C47164Aul, 0x61043093ul, 0x65C52D24ul, 0x119B4BE9ul, 0x155A565Eul, 0x18197087ul, 0x1CD86D30ul, 0x029F3D35ul, 0x065E2082ul, 0x0B1D065Bul, 0x0FDC1BECul, 0x3793A651ul, 0x3352BBE6ul, 0x3E119D3Ful, 0x3AD08088ul, 0x2497D08Dul, 0x2056CD3Aul, 0x2D15EBE3ul, 0x29D4F654ul, 0xC5A92679ul, 0xC1683BCEul, 0xCC2B1D17ul, 0xC8EA00A0ul, 0xD6AD50A5ul, 0xD26C4D12ul, 0xDF2F6BCBul, 0xDBEE767Cul, 0xE3A1CBC1ul, 0xE760D676ul, 0xEA23F0AFul, 0xEEE2ED18ul, 0xF0A5BD1Dul, 0xF464A0AAul, 0xF9278673ul, 0xFDE69BC4ul, 0x89B8FD09ul, 0x8D79E0BEul, 0x803AC667ul, 0x84FBDBD0ul, 0x9ABC8BD5ul, 0x9E7D9662ul, 0x933EB0BBul, 0x97FFAD0Cul, 0xAFB010B1ul, 0xAB710D06ul, 0xA6322BDFul, 0xA2F33668ul, 0xBCB4666Dul, 0xB8757BDAul, 0xB5365D03ul, 0xB1F740B4ul }; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static void* OurImports; static void* interfprivate; /* * * Our plugin initialization and registration function * It gets called when the plugin gets loaded. */ PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interfaces */ return imports->register_interface(us, PIL_PLUGINTYPE_S, PIL_PLUGIN_S , &crcOps , NULL /*close */ , &OurInterface , &OurImports , interfprivate); } static int crc_auth_needskey (void) { return 0; } static int crc_auth_calc (const struct HBauth_info * info , const void * value, size_t valuelen, char * result, int resultlen) { const char* valuechar=value; unsigned long crc = 0; (void)info; while(valuelen--) crc = (crc << 8) ^ crctab[((crc >> 24) ^ *(valuechar++)) & 0xFF]; crc = ~crc & 0xFFFFFFFFul; snprintf(result, resultlen, "%lx", crc); return 1; } Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBauth/md5.c0000644000000000000000000000526711576626513021444 0ustar00usergroup00000000000000/* * The code to implement the MD5 message-digest algorithm is moved * to lib/clplumbing/md5.c * * Cleaned up by Mitja Sarp for heartbeat * * Significant changed by Sun Jiang Dong * * */ #include #ifdef HAVE_STDINT_H #include #endif #include /* for sprintf() */ #include #include /* for memcpy() */ #include /* for stupid systems */ #include /* for ntohl() */ #include #include #define PIL_PLUGINTYPE HB_AUTH_TYPE #define PIL_PLUGIN md5 #define PIL_PLUGINTYPE_S "HBauth" #define PIL_PLUGIN_S "md5" #define PIL_PLUGINLICENSE LICENSE_PUBDOM #define PIL_PLUGINLICENSEURL URL_PUBDOM #include /* The Ops we export to the world... */ static int md5_auth_calc(const struct HBauth_info *t , const void * text, size_t textlen ,char * result, int resultlen); static int md5_auth_needskey(void); /* Authentication plugin operations */ static struct HBAuthOps md5ops = { md5_auth_calc , md5_auth_needskey }; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static void* OurImports; static void* interfprivate; /* * * Our plugin initialization and registration function * It gets called when the plugin gets loaded. */ PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interfaces */ return imports->register_interface(us, PIL_PLUGINTYPE_S, PIL_PLUGIN_S , &md5ops , NULL /*close */ , &OurInterface , &OurImports , interfprivate); } /* * Real work starts here ;-) */ #define MD5_DIGESTSIZE 16 #define md5byte unsigned char static int md5_auth_needskey(void) { return 1; /* Yes, we require (need) a key */ } #define byteSwap(buf,words) static int md5_auth_calc(const struct HBauth_info *t, const void * text , size_t textlen, char * result, int resultlen) { unsigned char digest[MD5_DIGESTSIZE]; const unsigned char * key = (unsigned char *)t->key; int i, key_len; if (resultlen <= (MD5_DIGESTSIZE+1) *2) { return 0; } key_len = strlen((const char *)key); HMAC(key, key_len, text, textlen, digest); /* And show the result in human-readable form */ for (i = 0; i < MD5_DIGESTSIZE; i++) { sprintf(result, "%02x", digest[i]); result +=2; } return 1; } Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBauth/sha1.c0000644000000000000000000002251711576626513021610 0ustar00usergroup00000000000000/* SHA-1 in C By Steve Reid 100% Public Domain Test Vectors (from FIPS PUB 180-1) "abc" A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" 84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1 A million repetitions of "a" 34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F Cleaned up by Mitja Sarp for heartbeat */ /* #define LITTLE_ENDIAN * This should be #define'd if true. */ /* #define SHA1HANDSOFF * Copies data before messing with it. */ #define SHA1HANDSOFF 1 #include #include #ifdef HAVE_STDINT_H #include #endif #include #include #include #define PIL_PLUGINTYPE HB_AUTH_TYPE #define PIL_PLUGINTYPE_S "HBauth" #define PIL_PLUGIN sha1 #define PIL_PLUGIN_S "sha1" #define PIL_PLUGINLICENSE LICENSE_PUBDOM #define PIL_PLUGINLICENSEURL URL_PUBDOM #include #define SHA_DIGESTSIZE 20 #define SHA_BLOCKSIZE 64 typedef struct SHA1Context_st{ uint32_t state[5]; uint32_t count[2]; unsigned char buffer[64]; } SHA1_CTX; void SHA1Transform(uint32_t state[5], const unsigned char buffer[64]); void SHA1Init(SHA1_CTX* context); void SHA1Update(SHA1_CTX* context, const unsigned char* data, unsigned int len); void SHA1Final(unsigned char digest[20], SHA1_CTX* context); static int sha1_auth_calc (const struct HBauth_info *info , const void * text, size_t textlen, char * result, int resultlen); static int sha1_auth_needskey(void); static struct HBAuthOps sha1Ops = { sha1_auth_calc , sha1_auth_needskey }; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static void* OurImports; static void* interfprivate; /* * * Our plugin initialization and registration function * It gets called when the plugin gets loaded. */ PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interfaces */ return imports->register_interface(us, PIL_PLUGINTYPE_S, PIL_PLUGIN_S , &sha1Ops , NULL /*close */ , &OurInterface , &OurImports , interfprivate); } static int sha1_auth_needskey(void) { return 1; } #define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits)))) /* blk0() and blk() perform the initial expand. */ /* I got the idea of expanding during the round function from SSLeay */ #ifdef LITTLE_ENDIAN #define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00U) \ |(rol(block->l[i],8)&0x00FF00FFU)) #else #define blk0(i) block->l[i] #endif #define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \ ^block->l[(i+2)&15]^block->l[i&15],1)) /* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */ #define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999u+rol(v,5);w=rol(w,30); #define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999u+rol(v,5);w=rol(w,30); #define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1u+rol(v,5);w=rol(w,30); #define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDCu+rol(v,5);w=rol(w,30); #define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6u+rol(v,5);w=rol(w,30); /* Hash a single 512-bit block. This is the core of the algorithm. */ void SHA1Transform(uint32_t state[5], const unsigned char buffer[64]) { uint32_t a, b, c, d, e; typedef union { unsigned char c[64]; uint32_t l[16]; } CHAR64LONG16; CHAR64LONG16* block; #ifdef SHA1HANDSOFF CHAR64LONG16 workspace; block = &workspace; memcpy(block, buffer, 64); #else block = (CHAR64LONG16*)buffer; #endif /* Copy context->state[] to working vars */ a = state[0]; b = state[1]; c = state[2]; d = state[3]; e = state[4]; /* 4 rounds of 20 operations each. Loop unrolled. */ R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); /* Add the working vars back into context.state[] */ state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; /* Wipe variables */ a = b = c = d = e = 0; } /* SHA1Init - Initialize new context */ void SHA1Init(SHA1_CTX* context) { /* SHA1 initialization constants */ context->state[0] = 0x67452301u; context->state[1] = 0xEFCDAB89u; context->state[2] = 0x98BADCFEu; context->state[3] = 0x10325476u; context->state[4] = 0xC3D2E1F0u; context->count[0] = context->count[1] = 0; } /* Run your data through this. */ void SHA1Update(SHA1_CTX* context, const unsigned char* data, unsigned int len) { unsigned int i, j; j = (context->count[0] >> 3) & 63; if ((context->count[0] += len << 3) < (len << 3)) { context->count[1]++; } context->count[1] += (len >> 29); if ((j + len) > 63) { memcpy(&context->buffer[j], data, (i = 64-j)); SHA1Transform(context->state, context->buffer); for ( ; i + 63 < len; i += 64) { SHA1Transform(context->state, &data[i]); } j = 0; } else { i = 0; } memcpy(&context->buffer[j], &data[i], len - i); } /* Add padding and return the message digest. */ void SHA1Final(unsigned char digest[20], SHA1_CTX* context) { uint32_t i, j; unsigned char finalcount[8]; unsigned char twohundred [] = "\200"; unsigned char twozeroes [] = "\00"; for (i = 0; i < 8; i++) { finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)] >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */ } SHA1Update(context, twohundred, 1); while ((context->count[0] & 504) != 448) { SHA1Update(context, twozeroes, 1); } SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */ for (i = 0; i < 20; i++) { digest[i] = (unsigned char) ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255); } /* Wipe variables */ i = j = 0; memset(context->buffer, 0, 64); memset(context->state, 0, 20); memset(context->count, 0, 8); memset(&finalcount, 0, 8); #ifdef SHA1HANDSOFF /* make SHA1Transform overwrite it's own static vars */ SHA1Transform(context->state, context->buffer); #endif } static int sha1_auth_calc (const struct HBauth_info *info , const void * text, size_t textlen, char * result, int resultlen) { SHA1_CTX ictx, octx ; unsigned char isha[SHA_DIGESTSIZE]; unsigned char osha[SHA_DIGESTSIZE]; unsigned char tk[SHA_DIGESTSIZE]; unsigned char buf[SHA_BLOCKSIZE]; int i, key_len; unsigned char * key; if (resultlen <= SHA_DIGESTSIZE) { return FALSE; } key = (unsigned char *)g_strdup(info->key); key_len = strlen((char *)key); if (key_len > SHA_BLOCKSIZE) { SHA1_CTX tctx ; SHA1Init(&tctx); SHA1Update(&tctx, key, key_len); SHA1Final(tk, &tctx); g_free(key); key = tk; key_len = SHA_DIGESTSIZE; } /**** Inner Digest ****/ SHA1Init(&ictx) ; /* Pad the key for inner digest */ for (i = 0 ; i < key_len ; ++i) { buf[i] = key[i] ^ 0x36;}; /* Should this be a call to to memset? */ for (i = key_len ; i < SHA_BLOCKSIZE ; ++i) { buf[i] = 0x36;}; SHA1Update(&ictx, buf, SHA_BLOCKSIZE) ; SHA1Update(&ictx, (const unsigned char *)text, textlen) ; SHA1Final(isha, &ictx) ; /**** Outer Digest ****/ SHA1Init(&octx) ; /* Pad the key for outer digest */ for (i = 0 ; i < key_len ; ++i) {buf[i] = key[i] ^ 0x5C;}; /* Should this be a call to memset? */ for (i = key_len ; i < SHA_BLOCKSIZE ; ++i) { buf[i] = 0x5C;}; SHA1Update(&octx, buf, SHA_BLOCKSIZE) ; SHA1Update(&octx, isha, SHA_DIGESTSIZE) ; SHA1Final(osha, &octx) ; result[0] = '\0'; for (i = 0; i < SHA_DIGESTSIZE; i++) { sprintf((char *)tk, "%02x", osha[i]); strcat(result, (char *)tk); } if (key != tk) { g_free(key); } return TRUE; } Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBcomm/Makefile.am0000644000000000000000000000612611576626513022634 0ustar00usergroup00000000000000# # InterfaceMgr: Interface manager plugins for Linux-HA # # Copyright (C) 2001 Alan Robertson # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in if HBAAPI_BUILD HBAPING = hbaping.la endif if BUILD_OPENAIS_MODULE OPENAIS=openais.la endif if BUILD_TIPC_MODULE TIPC=tipc.la endif if BUILD_RDS_MODULE RDS=rds.la endif SUBDIRS = INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \ -I$(top_builddir)/lib/upmls -I$(top_srcdir)/lib/upmls \ @TIPC_INCLUDE@ AM_CFLAGS = @CFLAGS@ ## libraries halibdir = $(libdir)/@HB_PKG@ plugindir = $(halibdir)/plugins/HBcomm plugin_LTLIBRARIES = bcast.la mcast.la mcast6.la ucast.la \ serial.la \ ping.la ping6.la ping_group.la \ $(HBAPING) $(OPENAIS) $(TIPC) $(RDS) bcast_la_SOURCES = bcast.c bcast_la_LDFLAGS = -export-dynamic -module -avoid-version ucast_la_SOURCES = ucast.c ucast_la_LDFLAGS = -export-dynamic -module -avoid-version rds_la_SOURCES = rds.c rds_la_LDFLAGS = -export-dynamic -module -avoid-version mcast_la_SOURCES = mcast.c mcast_la_LDFLAGS = -export-dynamic -module -avoid-version mcast_la_LIBADD = $(top_builddir)/replace/libreplace.la mcast6_la_SOURCES = mcast6.c mcast6_la_LDFLAGS = -export-dynamic -module -avoid-version mcast6_la_LIBADD = $(top_builddir)/replace/libreplace.la serial_la_SOURCES = serial.c serial_la_LDFLAGS = -export-dynamic -module -avoid-version ping_la_SOURCES = ping.c ping_la_LDFLAGS = -export-dynamic -module -avoid-version ping_la_LIBADD = $(top_builddir)/replace/libreplace.la ping6_la_SOURCES = ping6.c ping6_la_LDFLAGS = -export-dynamic -module -avoid-version ping6_la_LIBADD = $(top_builddir)/replace/libreplace.la ping_group_la_SOURCES = ping_group.c ping_group_la_LDFLAGS = -export-dynamic -module -avoid-version ping_group_la_LIBADD = $(top_builddir)/replace/libreplace.la hbaping_la_SOURCES = hbaping.c hbaping_la_LDFLAGS = -export-dynamic -module -avoid-version hbaping_la_LIBADD = $(top_builddir)/replace/libreplace.la openais_la_SOURCES = openais.c openais_la_LDFLAGS = -export-dynamic -module -avoid-version openais_la_LIBADD = $(top_builddir)/replace/libreplace.la -levs tipc_la_SOURCES = tipc.c tipc_la_LDFLAGS = -export-dynamic -module -avoid-version tipc_la_LIBADD = $(top_builddir)/replace/libreplace.la Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBcomm/bcast.c0000644000000000000000000004375111576626513022045 0ustar00usergroup00000000000000/* * bcast.c: UDP/IP broadcast-based communication code for heartbeat. * * Copyright (C) 1999, 2000,2001 Alan Robertson * * About 150 lines of the code in this file originally borrowed in * 1999 from Tom Vogt's "Heart" program, and significantly mangled by * Alan Robertson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(SO_BINDTODEVICE) # include #endif #define PIL_PLUGINTYPE HB_COMM_TYPE #define PIL_PLUGINTYPE_S HB_COMM_TYPE_S #define PIL_PLUGIN bcast #define PIL_PLUGIN_S "bcast" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL #include struct ip_private { char * interface; /* Interface name */ struct in_addr bcast; /* Broadcast address */ struct sockaddr_in addr; /* Broadcast addr */ int port; int rsocket; /* Read-socket */ int wsocket; /* Write-socket */ }; static int bcast_init(void); struct hb_media* bcast_new(const char* interface); static int bcast_open(struct hb_media* mp); static int bcast_close(struct hb_media* mp); static void* bcast_read(struct hb_media* mp, int *lenp); static int bcast_write(struct hb_media* mp, void* msg, int len); static int bcast_make_receive_sock(struct hb_media* ei); static int bcast_make_send_sock(struct hb_media * mp); static struct ip_private * new_ip_interface(const char * ifn, int port); static int bcast_descr(char** buffer); static int bcast_mtype(char** buffer); static int bcast_isping(void); static int localudpport = -1; int if_get_broadaddr(const char *ifn, struct in_addr *broadaddr); static struct hb_media_fns bcastOps ={ bcast_new, /* Create single object function */ NULL, /* whole-line parse function */ bcast_open, bcast_close, bcast_read, bcast_write, bcast_mtype, bcast_descr, bcast_isping, }; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate; #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S , PIL_PLUGIN_S , &bcastOps , NULL /*close */ , &OurInterface , (void*)&OurImports , interfprivate); } #define ISBCASTOBJECT(mp) ((mp) && ((mp)->vf == (void*)&bcastOps)) #define BCASTASSERT(mp) g_assert(ISBCASTOBJECT(mp)) static int bcast_mtype(char** buffer) { *buffer = STRDUP(PIL_PLUGIN_S); if (!*buffer) { return 0; } return STRLEN_CONST(PIL_PLUGIN_S); } static int bcast_descr(char **buffer) { const char constret[] = "UDP/IP broadcast"; *buffer = STRDUP(constret); if (!*buffer) { return 0; } return STRLEN_CONST(constret); } static int bcast_isping(void) { return 0; } static int bcast_init(void) { struct servent* service; g_assert(OurImports != NULL); if (localudpport <= 0) { const char * chport; if ((chport = OurImports->ParamValue("udpport")) != NULL) { if (sscanf(chport, "%d", &localudpport) <= 0 || localudpport <= 0) { PILCallLog(LOG, PIL_CRIT , "bad port number %s" , chport); return HA_FAIL; } } } /* No port specified in the configuration... */ if (localudpport <= 0) { /* If our service name is in /etc/services, then use it */ if ((service=getservbyname(HA_SERVICENAME, "udp")) != NULL){ localudpport = ntohs(service->s_port); }else{ localudpport = UDPPORT; } } return(HA_OK); } /* * Create new UDP/IP broadcast heartbeat object * Name of interface is passed as a parameter */ struct hb_media * bcast_new(const char * intf) { struct ip_private* ipi; struct hb_media * ret; bcast_init(); ipi = new_ip_interface(intf, localudpport); if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "bcast_new: attempting to open %s:%d", intf , localudpport); } if (ipi == NULL) { PILCallLog(LOG, PIL_CRIT, "IP interface [%s] does not exist" , intf); return(NULL); } ret = (struct hb_media*) MALLOC(sizeof(struct hb_media)); if (ret != NULL) { char * name; memset(ret, 0, sizeof(*ret)); ret->pd = (void*)ipi; name = STRDUP(intf); if (name != NULL) { ret->name = name; } else { FREE(ret); ret = NULL; } } if (ret != NULL) { if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "bcast_new: returning ret (%s)", ret->name); } }else{ FREE(ipi->interface); FREE(ipi); if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "bcast_new: ret was NULL"); } } return(ret); } /* * Open UDP/IP broadcast heartbeat interface */ static int bcast_open(struct hb_media* mp) { struct ip_private * ei; BCASTASSERT(mp); ei = (struct ip_private *) mp->pd; if ((ei->wsocket = bcast_make_send_sock(mp)) < 0) { return(HA_FAIL); } if ((ei->rsocket = bcast_make_receive_sock(mp)) < 0) { bcast_close(mp); return(HA_FAIL); } PILCallLog(LOG, PIL_INFO , "UDP Broadcast heartbeat started on port %d (%d) interface %s" , localudpport, ei->port, mp->name); if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG , "bcast_open : Socket %d opened for reading" ", socket %d opened for writing." , ei->rsocket, ei->wsocket); } return(HA_OK); } /* * Close UDP/IP broadcast heartbeat interface */ static int bcast_close(struct hb_media* mp) { struct ip_private * ei; int rc = HA_OK; BCASTASSERT(mp); ei = (struct ip_private *) mp->pd; if (ei->rsocket >= 0) { if (close(ei->rsocket) < 0) { rc = HA_FAIL; } ei->rsocket=-1; } if (ei->wsocket >= 0) { if (close(ei->wsocket) < 0) { rc = HA_FAIL; } ei->wsocket=-1; } PILCallLog(LOG, PIL_INFO , "UDP Broadcast heartbeat closed on port %d interface %s - Status: %d" , localudpport, mp->name, rc); return(rc); } /* * Receive a heartbeat broadcast packet from BCAST interface */ char bcast_pkt[MAXMSG]; void * bcast_read(struct hb_media* mp, int * lenp) { struct ip_private * ei; socklen_t addr_len = sizeof(struct sockaddr); struct sockaddr_in their_addr; /* connector's addr information */ int numbytes; BCASTASSERT(mp); ei = (struct ip_private *) mp->pd; if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG , "bcast_read : reading from socket %d (writing to socket %d)" , ei->rsocket, ei->wsocket); } if ((numbytes=recvfrom(ei->rsocket, bcast_pkt, MAXMSG-1, MSG_WAITALL , (struct sockaddr *)&their_addr, &addr_len)) == -1) { if (errno != EINTR) { PILCallLog(LOG, PIL_CRIT , "Error receiving from socket: %s" , strerror(errno)); } return NULL; } /* Avoid possible buffer overruns */ bcast_pkt[numbytes] = EOS; if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "got %d byte packet from %s" , numbytes, inet_ntoa(their_addr.sin_addr)); } if (DEBUGPKTCONT && numbytes > 0) { PILCallLog(LOG, PIL_DEBUG, "%s", bcast_pkt); } *lenp = numbytes +1; return bcast_pkt; } /* * Send a heartbeat packet over broadcast UDP/IP interface */ static int bcast_write(struct hb_media* mp, void *pkt, int len) { struct ip_private * ei; int rc; BCASTASSERT(mp); ei = (struct ip_private *) mp->pd; if ((rc=sendto(ei->wsocket, pkt, len, 0 , (struct sockaddr *)&ei->addr , sizeof(struct sockaddr))) != len) { struct ha_msg* m; int err = errno; if (!mp->suppresserrs) { PILCallLog(LOG, PIL_CRIT , "%s: Unable to send " PIL_PLUGINTYPE_S " packet %s %s:%u len=%d [%d]: %s" , __FUNCTION__, ei->interface, inet_ntoa(ei->addr.sin_addr), ei->port , len, rc, strerror(errno)); } if (ANYDEBUG) { m = wirefmt2msg(pkt, len,MSG_NEEDAUTH); if (m){ cl_log_message(LOG_ERR, m); ha_msg_del(m); } } errno = err; return(HA_FAIL); } if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG , "bcast_write : writing %d bytes to %s (socket %d)" , rc, inet_ntoa(ei->addr.sin_addr), ei->wsocket); } if (DEBUGPKTCONT) { PILCallLog(LOG, PIL_DEBUG, "bcast pkt out: [%s]", (char*)pkt); } return(HA_OK); } /* * Set up socket for sending broadcast UDP heartbeats */ static int bcast_make_send_sock(struct hb_media * mp) { int sockfd, one = 1; struct ip_private * ei; BCASTASSERT(mp); ei = (struct ip_private *) mp->pd; if ((sockfd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { PILCallLog(LOG, PIL_CRIT , "Error getting socket: %s", strerror(errno)); return(sockfd); } if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG , "bcast_make_send_sock: Opened socket %d", sockfd); } /* Warn that we're going to broadcast */ if (setsockopt(sockfd, SOL_SOCKET, SO_BROADCAST, (const void *) &one, sizeof(one))==-1){ PILCallLog(LOG, PIL_CRIT , "Error setting socket option SO_BROADCAST: %s" , strerror(errno)); close(sockfd); return(-1); } if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG , "bcast_make_send_sock: Modified %d" " Added option SO_BROADCAST." , sockfd); } #if defined(SO_DONTROUTE) && !defined(USE_ROUTING) /* usually, we don't want to be subject to routing. */ if (setsockopt(sockfd, SOL_SOCKET, SO_DONTROUTE,(const void *) &one,sizeof(int))==-1) { PILCallLog(LOG, PIL_CRIT , "Error setting socket option SO_DONTROUTE: %s" , strerror(errno)); close(sockfd); return(-1); } if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "bcast_make_send_sock:" " Modified %d Added option SO_DONTROUTE." , sockfd); } #endif #if defined(SO_BINDTODEVICE) { /* * We want to send out this particular interface * * This is so we can have redundant NICs, and heartbeat on both */ struct ifreq i; strcpy(i.ifr_name, mp->name); if (setsockopt(sockfd, SOL_SOCKET, SO_BINDTODEVICE , (const void *) &i, sizeof(i)) == -1) { PILCallLog(LOG, PIL_CRIT , "Error setting socket option SO_BINDTODEVICE" ": %s" , strerror(errno)); close(sockfd); return(-1); } if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG , "bcast_make_send_sock: Modified %d" " Added option SO_BINDTODEVICE." , sockfd); } } #endif if (fcntl(sockfd,F_SETFD, FD_CLOEXEC)) { PILCallLog(LOG, PIL_CRIT , "Error setting close-on-exec flag: %s" , strerror(errno)); } return(sockfd); } /* * Set up socket for listening to heartbeats (UDP broadcasts) */ #define MAXBINDTRIES 10 static int bcast_make_receive_sock(struct hb_media * mp) { struct ip_private * ei; struct sockaddr_in my_addr; /* my address information */ int sockfd; int bindtries; int boundyet=0; int j; BCASTASSERT(mp); ei = (struct ip_private *) mp->pd; memset(&(my_addr), 0, sizeof(my_addr)); /* zero my address struct */ my_addr.sin_family = AF_INET; /* host byte order */ my_addr.sin_port = htons(ei->port); /* short, network byte order */ my_addr.sin_addr.s_addr = INADDR_ANY; /* auto-fill with my IP */ if ((sockfd = socket(AF_INET, SOCK_DGRAM, 0)) == -1) { PILCallLog(LOG, PIL_CRIT, "Error getting socket: %s" , strerror(errno)); return(-1); } /* * Set SO_REUSEADDR on the server socket s. Variable j is used * as a scratch varable. * * 16th February 2000 * Added by Horms * with thanks to Clinton Work */ j = 1; if(setsockopt(sockfd,SOL_SOCKET,SO_REUSEADDR, (const void *)&j, sizeof j) <0){ /* Ignore it. It will almost always be OK anyway. */ PILCallLog(LOG, PIL_CRIT , "Error setting socket option SO_REUSEADDR: %s" , strerror(errno)); } if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG , "bcast_make_receive_sock: Modified %d Added option SO_REUSEADDR." , sockfd); } #if defined(SO_BINDTODEVICE) { /* * We want to receive packets only from this interface... */ struct ifreq i; strcpy(i.ifr_name, ei->interface); if (setsockopt(sockfd, SOL_SOCKET, SO_BINDTODEVICE , (const void *)&i, sizeof(i)) == -1) { PILCallLog(LOG, PIL_CRIT , "Error setting socket option" " SO_BINDTODEVICE(r) on %s: %s" , i.ifr_name, strerror(errno)); close(sockfd); return(-1); } if (ANYDEBUG) { PILCallLog(LOG, PIL_DEBUG , "SO_BINDTODEVICE(r) set for device %s" , i.ifr_name); } } #endif /* Try binding a few times before giving up */ /* Sometimes a process with it open is exiting right now */ for(bindtries=0; !boundyet && bindtries < MAXBINDTRIES; ++bindtries) { if (bind(sockfd, (struct sockaddr *)&my_addr , sizeof(struct sockaddr)) < 0) { PILCallLog(LOG, PIL_CRIT , "Error binding socket (%s). Retrying." , strerror(errno)); sleep(1); }else{ boundyet = 1; } } if (!boundyet) { #if !defined(SO_BINDTODEVICE) if (errno == EADDRINUSE) { /* This happens with multiple bcast or ppp interfaces */ PILCallLog(LOG, PIL_INFO , "Someone already listening on port %d [%s]" , ei->port , ei->interface); PILCallLog(LOG, PIL_INFO, "BCAST read process exiting"); close(sockfd); cleanexit(0); } #else PILCallLog(LOG, PIL_CRIT , "Unable to bind socket (%s). Giving up." , strerror(errno)); close(sockfd); return(-1); #endif } if (fcntl(sockfd,F_SETFD, FD_CLOEXEC)) { PILCallLog(LOG, PIL_CRIT , "Error setting the close-on-exec flag: %s" , strerror(errno)); } if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG , "bcast_make_receive_sock: Returning %d", sockfd); } return(sockfd); } static struct ip_private * new_ip_interface(const char * ifn, int port) { struct ip_private * ep; struct in_addr broadaddr; /* Fetch the broadcast address for this interface */ if (if_get_broadaddr(ifn, &broadaddr) < 0) { /* this function whines about problems... */ return (NULL); } /* * We now have all the information we need. Populate our * structure with the information we've gotten. */ ep = (struct ip_private *)MALLOC(sizeof(struct ip_private)); if (ep == NULL) { return(NULL); } memset(ep, 0, sizeof(*ep)); ep->bcast = broadaddr; ep->interface = (char *)STRDUP(ifn); if(ep->interface == NULL) { FREE(ep); return(NULL); } memset(&ep->addr, 0, sizeof(ep->addr)); /* zero the struct */ ep->addr.sin_family = AF_INET; /* host byte order */ ep->addr.sin_port = htons(port); /* short, network byte order */ ep->port = port; ep->wsocket = -1; ep->rsocket = -1; ep->addr.sin_addr = ep->bcast; return(ep); } /* * ha_if.c - code that extracts information about a network interface * * See the linux ifconfig source code for more examples. * * Works on HP_UX 10.20, freebsd, linux rh6.2 * Works on solaris or Unixware (SVR4) with: * gcc -DBSD_COMP -c ha_if.c * Doesn't seem to work at all on Digital Unix (?) * * Author: Eric Z. Ayers * * Copyright (C) 2000 Computer Generation Incorporated * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */ #include #ifdef HAVE_SYS_SOCKIO_H # include #endif /* if_get_broadaddr Retrieve the ipv4 broadcast address for the specified network interface. Inputs: ifn - the name of the network interface: e.g. eth0, eth1, ppp0, plip0, plusb0 ... Outputs: broadaddr - returned broadcast address. Returns: 0 on success -1 on failure - sets errno. */ int if_get_broadaddr(const char *ifn, struct in_addr *broadaddr) { int return_val; int fd = -1; struct ifreq ifr; /* points to one interface returned from ioctl */ fd = socket (PF_INET, SOCK_DGRAM, 0); if (fd < 0) { PILCallLog(LOG, PIL_CRIT , "Error opening socket for interface %s: %s" , ifn, strerror(errno)); return -1; } strncpy (ifr.ifr_name, ifn, sizeof(ifr.ifr_name)); /* Fetch the broadcast address of this interface by calling ioctl() */ return_val = ioctl(fd,SIOCGIFBRDADDR, &ifr); if (return_val == 0 ) { if (ifr.ifr_broadaddr.sa_family == AF_INET) { struct sockaddr_in sin_ptr; memcpy(&sin_ptr, &ifr.ifr_broadaddr, sizeof(sin_ptr)); memcpy(broadaddr, &sin_ptr.sin_addr, sizeof(*broadaddr)); /* leave return_val set to 0 to return success! */ }else{ PILCallLog(LOG, PIL_CRIT , "Wrong family for broadcast interface %s: %s" , ifn, strerror(errno)); return_val = -1; } }else{ PILCallLog(LOG, PIL_CRIT , "Get broadcast for interface %s failed: %s" , ifn, strerror(errno)); return_val = -1; } close (fd); return return_val; } /* end if_get_broadaddr() */ Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBcomm/hbaping.c0000644000000000000000000002445011576626513022354 0ustar00usergroup00000000000000/* * hbaping.c: Fiber Channel Host Bus Adapters (HBA) aliveness code for heartbeat * * Copyright (C) 2004 Alain St-Denis * * The checksum code in this file code was borrowed from the ping program. * * SECURITY NOTE: It would be very easy for someone to masquerade as the * device that you're pinging. If they don't know the password, all they can * do is echo back the packets that you're sending out, or send out old ones. * This does mean that if you're using such an approach, that someone could * make you think you have quorum when you don't during a cluster partition. * The danger in that seems small, but you never know ;-) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ /* * This plugin only checks if it can reach device 0 by sending a HBA_SendScsiInquiry. * There is room for improvement here. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_NETINET_IN_SYSTM_H # include #endif /* HAVE_NETINET_IN_SYSTM_H */ #ifdef HAVE_NETINET_IP_VAR_H # include #endif /* HAVE_NETINET_IP_VAR_H */ #ifdef HAVE_NETINET_IP_H # include #endif /* HAVE_NETINET_IP_H */ #ifdef HAVE_NETINET_IP_COMPAT_H # include #endif /* HAVE_NETINET_IP_COMPAT_H */ #ifdef HAVE_NETINET_IP_FW_H # include #endif /* HAVE_NETINET_IP_FW_H */ #include #include #include #include #include #define PIL_PLUGINTYPE HB_COMM_TYPE #define PIL_PLUGINTYPE_S HB_COMM_TYPE_S #define PIL_PLUGIN hbaping #define PIL_PLUGIN_S "hbaping" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL #include struct hbaping_private { HBA_WWN addr; /* dest WWN */ HBA_HANDLE handle; /* hba handle */ char namebuf[1028]; /* interface name */ int ident; /* heartbeat pid */ int iseq; /* sequence number */ }; static struct hb_media* hbaping_new (const char* interface); static int hbaping_open (struct hb_media* mp); static int hbaping_close (struct hb_media* mp); static void * hbaping_read (struct hb_media* mp, int* lenp); static int hbaping_write (struct hb_media* mp, void* p, int len); static struct hbaping_private * new_hbaping_interface(const char * host); static int hbaping_mtype(char **buffer); static int hbaping_descr(char **buffer); static int hbaping_isping(void); #define ISPINGOBJECT(mp) ((mp) && ((mp)->vf == (void*)&hbapingOps)) #define PINGASSERT(mp) g_assert(ISPINGOBJECT(mp)) static struct hb_media_fns hbapingOps ={ hbaping_new, /* Create single object function */ NULL, /* whole-line parse function */ hbaping_open, hbaping_close, hbaping_read, hbaping_write, hbaping_mtype, hbaping_descr, hbaping_isping, }; PIL_PLUGIN_BOILERPLATE2("1.0", Debug); static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate; #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define FREE PluginImports->mfree PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S , PIL_PLUGIN_S , &hbapingOps , NULL /*close */ , &OurInterface , (void*)&OurImports , interfprivate); } static int hbaping_mtype(char **buffer) { *buffer = MALLOC((strlen(PIL_PLUGIN_S) * sizeof(char)) + 1); strcpy(*buffer, PIL_PLUGIN_S); return strlen(PIL_PLUGIN_S); } static int hbaping_descr(char **buffer) { const char *str = "hbaping membership"; *buffer = MALLOC((strlen(str) * sizeof(char)) + 1); strcpy(*buffer, str); return strlen(str); } /* Yes, a ping device */ static int hbaping_isping(void) { return 1; } static struct hbaping_private * new_hbaping_interface(const char * host) { struct hbaping_private* ppi; HBA_UINT32 hba_cnt; if ((ppi = (struct hbaping_private*)MALLOC(sizeof(struct hbaping_private))) == NULL) { return NULL; } memset(ppi, 0, sizeof (*ppi)); if (HBA_LoadLibrary() != HBA_STATUS_OK) { LOG(PIL_CRIT, "error loading hbaapi: %s", strerror(errno)); return(NULL); } hba_cnt = HBA_GetNumberOfAdapters(); if (hba_cnt == 0) { LOG(PIL_CRIT, "no HBA found"); return(NULL); } /* adapter identified by its name (e.g. qlogic-qla2200-0) * only one HBA and one port per HBA. Bad. */ strncpy(ppi->namebuf, host, sizeof(ppi->namebuf)); return(ppi); } /* * Create new ping heartbeat object * Name of host is passed as a parameter */ static struct hb_media * hbaping_new(const char * host) { struct hbaping_private* ipi; struct hb_media * ret; ipi = new_hbaping_interface(host); if (ipi == NULL) { return(NULL); } ret = (struct hb_media *) MALLOC(sizeof(struct hb_media)); if (ret != NULL) { char * name; memset(ret, 0, sizeof(*ret)); ret->pd = (void*)ipi; name = MALLOC(strlen(host)+1); strcpy(name, host); ret->name = name; add_node(host, PINGNODE_I); }else{ FREE(ipi); ipi = NULL; } return(ret); } /* * Close HBA connection */ static int hbaping_close(struct hb_media* mp) { struct hbaping_private * ei; int rc = HA_OK; PINGASSERT(mp); ei = (struct hbaping_private *) mp->pd; if (ei->handle >= 0) { HBA_CloseAdapter(ei->handle); ei->handle=-1; } return(rc); } /* * Receive a heartbeat ping reply packet. * Here we do a HBA_SendScsiInquiry and build a HA msg if it is * successful. */ char hbaping_pkt[MAXLINE]; static void * hbaping_read(struct hb_media* mp, int *lenp) { struct hbaping_private * ei; char RspBuffer[96], SenseBuffer[96]; int rc; struct ha_msg *nmsg; static char ts[32]; void *pkt; sleep(1); /* since we are not polling... */ PINGASSERT(mp); ei = (struct hbaping_private *) mp->pd; rc = HBA_SendScsiInquiry(ei->handle, ei->addr, 0, 0, 0, (void *)&RspBuffer, sizeof(RspBuffer), (void *)&SenseBuffer, sizeof(SenseBuffer)); if (rc != HBA_STATUS_OK) { /* LOG(PIL_CRIT, "can't inquiry to %s, status = %d", ei->namebuf, rc); */ return NULL; } if ((nmsg = ha_msg_new(5)) == NULL) { LOG(PIL_CRIT, "cannot create new message"); return NULL; } sprintf(ts, "%lx", time(NULL)); if (ha_msg_add(nmsg, F_TYPE, T_NS_STATUS) != HA_OK || ha_msg_add(nmsg, F_STATUS, PINGSTATUS) != HA_OK || ha_msg_add(nmsg, F_ORIG, ei->namebuf) != HA_OK || ha_msg_add(nmsg, F_TIME, ts) != HA_OK) { ha_msg_del(nmsg); nmsg = NULL; LOG(PIL_CRIT, "cannot add fields to message"); return NULL; } if (add_msg_auth(nmsg) != HA_OK) { LOG(PIL_CRIT, "cannot add auth field to message"); ha_msg_del(nmsg); nmsg = NULL; return NULL; } pkt = msg2wirefmt(nmsg, lenp); if( pkt == NULL){ LOG(PIL_WARN, "containg msg to wirefmt failed in hbaping_read()\n"); return NULL; } ha_msg_del(nmsg); memcpy(hbaping_pkt, pkt, *lenp); free(pkt); return hbaping_pkt; } /* * Send a heartbeat packet over broadcast UDP/IP interface * * This is a noop. * * We ignore packets we're given to write that aren't "status" packets. * */ static int hbaping_write(struct hb_media* mp, void *p, int len) { /* struct hbaping_private * ei; */ /* HBA_PORTATTRIBUTES hba_portattrs; */ /* int rc; */ /* const char * type; */ /* const char * ts; */ PINGASSERT(mp); /* ei = (struct hbaping_private *) mp->pd; */ /* type = ha_msg_value(msg, F_TYPE); */ /* if (type == NULL || strcmp(type, T_STATUS) != 0 */ /* || ((ts = ha_msg_value(msg, F_TIME)) == NULL)) { */ /* return HA_OK; */ /* } */ /*rc = HBA_GetAdapterPortAttributes(ei->handle, 0, &hba_portattrs);*/ /* rc = HBA_STATUS_OK; */ /* if (rc != HBA_STATUS_OK) { */ /* LOG(PIL_CRIT, "can't get %s hba attributes (status = %d)!", ei->namebuf, rc); */ /* return HA_FAIL; */ /* } */ /*if (hba_portattrs.PortState != HBA_PORTSTATE_ONLINE) { LOG(PIL_CRIT, "hba %s is not online!", ei->namebuf); return HA_FAIL; }*/ return HA_OK; } /* * Open hbaping. Basically a noop... */ static int hbaping_open(struct hb_media* mp) { struct hbaping_private * ppi; int retval; union { HBA_FCPTARGETMAPPING fcp_tmap; struct { HBA_UINT32 cnt; HBA_FCPSCSIENTRY entry[32]; } fcp_tmapi; } map; PINGASSERT(mp); ppi = (struct hbaping_private *) mp->pd; if ((ppi->handle = HBA_OpenAdapter(ppi->namebuf)) == 0) { LOG(PIL_CRIT, "can't open adapter %s", ppi->namebuf); return(HA_FAIL); } /* discover target mapping, use the first port only * will be used to contact shared device controller * in hbaping_write */ map.fcp_tmapi.cnt = 32; retval = HBA_GetFcpTargetMapping(ppi->handle, &map.fcp_tmap); if (retval != HBA_STATUS_OK) { LOG(PIL_CRIT, "failure of HBA_GetFcpTargetMapping: %d", retval); return(HA_FAIL); } if (map.fcp_tmap.NumberOfEntries == 0) { LOG(PIL_CRIT, "no target found for adapter %s", ppi->namebuf); return(HA_FAIL); } /*memcpy(&(ppi->addr), &(map.fcp_tmap.entry[0].FcpId.PortWWN), sizeof(HBA_WWN));*/ ppi->addr = map.fcp_tmap.entry[0].FcpId.PortWWN; ppi->ident = getpid() & 0xFFFF; LOG(LOG_NOTICE, "hbaping heartbeat started."); return HA_OK; } Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBcomm/mcast.c0000644000000000000000000005231211576626513022051 0ustar00usergroup00000000000000/* * mcast.c: implements hearbeat API for UDP multicast communication * * Copyright (C) 2000 Alan Robertson * Copyright (C) 2000 Chris Wright * * Thanks to WireX for providing hardware to test on. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_SYS_SOCKIO_H # include #endif #include #define PIL_PLUGINTYPE HB_COMM_TYPE #define PIL_PLUGINTYPE_S HB_COMM_TYPE_S #define PIL_PLUGIN mcast #define PIL_PLUGIN_S "mcast" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL #include #include struct mcast_private { char * interface; /* Interface name */ struct in_addr mcast; /* multicast address */ struct sockaddr_in addr; /* multicast addr */ u_short port; int rsocket; /* Read-socket */ int wsocket; /* Write-socket */ u_char ttl; /* TTL value for outbound packets */ u_char loop; /* boolean, loop back outbound packets */ }; static int mcast_parse(const char* configline); static struct hb_media * mcast_new(const char * intf, const char *mcast , u_short port, u_char ttl, u_char loop); static int mcast_open(struct hb_media* mp); static int mcast_close(struct hb_media* mp); static void* mcast_read(struct hb_media* mp, int* lenp); static int mcast_write(struct hb_media* mp, void* p, int len); static int mcast_descr(char** buffer); static int mcast_mtype(char** buffer); static int mcast_isping(void); static struct hb_media_fns mcastOps ={ NULL, /* Create single object function */ mcast_parse, /* whole-line parse function */ mcast_open, mcast_close, mcast_read, mcast_write, mcast_mtype, mcast_descr, mcast_isping, }; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate; #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S , PIL_PLUGIN_S , &mcastOps , NULL /*close */ , &OurInterface , (void*)&OurImports , interfprivate); } /* helper functions */ static int mcast_make_receive_sock(struct hb_media* hbm); static int mcast_make_send_sock(struct hb_media * hbm); static struct mcast_private * new_mcast_private(const char *ifn, const char *mcast, u_short port, u_char ttl, u_char loop); static int set_mcast_if(int sockfd, char *ifname); static int set_mcast_loop(int sockfd, u_char loop); static int set_mcast_ttl(int sockfd, u_char ttl); static int join_mcast_group(int sockfd, struct in_addr *addr, char *ifname); static int if_getaddr(const char *ifname, struct in_addr *addr); static int is_valid_dev(const char *dev); static int is_valid_mcast_addr(const char *addr); static int get_port(const char *port, u_short *p); static int get_ttl(const char *ttl, u_char *t); static int get_loop(const char *loop, u_char *l); #define ISMCASTOBJECT(mp) ((mp) && ((mp)->vf == (void*)&mcastOps)) #define MCASTASSERT(mp) g_assert(ISMCASTOBJECT(mp)) static int mcast_mtype(char** buffer) { *buffer = STRDUP(PIL_PLUGIN_S); if (!*buffer) { return 0; } return STRLEN_CONST(PIL_PLUGIN_S); } static int mcast_descr(char **buffer) { const char cret[] = "UDP/IP multicast"; *buffer = STRDUP(cret); if (!*buffer) { return 0; } return STRLEN_CONST(cret); } static int mcast_isping(void) { /* nope, this is not a ping device */ return 0; } /* mcast_parse will parse the line in the config file that is * associated with the media's type (hb_dev_mtype). It should * receive the rest of the line after the mtype. And it needs * to call hb_dev_new, add the media to the list of available media. * * So in this case, the config file line should look like * mcast [device] [mcast group] [port] [mcast ttl] [mcast loop] * for example: * mcast eth0 225.0.0.1 694 1 0 */ static int mcast_parse(const char *line) { const char * bp = line; char dev[MAXLINE]; char mcast[MAXLINE]; char token[MAXLINE]; u_short port = 0; /* Bogus */ u_char ttl = 10; /* Bogus */ u_char loop = 10; /* Bogus */ int toklen; struct hb_media * mp; /* Skip over white space, then grab the device */ bp += strspn(bp, WHITESPACE); toklen = strcspn(bp, WHITESPACE); strncpy(dev, bp, toklen); bp += toklen; dev[toklen] = EOS; if (*dev != EOS) { if (!is_valid_dev(dev)) { PILCallLog(LOG, PIL_CRIT, "mcast device [%s] is invalid or not set up properly", dev); return HA_FAIL; } /* Skip over white space, then grab the multicast group */ bp += strspn(bp, WHITESPACE); toklen = strcspn(bp, WHITESPACE); strncpy(mcast, bp, toklen); bp += toklen; mcast[toklen] = EOS; if (*mcast == EOS) { PILCallLog(LOG, PIL_CRIT, "mcast [%s] missing mcast address", dev); return(HA_FAIL); } if (!is_valid_mcast_addr(mcast)) { PILCallLog(LOG, PIL_CRIT, "mcast [%s] bad addr [%s]", dev, mcast); return(HA_FAIL); } /* Skip over white space, then grab the port */ bp += strspn(bp, WHITESPACE); toklen = strcspn(bp, WHITESPACE); strncpy(token, bp, toklen); bp += toklen; token[toklen] = EOS; if (*token == EOS) { PILCallLog(LOG, PIL_CRIT, "mcast [%s] missing port" , dev); return(HA_FAIL); } if (get_port(token, &port) < 0 || port <= 0) { PILCallLog(LOG, PIL_CRIT, " mcast [%s] bad port [%d]", dev, port); return HA_FAIL; } /* Skip over white space, then grab the ttl */ bp += strspn(bp, WHITESPACE); toklen = strcspn(bp, WHITESPACE); strncpy(token, bp, toklen); bp += toklen; token[toklen] = EOS; if (*token == EOS) { PILCallLog(LOG, PIL_CRIT, "mcast [%s] missing ttl", dev); return(HA_FAIL); } if (get_ttl(token, &ttl) < 0 || ttl > 4) { PILCallLog(LOG, PIL_CRIT, " mcast [%s] bad ttl [%d]", dev, ttl); return HA_FAIL; } /* Skip over white space, then grab the loop */ bp += strspn(bp, WHITESPACE); toklen = strcspn(bp, WHITESPACE); strncpy(token, bp, toklen); bp += toklen; token[toklen] = EOS; if (*token == EOS) { PILCallLog(LOG, PIL_CRIT, "mcast [%s] missing loop", dev); return(HA_FAIL); } if (get_loop(token, &loop) < 0 || loop > 1) { PILCallLog(LOG, PIL_CRIT, " mcast [%s] bad loop [%d]", dev, loop); return HA_FAIL; } if ((mp = mcast_new(dev, mcast, port, ttl, loop)) == NULL) { return(HA_FAIL); } OurImports->RegisterNewMedium(mp); } return(HA_OK); } /* * Create new UDP/IP multicast heartbeat object * pass in name of interface, multicast address, port, multicast * ttl, and multicast loopback value as parameters. * This should get called from hb_dev_parse(). */ static struct hb_media * mcast_new(const char * intf, const char *mcast, u_short port, u_char ttl, u_char loop) { struct mcast_private* mcp; struct hb_media * ret; /* create new mcast_private struct...hmmm...who frees it? */ mcp = new_mcast_private(intf, mcast, port, ttl, loop); if (mcp == NULL) { PILCallLog(LOG, PIL_WARN, "Error creating mcast_private(%s, %s, %d, %d, %d)", intf, mcast, port, ttl, loop); return(NULL); } ret = (struct hb_media*) MALLOC(sizeof(struct hb_media)); if (ret != NULL) { char * name; memset(ret, 0, sizeof(*ret)); ret->pd = (void*)mcp; name = STRDUP(intf); if (name != NULL) { ret->name = name; } else { FREE(ret); ret = NULL; } } if(ret == NULL) { FREE(mcp->interface); FREE(mcp); } return(ret); } /* * Open UDP/IP multicast heartbeat interface */ static int mcast_open(struct hb_media* hbm) { struct mcast_private * mcp; MCASTASSERT(hbm); mcp = (struct mcast_private *) hbm->pd; if ((mcp->wsocket = mcast_make_send_sock(hbm)) < 0) { return(HA_FAIL); } if (Debug) { PILCallLog(LOG, PIL_DEBUG , "%s: write socket: %d" , __FUNCTION__, mcp->wsocket); } if ((mcp->rsocket = mcast_make_receive_sock(hbm)) < 0) { mcast_close(hbm); return(HA_FAIL); } if (Debug) { PILCallLog(LOG, PIL_DEBUG , "%s: read socket: %d" , __FUNCTION__, mcp->rsocket); } PILCallLog(LOG, PIL_INFO, "UDP multicast heartbeat started for group %s " "port %d interface %s (ttl=%d loop=%d)" , inet_ntoa(mcp->mcast), mcp->port, mcp->interface, mcp->ttl, mcp->loop); return(HA_OK); } /* * Close UDP/IP multicast heartbeat interface */ static int mcast_close(struct hb_media* hbm) { struct mcast_private * mcp; int rc = HA_OK; MCASTASSERT(hbm); mcp = (struct mcast_private *) hbm->pd; if (mcp->rsocket >= 0) { if (Debug) { PILCallLog(LOG, PIL_DEBUG , "%s: Closing socket %d" , __FUNCTION__, mcp->rsocket); } if (close(mcp->rsocket) < 0) { rc = HA_FAIL; } mcp->rsocket = -1; } if (mcp->wsocket >= 0) { if (Debug) { PILCallLog(LOG, PIL_DEBUG , "%s: Closing socket %d" , __FUNCTION__, mcp->wsocket); } if (close(mcp->wsocket) < 0) { rc = HA_FAIL; } mcp->rsocket = -1; } return(rc); } /* * Receive a heartbeat multicast packet from UDP interface */ char mcast_pkt[MAXMSG]; static void * mcast_read(struct hb_media* hbm, int *lenp) { struct mcast_private * mcp; socklen_t addr_len = sizeof(struct sockaddr); struct sockaddr_in their_addr; /* connector's addr information */ int numbytes; MCASTASSERT(hbm); mcp = (struct mcast_private *) hbm->pd; if ((numbytes=recvfrom(mcp->rsocket, mcast_pkt, MAXMSG-1, 0 , (struct sockaddr *)&their_addr, &addr_len)) < 0) { if (errno != EINTR) { PILCallLog(LOG, PIL_CRIT, "Error receiving from socket: %s" , strerror(errno)); } return NULL; } /* Avoid possible buffer overruns */ mcast_pkt[numbytes] = EOS; if (Debug >= PKTTRACE) { PILCallLog(LOG, PIL_DEBUG, "got %d byte packet from %s" , numbytes, inet_ntoa(their_addr.sin_addr)); } if (Debug >= PKTCONTTRACE && numbytes > 0) { PILCallLog(LOG, PIL_DEBUG, "%s", mcast_pkt); } *lenp = numbytes + 1 ; return mcast_pkt;; } /* * Send a heartbeat packet over multicast UDP/IP interface */ static int mcast_write(struct hb_media* hbm, void *pkt, int len) { struct mcast_private * mcp; int rc; MCASTASSERT(hbm); mcp = (struct mcast_private *) hbm->pd; if ((rc=sendto(mcp->wsocket, pkt, len, 0 , (struct sockaddr *)&mcp->addr , sizeof(struct sockaddr))) != len) { if (!hbm->suppresserrs) { PILCallLog(LOG, PIL_CRIT , "%s: Unable to send " PIL_PLUGINTYPE_S " packet %s %s:%u len=%d [%d]: %s" , __FUNCTION__, mcp->interface, inet_ntoa(mcp->addr.sin_addr), mcp->port , len, rc, strerror(errno)); } return(HA_FAIL); } if (Debug >= PKTTRACE) { PILCallLog(LOG, PIL_DEBUG, "sent %d bytes to %s" , rc, inet_ntoa(mcp->addr.sin_addr)); } if (Debug >= PKTCONTTRACE) { PILCallLog(LOG, PIL_DEBUG, "%s", (const char *)pkt); } return(HA_OK); } /* * Set up socket for sending multicast UDP heartbeats */ static int mcast_make_send_sock(struct hb_media * hbm) { int sockfd; struct mcast_private * mcp; MCASTASSERT(hbm); mcp = (struct mcast_private *) hbm->pd; if ((sockfd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { PILCallLog(LOG, PIL_WARN, "Error getting socket: %s", strerror(errno)); return(sockfd); } if (set_mcast_if(sockfd, mcp->interface) < 0) { PILCallLog(LOG, PIL_WARN, "Error setting outbound mcast interface: %s", strerror(errno)); } if (set_mcast_loop(sockfd, mcp->loop) < 0) { PILCallLog(LOG, PIL_WARN, "Error setting outbound mcast loopback value: %s", strerror(errno)); } if (set_mcast_ttl(sockfd, mcp->ttl) < 0) { PILCallLog(LOG, PIL_WARN, "Error setting outbound mcast TTL: %s", strerror(errno)); } if (fcntl(sockfd,F_SETFD, FD_CLOEXEC)) { PILCallLog(LOG, PIL_WARN, "Error setting the close-on-exec flag: %s", strerror(errno)); } return(sockfd); } /* * Set up socket for listening to heartbeats (UDP multicasts) */ #define MAXBINDTRIES 50 static int mcast_make_receive_sock(struct hb_media * hbm) { struct mcast_private * mcp; int sockfd; int bindtries; int boundyet=0; int one=1; int rc; int binderr=0; MCASTASSERT(hbm); mcp = (struct mcast_private *) hbm->pd; if ((sockfd = socket(AF_INET, SOCK_DGRAM, 0)) == -1) { PILCallLog(LOG, PIL_CRIT, "Error getting socket"); return -1; } /* set REUSEADDR option on socket so you can bind a multicast */ /* reader to multiple interfaces */ if(setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, (void *)&one, sizeof(one)) < 0){ PILCallLog(LOG, PIL_CRIT, "Error setsockopt(SO_REUSEADDR)"); } /* ripped off from udp.c, if we all use SO_REUSEADDR */ /* this shouldn't be necessary */ /* Try binding a few times before giving up */ /* Sometimes a process with it open is exiting right now */ for(bindtries=0; !boundyet && bindtries < MAXBINDTRIES; ++bindtries) { rc=bind(sockfd, (struct sockaddr *)&mcp->addr, sizeof(mcp->addr)); binderr=errno; if (rc==0) { boundyet=1; } else if (rc == -1) { if (binderr == EADDRINUSE) { PILCallLog(LOG, PIL_CRIT, "Can't bind (EADDRINUSE), " "retrying"); sleep(1); } else { /* don't keep trying if the error isn't caused by */ /* the address being in use already...real error */ break; } } } if (!boundyet) { if (binderr == EADDRINUSE) { /* This happens with multiple udp or ppp interfaces */ PILCallLog(LOG, PIL_INFO , "Someone already listening on port %d [%s]" , mcp->port , mcp->interface); PILCallLog(LOG, PIL_INFO, "multicast read process exiting"); close(sockfd); cleanexit(0); } else { PILCallLog(LOG, PIL_WARN, "Unable to bind socket. Giving up: %s", strerror(errno)); close(sockfd); return(-1); } } /* join the multicast group...this is what really makes this a */ /* multicast reader */ if (join_mcast_group(sockfd, &mcp->mcast, mcp->interface) == -1) { PILCallLog(LOG, PIL_CRIT, "Can't join multicast group %s on interface %s", inet_ntoa(mcp->mcast), mcp->interface); PILCallLog(LOG, PIL_INFO, "multicast read process exiting"); close(sockfd); cleanexit(0); } if (ANYDEBUG) PILCallLog(LOG, PIL_DEBUG, "Successfully joined multicast group %s on" "interface %s", inet_ntoa(mcp->mcast), mcp->interface); if (fcntl(sockfd,F_SETFD, FD_CLOEXEC)) { PILCallLog(LOG, PIL_WARN, "Error setting the close-on-exec flag: %s", strerror(errno)); } return(sockfd); } static struct mcast_private * new_mcast_private(const char *ifn, const char *mcast, u_short port, u_char ttl, u_char loop) { struct mcast_private *mcp; mcp = MALLOCT(struct mcast_private); if (mcp == NULL) { return NULL; } memset(mcp, 0, sizeof(*mcp)); mcp->interface = (char *)STRDUP(ifn); if(mcp->interface == NULL) { FREE(mcp); return NULL; } /* Set up multicast address */ if (inet_pton(AF_INET, mcast, (void *)&mcp->mcast) <= 0) { FREE(mcp->interface); FREE(mcp); return NULL; } mcp->addr.sin_family = AF_INET; /* host byte order */ mcp->addr.sin_port = htons(port); /* short, network byte order */ mcp->addr.sin_addr = mcp->mcast; mcp->port = port; mcp->wsocket = -1; mcp->rsocket = -1; mcp->ttl=ttl; mcp->loop=loop; return(mcp); } /* set_mcast_loop takes a boolean flag, loop, which is useful on * a writing socket. with loop enabled (the default on a multicast socket) * the outbound packet will get looped back and received by the sending * interface, if it is listening for the multicast group and port that the * packet was sent to. Returns 0 on success -1 on failure. */ static int set_mcast_loop(int sockfd, u_char loop) { return setsockopt(sockfd, IPPROTO_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); } /* set_mcast_ttl will set the time-to-live value for the writing socket. * the socket default is TTL=1. The TTL is used to limit the scope of the * packet and can range from 0-255. * TTL Scope * ---------------------------------------------------------------------- * 0 Restricted to the same host. Won't be output by any interface. * 1 Restricted to the same subnet. Won't be forwarded by a router. * <32 Restricted to the same site, organization or department. * <64 Restricted to the same region. * <128 Restricted to the same continent. * <255 Unrestricted in scope. Global. * * Returns 0 on success -1 on failure. */ static int set_mcast_ttl(int sockfd, u_char ttl) { return setsockopt(sockfd, IPPROTO_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); } /* * set_mcast_if takes the name of an interface (i.e. eth0) and then * sets that as the interface to use for outbound multicast traffic. * If ifname is NULL, then it the OS will assign the interface. * Returns 0 on success -1 on faliure. */ static int set_mcast_if(int sockfd, char *ifname) { int rc; struct in_addr addr; /* Zero out the struct... we only care about the address... */ memset(&addr, 0, sizeof(addr)); rc = if_getaddr(ifname, &addr); if (rc == -1) return -1; return setsockopt(sockfd, IPPROTO_IP, IP_MULTICAST_IF , (void*)&addr, sizeof(addr)); } /* join_mcast_group is used to join a multicast group. the group is * specified by a class D multicast address 224.0.0.0/8 in the in_addr * structure passed in as a parameter. The interface name can be used * to "bind" the multicast group to a specific interface (or any * interface if ifname is NULL); * returns 0 on success, -1 on failure. */ static int join_mcast_group(int sockfd, struct in_addr *addr, char *ifname) { struct ip_mreq mreq_add; memset(&mreq_add, 0, sizeof(mreq_add)); memcpy(&mreq_add.imr_multiaddr, addr, sizeof(struct in_addr)); if (ifname) { if_getaddr(ifname, &mreq_add.imr_interface); } return setsockopt(sockfd, IPPROTO_IP, IP_ADD_MEMBERSHIP, (void*)&mreq_add, sizeof(mreq_add)); } /* if_getaddr gets the ip address from an interface * specified by name and places it in addr. * returns 0 on success and -1 on failure. */ static int if_getaddr(const char *ifname, struct in_addr *addr) { struct ifreq if_info; int j; int maxtry = 120; gboolean gotaddr = FALSE; int err = 0; if (!addr) { return -1; } addr->s_addr = INADDR_ANY; memset(&if_info, 0, sizeof(if_info)); if (ifname) { strncpy(if_info.ifr_name, ifname, IFNAMSIZ-1); }else{ /* ifname is NULL, so use any address */ return 0; } if (Debug > 0) { PILCallLog(LOG, PIL_DEBUG, "looking up address for %s" , if_info.ifr_name); } for (j=0; j < maxtry && !gotaddr; ++j) { int fd; if ((fd=socket(AF_INET, SOCK_DGRAM, 0)) == -1) { PILCallLog(LOG, PIL_CRIT, "Error getting socket"); return -1; } if (ioctl(fd, SIOCGIFADDR, &if_info) >= 0) { gotaddr = TRUE; }else{ err = errno; switch(err) { case EADDRNOTAVAIL: sleep(1); break; default: close(fd); goto getout; } } close(fd); } getout: if (!gotaddr) { PILCallLog(LOG, PIL_CRIT , "Unable to retrieve local interface address" " for interface [%s] using ioctl(SIOCGIFADDR): %s" , ifname, strerror(err)); return -1; } /* * This #define w/void cast is to quiet alignment errors on some * platforms (notably Solaris) */ #define SOCKADDR_IN(a) ((struct sockaddr_in *)((void*)(a))) memcpy(addr, &(SOCKADDR_IN(&if_info.ifr_addr)->sin_addr) , sizeof(struct in_addr)); return 0; } /* returns true or false */ static int is_valid_dev(const char *dev) { int rc=0; if (dev) { struct in_addr addr; if (if_getaddr(dev, &addr) != -1) rc = 1; } return rc; } /* returns true or false */ #define MCAST_NET 0xf0000000 #define MCAST_BASE 0xe0000000 static int is_valid_mcast_addr(const char *addr) { unsigned long mc_addr; /* make sure address is in host byte order */ mc_addr = ntohl(inet_addr(addr)); if ((mc_addr & MCAST_NET) == MCAST_BASE) return 1; return 0; } /* return port number on success, 0 on failure */ static int get_port(const char *port, u_short *p) { /* not complete yet */ *p=(u_short)atoi(port); return 0; } /* returns ttl on succes, -1 on failure */ static int get_ttl(const char *ttl, u_char *t) { /* not complete yet */ *t=(u_char)atoi(ttl); return 0; } /* returns loop on success, -1 on failure */ static int get_loop(const char *loop, u_char *l) { /* not complete yet */ *l=(u_char)atoi(loop); return 0; } Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBcomm/mcast6.c0000644000000000000000000005245511576626513022147 0ustar00usergroup00000000000000/* * mcast6.c: implements hearbeat API for UDP IPv6 multicast communication * * Copyright (C) 2010 Lars Ellenberg * based on mcast6.c, which is * Copyright (C) 2000 Alan Robertson * Copyright (C) 2000 Chris Wright * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_SYS_SOCKIO_H # include #endif #include #define PIL_PLUGINTYPE HB_COMM_TYPE #define PIL_PLUGINTYPE_S HB_COMM_TYPE_S #define PIL_PLUGIN mcast6 #define PIL_PLUGIN_S "mcast6" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL #include #include static int largest_msg_size = 0; struct mcast6_private { char *interface; /* Interface name */ char mcast6_s[64]; /* multicast address and port */ char port_s[8]; /* as read in from config */ struct sockaddr_in6 maddr; /* multicast addr */ struct sockaddr_in6 saddr; /* local addr to bind() to */ int rsocket; /* Read-socket */ int wsocket; /* Write-socket */ u_char hops; /* TTL value for outbound packets */ u_char loop; /* boolean, loop back outbound packets */ }; static int mcast6_parse(const char* configline); static struct hb_media * mcast6_new(const char * intf, const char *mcast6 , const char *port, u_char hops, u_char loop); static int mcast6_open(struct hb_media* mp); static int mcast6_close(struct hb_media* mp); static void* mcast6_read(struct hb_media* mp, int* lenp); static int mcast6_write(struct hb_media* mp, void* p, int len); static int mcast6_descr(char** buffer); static int mcast6_mtype(char** buffer); static int mcast6_isping(void); static struct hb_media_fns mcast6Ops ={ NULL, /* Create single object function */ mcast6_parse, /* whole-line parse function */ mcast6_open, mcast6_close, mcast6_read, mcast6_write, mcast6_mtype, mcast6_descr, mcast6_isping, }; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate; #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S , PIL_PLUGIN_S , &mcast6Ops , NULL /*close */ , &OurInterface , (void*)&OurImports , interfprivate); } /* helper functions */ static int mcast6_make_receive_sock(struct hb_media* hbm); static int mcast6_make_send_sock(struct hb_media * hbm); static struct mcast6_private * new_mcast6_private(const char *ifn, const char *mcast6, const char *port, u_char hops, u_char loop); static int set_mcast6_if(int sockfd, char *ifname); static int set_mcast6_loop(int sockfd, unsigned int loop); static int set_mcast6_hops(int sockfd, int hops); static int join_mcast6_group(int sockfd, struct in6_addr *addr, char *ifname); static int is_valid_dev(const char *dev); static int is_valid_mcast6_addr(const char *addr); static int get_hops(const char *hops, u_char *t); static int get_loop(const char *loop, u_char *l); #define ISMCASTOBJECT(mp) ((mp) && ((mp)->vf == (void*)&mcast6Ops)) #define MCASTASSERT(mp) g_assert(ISMCASTOBJECT(mp)) static int mcast6_mtype(char** buffer) { *buffer = STRDUP(PIL_PLUGIN_S); if (!*buffer) { return 0; } return STRLEN_CONST(PIL_PLUGIN_S); } static int mcast6_descr(char **buffer) { const char cret[] = "UDP/IP multicast"; *buffer = STRDUP(cret); if (!*buffer) { return 0; } return STRLEN_CONST(cret); } static int mcast6_isping(void) { /* nope, this is not a ping device */ return 0; } /* mcast6_parse will parse the line in the config file that is * associated with the media's type (hb_dev_mtype). It should * receive the rest of the line after the mtype. And it needs * to call hb_dev_new, add the media to the list of available media. * * So in this case, the config file line should look like * mcast6 [device] [mcast6 group] [port] [mcast6 hops] [mcast6 loop] * for example (using link-local scope with some "transient" group): * mcast6 eth0 ff12::1:2:3:4 694 1 0 */ #define GET_NEXT_TOKEN(bp, token) do { \ int toklen; \ bp += strspn(bp, WHITESPACE); \ toklen = strcspn(bp, WHITESPACE); \ strncpy(token, bp, toklen); \ bp += toklen; \ token[toklen] = EOS; \ } while(0) static int mcast6_parse(const char *line) { const char * bp = line; char dev[MAXLINE]; char mcast6[MAXLINE]; char port[MAXLINE]; char token[MAXLINE]; u_char hops = 10; /* Bogus */ u_char loop = 10; /* Bogus */ struct hb_media * mp; GET_NEXT_TOKEN(bp, dev); if (*dev == EOS) { PILCallLog(LOG, PIL_CRIT, "mcast6 statement without device"); return HA_FAIL; } if (!is_valid_dev(dev)) { PILCallLog(LOG, PIL_CRIT, "mcast6 device [%s] is invalid or not set up properly", dev); return HA_FAIL; } GET_NEXT_TOKEN(bp, mcast6); if (*mcast6 == EOS) { PILCallLog(LOG, PIL_CRIT, "mcast6 [%s] missing mcast6 address", dev); return(HA_FAIL); } if (!is_valid_mcast6_addr(mcast6)) { PILCallLog(LOG, PIL_CRIT, " mcast6 [%s] bad addr [%s]", dev, mcast6); return(HA_FAIL); } GET_NEXT_TOKEN(bp, port); if (*port == EOS) { PILCallLog(LOG, PIL_CRIT, "mcast6 [%s] missing port", dev); return(HA_FAIL); } /* further validation on the port and mcast6 will be done with getaddrinfo later */ /* hops */ GET_NEXT_TOKEN(bp, token); if (*token == EOS) { PILCallLog(LOG, PIL_CRIT, "mcast6 [%s] missing hops", dev); return(HA_FAIL); } if (get_hops(token, &hops) < -1 || hops > 4) { PILCallLog(LOG, PIL_CRIT, " mcast6 [%s] bad hops [%d]", dev, hops); return HA_FAIL; } /* loop */ GET_NEXT_TOKEN(bp, token); if (*token == EOS) { PILCallLog(LOG, PIL_CRIT, "mcast6 [%s] missing loop", dev); return(HA_FAIL); } if (get_loop(token, &loop) < 0 || loop > 1) { PILCallLog(LOG, PIL_CRIT, " mcast6 [%s] bad loop [%d]", dev, loop); return HA_FAIL; } if ((mp = mcast6_new(dev, mcast6, port, hops, loop)) == NULL) { return(HA_FAIL); } OurImports->RegisterNewMedium(mp); return(HA_OK); } /* * Create new UDP/IPv6 multicast heartbeat object * pass in name of interface, multicast address, port, multicast * hops, and multicast loopback value as parameters. * This should get called from hb_dev_parse(). */ static struct hb_media * mcast6_new(const char * intf, const char *mcast6, const char *port, u_char hops, u_char loop) { struct mcast6_private* mcp; struct hb_media * ret; /* create new mcast6_private struct...hmmm...who frees it? */ mcp = new_mcast6_private(intf, mcast6, port, hops, loop); if (mcp == NULL) { PILCallLog(LOG, PIL_WARN, "Error creating mcast6_private(%s, %s, %s, %d, %d)", intf, mcast6, port, hops, loop); return(NULL); } ret = (struct hb_media*) MALLOC(sizeof(struct hb_media)); if (ret != NULL) { char * name; memset(ret, 0, sizeof(*ret)); ret->pd = (void*)mcp; name = STRDUP(intf); if (name != NULL) { ret->name = name; } else { FREE(ret); ret = NULL; } } if(ret == NULL) { FREE(mcp->interface); FREE(mcp); } return(ret); } /* * Open UDP/IP multicast heartbeat interface */ static int mcast6_open(struct hb_media* hbm) { struct mcast6_private * mcp; MCASTASSERT(hbm); mcp = (struct mcast6_private *) hbm->pd; if ((mcp->wsocket = mcast6_make_send_sock(hbm)) < 0) { return(HA_FAIL); } if (Debug) { PILCallLog(LOG, PIL_DEBUG , "%s: write socket: %d" , __FUNCTION__, mcp->wsocket); } if ((mcp->rsocket = mcast6_make_receive_sock(hbm)) < 0) { mcast6_close(hbm); return(HA_FAIL); } if (Debug) { PILCallLog(LOG, PIL_DEBUG , "%s: read socket: %d" , __FUNCTION__, mcp->rsocket); } PILCallLog(LOG, PIL_INFO, "UDP multicast heartbeat started for [%s]:%s " "on interface %s (hops=%d loop=%d)" , mcp->mcast6_s, mcp->port_s, mcp->interface, mcp->hops, mcp->loop); return(HA_OK); } /* * Close UDP/IP multicast heartbeat interface */ static int mcast6_close(struct hb_media* hbm) { struct mcast6_private * mcp; int rc = HA_OK; MCASTASSERT(hbm); mcp = (struct mcast6_private *) hbm->pd; if (mcp->rsocket >= 0) { if (Debug) { PILCallLog(LOG, PIL_DEBUG , "%s: Closing socket %d" , __FUNCTION__, mcp->rsocket); } if (close(mcp->rsocket) < 0) { rc = HA_FAIL; } mcp->rsocket = -1; } if (mcp->wsocket >= 0) { if (Debug) { PILCallLog(LOG, PIL_DEBUG , "%s: Closing socket %d" , __FUNCTION__, mcp->wsocket); } if (close(mcp->wsocket) < 0) { rc = HA_FAIL; } mcp->rsocket = -1; } return(rc); } /* * Receive a heartbeat multicast packet from UDP interface */ char mcast6_pkt[MAXMSG]; static void * mcast6_read(struct hb_media* hbm, int *lenp) { struct mcast6_private * mcp; socklen_t addr_len = sizeof(struct sockaddr); struct sockaddr_in their_addr; /* connector's addr information */ int numbytes; MCASTASSERT(hbm); mcp = (struct mcast6_private *) hbm->pd; if ((numbytes=recvfrom(mcp->rsocket, mcast6_pkt, MAXMSG-1, 0 , (struct sockaddr *)&their_addr, &addr_len)) < 0) { if (errno != EINTR) { PILCallLog(LOG, PIL_CRIT, "Error receiving from socket: %s" , strerror(errno)); } return NULL; } /* Avoid possible buffer overruns */ mcast6_pkt[numbytes] = EOS; if (numbytes > largest_msg_size) { PILCallLog(LOG, PIL_INFO, "mcast6: maximum received message: %d bytes from %s", numbytes, mcp->mcast6_s); largest_msg_size = numbytes; } if (Debug >= PKTTRACE) { PILCallLog(LOG, PIL_DEBUG, "got %d byte packet from %s" , numbytes, inet_ntoa(their_addr.sin_addr)); } if (Debug >= PKTCONTTRACE && numbytes > 0) { PILCallLog(LOG, PIL_DEBUG, "%s", mcast6_pkt); } *lenp = numbytes + 1 ; return mcast6_pkt;; } /* * Send a heartbeat packet over multicast UDP/IP interface */ static int mcast6_write(struct hb_media* hbm, void *pkt, int len) { struct mcast6_private * mcp; int rc; MCASTASSERT(hbm); mcp = (struct mcast6_private *) hbm->pd; rc = sendto(mcp->wsocket, pkt, len, 0 , (struct sockaddr *)&mcp->maddr, sizeof(struct sockaddr_in6)); if (rc != len) { if (!hbm->suppresserrs) { PILCallLog(LOG, PIL_CRIT , "%s: Unable to send " PIL_PLUGINTYPE_S " packet %s[%s]:%s len=%d [%d]: %s" , __FUNCTION__, mcp->interface, mcp->mcast6_s, mcp->port_s , len, rc, strerror(errno)); } return(HA_FAIL); } if (len > largest_msg_size) { PILCallLog(LOG, PIL_INFO, "mcast6: maximum sent message: %d bytes to %s", rc, mcp->mcast6_s); largest_msg_size = len; } if (Debug >= PKTTRACE) { PILCallLog(LOG, PIL_DEBUG, "sent %d bytes to %s", rc, mcp->mcast6_s); } if (Debug >= PKTCONTTRACE) { PILCallLog(LOG, PIL_DEBUG, "%s", (const char *)pkt); } return(HA_OK); } static void adjust_socket_bufs(int sockfd, int bytes) { setsockopt(sockfd, SOL_SOCKET, SO_SNDBUF, &bytes, sizeof(bytes)); setsockopt(sockfd, SOL_SOCKET, SO_RCVBUF, &bytes, sizeof(bytes)); /* FIXME error handling, logging */ } /* * Set up socket for sending multicast UDP heartbeats */ static int mcast6_make_send_sock(struct hb_media * hbm) { int sockfd; struct mcast6_private * mcp; MCASTASSERT(hbm); mcp = (struct mcast6_private *) hbm->pd; if ((sockfd = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) { PILCallLog(LOG, PIL_WARN, "Error getting socket: %s", strerror(errno)); return(sockfd); } adjust_socket_bufs(sockfd, 1024*1024); if (set_mcast6_if(sockfd, mcp->interface) < 0) { PILCallLog(LOG, PIL_WARN, "Error setting outbound mcast6 interface: %s", strerror(errno)); } if (set_mcast6_loop(sockfd, mcp->loop) < 0) { PILCallLog(LOG, PIL_WARN, "Error setting outbound mcast6 loopback value: %s", strerror(errno)); } if (set_mcast6_hops(sockfd, mcp->hops) < 0) { PILCallLog(LOG, PIL_WARN, "Error setting outbound mcast6 hops: %s", strerror(errno)); } if (fcntl(sockfd,F_SETFD, FD_CLOEXEC)) { PILCallLog(LOG, PIL_WARN, "Error setting the close-on-exec flag: %s", strerror(errno)); } return(sockfd); } /* * Set up socket for listening to heartbeats (UDP multicasts) */ #define MAXBINDTRIES 50 static int mcast6_make_receive_sock(struct hb_media * hbm) { struct mcast6_private * mcp; int sockfd; int bindtries; int boundyet=0; int one=1; int rc; int error=0; MCASTASSERT(hbm); mcp = (struct mcast6_private *) hbm->pd; if ((sockfd = socket(AF_INET6, SOCK_DGRAM, 0)) == -1) { PILCallLog(LOG, PIL_CRIT, "Error getting socket"); return -1; } /* set REUSEADDR option on socket so you can bind a multicast */ /* reader to multiple interfaces */ if(setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, (void *)&one, sizeof(one)) < 0){ PILCallLog(LOG, PIL_CRIT, "Error setsockopt(SO_REUSEADDR)"); } adjust_socket_bufs(sockfd, 1024*1024); /* ripped off from udp.c, if we all use SO_REUSEADDR */ /* this shouldn't be necessary */ /* Try binding a few times before giving up */ /* Sometimes a process with it open is exiting right now */ for(bindtries=0; !boundyet && bindtries < MAXBINDTRIES; ++bindtries) { rc = bind(sockfd, (void*)&mcp->saddr, sizeof(mcp->saddr)); error = errno; if (rc==0) { boundyet=1; } else if (rc == -1) { if (error == EADDRINUSE) { PILCallLog(LOG, PIL_CRIT, "Can't bind (EADDRINUSE), " "retrying"); sleep(1); } else { /* don't keep trying if the error isn't caused by */ /* the address being in use already...real error */ break; } } } if (!boundyet) { if (error == EADDRINUSE) { /* This happens with multiple udp or ppp interfaces */ PILCallLog(LOG, PIL_INFO , "Someone already listening on port %s [%s]" , mcp->port_s , mcp->interface); PILCallLog(LOG, PIL_INFO, "multicast read process exiting"); close(sockfd); cleanexit(0); } else { PILCallLog(LOG, PIL_WARN, "Unable to bind socket to %s %s. Giving up: %s", mcp->mcast6_s, mcp->port_s, strerror(errno)); close(sockfd); return(-1); } } /* join the multicast group...this is what really makes this a */ /* multicast reader */ if (join_mcast6_group(sockfd, &mcp->maddr.sin6_addr, mcp->interface) == -1) { char buf[/* 16 * 3 + some */ 64]; PILCallLog(LOG, PIL_CRIT, "Can't join multicast group %s on interface %s" , inet_ntop(AF_INET6, &mcp->maddr.sin6_addr, buf, sizeof(buf)) , mcp->interface); PILCallLog(LOG, PIL_INFO, "multicast read process exiting"); close(sockfd); cleanexit(0); } if (ANYDEBUG) { PILCallLog(LOG, PIL_DEBUG, "Successfully joined multicast group %s on interface %s", mcp->mcast6_s, mcp->interface); } if (fcntl(sockfd,F_SETFD, FD_CLOEXEC)) { PILCallLog(LOG, PIL_WARN, "Error setting the close-on-exec flag: %s", strerror(errno)); } return(sockfd); } static struct mcast6_private * new_mcast6_private(const char *ifn, const char *mcast6, const char *port, u_char hops, u_char loop) { struct addrinfo hints; struct addrinfo *res; struct mcast6_private *mcp; int error; mcp = MALLOCT(struct mcast6_private); if (mcp == NULL) { return NULL; } memset(mcp, 0, sizeof(*mcp)); mcp->interface = (char *)STRDUP(ifn); if(mcp->interface == NULL) { FREE(mcp); return NULL; } /* mcast group destination address */ memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_INET6; hints.ai_socktype = SOCK_DGRAM; hints.ai_flags = AI_NUMERICHOST; error = getaddrinfo(mcast6, port, &hints, &res); if (error) { PILCallLog(LOG, PIL_CRIT, "getaddrinfo([%s]:%s): %s", mcast6, port, gai_strerror(error)); goto getout; } memcpy(&mcp->maddr, res->ai_addr, res->ai_addrlen); freeaddrinfo(res); /* store canonicalized input as char* again. */ inet_ntop(AF_INET6, &mcp->maddr.sin6_addr, mcp->mcast6_s, sizeof(mcp->mcast6_s)); /* byte order! */ sprintf(mcp->port_s, "%u", ntohs(mcp->maddr.sin6_port)); /* local address to bind() to, results usually in [::]:someport */ memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_INET6; hints.ai_socktype = SOCK_DGRAM; hints.ai_flags = AI_PASSIVE; error = getaddrinfo(NULL, port, &hints, &res); if (error) { PILCallLog(LOG, PIL_CRIT, "getaddrinfo([::]:%s): %s", port, gai_strerror(error)); goto getout; } memcpy(&mcp->saddr, res->ai_addr, res->ai_addrlen); freeaddrinfo(res); mcp->wsocket = -1; mcp->rsocket = -1; mcp->hops = hops; mcp->loop = loop; return mcp; getout: FREE(mcp->interface); FREE(mcp); return NULL; } /* set_mcast6_loop takes a boolean flag, loop, which is useful on * a writing socket. with loop enabled (the default on a multicast socket) * the outbound packet will get looped back and received by the sending * interface, if it is listening for the multicast group and port that the * packet was sent to. Returns 0 on success -1 on failure. */ static int set_mcast6_loop(int sockfd, unsigned int loop) { loop = !!loop; return setsockopt(sockfd, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, &loop, sizeof(loop)); } /* set_mcast6_hops will set the multicast hop limit for the writing socket. * the socket default is hop=-1 (route default). * The hop is used to limit the scope of the packet and can range from 0-255. * Returns 0 on success -1 on failure. */ static int set_mcast6_hops(int sockfd, int hops) { return setsockopt(sockfd, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, &hops, sizeof(hops)); } /* * set_mcast6_if takes the name of an interface (i.e. eth0) and then * sets that as the interface to use for outbound multicast traffic. * If ifname is NULL, then it the OS will assign the interface. * Returns 0 on success -1 on faliure. */ static int set_mcast6_if(int sockfd, char *ifname) { int rc; rc = if_nametoindex(ifname); if (rc == 0) return -1; return setsockopt(sockfd, IPPROTO_IPV6, IPV6_MULTICAST_IF , &rc, sizeof(rc)); } /* join_mcast6_group is used to join a multicast group. the group is * specified by an IPv6 multicast group address in the in_addr * structure passed in as a parameter. The interface name can be used * to "bind" the multicast group to a specific interface (or any * interface if ifname is NULL); * returns 0 on success, -1 on failure. */ static int join_mcast6_group(int sockfd, struct in6_addr *addr, char *ifname) { struct ipv6_mreq mreq6; memset(&mreq6, 0, sizeof(mreq6)); memcpy(&mreq6.ipv6mr_multiaddr, addr, sizeof(struct in6_addr)); if (ifname) { mreq6.ipv6mr_interface = if_nametoindex(ifname); } return setsockopt(sockfd, IPPROTO_IPV6, IPV6_JOIN_GROUP, &mreq6, sizeof(mreq6)); } /* returns true or false */ static int is_valid_dev(const char *dev) { int rc=0; if (dev) { if (if_nametoindex(dev) > 0) rc = 1; } return rc; } /* returns true or false */ static int is_valid_mcast6_addr(const char *addr) { unsigned char mc_addr[sizeof(struct in6_addr)]; if (inet_pton(AF_INET6, addr, &mc_addr) <= 0) return 0; /* http://tools.ietf.org/html/rfc3513#section-2.7 */ if (mc_addr[0] != 0xff) return 0; /* flags. the 0x10 bit marks "transient" */ if ((mc_addr[1] & 0xe0) != 0) return 0; /* scope */ switch (mc_addr[1] & 0x0f) { case 0x0: return 0; /* reserved */ /* heartbeats on interface-local scope are not useful. */ case 0x1: return 0; case 0x2: break; /* link-local scope */ case 0x3: return 0; /* reserved */ case 0x4: break; /* admin-local scope */ case 0x5: break; /* site-local scope */ case 0x6: return 0; /* (unassigned) */ case 0x7: return 0; /* (unassigned) */ case 0x8: break; /* organization-local scope */ case 0x9: return 0; /* (unassigned) */ case 0xA: return 0; /* (unassigned) */ case 0xB: return 0; /* (unassigned) */ case 0xC: return 0; /* (unassigned) */ case 0xD: return 0; /* (unassigned) */ /* heartbeats SHALL NOT be in the global scope */ case 0xE: return 0; case 0xF: return 0; /* reserved */ } /* all trailing zeros? reserved. */ if (!memcmp(mc_addr+2, "\0\0" "\0\0\0\0" "\0\0\0\0" "\0\0\0\0", 14)) return 0; /* still here? plausibility check passed */ return 1; } /* returns hops on succes, -2 on failure */ static int get_hops(const char *hops, u_char *t) { /* not complete yet */ *t=(u_char)atoi(hops); return 0; } /* returns loop on success, -1 on failure */ static int get_loop(const char *loop, u_char *l) { /* not complete yet */ *l=(u_char)atoi(loop); return 0; } Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBcomm/openais.c0000644000000000000000000001746311576626513022410 0ustar00usergroup00000000000000/* * openais.c: openais communication code for heartbeat. * * Copyright (C) 2005 Guochun Shi * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #include #include #include #include #include #include #define PIL_PLUGINTYPE HB_COMM_TYPE #define PIL_PLUGINTYPE_S HB_COMM_TYPE_S #define PIL_PLUGIN openais #define PIL_PLUGIN_S "openais" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL #include struct ais_private { char * interface; /* Interface name */ evs_handle_t handle; int fd; }; static struct hb_media_fns openaisOps; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate; #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S , PIL_PLUGIN_S , &openaisOps , NULL /*close */ , &OurInterface , (void*)&OurImports , interfprivate); } #define ISOPENAISOBJECT(mp) ((mp) && ((mp)->vf == (void*)&openaisOps)) #define OPENAISASSERT(mp) g_assert(ISOPENAISOBJECT(mp)) static int openais_mtype(char** buffer) { *buffer = STRDUP(PIL_PLUGIN_S); if (!*buffer) { return 0; } return STRLEN_CONST(PIL_PLUGIN_S); } static int openais_descr(char **buffer) { const char constret[] = "openais communication module"; *buffer = STRDUP(constret); if (!*buffer) { return 0; } return STRLEN_CONST(constret); } static int openais_isping(void) { return 0; } static gboolean openais_msg_ready = FALSE; static char openais_pkt[MAXMSG]; static int openais_pktlen =0; static void evs_deliver_fn(struct in_addr source_addr, void* msg, int msg_len){ if (openais_msg_ready){ PILCallLog(LOG, PIL_CRIT, "message overwrite"); return; } memcpy(openais_pkt, msg, msg_len); openais_pktlen = msg_len; openais_pkt[msg_len] = 0; openais_msg_ready = TRUE; return; } static void evs_confchg_fn(struct in_addr *member_list, int member_list_entries, struct in_addr *left_list, int left_list_entries, struct in_addr *joined_list, int joined_list_entries){ PILCallLog(LOG, PIL_INFO, "evs_confchg_fn is called"); return; } static evs_callbacks_t callbacks = { evs_deliver_fn, evs_confchg_fn }; static struct evs_group groups[]={ {"openais_comm"} }; static int openais_init(evs_handle_t* handle){ if (evs_initialize(handle, &callbacks) != EVS_OK){ PILCallLog(LOG, PIL_INFO, "evs_initialize failed"); return HA_FAIL; } return(HA_OK); } static struct hb_media * openais_new(const char * intf) { struct ais_private* ais; struct hb_media * ret; ais = MALLOC(sizeof(struct ais_private)); if (ais == NULL){ PILCallLog(LOG, PIL_CRIT, "%s: malloc failed for ais_if", __FUNCTION__); return NULL; } memset(ret, 0, sizeof(*ret)); ais->interface = (char*) STRDUP(intf); if (ais->interface == NULL){ PILCallLog(LOG, PIL_CRIT, "%s: STRDUP failed", __FUNCTION__); return NULL; } if (openais_init(&ais->handle) != HA_OK){ PILCallLog(LOG, PIL_CRIT, "%s: initialization failed", __FUNCTION__); } ret = (struct hb_media*) MALLOC(sizeof(struct hb_media)); if (ret != NULL) { char * name; memset(ret, 0, sizeof(*ret)); ret->pd = (void*)ais; name = STRDUP(intf); if (name != NULL) { ret->name = name; } else { FREE(ret); ret = NULL; } } if (ret != NULL) { if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "openais_new: returning ret (%s)", ret->name); } }else{ FREE(ais->interface); FREE(ais); if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "openais_new: ret was NULL"); } } return(ret); } static int openais_open(struct hb_media* mp) { struct ais_private * ais; PILCallLog(LOG, PIL_INFO, "%s is called", __FUNCTION__); OPENAISASSERT(mp); ais = (struct ais_private *) mp->pd; if (evs_join(ais->handle, groups, 1) != EVS_OK){ PILCallLog(LOG, PIL_CRIT, "%s: evs_join failed", __FUNCTION__); return HA_FAIL; } if (evs_fd_get(ais->handle, &ais->fd) != EVS_OK){ PILCallLog(LOG, PIL_CRIT, "%s: evs_fd_get failed", __FUNCTION__); return HA_FAIL; } return(HA_OK); } static int openais_close(struct hb_media* mp) { struct ais_private * ais; PILCallLog(LOG, PIL_INFO, "%s is called", __FUNCTION__); OPENAISASSERT(mp); ais = (struct ais_private *) mp->pd; if (ais->handle >= 0) { if (evs_finalize(ais->handle) != EVS_OK){ PILCallLog(LOG,PIL_CRIT, "%s: evs_finalize failed", __FUNCTION__); return HA_FAIL; } ais_handle=-1; } return HA_OK; } /* * Receive a heartbeat broadcast packet from OPENAIS interface */ static void * openais_read(struct hb_media* mp, int * lenp) { struct ais_private * ais; struct pollfd pfd; ais= (struct ais_private *) mp->pd; while (!openais_msg_ready){ pfd.fd = ais->fd; pfd.events = POLLIN|POLLPRI; pfd.revents = 0; OPENAISASSERT(mp); if (poll(&pfd, 1, -1) < 0){ if (errno == EINTR){ break; }else{ PILCallLog(LOG, PIL_CRIT, "%s: poll failed, errno =%d", __FUNCTION__, errno); return NULL; } } if (pfd.revents & (POLLERR|POLLNVAL|POLLHUP)){ PILCallLog(LOG, PIL_CRIT, "%s: poll returns bad revents(%d)", __FUNCTION__, pfd.revents); return NULL; }else if (!pfd.revents & POLLIN){ PILCallLog(LOG, PIL_CRIT, "%s: poll returns but no input data", __FUNCTION__); return NULL; }; if (evs_dispatch(ais->handle, EVS_DISPATCH_ONE) != EVS_OK){ PILCallLog(LOG, PIL_CRIT, "%s: evs_dispatch() failed", __FUNCTION__); return NULL; } } if (openais_msg_ready){ openais_msg_ready = FALSE; *lenp = openais_pktlen +1; return openais_pkt; }else{ return NULL; } } /* * Send a heartbeat packet over openais interface */ static int openais_write(struct hb_media* mp, void *pkt, int len) { struct ais_private * ais; struct iovec iov={ .iov_base = pkt, .iov_len = len, }; OPENAISASSERT(mp); ais = (struct ais_private *) mp->pd; if (evs_mcast_joined(ais->handle, EVS_TYPE_AGREED, &iov, 1) != EVS_OK){ if (!mp->suppresserrs) { PILCallLog(LOG, PIL_CRIT, "%s: evs_mcast_joined failed", __FUNCTION__); } return HA_FAIL; } return(HA_OK); } static struct hb_media_fns openaisOps ={ openais_new, NULL, openais_open, openais_close, openais_read, openais_write, openais_mtype, openais_descr, openais_isping, }; Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBcomm/ping.c0000644000000000000000000003413211576626513021677 0ustar00usergroup00000000000000/* * ping.c: ICMP-echo-based heartbeat code for heartbeat. * * Copyright (C) 2000 Alan Robertson * * The checksum code in this file code was borrowed from the ping program. * * SECURITY NOTE: It would be very easy for someone to masquerade as the * device that you're pinging. If they don't know the password, all they can * do is echo back the packets that you're sending out, or send out old ones. * This does mean that if you're using such an approach, that someone could * make you think you have quorum when you don't during a cluster partition. * The danger in that seems small, but you never know ;-) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_NETINET_IN_H #include #endif /* HAVE_NETINET_IN_H */ #ifdef HAVE_NETINET_IN_SYSTM_H # include #endif /* HAVE_NETINET_IN_SYSTM_H */ #ifdef HAVE_NETINET_IP_VAR_H # include #endif /* HAVE_NETINET_IP_VAR_H */ #ifdef HAVE_NETINET_IP_FW_H # include #endif /* HAVE_NETINET_IP_FW_H */ #ifdef HAVE_NETINET_IP_H # include #endif /* HAVE_NETINET_IP_H */ #include #ifdef HAVE_NETINET_IP_COMPAT_H # include #endif /* HAVE_NETINET_IP_COMPAT_H */ #include #include #include #include #include #include #ifdef linux # define ICMP_HDR_SZ sizeof(struct icmphdr) /* 8 */ #else # define ICMP_HDR_SZ 8 #endif #define PIL_PLUGINTYPE HB_COMM_TYPE #define PIL_PLUGINTYPE_S HB_COMM_TYPE_S #define PIL_PLUGIN ping #define PIL_PLUGIN_S "ping" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL #include struct ping_private { struct sockaddr_in addr; /* ping addr */ int sock; /* ping socket */ int ident; /* heartbeat pid */ int iseq; /* sequence number */ }; static struct hb_media* ping_new (const char* interface); static int ping_open (struct hb_media* mp); static int ping_close (struct hb_media* mp); static void* ping_read (struct hb_media* mp, int* lenp); static int ping_write (struct hb_media* mp, void* p, int len); static struct ping_private * new_ping_interface(const char * host); static int in_cksum (u_short * buf, size_t nbytes); static int ping_mtype(char **buffer); static int ping_descr(char **buffer); static int ping_isping(void); #define ISPINGOBJECT(mp) ((mp) && ((mp)->vf == (void*)&pingOps)) #define PINGASSERT(mp) g_assert(ISPINGOBJECT(mp)) static struct hb_media_fns pingOps ={ ping_new, /* Create single object function */ NULL, /* whole-line parse function */ ping_open, ping_close, ping_read, ping_write, ping_mtype, ping_descr, ping_isping, }; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate; #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S , PIL_PLUGIN_S , &pingOps , NULL /*close */ , &OurInterface , (void*)&OurImports , interfprivate); } static int ping_mtype(char **buffer) { *buffer = STRDUP(PIL_PLUGIN_S); if (!*buffer) { return 0; } return strlen(*buffer); } static int ping_descr(char **buffer) { *buffer = STRDUP("ping membership"); if (!*buffer) { return 0; } return strlen(*buffer); } /* Yes, a ping device */ static int ping_isping(void) { return 1; } static struct ping_private * new_ping_interface(const char * host) { struct ping_private* ppi; struct sockaddr_in *to; if ((ppi = (struct ping_private*)MALLOC(sizeof(struct ping_private))) == NULL) { return NULL; } memset(ppi, 0, sizeof (*ppi)); to = &ppi->addr; #ifdef HAVE_SOCKADDR_IN_SIN_LEN ppi->addr.sin_len = sizeof(struct sockaddr_in); #endif ppi->addr.sin_family = AF_INET; if (inet_pton(AF_INET, host, (void *)&ppi->addr.sin_addr) <= 0) { struct hostent *hep; hep = gethostbyname(host); if (hep == NULL) { PILCallLog(LOG, PIL_CRIT, "unknown host: %s: %s" , host, strerror(errno)); FREE(ppi); ppi = NULL; return NULL; } ppi->addr.sin_family = hep->h_addrtype; memcpy(&ppi->addr.sin_addr, hep->h_addr, hep->h_length); } ppi->ident = getpid() & 0xFFFF; return(ppi); } /* * Create new ping heartbeat object * Name of host is passed as a parameter */ static struct hb_media * ping_new(const char * host) { struct ping_private* ipi; struct hb_media * ret; char * name; ipi = new_ping_interface(host); if (ipi == NULL) { return(NULL); } ret = (struct hb_media *) MALLOC(sizeof(struct hb_media)); if (ret == NULL) { FREE(ipi); ipi = NULL; return(NULL); } memset(ret, 0, sizeof(*ret)); ret->pd = (void*)ipi; name = STRDUP(host); if(name == NULL || add_node(host, PINGNODE_I) != HA_OK) { FREE(ipi); ipi = NULL; FREE(ret); ret = NULL; return(NULL); } ret->name = name; return(ret); } /* * Close ICMP ping heartbeat interface */ static int ping_close(struct hb_media* mp) { struct ping_private * ei; int rc = HA_OK; PINGASSERT(mp); ei = (struct ping_private *) mp->pd; if (ei->sock >= 0) { if (close(ei->sock) < 0) { rc = HA_FAIL; } ei->sock = -1; } return(rc); } /* * Receive a heartbeat ping reply packet. * NOTE: This code only needs to run once for ALL ping nodes. * FIXME!! */ static char ping_pkt[MAXLINE]; static void * ping_read(struct hb_media* mp, int *lenp) { struct ping_private * ei; union { char cbuf[MAXLINE+ICMP_HDR_SZ]; struct ip ip; }buf; const char * bufmax = ((char *)&buf)+sizeof(buf); char * msgstart; socklen_t addr_len = sizeof(struct sockaddr); struct sockaddr_in their_addr; /* connector's addr information */ struct ip * ip; struct icmp icp; int numbytes; int hlen; struct ha_msg * msg; const char *comment; int pktlen; PINGASSERT(mp); ei = (struct ping_private *) mp->pd; ReRead: /* We recv lots of packets that aren't ours */ if ((numbytes=recvfrom(ei->sock, (void *) &buf.cbuf , sizeof(buf.cbuf)-1, 0, (struct sockaddr *)&their_addr , &addr_len)) < 0) { if (errno != EINTR) { PILCallLog(LOG, PIL_CRIT, "Error receiving from socket: %s" , strerror(errno)); } return NULL; } /* Avoid potential buffer overruns */ buf.cbuf[numbytes] = EOS; /* Check the IP header */ ip = &buf.ip; hlen = ip->ip_hl * 4; if (numbytes < hlen + ICMP_MINLEN) { PILCallLog(LOG, PIL_WARN, "ping packet too short (%d bytes) from %s" , numbytes , inet_ntoa(*(struct in_addr *) & their_addr.sin_addr.s_addr)); return NULL; } /* Now the ICMP part */ /* (there may be a better way...) */ memcpy(&icp, (buf.cbuf + hlen), sizeof(icp)); if (icp.icmp_type != ICMP_ECHOREPLY || icp.icmp_id != ei->ident) { goto ReRead; /* Not one of ours */ } if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "got %d byte packet from %s" , numbytes, inet_ntoa(their_addr.sin_addr)); } msgstart = (buf.cbuf + hlen + ICMP_HDR_SZ); if (DEBUGPKTCONT && numbytes > 0) { PILCallLog(LOG, PIL_DEBUG, "%s", msgstart); } pktlen = numbytes - hlen - ICMP_HDR_SZ; memcpy(ping_pkt, buf.cbuf + hlen + ICMP_HDR_SZ, pktlen); ping_pkt[pktlen] = 0; *lenp = pktlen + 1; msg = wirefmt2msg(msgstart, bufmax - msgstart, MSG_NEEDAUTH); if (msg == NULL) { errno = EINVAL; return(NULL); } comment = ha_msg_value(msg, F_COMMENT); if (comment == NULL || strcmp(comment, PIL_PLUGIN_S) != 0) { ha_msg_del(msg); errno = EINVAL; return(NULL); } ha_msg_del(msg); return (ping_pkt); } /* * Send a heartbeat packet over ICMP ping channel * * The peculiar thing here is that we don't send the packet we're given at all * * Instead, we send out the packet we want to hear back from them, just * as though we were they ;-) That's what comes of having such a dumb * device as a "member" of our cluster... * * We ignore packets we're given to write that aren't "status" packets. * */ static int ping_write(struct hb_media* mp, void *p, int len) { struct ping_private * ei; int rc; char* pkt; union{ char* buf; struct icmp ipkt; }*icmp_pkt; size_t size; struct icmp * icp; size_t pktsize; const char * type; const char * ts; struct ha_msg * nmsg; struct ha_msg * msg; static gboolean needroot = FALSE; msg = wirefmt2msg(p, len, MSG_NEEDAUTH); if( !msg){ PILCallLog(LOG, PIL_CRIT, "ping_write(): cannot convert wirefmt to msg"); return(HA_FAIL); } PINGASSERT(mp); ei = (struct ping_private *) mp->pd; type = ha_msg_value(msg, F_TYPE); if (type == NULL || strcmp(type, T_STATUS) != 0 || ((ts = ha_msg_value(msg, F_TIME)) == NULL)) { ha_msg_del(msg); return HA_OK; } /* * We populate the following fields in the packet we create: * * F_TYPE: T_NS_STATUS * F_STATUS: ping * F_COMMENT: ping * F_ORIG: destination name * F_TIME: local timestamp (from "msg") * F_AUTH: added by add_msg_auth() */ if ((nmsg = ha_msg_new(5)) == NULL) { PILCallLog(LOG, PIL_CRIT, "cannot create new message"); ha_msg_del(msg); return(HA_FAIL); } if (ha_msg_add(nmsg, F_TYPE, T_NS_STATUS) != HA_OK || ha_msg_add(nmsg, F_STATUS, PINGSTATUS) != HA_OK || ha_msg_add(nmsg, F_COMMENT, PIL_PLUGIN_S) != HA_OK || ha_msg_add(nmsg, F_ORIG, mp->name) != HA_OK || ha_msg_add(nmsg, F_TIME, ts) != HA_OK) { ha_msg_del(nmsg); nmsg = NULL; PILCallLog(LOG, PIL_CRIT, "cannot add fields to message"); ha_msg_del(msg); return HA_FAIL; } if (add_msg_auth(nmsg) != HA_OK) { PILCallLog(LOG, PIL_CRIT, "cannot add auth field to message"); ha_msg_del(nmsg); nmsg = NULL; ha_msg_del(msg); return HA_FAIL; } if ((pkt = msg2wirefmt(nmsg, &size)) == NULL) { PILCallLog(LOG, PIL_CRIT, "cannot convert message to string"); ha_msg_del(msg); return HA_FAIL; } ha_msg_del(nmsg); nmsg = NULL; pktsize = size + ICMP_HDR_SZ; if ((icmp_pkt = MALLOC(pktsize)) == NULL) { PILCallLog(LOG, PIL_CRIT, "out of memory"); free(pkt); ha_msg_del(msg); return HA_FAIL; } icp = &(icmp_pkt->ipkt); icp->icmp_type = ICMP_ECHO; icp->icmp_code = 0; icp->icmp_cksum = 0; icp->icmp_seq = htons(ei->iseq); icp->icmp_id = ei->ident; /* Only used by us */ ++ei->iseq; memcpy(icp->icmp_data, pkt, size); free(pkt); pkt = NULL; /* Compute the ICMP checksum */ icp->icmp_cksum = in_cksum((u_short *)icp, pktsize); retry: if (needroot) { return_to_orig_privs(); } if ((rc=sendto(ei->sock, (void *) icmp_pkt, pktsize, MSG_DONTWAIT , (struct sockaddr *)&ei->addr , sizeof(struct sockaddr))) != (ssize_t)pktsize) { if (errno == EPERM && !needroot) { needroot=TRUE; goto retry; } if (!mp->suppresserrs) { PILCallLog(LOG, PIL_CRIT, "Error sending packet: %s", strerror(errno)); PILCallLog(LOG, PIL_INFO, "euid=%lu egid=%lu" , (unsigned long) geteuid() , (unsigned long) getegid()); } FREE(icmp_pkt); ha_msg_del(msg); return(HA_FAIL); } if (needroot) { return_to_dropped_privs(); } if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "sent %d bytes to %s" , rc, inet_ntoa(ei->addr.sin_addr)); } if (DEBUGPKTCONT) { PILCallLog(LOG, PIL_DEBUG, "ping pkt: %s" , icp->icmp_data); } FREE(icmp_pkt); ha_msg_del(msg); return HA_OK; } /* * Open ping socket. */ static int ping_open(struct hb_media* mp) { struct ping_private * ei; int sockfd; struct protoent *proto; PINGASSERT(mp); ei = (struct ping_private *) mp->pd; if ((proto = getprotobyname("icmp")) == NULL) { PILCallLog(LOG, PIL_CRIT, "protocol ICMP is unknown: %s", strerror(errno)); return HA_FAIL; } if ((sockfd = socket(AF_INET, SOCK_RAW, proto->p_proto)) < 0) { PILCallLog(LOG, PIL_CRIT, "Can't open RAW socket.: %s", strerror(errno)); return HA_FAIL; } if (fcntl(sockfd, F_SETFD, FD_CLOEXEC)) { PILCallLog(LOG, PIL_CRIT, "Error setting the close-on-exec flag: %s" , strerror(errno)); } ei->sock = sockfd; PILCallLog(LOG, PIL_INFO, "ping heartbeat started."); return HA_OK; } /* * in_cksum -- * Checksum routine for Internet Protocol family headers (C Version) * This function taken from Mike Muuss' ping program. */ static int in_cksum (u_short *addr, size_t len) { size_t nleft = len; u_short * w = addr; int sum = 0; u_short answer = 0; /* * The IP checksum algorithm is simple: using a 32 bit accumulator (sum) * add sequential 16 bit words to it, and at the end, folding back all * the carry bits from the top 16 bits into the lower 16 bits. */ while (nleft > 1) { sum += *w++; nleft -= 2; } /* Mop up an odd byte, if necessary */ if (nleft == 1) { sum += *(u_char*)w; } /* Add back carry bits from top 16 bits to low 16 bits */ sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */ sum += (sum >> 16); /* add carry */ answer = ~sum; /* truncate to 16 bits */ return answer; } Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBcomm/ping6.c0000644000000000000000000003221011576626513021760 0ustar00usergroup00000000000000/* * ping.c: ICMP-echo-based heartbeat code for heartbeat. * * Copyright (C) 2000 Alan Robertson * * SECURITY NOTE: It would be very easy for someone to masquerade as the * device that you're pinging. If they don't know the password, all they can * do is echo back the packets that you're sending out, or send out old ones. * This does mean that if you're using such an approach, that someone could * make you think you have quorum when you don't during a cluster partition. * The danger in that seems small, but you never know ;-) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_NETINET_IN_H #include #endif /* HAVE_NETINET_IN_H */ #ifdef HAVE_NETINET_IN_SYSTM_H # include #endif /* HAVE_NETINET_IN_SYSTM_H */ #ifdef HAVE_NETINET_IP_VAR_H # include #endif /* HAVE_NETINET_IP_VAR_H */ #ifdef HAVE_NETINET_IP_FW_H # include #endif /* HAVE_NETINET_IP_FW_H */ #ifdef HAVE_NETINET_IP_H # include #endif /* HAVE_NETINET_IP_H */ #include #include #ifdef HAVE_NETINET_IP_COMPAT_H # include #endif /* HAVE_NETINET_IP_COMPAT_H */ #include #include #include #include #include #include #ifdef linux # define ICMP6_HDR_SZ sizeof(struct icmp6_hdr) /* 8 */ #else # define ICMP6_HDR_SZ 8 #endif #define PIL_PLUGINTYPE HB_COMM_TYPE #define PIL_PLUGINTYPE_S HB_COMM_TYPE_S #define PIL_PLUGIN ping6 #define PIL_PLUGIN_S "ping6" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL #include struct ping_private { struct sockaddr_storage addr; /* ping addr */ int sock; /* ping socket */ int ident; /* heartbeat pid */ int iseq; /* sequence number */ }; static struct hb_media* ping_new (const char* interface); static int ping_open (struct hb_media* mp); static int ping_close (struct hb_media* mp); static void* ping_read (struct hb_media* mp, int* lenp); static int ping_write (struct hb_media* mp, void* p, int len); static struct ping_private * new_ping_interface(const char * host); static int ping_mtype(char **buffer); static int ping_descr(char **buffer); static int ping_isping(void); #define ISPINGOBJECT(mp) ((mp) && ((mp)->vf == (void*)&pingOps)) #define PINGASSERT(mp) g_assert(ISPINGOBJECT(mp)) static struct hb_media_fns pingOps ={ ping_new, /* Create single object function */ NULL, /* whole-line parse function */ ping_open, ping_close, ping_read, ping_write, ping_mtype, ping_descr, ping_isping, }; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate; #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree static const char *inet_satop(void *sa) { static char buf[INET6_ADDRSTRLEN]; struct sockaddr_in *sin = (struct sockaddr_in *)sa; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; switch (sin->sin_family) { case AF_INET6: return inet_ntop(sin6->sin6_family, &(sin6->sin6_addr), buf, INET6_ADDRSTRLEN); case AF_INET: return inet_ntop(sin->sin_family, &(sin->sin_addr), buf, INET6_ADDRSTRLEN); } errno = EAFNOSUPPORT; return NULL; } PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S , PIL_PLUGIN_S , &pingOps , NULL /*close */ , &OurInterface , (void*)&OurImports , interfprivate); } static int ping_mtype(char **buffer) { *buffer = STRDUP(PIL_PLUGIN_S); if (!*buffer) { return 0; } return strlen(*buffer); } static int ping_descr(char **buffer) { *buffer = STRDUP("ping6 membership"); if (!*buffer) { return 0; } return strlen(*buffer); } /* Yes, a ping device */ static int ping_isping(void) { return 1; } static struct ping_private * new_ping_interface(const char * host) { struct ping_private* ppi; struct sockaddr_in6 *to6; if ((ppi = (struct ping_private*)MALLOC(sizeof(struct ping_private))) == NULL) { return NULL; } memset(ppi, 0, sizeof (*ppi)); to6 = (struct sockaddr_in6 *)&ppi->addr; ppi->ident = getpid() & 0xFFFF; if (inet_pton(AF_INET6, host, (void *)&to6->sin6_addr) > 0) { #ifdef HAVE_SOCKADDR_IN_SIN_LEN to6->sin6_len = sizeof(struct sockaddr_in6); #endif to6->sin6_family = AF_INET6; return(ppi); } FREE(ppi); return NULL; } /* * Create new ping heartbeat object * Name of host is passed as a parameter */ static struct hb_media * ping_new(const char * host) { struct ping_private* ipi; struct hb_media * ret; char * name; ipi = new_ping_interface(host); if (ipi == NULL) { return(NULL); } ret = (struct hb_media *) MALLOC(sizeof(struct hb_media)); if (ret == NULL) { FREE(ipi); ipi = NULL; return(NULL); } memset(ret, 0, sizeof(*ret)); ret->pd = (void*)ipi; name = STRDUP(host); if(name == NULL) { FREE(ipi); ipi = NULL; FREE(ret); ret = NULL; return(NULL); } ret->name = name; add_node(host, PINGNODE_I); return(ret); } /* * Close ICMP ping heartbeat interface */ static int ping_close(struct hb_media* mp) { struct ping_private * ei; int rc = HA_OK; PINGASSERT(mp); ei = (struct ping_private *) mp->pd; if (ei->sock >= 0) { if (close(ei->sock) < 0) { rc = HA_FAIL; } ei->sock = -1; } return(rc); } /* * Receive a heartbeat ping reply packet. * NOTE: This code only needs to run once for ALL ping nodes. * FIXME!! */ static char ping_pkt[MAXLINE]; static void * ping_read(struct hb_media* mp, int *lenp) { struct ping_private * ei; union { char cbuf[MAXLINE+ICMP6_HDR_SZ]; }buf; const char * bufmax = ((char *)&buf)+sizeof(buf); char * msgstart; socklen_t addr_len = sizeof(struct sockaddr_in6); struct sockaddr_in6 their_addr; /* connector's addr information */ struct icmp6_hdr icp; int numbytes; struct ha_msg * msg; const char *comment; int pktlen; PINGASSERT(mp); ei = (struct ping_private *) mp->pd; ReRead: /* We recv lots of packets that aren't ours */ if ((numbytes=recvfrom(ei->sock, (void *) &buf.cbuf , sizeof(buf.cbuf)-1, 0, &their_addr , &addr_len)) < 0) { if (errno != EINTR) { PILCallLog(LOG, PIL_CRIT, "Error receiving from socket: %s" , strerror(errno)); } return NULL; } /* Avoid potential buffer overruns */ buf.cbuf[numbytes] = EOS; if (numbytes < ICMP6_HDR_SZ) { PILCallLog(LOG, PIL_WARN, "ping packet too short (%d bytes) from %s" , numbytes , inet_satop(&their_addr)); return NULL; } /* Now the ICMP part */ /* (there may be a better way...) */ memcpy(&icp, (buf.cbuf), sizeof(icp)); if (icp.icmp6_type != ICMP6_ECHO_REPLY || icp.icmp6_id != ei->ident) { goto ReRead; /* Not one of ours */ } if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "got %d byte packet from %s" , numbytes, inet_satop(&their_addr)); } msgstart = (buf.cbuf + ICMP6_HDR_SZ); if (DEBUGPKTCONT && numbytes > 0) { PILCallLog(LOG, PIL_DEBUG, "%s", msgstart); } pktlen = numbytes - ICMP6_HDR_SZ; memcpy(ping_pkt, buf.cbuf + ICMP6_HDR_SZ, pktlen); ping_pkt[pktlen] = 0; *lenp = pktlen + 1; msg = wirefmt2msg(msgstart, bufmax - msgstart, MSG_NEEDAUTH); if (msg == NULL) { errno = EINVAL; return(NULL); } comment = ha_msg_value(msg, F_COMMENT); if (comment == NULL || strcmp(comment, PIL_PLUGIN_S) != 0) { ha_msg_del(msg); errno = EINVAL; return(NULL); } ha_msg_del(msg); return (ping_pkt); } /* * Send a heartbeat packet over ICMP ping channel * * The peculiar thing here is that we don't send the packet we're given at all * * Instead, we send out the packet we want to hear back from them, just * as though we were they ;-) That's what comes of having such a dumb * device as a "member" of our cluster... * * We ignore packets we're given to write that aren't "status" packets. * */ static int ping_write(struct hb_media* mp, void *p, int len) { struct ping_private * ei; int rc; char* pkt; union{ char* buf; struct icmp6_hdr ipkt; }*icmp_pkt; size_t size; struct icmp6_hdr * icp; size_t pktsize; const char * type; const char * ts; struct ha_msg * nmsg; struct ha_msg * msg; static gboolean needroot = FALSE; msg = wirefmt2msg(p, len, MSG_NEEDAUTH); if( !msg){ PILCallLog(LOG, PIL_CRIT, "ping_write(): cannot convert wirefmt to msg"); return(HA_FAIL); } PINGASSERT(mp); ei = (struct ping_private *) mp->pd; type = ha_msg_value(msg, F_TYPE); if (type == NULL || strcmp(type, T_STATUS) != 0 || ((ts = ha_msg_value(msg, F_TIME)) == NULL)) { ha_msg_del(msg); return HA_OK; } /* * We populate the following fields in the packet we create: * * F_TYPE: T_NS_STATUS * F_STATUS: ping * F_COMMENT: ping * F_ORIG: destination name * F_TIME: local timestamp (from "msg") * F_AUTH: added by add_msg_auth() */ if ((nmsg = ha_msg_new(5)) == NULL) { PILCallLog(LOG, PIL_CRIT, "cannot create new message"); ha_msg_del(msg); return(HA_FAIL); } if (ha_msg_add(nmsg, F_TYPE, T_NS_STATUS) != HA_OK || ha_msg_add(nmsg, F_STATUS, PINGSTATUS) != HA_OK || ha_msg_add(nmsg, F_COMMENT, PIL_PLUGIN_S) != HA_OK || ha_msg_add(nmsg, F_ORIG, mp->name) != HA_OK || ha_msg_add(nmsg, F_TIME, ts) != HA_OK) { ha_msg_del(nmsg); nmsg = NULL; PILCallLog(LOG, PIL_CRIT, "cannot add fields to message"); ha_msg_del(msg); return HA_FAIL; } if (add_msg_auth(nmsg) != HA_OK) { PILCallLog(LOG, PIL_CRIT, "cannot add auth field to message"); ha_msg_del(nmsg); nmsg = NULL; ha_msg_del(msg); return HA_FAIL; } if ((pkt = msg2wirefmt(nmsg, &size)) == NULL) { PILCallLog(LOG, PIL_CRIT, "cannot convert message to string"); ha_msg_del(msg); return HA_FAIL; } ha_msg_del(nmsg); nmsg = NULL; pktsize = size + ICMP6_HDR_SZ; if ((icmp_pkt = MALLOC(pktsize)) == NULL) { PILCallLog(LOG, PIL_CRIT, "out of memory"); free(pkt); ha_msg_del(msg); return HA_FAIL; } icp = &(icmp_pkt->ipkt); icp->icmp6_type = ICMP6_ECHO_REQUEST; icp->icmp6_code = 0; icp->icmp6_cksum = 0; icp->icmp6_seq = htons(ei->iseq); icp->icmp6_id = ei->ident; /* Only used by us */ ++ei->iseq; memcpy((char *)icmp_pkt + ICMP6_HDR_SZ, pkt, size); free(pkt); pkt = NULL; retry: if (needroot) { return_to_orig_privs(); } if ((rc=sendto(ei->sock, (void *) icmp_pkt, pktsize, MSG_DONTWAIT , (struct sockaddr *)&ei->addr , sizeof(struct sockaddr_in6))) != (ssize_t)pktsize) { if (errno == EPERM && !needroot) { needroot=TRUE; goto retry; } if (!mp->suppresserrs) { PILCallLog(LOG, PIL_CRIT, "Error sending packet: %s", strerror(errno)); PILCallLog(LOG, PIL_INFO, "euid=%lu egid=%lu" , (unsigned long) geteuid() , (unsigned long) getegid()); } FREE(icmp_pkt); ha_msg_del(msg); return(HA_FAIL); } if (needroot) { return_to_dropped_privs(); } if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "sent %d bytes to %s" , rc, inet_satop(&ei->addr)); } if (DEBUGPKTCONT) { PILCallLog(LOG, PIL_DEBUG, "ping pkt: %s" , (char *)icmp_pkt + ICMP6_HDR_SZ); } FREE(icmp_pkt); ha_msg_del(msg); return HA_OK; } /* * Open ping socket. */ static int ping_open(struct hb_media* mp) { struct ping_private * ei; int sockfd; struct protoent *proto; PINGASSERT(mp); ei = (struct ping_private *) mp->pd; if ((proto = getprotobyname("ipv6-icmp")) == NULL) { PILCallLog(LOG, PIL_CRIT, "protocol IPv6-ICMP is unknown: %s", strerror(errno)); return HA_FAIL; } if ((sockfd = socket(AF_INET6, SOCK_RAW, proto->p_proto)) < 0) { PILCallLog(LOG, PIL_CRIT, "Can't open RAW socket.: %s", strerror(errno)); return HA_FAIL; } if (fcntl(sockfd, F_SETFD, FD_CLOEXEC)) { PILCallLog(LOG, PIL_CRIT, "Error setting the close-on-exec flag: %s" , strerror(errno)); } ei->sock = sockfd; PILCallLog(LOG, PIL_INFO, "ping heartbeat started."); return HA_OK; } Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBcomm/ping_group.c0000644000000000000000000004363511576626513023123 0ustar00usergroup00000000000000/* * ping_group.c: ICMP-echo-based heartbeat code for heartbeat. * * This allows a group of nodes to be pinged. The group is * considered to be available if any of the nodes are available. * * Copyright (C) 2003 Horms * * Based heavily on ping.c * Copyright (C) 2000 Alan Robertson * * The checksum code in this file code was borrowed from the ping program. * * SECURITY NOTE: It would be very easy for someone to masquerade as the * device that you're pinging. If they don't know the password, all they can * do is echo back the packets that you're sending out, or send out old ones. * This does mean that if you're using such an approach, that someone could * make you think you have quorum when you don't during a cluster partition. * The danger in that seems small, but you never know ;-) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_NETINET_IN_SYSTM_H # include #endif /* HAVE_NETINET_IN_SYSTM_H */ #ifdef HAVE_NETINET_IP_H # include #endif /* HAVE_NETINET_IP_H */ #include #ifdef HAVE_NETINET_IP_H # include #endif /* HAVE_NETINET_IP_H */ #ifdef HAVE_NETINET_IP_VAR_H # include #endif /* HAVE_NETINET_IP_VAR_H */ #ifdef HAVE_NETINET_IP_COMPAT_H # include #endif /* HAVE_NETINET_IP_COMPAT_H */ #ifdef HAVE_NETINET_IP_FW_H # include #endif /* HAVE_NETINET_IP_FW_H */ #include #include #include #include #include #ifdef linux # define ICMP_HDR_SZ sizeof(struct icmphdr) /* 8 */ #else # define ICMP_HDR_SZ 8 #endif #define PIL_PLUGINTYPE HB_COMM_TYPE #define PIL_PLUGINTYPE_S HB_COMM_TYPE_S #define PIL_PLUGIN ping_group #define PIL_PLUGIN_S "ping_group" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL #include #define NSLOT 128 /* How old ping sequence numbers can be to still count */ typedef struct ping_group_node ping_group_node_t; struct ping_group_node { struct sockaddr_in addr; /* ping addr */ ping_group_node_t *next; }; typedef struct { int ident; /* heartbeat pid */ int sock; /* ping socket */ ping_group_node_t *node; size_t nnode; int slot[NSLOT]; int iseq; /* sequence number */ } ping_group_private_t; static int ping_group_parse(const char *line); static int ping_group_open (struct hb_media* mp); static int ping_group_close (struct hb_media* mp); static void* ping_group_read (struct hb_media* mp, int* lenp); static int ping_group_write (struct hb_media* mp ,void* msg, int len); static struct hb_media * ping_group_new(const char *name); static int in_cksum (u_short * buf, size_t nbytes); static int ping_group_mtype(char **buffer); static int ping_group_descr(char **buffer); static int ping_group_isping(void); #define ISPINGGROUPOBJECT(mp) \ ((mp) && ((mp)->vf == (void*)&ping_group_ops)) #define PINGGROUPASSERT(mp) g_assert(ISPINGGROUPOBJECT(mp)) static struct hb_media_fns ping_group_ops ={ NULL, /* Create single object function */ ping_group_parse, /* whole-line parse function */ ping_group_open, ping_group_close, ping_group_read, ping_group_write, ping_group_mtype, ping_group_descr, ping_group_isping, }; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate; #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S , PIL_PLUGIN_S , &ping_group_ops , NULL /*close */ , &OurInterface , (void*)&OurImports , interfprivate); } static int ping_group_mtype(char **buffer) { *buffer = STRDUP(PIL_PLUGIN_S); if (!*buffer) { return 0; } return strlen(*buffer); } static int ping_group_descr(char **buffer) { *buffer = STRDUP("ping group membership"); if (!*buffer) { return 0; } return strlen(*buffer); } /* Yes, a ping device */ static int ping_group_isping(void) { return 1; } static ping_group_node_t * new_ping_group_node(const char *host) { ping_group_node_t* node; node = (ping_group_node_t*)MALLOC(sizeof(ping_group_node_t)); if(!node) { return(NULL); } memset(node, 0, sizeof(ping_group_node_t)); #ifdef HAVE_SOCKADDR_IN_SIN_LEN node->addr.sin_len = sizeof(struct sockaddr_in); #endif node->addr.sin_family = AF_INET; if (inet_pton(AF_INET, host, (void *)&node->addr.sin_addr) <= 0) { struct hostent *hp; hp = gethostbyname(host); if (hp == NULL) { PILCallLog(LOG, PIL_CRIT, "unknown host: %s: %s" , host, strerror(errno)); FREE(node); return NULL; } node->addr.sin_family = hp->h_addrtype; memcpy(&node->addr.sin_addr, hp->h_addr, hp->h_length); } return(node); } static int ping_group_add_node(struct hb_media* media, const char *host) { ping_group_private_t *priv; ping_group_node_t *node; PINGGROUPASSERT(media); priv = (ping_group_private_t *)media->pd; node = new_ping_group_node(host); if(!node) { return(HA_FAIL); } node->next = priv->node; priv->node = node; priv->nnode++; return(HA_OK); } /* * Create new ping heartbeat object * Name of host is passed as a parameter */ static struct hb_media * ping_group_new(const char *name) { ping_group_private_t* priv; struct hb_media * media; char * tmp; priv = (ping_group_private_t*)MALLOC(sizeof(ping_group_private_t)); if(!priv) { return(NULL); } memset(priv, 0, sizeof(ping_group_private_t)); priv->ident = getpid() & 0xFFFF; media = (struct hb_media *) MALLOC(sizeof(struct hb_media)); if (!media) { FREE(priv); return(NULL); } memset(media, 0, sizeof(*media)); media->pd = (void*)priv; tmp = STRDUP(name); if(!tmp) { FREE(priv); FREE(media); return(NULL); } media->name = tmp; add_node(tmp, PINGNODE_I); /* Fake it so that PINGGROUPASSERT() will work * before the media is registered */ media->vf = (void*)&ping_group_ops; return(media); } static void ping_group_destroy_data(struct hb_media* media) { ping_group_private_t* priv; ping_group_node_t * node; PINGGROUPASSERT(media); priv = (ping_group_private_t *)media->pd; while(priv->node) { node = priv->node; priv->node = node->next; FREE(node); } } static void ping_group_destroy(struct hb_media* media) { ping_group_private_t* priv; PINGGROUPASSERT(media); priv = (ping_group_private_t *)media->pd; ping_group_destroy_data(media); FREE(priv); media->pd = NULL; /* XXX: How can we free this? Should media->name really be const? * And on the same topic, how are media unregistered / freed ? */ /* tmp = (char *)media->name; FREE(tmp); media->name = NULL; */ } /* * Close UDP/IP broadcast heartbeat interface */ static int ping_group_close(struct hb_media* mp) { ping_group_private_t * ei; int rc = HA_OK; PINGGROUPASSERT(mp); ei = (ping_group_private_t *) mp->pd; if (ei->sock >= 0) { if (close(ei->sock) < 0) { rc = HA_FAIL; } ei->sock=-1; } ping_group_destroy_data(mp); return(rc); } /* * Receive a heartbeat ping reply packet. */ char pinggroup_pkt[MAXLINE]; static void * ping_group_read(struct hb_media* mp, int *lenp) { ping_group_private_t * ei; union { char cbuf[MAXLINE+ICMP_HDR_SZ]; struct ip ip; }buf; const char * bufmax = ((char *)&buf)+sizeof(buf); char * msgstart; socklen_t addr_len = sizeof(struct sockaddr); struct sockaddr_in their_addr; /* connector's addr information */ struct ip * ip; struct icmp icp; int numbytes; int hlen; int seq; size_t slotn; ping_group_node_t *node; struct ha_msg *msg = NULL; const char *comment; int pktlen; PINGGROUPASSERT(mp); ei = (ping_group_private_t *) mp->pd; ReRead: /* We recv lots of packets that aren't ours */ *lenp = 0; if ((numbytes=recvfrom(ei->sock, (void *) &buf.cbuf , sizeof(buf.cbuf)-1, 0, (struct sockaddr *)&their_addr , &addr_len)) < 0) { if (errno != EINTR) { PILCallLog(LOG, PIL_CRIT, "Error receiving from socket: %s" , strerror(errno)); } return(NULL); } /* Avoid potential buffer overruns */ buf.cbuf[numbytes] = EOS; /* Check the IP header */ ip = &buf.ip; hlen = ip->ip_hl * 4; if (numbytes < hlen + ICMP_MINLEN) { PILCallLog(LOG, PIL_WARN, "ping packet too short (%d bytes) from %s" , numbytes , inet_ntoa(*(struct in_addr *) & their_addr.sin_addr.s_addr)); return(NULL); } /* Now the ICMP part */ /* (there may be a better way...) */ memcpy(&icp, (buf.cbuf + hlen), sizeof(icp)); if (icp.icmp_type != ICMP_ECHOREPLY || icp.icmp_id != ei->ident) { goto ReRead; /* Not ours */ } seq = ntohs(icp.icmp_seq); if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "got %d byte packet from %s" , numbytes, inet_ntoa(their_addr.sin_addr)); } msgstart = (buf.cbuf + hlen + ICMP_HDR_SZ); if (DEBUGPKTCONT && numbytes > 0) { PILCallLog(LOG, PIL_DEBUG, "%s", msgstart); } for(node = ei->node; node; node = node->next) { if(!memcmp(&(their_addr.sin_addr), &(node->addr.sin_addr) , sizeof(struct in_addr))) { break; } } if(!node) { goto ReRead; /* Not ours */ } msg = wirefmt2msg(msgstart, bufmax - msgstart, MSG_NEEDAUTH); if(msg == NULL) { errno = EINVAL; return(NULL); } comment = ha_msg_value(msg, F_COMMENT); if(comment == NULL || strcmp(comment, PIL_PLUGIN_S)) { ha_msg_del(msg); errno = EINVAL; return(NULL); } slotn = seq % NSLOT; if(ei->slot[slotn] == seq) { /* Duplicate within window */ ha_msg_del(msg); goto ReRead; /* Not ours */ } ei->slot[slotn] = seq; pktlen = numbytes - hlen - ICMP_HDR_SZ; pinggroup_pkt[pktlen] = 0; memcpy(pinggroup_pkt, buf.cbuf + hlen + ICMP_HDR_SZ, pktlen); *lenp = pktlen + 1; ha_msg_del(msg); return(pinggroup_pkt); } /* * Send a heartbeat packet over broadcast UDP/IP interface * * The peculiar thing here is that we don't send the packet we're given at all * * Instead, we send out the packet we want to hear back from them, just * as though we were they ;-) That's what comes of having such a dumb * device as a "member" of our cluster... * * We ignore packets we're given to write that aren't "status" packets. * */ static int ping_group_write(struct hb_media* mp, void *p, int len) { ping_group_private_t * ei; int rc; char* pkt; union{ char* buf; struct icmp ipkt; }*icmp_pkt; size_t size; struct icmp * icp; size_t pktsize; const char * type; const char * ts; struct ha_msg * nmsg; ping_group_node_t * node; struct ha_msg* msg; static gboolean needroot=FALSE; PINGGROUPASSERT(mp); if ((msg = wirefmt2msg(p, len, MSG_NEEDAUTH)) == NULL) { PILCallLog(LOG, PIL_CRIT, "ping_write(): cannot convert wirefmt to msg"); return(HA_FAIL); } ei = (ping_group_private_t *) mp->pd; type = ha_msg_value(msg, F_TYPE); if (type == NULL || strcmp(type, T_STATUS) != 0 || ((ts = ha_msg_value(msg, F_TIME)) == NULL)) { ha_msg_del(msg); return HA_OK; } /* * We populate the following fields in the packet we create: * * F_TYPE: T_NS_STATUS * F_STATUS: ping * F_COMMENT: ping_group * F_ORIG: destination name * F_TIME: local timestamp (from "msg") * F_AUTH: added by add_msg_auth() */ if ((nmsg = ha_msg_new(5)) == NULL) { PILCallLog(LOG, PIL_CRIT, "cannot create new message"); ha_msg_del(msg); return(HA_FAIL); } if (ha_msg_add(nmsg, F_TYPE, T_NS_STATUS) != HA_OK || ha_msg_add(nmsg, F_STATUS, PINGSTATUS) != HA_OK || ha_msg_add(nmsg, F_COMMENT, PIL_PLUGIN_S) != HA_OK || ha_msg_add(nmsg, F_ORIG, mp->name) != HA_OK || ha_msg_add(nmsg, F_TIME, ts) != HA_OK) { ha_msg_del(nmsg); nmsg = NULL; PILCallLog(LOG, PIL_CRIT, "cannot add fields to message"); ha_msg_del(msg); return HA_FAIL; } if (add_msg_auth(nmsg) != HA_OK) { PILCallLog(LOG, PIL_CRIT, "cannot add auth field to message"); ha_msg_del(nmsg); nmsg = NULL; ha_msg_del(msg); return HA_FAIL; } if ((pkt = msg2wirefmt(nmsg, &size)) == NULL) { PILCallLog(LOG, PIL_CRIT, "cannot convert message to string"); ha_msg_del(msg); return HA_FAIL; } ha_msg_del(nmsg); nmsg = NULL; pktsize = size + ICMP_HDR_SZ; if ((icmp_pkt = MALLOC(pktsize)) == NULL) { PILCallLog(LOG, PIL_CRIT, "out of memory"); free(pkt); ha_msg_del(msg); return HA_FAIL; } icp = &(icmp_pkt->ipkt); icp->icmp_type = ICMP_ECHO; icp->icmp_code = 0; icp->icmp_cksum = 0; icp->icmp_seq = htons(ei->iseq); icp->icmp_id = ei->ident; /* Only used by us */ ++ei->iseq; memcpy(icp->icmp_data, pkt, size); free(pkt); pkt = NULL; /* Compute the ICMP checksum */ icp->icmp_cksum = in_cksum((u_short *)icp, pktsize); retry: if (needroot) { return_to_orig_privs(); } for(node = ei->node; node; node = node->next) { if ((rc=sendto(ei->sock, (void *) icmp_pkt, pktsize , MSG_DONTWAIT , (struct sockaddr *)&node->addr , sizeof(struct sockaddr))) != (ssize_t)pktsize) { if (errno == EPERM && !needroot) { needroot=TRUE; goto retry; } if (!mp->suppresserrs) { PILCallLog(LOG, PIL_CRIT, "Error sending packet: %s" , strerror(errno)); PILCallLog(LOG, PIL_INFO, "euid=%lu egid=%lu" , (unsigned long) geteuid() , (unsigned long) getegid()); } FREE(icmp_pkt); ha_msg_del(msg); return(HA_FAIL); } if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "sent %d bytes to %s" , rc, inet_ntoa(node->addr.sin_addr)); } cl_shortsleep(); } if (needroot) { return_to_dropped_privs(); } if (DEBUGPKTCONT) { PILCallLog(LOG, PIL_DEBUG, "%s" , (const char*)icp->icmp_data); } FREE(icmp_pkt); ha_msg_del(msg); return HA_OK; } /* * Open ping socket. */ static int ping_group_open(struct hb_media* mp) { ping_group_private_t * ei; int sockfd; struct protoent *proto; PINGGROUPASSERT(mp); ei = (ping_group_private_t *) mp->pd; if ((proto = getprotobyname("icmp")) == NULL) { PILCallLog(LOG, PIL_CRIT, "protocol ICMP is unknown: %s", strerror(errno)); return HA_FAIL; } if ((sockfd = socket(AF_INET, SOCK_RAW, proto->p_proto)) < 0) { PILCallLog(LOG, PIL_CRIT, "Can't open RAW socket.: %s", strerror(errno)); return HA_FAIL; } if (fcntl(sockfd, F_SETFD, FD_CLOEXEC)) { PILCallLog(LOG, PIL_CRIT, "Error setting the close-on-exec flag: %s" , strerror(errno)); } ei->sock = sockfd; PILCallLog(LOG, PIL_INFO, "ping group heartbeat started."); return HA_OK; } /* * in_cksum -- * Checksum routine for Internet Protocol family headers (C Version) * This function taken from Mike Muuss' ping program. */ static int in_cksum (u_short *addr, size_t len) { size_t nleft = len; u_short * w = addr; int sum = 0; u_short answer = 0; /* * The IP checksum algorithm is simple: using a 32 bit accumulator (sum) * add sequential 16 bit words to it, and at the end, folding back all * the carry bits from the top 16 bits into the lower 16 bits. */ while (nleft > 1) { sum += *w++; nleft -= 2; } /* Mop up an odd byte, if necessary */ if (nleft == 1) { sum += *(u_char*)w; } /* Add back carry bits from top 16 bits to low 16 bits */ sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */ sum += (sum >> 16); /* add carry */ answer = ~sum; /* truncate to 16 bits */ return answer; } /* mcast_parse will parse the line in the config file that is * associated with the media's type (hb_dev_mtype). It should * receive the rest of the line after the mtype. And it needs * to call hb_dev_new, add the media to the list of available media. * * So in this case, the config file line should look like * mcast [device] [mcast group] [port] [mcast ttl] [mcast loop] * for example: * mcast eth0 225.0.0.1 694 1 0 */ static int ping_group_parse(const char *line) { char tmp[MAXLINE]; size_t len; size_t nhost = 0; struct hb_media *media; /* Skip over white space, then grab the name */ line += strspn(line, WHITESPACE); len = strcspn(line, WHITESPACE); strncpy(tmp, line, len); line += len; *(tmp+len) = EOS; if(*tmp == EOS) { return(HA_FAIL); } media = ping_group_new(tmp); if (!media) { return(HA_FAIL); } while(1) { /* Skip over white space, then grab the host */ line += strspn(line, WHITESPACE); len = strcspn(line, WHITESPACE); strncpy(tmp, line, len); line += len; *(tmp+len) = EOS; if(*tmp == EOS) { break; } if(ping_group_add_node(media, tmp) < 0) { ping_group_destroy(media); return(HA_FAIL); } nhost++; } if(nhost == 0) { ping_group_destroy(media); return(HA_FAIL); } OurImports->RegisterNewMedium(media); return(HA_OK); } Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBcomm/rds.c0000644000000000000000000004402111576626513021530 0ustar00usergroup00000000000000/* * (c) 2010 Lars Ellenberg * RDS adaption of ucast.c, which in turn was: ... * ... well, see the first comment there. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* NOTE * This is * * ==================================== * == NOT PRODUCTION CODE, == * ==================================== * * but proof of concept only. It will break if things break. * * It is here only in case someone finds time to pick this up and add RDS * specific error handling to sendto() and others, figure out when to use * RDS_CANCEL_SENT_TO (and how to get the necessary information into the * plugin), how to handle necessary retries on congestion and whatever else is * necessary to make it actually work. * * And, how to sensibly configure (and reconfigure, preferably at runtime) * the list of peers this thing talks to. * * The easiest way to configure it will be to just list all the node names, * and have them map to ipv6 addresses using /etc/hosts. * rds eth1 node-a node-b node-c node-d node-e * anything that resolves to the ip of eth1 on this node will be skipped, * so you can have ha.cf identical on all nodes. * If this is ever made fit for production, most likely it should read the list * of peers from some config file instead, or get it via some additional API * plugin hook. * * For some information about RDS, see * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=blob;f=Documentation/networking/rds.txt */ #include #include #include #include #include #include #ifdef HAVE_STRINGS_H #include #endif #include #include #include #include #include #include #ifndef HAVE_INET_ATON extern int inet_aton(const char *, struct in_addr *); #endif #include #include #include #if defined(SO_BINDTODEVICE) #include #endif #include #include /* * Plugin information */ #define PIL_PLUGINTYPE HB_COMM_TYPE #define PIL_PLUGINTYPE_S HB_COMM_TYPE_S #define PIL_PLUGIN rds #define PIL_PLUGIN_S "rds" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL #include /* * Macros/Defines */ #define ISRDSOBJECT(mp) ((mp) && ((mp)->vf == (void*)&rdsOps)) #define RDSASSERT(mp) g_assert(ISRDSOBJECT(mp)) #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree #define MAXBINDTRIES 1 static int largest_msg_size = 0; /* * Structure Declarations */ struct rds_private { char* interface; /* Interface name */ struct sockaddr_in my_addr; /* Local address */ int port; /* RDS port */ int socket; /* Read/Write-socket */ int n_peers; /* how many peers? */ /* Not a list, but a hash table: * "soon" we should be able to send node messages * only to the destination node, not to all nodes, * and then a "node name" -> "in_addr" key value pair * comes in handy. */ GHashTable *peer_addresses; }; /* * Function Prototypes */ PIL_rc PIL_PLUGIN_INIT(PILPlugin *us, const PILPluginImports *imports); static int rds_parse(const char *line); static struct hb_media* rds_new(const char *intf); static int rds_open(struct hb_media *mp); static int rds_close(struct hb_media *mp); static void* rds_read(struct hb_media *mp, int* lenp); static int rds_write(struct hb_media *mp, void *msg, int len); static int HB_make_sock(struct hb_media *mp); static struct rds_private* new_ip_interface(const char *ifn); static int rds_descr(char **buffer); static int rds_mtype(char **buffer); static int rds_isping(void); /* * External Data */ extern struct hb_media *sysmedia[]; extern int nummedia; /* * Module Public Data */ const char hb_media_name[] = "RDS/IP"; static struct hb_media_fns rdsOps = { NULL, rds_parse, rds_open, rds_close, rds_read, rds_write, rds_mtype, rds_descr, rds_isping }; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate; static int localrdsport; /* * Implmentation */ PIL_rc PIL_PLUGIN_INIT(PILPlugin *us, const PILPluginImports *imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S, PIL_PLUGIN_S, &rdsOps, NULL, &OurInterface, (void*)&OurImports, interfprivate); } #define GET_NEXT_TOKEN(bp, token) do { \ int toklen; \ bp += strspn(bp, WHITESPACE); \ toklen = strcspn(bp, WHITESPACE); \ strncpy(token, bp, toklen); \ bp += toklen; \ token[toklen] = EOS; \ } while(0) static void free_key_value(gpointer kv) { FREE(kv); } static int rds_parse(const char *line) { const char *bp = line; struct hb_media *mp; struct rds_private *ei; char dev[MAXLINE]; char ip[MAXLINE]; GET_NEXT_TOKEN(bp, dev); if (*dev == EOS) { PILCallLog(LOG, PIL_CRIT, "rds statement without device"); return HA_FAIL; } mp = rds_new(dev); if (!mp) return HA_FAIL; ei = mp->pd; PILCallLog(LOG, PIL_DEBUG, "rds: on %s %s:%d", ei->interface, inet_ntoa(ei->my_addr.sin_addr), localrdsport); ei->peer_addresses = g_hash_table_new_full(g_str_hash, g_str_equal, free_key_value, free_key_value); if (ei->peer_addresses == NULL) { PILCallLog(LOG, PIL_CRIT, "rds: g_hash_table_new_full failed"); goto fail; } for (;;) { char *name; struct sockaddr_in *addr; struct hostent *h; /* FIXME get node names from somewhere else, * not specify them on the rds media line again */ GET_NEXT_TOKEN(bp, ip); if (*ip == EOS) break; h = gethostbyname(ip); if (!h) { PILCallLog(LOG, PIL_CRIT, "rds: cannot resolve hostname %s", ip); goto fail; } if (ei->my_addr.sin_addr.s_addr == ((struct in_addr *)h->h_addr_list[0])->s_addr) { PILCallLog(LOG, PIL_DEBUG, "rds: %s skipping my own address", ei->interface); continue; } addr = MALLOC(sizeof(*addr)); if (!addr) { PILCallLog(LOG, PIL_CRIT, "rds: cannot alloc addr"); goto fail; } name = STRDUP(h->h_name); if (!name) { PILCallLog(LOG, PIL_CRIT, "rds: cannot strdup name"); FREE(addr); goto fail; } addr->sin_family = AF_INET; addr->sin_port = htons(localrdsport); memcpy(&addr->sin_addr, h->h_addr_list[0], sizeof(*addr)); g_hash_table_insert(ei->peer_addresses, name, addr); PILCallLog(LOG, PIL_DEBUG, "rds: %s %s -> %s", ei->interface, name, inet_ntoa(addr->sin_addr)); ei->n_peers++; } /* we found some, and now reached the end of the list */ if (ei->n_peers) { sysmedia[nummedia++] = mp; return HA_OK; } /* empty list? */ PILCallLog(LOG, PIL_CRIT, "rds: [%s] missing target IP address/hostname", dev); fail: if (ei->peer_addresses) { g_hash_table_destroy(ei->peer_addresses); ei->peer_addresses = NULL; } FREE(ei->interface); FREE(mp->pd); FREE((void*)(unsigned long)(mp->name)); FREE(mp); return HA_FAIL; } static int rds_mtype(char **buffer) { *buffer = STRDUP(PIL_PLUGIN_S); if (!*buffer) { PILCallLog(LOG, PIL_CRIT, "rds: memory allocation error (line %d)", (__LINE__ - 2) ); return 0; } return strlen(*buffer); } static int rds_descr(char **buffer) { *buffer = strdup(hb_media_name); if (!*buffer) { PILCallLog(LOG, PIL_CRIT, "rds: memory allocation error (line %d)", (__LINE__ - 2) ); return 0; } return strlen(*buffer); } static int rds_isping(void) { return 0; } static int rds_init(void) { struct servent *service; g_assert(OurImports != NULL); if (localrdsport <= 0) { const char *chport; if ((chport = OurImports->ParamValue("rdsport")) != NULL) { if (sscanf(chport, "%d", &localrdsport) <= 0 || localrdsport <= 0) { PILCallLog(LOG, PIL_CRIT, "rds: bad port number %s", chport); return HA_FAIL; } } } /* No port specified in the configuration... */ if (localrdsport <= 0) { /* If our service name is in /etc/services, then use it */ if ((service=getservbyname(HA_SERVICENAME, "rds")) != NULL) localrdsport = ntohs(service->s_port); else localrdsport = UDPPORT; } return HA_OK; } /* * Create new RDS/IP heartbeat object * Name of interface and address are passed as parameters */ static struct hb_media* rds_new(const char *intf) { struct rds_private *ipi; struct hb_media *ret; rds_init(); ipi = new_ip_interface(intf); if (!ipi) { PILCallLog(LOG, PIL_CRIT, "rds: interface [%s] does not exist", intf); return NULL; } ret = (struct hb_media*)MALLOC(sizeof(struct hb_media)); if (!ret) { PILCallLog(LOG, PIL_CRIT, "rds: cannot alloc hb_media"); goto out1; } else { memset(ret, 0, sizeof(*ret)); ret->pd = (void*)ipi; ret->name = STRDUP(intf); if (!ret->name) { PILCallLog(LOG, PIL_CRIT, "rds: cannot strdup name"); goto out2; } } return ret; out2: FREE(ret); out1: FREE(ipi->interface); FREE(ipi); return NULL; } /* * Open RDS/IP unicast heartbeat interface */ static int rds_open(struct hb_media* mp) { struct rds_private * ei; RDSASSERT(mp); ei = (struct rds_private*)mp->pd; ei->socket = HB_make_sock(mp); if (ei->socket < 0) return HA_FAIL; PILCallLog(LOG, PIL_INFO, "rds: started on %s %s:%d", ei->interface, inet_ntoa(ei->my_addr.sin_addr), localrdsport); return HA_OK; } /* * Close RDS/IP unicast heartbeat interface */ static int rds_close(struct hb_media* mp) { struct rds_private *ei; int rc = HA_OK; RDSASSERT(mp); ei = (struct rds_private*)mp->pd; if (ei->socket >= 0) { if (close(ei->socket) < 0) { rc = HA_FAIL; } ei->socket = -1; } return rc; } /* * Receive a heartbeat unicast packet from RDS interface */ char rds_pkt[MAXMSG]; static void * rds_read(struct hb_media* mp, int *lenp) { struct rds_private *ei; socklen_t addr_len; struct sockaddr_in their_addr; int numbytes; RDSASSERT(mp); ei = (struct rds_private*)mp->pd; addr_len = sizeof(struct sockaddr); if ((numbytes = recvfrom(ei->socket, rds_pkt, MAXMSG-1, 0, (struct sockaddr *)&their_addr, &addr_len)) == -1) { if (errno != EINTR) { PILCallLog(LOG, PIL_CRIT, "rds: error receiving from socket: %s", strerror(errno)); } return NULL; } if (numbytes == 0) { PILCallLog(LOG, PIL_CRIT, "rds: received zero bytes"); return NULL; } if (numbytes > largest_msg_size) { PILCallLog(LOG, PIL_INFO, "rds: %s maximum received message: %d bytes from %s", ei->interface, numbytes, inet_ntoa(their_addr.sin_addr)); largest_msg_size = numbytes; } rds_pkt[numbytes] = EOS; if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "rds: received %d byte packet from %s", numbytes, inet_ntoa(their_addr.sin_addr)); } if (DEBUGPKTCONT) { PILCallLog(LOG, PIL_DEBUG, "%s", rds_pkt); } *lenp = numbytes +1; return rds_pkt; } /* * Send a heartbeat packet over unicast RDS/IP interface */ struct state_on_stack { struct hb_media *mp; char *peer; void *pkt; int len; int err_count; }; static void rds_sendto_one(gpointer key, gpointer value, gpointer user_data) { struct state_on_stack *s = user_data; struct sockaddr_in *addr = value; struct rds_private *ei = s->mp->pd; const char *this_peer = key; int rc; if (s->peer && strcmp(s->peer, this_peer)) { if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "rds: %s != %s, NOT sending to %s", this_peer, s->peer, inet_ntoa(addr->sin_addr)); } return; } rc = sendto(ei->socket, s->pkt, s->len, MSG_DONTWAIT, addr, sizeof(*addr)); /* FIXME * handle RDS specific meaning of error codes like EMSGSIZE, EAGAIN, ENOBUFS */ if (rc != s->len) { s->err_count++; if (!s->mp->suppresserrs) { PILCallLog(LOG, PIL_CRIT, "sendto(%s) failed: [%d] %s", inet_ntoa(addr->sin_addr), rc, strerror(errno)); } return; } if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "rds: sent %d bytes to %s", rc, inet_ntoa(addr->sin_addr)); } } static int rds_write(struct hb_media* mp, void *pkt, int len) { struct state_on_stack s; struct rds_private *ei = mp->pd; #if 0 char node[64]; char ns_len[3]; char delim = EOS; char *dest = NULL; #endif RDSASSERT(mp); s.mp = mp; s.peer = NULL; s.pkt = pkt; s.len = len; s.err_count = 0; #if 0 /* We assume that the F_TO field, if present, is always the first field * in a message. A follow-up commit to hb_msg_internal.c will actually * assure that. */ /* Unfortunately it is not that easy. * Cluster wide sequence numbers (F_SEQ) will get out-of-sync, triggering * rexmit. Unless we send "dummy" messages, or piggy-back some * information about node-messages not sent to everyone to the next * cluster wide message. Impact on F_ORDERSEQ is even worse. */ if (*(const unsigned*)pkt == *(const unsigned *)MSG_START_NETSTRING) { int rc = sscanf((char*)pkt+4, "%2[0-9]:(0)dest=%63[^,]%c", ns_len, node, &delim); /* TOBEDONE if you are paranoid, you need to double check * that this was correct netstring encoding, by checking the length */ if (rc == 3 && delim == ',') dest = node; } else if (*(const unsigned*)pkt == *(const unsigned *)MSG_START) { int rc = sscanf((char*)pkt+4, "dest=%63[^\n]%c", node, &delim); if (rc == 2 && delim == '\n') dest = node; } if (dest) { if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "rds: detected node message to %s", dest); } /* not yet enabled! s.peer = dest; */ } #endif g_hash_table_foreach(ei->peer_addresses, rds_sendto_one, &s); if (DEBUGPKTCONT) { PILCallLog(LOG, PIL_DEBUG, "%s", (const char*)pkt); } if (s.err_count >= ei->n_peers) return HA_FAIL; if (len > largest_msg_size) { PILCallLog(LOG, PIL_INFO, "rds: %s maximum sent message: %d bytes", ei->interface, len); largest_msg_size = len; } return HA_OK; } /* if_getaddr gets the ip address from an interface * specified by name and places it in addr. * returns 0 on success and -1 on failure. */ static int if_getaddr(const char *ifname, struct in_addr *addr) { struct ifreq if_info; int j; int maxtry = 120; gboolean gotaddr = FALSE; int err = 0; if (!addr) { return -1; } addr->s_addr = INADDR_ANY; memset(&if_info, 0, sizeof(if_info)); if (ifname) { strncpy(if_info.ifr_name, ifname, IFNAMSIZ-1); }else{ /* ifname is NULL, so use any address */ return 0; } if (Debug > 0) { PILCallLog(LOG, PIL_DEBUG, "looking up address for %s" , if_info.ifr_name); } for (j=0; j < maxtry && !gotaddr; ++j) { int fd; if ((fd=socket(AF_INET, SOCK_DGRAM, 0)) == -1) { PILCallLog(LOG, PIL_CRIT, "Error getting socket"); return -1; } if (ioctl(fd, SIOCGIFADDR, &if_info) >= 0) { gotaddr = TRUE; }else{ err = errno; switch(err) { case EADDRNOTAVAIL: sleep(1); break; default: close(fd); goto getout; } } close(fd); } getout: if (!gotaddr) { PILCallLog(LOG, PIL_CRIT , "Unable to retrieve local interface address" " for interface [%s] using ioctl(SIOCGIFADDR): %s" , ifname, strerror(err)); return -1; } /* * This #define w/void cast is to quiet alignment errors on some * platforms (notably Solaris) */ #define SOCKADDR_IN(a) ((struct sockaddr_in *)((void*)(a))) memcpy(addr, &(SOCKADDR_IN(&if_info.ifr_addr)->sin_addr) , sizeof(struct in_addr)); return 0; } static void adjust_socket_bufs(int sockfd, int bytes) { setsockopt(sockfd, SOL_SOCKET, SO_SNDBUFFORCE, &bytes, sizeof(bytes)); setsockopt(sockfd, SOL_SOCKET, SO_RCVBUFFORCE, &bytes, sizeof(bytes)); getsockopt(sockfd, SOL_SOCKET, SO_SNDBUF, &bytes, NULL); PILCallLog(LOG, PIL_INFO, "rds: set sndbuf to %d", bytes); getsockopt(sockfd, SOL_SOCKET, SO_RCVBUF, &bytes, NULL); PILCallLog(LOG, PIL_INFO, "rds: set rcvbuf to %d", bytes); } /* * Set up socket for sending unicast RDS heartbeats */ static int HB_make_sock(struct hb_media *mp) { int sockfd; struct rds_private *ei; RDSASSERT(mp); ei = (struct rds_private*)mp->pd; if ((sockfd = socket(AF_RDS, SOCK_SEQPACKET, 0)) < 0) { PILCallLog(LOG, PIL_CRIT, "rds: Error creating socket: %s", strerror(errno)); return -1; } adjust_socket_bufs(sockfd, ei->n_peers * 512*1024); if (bind(sockfd, &ei->my_addr, sizeof(struct sockaddr_in)) < 0) { PILCallLog(LOG, PIL_CRIT, "rds: unable to bind socket: %s", strerror(errno)); close(sockfd); return -1; } if (fcntl(sockfd,F_SETFD, FD_CLOEXEC) < 0) { PILCallLog(LOG, PIL_CRIT, "rds: error setting close-on-exec flag: %s", strerror(errno)); } return sockfd; } static struct rds_private* new_ip_interface(const char *ifn) { struct rds_private *ep; ep = MALLOC(sizeof(struct rds_private)); if (!ep) { PILCallLog(LOG, PIL_CRIT, "rds: cannot alloc rds_private"); return NULL; } memset(ep, 0, sizeof(*ep)); /* zero the struct */ ep->interface = STRDUP(ifn); if (!ep->interface) { PILCallLog(LOG, PIL_CRIT, "rds: cannot strdup interface"); goto out1; } if (if_getaddr(ep->interface, &ep->my_addr.sin_addr)) goto out2; ep->my_addr.sin_family = AF_INET; /* host byte order */ ep->my_addr.sin_port = htons(localrdsport); /* short, network byte order */ ep->port = localrdsport; ep->socket = -1; return ep; out2: FREE(ep->interface); out1: FREE(ep); return NULL; } Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBcomm/serial.c0000644000000000000000000003717411576626513022232 0ustar00usergroup00000000000000/* * Linux-HA serial heartbeat code * * The basic facilities for round-robin (ring) heartbeats are * contained within. * * Copyright (C) 1999, 2000, 2001 Alan Robertson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define PIL_PLUGINTYPE HB_COMM_TYPE #define PIL_PLUGINTYPE_S HB_COMM_TYPE_S #define PIL_PLUGIN serial #define PIL_PLUGIN_S "serial" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL #include struct serial_private { char * ttyname; int ttyfd; /* For direct TTY i/o */ int consecutive_errors; struct hb_media* next; }; static int serial_baud = 0; static const char * baudstring; /* Used to maintain a list of our serial ports in the ring */ static struct hb_media* lastserialport; static struct hb_media* serial_new(const char * value); static void* serial_read(struct hb_media *mp, int* lenp); static char * ttygets(char * inbuf, int length , struct serial_private *tty); static int serial_write(struct hb_media*mp, void *msg , int len); static int serial_open(struct hb_media* mp); static int ttysetup(int fd, const char * ourtty); static int opentty(char * serial_device); static int serial_close(struct hb_media* mp); static int serial_init(void); static void serial_localdie(void); static int serial_mtype(char **buffer); static int serial_descr(char **buffer); static int serial_isping(void); static struct hb_media_fns serialOps ={ serial_new, /* Create single object function */ NULL, /* whole-line parse function */ serial_open, serial_close, serial_read, serial_write, serial_mtype, serial_descr, serial_isping, }; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate; static int fragment_write_delay = 0; #define FRAGSIZE 512 #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { PIL_rc rc; /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ rc = imports->register_interface(us, PIL_PLUGINTYPE_S , PIL_PLUGIN_S , &serialOps , NULL /*close */ , &OurInterface , (void*)&OurImports , interfprivate); serial_init(); return rc; } #define IsTTYOBJECT(mp) ((mp) && ((mp)->vf == (void*)&serial_media_fns)) /* #define TTYASSERT(mp) ASSERT(IsTTYOBJECT(mp)) */ #define TTYASSERT(mp) #define RTS_WARNTIME 3600 static int serial_mtype (char **buffer) { *buffer = STRDUP("serial"); if (!*buffer) { return 0; } return strlen(*buffer); } static int serial_descr (char **buffer) { *buffer = STRDUP("serial ring"); if (!*buffer) { return 0; } return strlen(*buffer); } static int serial_isping (void) { return 0; } static int compute_fragment_write_delay(void) { int rate_bps = atoi(baudstring); if (rate_bps < 300 ){ cl_log(LOG_ERR, "%s: invalid baud rate(%s)", __FUNCTION__, baudstring); return HA_FAIL; } fragment_write_delay = (1.0*FRAGSIZE)/(rate_bps/8)*1000000; return HA_OK; } /* Initialize global serial data structures */ static int serial_init (void) { lastserialport = NULL; /* This eventually ought be done through the configuration API */ if (serial_baud <= 0) { if ((baudstring = OurImports->ParamValue("baud")) != NULL) { serial_baud = OurImports->StrToBaud(baudstring); } } if (serial_baud <= 0 || baudstring == NULL) { serial_baud = DEFAULTBAUD; baudstring = DEFAULTBAUDSTR; } if (ANYDEBUG) { PILCallLog(LOG, PIL_DEBUG, "serial_init: serial_baud = 0x%x" , serial_baud); } if(compute_fragment_write_delay() != HA_OK){ return HA_FAIL; } return HA_OK; } /* Process a serial port declaration */ static struct hb_media * serial_new (const char * port) { struct stat sbuf; struct hb_media * ret; /* Let's see if this looks like it might be a serial port... */ if (*port != '/') { PILCallLog(LOG, PIL_CRIT , "Serial port not full pathname [%s] in config file" , port); return(NULL); } if (stat(port, &sbuf) < 0) { PILCallLog(LOG, PIL_CRIT, "Nonexistent serial port [%s] in config file" , port); return(NULL); } if (!S_ISCHR(sbuf.st_mode)) { PILCallLog(LOG, PIL_CRIT , "Serial port [%s] not a char device in config file" , port); return(NULL); } ret = (struct hb_media*)MALLOC(sizeof(struct hb_media)); if (ret != NULL) { struct serial_private * sp; memset(ret, 0, sizeof(*ret)); sp = (struct serial_private*) MALLOC(sizeof(struct serial_private)); if (sp != NULL) { /* * This implies we have to process the "new" * for this object in the parent process of us all... * otherwise we can't do this linking stuff... */ sp->next = lastserialport; lastserialport=ret; sp->ttyname = STRDUP(port); if (sp->ttyname != NULL) { sp->consecutive_errors = 0; ret->name = sp->ttyname; ret->pd = sp; }else{ FREE(sp); sp = NULL; } } if (sp == NULL) { FREE(ret); ret = NULL; PILCallLog(LOG, PIL_CRIT, "Out of memory (private serial data)"); } }else{ PILCallLog(LOG, PIL_CRIT, "Out of memory (serial data)"); } return(ret); } static int serial_open (struct hb_media* mp) { struct serial_private* sp; TTYASSERT(mp); sp = (struct serial_private*)mp->pd; if (OurImports->devlock(sp->ttyname) < 0) { PILCallLog(LOG, PIL_CRIT, "cannot lock line %s", sp->ttyname); return(HA_FAIL); } if ((sp->ttyfd = opentty(sp->ttyname)) < 0) { return(HA_FAIL); } PILCallLog(LOG, PIL_INFO, "Starting serial heartbeat on tty %s (%s baud)" , sp->ttyname, baudstring); return(HA_OK); } static int serial_close (struct hb_media* mp) { struct serial_private* sp; int rc=HA_OK; TTYASSERT(mp); sp = (struct serial_private*)mp->pd; if (sp->ttyfd >= 0) { rc = close(sp->ttyfd) < 0 ? HA_FAIL : HA_OK; OurImports->devunlock(sp->ttyname); sp->ttyfd=-1; } return rc; } /* Set up a serial line the way we want it be done */ static int ttysetup(int fd, const char * ourtty) { struct TERMIOS ti; if (GETATTR(fd, &ti) < 0) { PILCallLog(LOG, PIL_CRIT, "cannot get tty attributes: %s", strerror(errno)); return(HA_FAIL); } #ifndef IUCLC # define IUCLC 0 /* Ignore it if not supported */ #endif #ifndef CBAUD # define CBAUD 0 #endif ti.c_iflag &= ~(IGNBRK|IUCLC|IXANY|IXOFF|IXON|ICRNL|PARMRK); /* Unsure if I want PARMRK or not... It may not matter much */ ti.c_iflag |= (INPCK|ISTRIP|IGNCR|BRKINT); ti.c_oflag &= ~(OPOST); ti.c_cflag &= ~(CBAUD|CSIZE|PARENB); #ifndef CRTSCTS # define CRTSCTS 0 /* AIX and others don't have this */ #endif /* * Make a silly Linux/Gcc -Wtraditional warning go away * This is not my fault, you understand... ;-) * Suggestions on how to better work around it would be welcome. */ #if CRTSCTS == 020000000000 # undef CRTSCTS # define CRTSCTS 020000000000U #endif ti.c_cflag |= (serial_baud|(unsigned)CS8|(unsigned)CREAD | (unsigned)CLOCAL|(unsigned)CRTSCTS); ti.c_lflag &= ~(ICANON|ECHO|ISIG); #ifdef HAVE_TERMIOS_C_LINE ti.c_line = 0; #endif ti.c_cc[VMIN] = 1; ti.c_cc[VTIME] = 1; if (SETATTR(fd, &ti) < 0) { PILCallLog(LOG, PIL_CRIT, "cannot set tty attributes: %s" , strerror(errno)); return(HA_FAIL); } if (ANYDEBUG) { PILCallLog(LOG, PIL_DEBUG, "tty setup on %s complete.", ourtty); PILCallLog(LOG, PIL_DEBUG, "Baud rate set to: 0x%x" , (unsigned)serial_baud); PILCallLog(LOG, PIL_DEBUG, "ti.c_iflag = 0x%x" , (unsigned)ti.c_iflag); PILCallLog(LOG, PIL_DEBUG, "ti.c_oflag = 0x%x" , (unsigned)ti.c_oflag); PILCallLog(LOG, PIL_DEBUG,"ti.c_cflag = 0x%x" , (unsigned)ti.c_cflag); PILCallLog(LOG, PIL_DEBUG, "ti.c_lflag = 0x%x" , (unsigned)ti.c_lflag); } /* For good measure */ FLUSH(fd); tcsetpgrp(fd, getsid(getpid())); return(HA_OK); } #ifndef O_NOCTTY # define O_NOCTTY 0 /* Ignore it if not supported */ #endif /* Open a tty and set it's line parameters */ static int opentty(char * serial_device) { int fd; if ((fd=open(serial_device, O_RDWR|O_NOCTTY)) < 0 ) { PILCallLog(LOG, PIL_CRIT, "cannot open %s: %s", serial_device , strerror(errno)); return(fd); } if (!ttysetup(fd, serial_device)) { close(fd); return(-1); } if (fcntl(fd, F_SETFD, FD_CLOEXEC)) { PILCallLog(LOG, PIL_WARN,"Error setting the close-on-exec flag: %s" , strerror(errno)); } /* Cause the other guy to flush his I/O */ tcsendbreak(fd, 0); return(fd); } static struct hb_media* ourmedia = NULL; static void serial_localdie(void) { int ourtty; if (!ourmedia || !ourmedia->pd) { return; } ourtty = ((struct serial_private*)(ourmedia->pd))->ttyfd; if (ourtty >= 0) { if (ANYDEBUG) { PILCallLog(LOG, PIL_DEBUG, "serial_localdie: Flushing tty"); } tcflush(ourtty, TCIOFLUSH); } } static char serial_pkt[MAXMSG]; /* This function does all the reading from our tty ports */ static void * serial_read(struct hb_media* mp, int *lenp) { char buf[MAXMSG]; struct serial_private* thissp; int startlen; const char * start = MSG_START; const char * end = MSG_END; int endlen; char *p; int len = 0; int tmplen; TTYASSERT(mp); thissp = (struct serial_private*)mp->pd; startlen = strlen(start); if (start[startlen-1] == '\n') { --startlen; } endlen = strlen(end); if (end[endlen-1] == '\n') { --endlen; } memset(serial_pkt, 0, MAXMSG); serial_pkt[0] = 0; p = serial_pkt; /* Skip until we find a MSG_START (hopefully we skip nothing) */ while (ttygets(buf, MAXMSG, thissp) != NULL && strncmp(buf, start, startlen) != 0) { /*nothing*/ } len = strnlen(buf, MAXMSG) + 1; if(len >= MAXMSG){ PILCallLog(LOG, PIL_CRIT, "serial_read:MSG_START exceeds MAXMSG"); return(NULL); } tmplen = strnlen(buf, MAXMSG); strcat(p, buf); p += tmplen; strcat(p, "\n"); p++; while (ttygets(buf, MAXMSG, thissp) != NULL && strncmp(buf, MSG_END, endlen) != 0) { len += strnlen(buf, MAXMSG) + 1; if(len >= MAXMSG){ PILCallLog(LOG, PIL_CRIT, "serial_read:serial_pkt exceeds MAXMSG"); return(NULL); } tmplen = strnlen(buf, MAXMSG); memcpy(p, buf, tmplen); p += tmplen; strcat(p, "\n"); p++; } if(strncmp(buf, MSG_END, endlen) == 0){ len += strnlen(buf, MAXMSG) + 2; if(len >= MAXMSG){ PILCallLog(LOG, PIL_CRIT, "serial_read:serial_pkt exceeds MAXMSG after adding MSG_END"); return(NULL); } tmplen = strnlen(buf, MAXMSG); memcpy(p, buf, tmplen); p += tmplen; strcat(p, "\n"); p++; p[0] = 0; } if (buf[0] == EOS ) { return NULL; }else{ thissp->consecutive_errors=0; } *lenp = len; return(serial_pkt); } /* This function does all the writing to our tty ports */ static int serial_write(struct hb_media* mp, void *p, int len) { int string_startlen = sizeof(MSG_START)-1; int netstring_startlen = sizeof(MSG_START_NETSTRING) - 1; char *str; int str_new = 0; int wrc; int size; int ourtty; static gboolean warnyet=FALSE; static longclock_t warninterval; static longclock_t lastwarn; int i; int loop; char* datastr; if (strncmp(p, MSG_START, string_startlen) == 0) { str = p; size = strlen(str); if(size > len){ return(HA_FAIL); } } else if(strncmp(p, MSG_START_NETSTRING, netstring_startlen) == 0) { struct ha_msg * msg; msg = wirefmt2msg(p, len, MSG_NEEDAUTH); if(!msg){ ha_log(PIL_WARN, "serial_write(): wirefmt2msg() failed"); return(HA_FAIL); } add_msg_auth(msg); str = msg2string(msg); str_new = 1; size = strlen(str); ha_msg_del(msg); } else{ return(HA_FAIL); } TTYASSERT(mp); if (!warnyet) { warninterval = msto_longclock(RTS_WARNTIME*1000L); } ourmedia = mp; /* Only used for the "localdie" function */ OurImports->RegisterCleanup(serial_localdie); ourtty = ((struct serial_private*)(mp->pd))->ttyfd; if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "Sending pkt to %s [%d bytes]" , mp->name, size); } if (DEBUGPKTCONT) { PILCallLog(LOG, PIL_DEBUG, "%s", str); } loop = size / FRAGSIZE + ((size%FRAGSIZE == 0)?0:1); datastr =str; for (i = 0; i < loop; i++){ int datalen ; datalen = FRAGSIZE; if ( (i == loop -1 ) && (size% FRAGSIZE != 0)){ datalen = size %FRAGSIZE; } setmsalarm(500); wrc = write(ourtty, datastr, datalen); cancelmstimer(); if (i != (loop -1)) { usleep(fragment_write_delay); } if (DEBUGPKTCONT) { PILCallLog(LOG, PIL_DEBUG, "serial write returned %d", wrc); } if (wrc < 0 || wrc != datalen) { if (DEBUGPKTCONT && wrc < 0) { PILCallLog(LOG, PIL_DEBUG, "serial write errno was %d", errno); } if (wrc > 0 || (wrc < 0 && errno == EINTR)) { longclock_t now = time_longclock(); tcflush(ourtty, TCIOFLUSH); if (!warnyet || cmp_longclock(sub_longclock(now, lastwarn) , warninterval) >= 0) { lastwarn = now; warnyet = TRUE; PILCallLog(LOG, PIL_WARN , "TTY write timeout on [%s]" " (no connection or bad cable" "? [see documentation])" , mp->name); PILCallLog(LOG, PIL_INFO , "See %s for details" , HAURL("FAQ#TTY_timeout")); } }else{ PILCallLog(LOG, PIL_CRIT, "TTY write failure on [%s]: %s" , mp->name, strerror(errno)); } } datastr +=datalen; } if(str_new){ free(str); str = NULL; } return(HA_OK); } /* Gets function for our tty */ static char * ttygets(char * inbuf, int length, struct serial_private *tty) { char * cp; char * end = inbuf + length; int rc; int fd = tty->ttyfd; for(cp=inbuf; cp < end; ++cp) { int saverr; errno = 0; /* One read per char -- yecch (but it's easy) */ rc = read(fd, cp, 1); saverr = errno; OurImports->CheckForEvents(); errno = saverr; if (rc != 1) { if (rc == 0 || errno == EINTR) { PILCallLog(LOG, PIL_CRIT, "EOF in ttygets [%s]: %s [%d]" , tty->ttyname, strerror(errno), rc); ++tty->consecutive_errors; tcsetpgrp(fd, getsid(getpid())); if ((tty->consecutive_errors % 10) == 0) { PILCallLog(LOG, PIL_WARN , "10 consecutive EOF" " errors from serial port %s" , tty->ttyname); PILCallLog(LOG, PIL_INFO , "%s pgrp: %d", tty->ttyname , tcgetpgrp(fd)); sleep(10); } return(NULL); } errno = 0; continue; }else{ tty->consecutive_errors = 0; } if (*cp == '\n') { break; } } *cp = '\0'; return(inbuf); } Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBcomm/tipc.c0000644000000000000000000003052511576626513021703 0ustar00usergroup00000000000000/* * tipc.c - tipc communication module for heartbeat * * Author: Jia Ming Pan * Copyright (c) 2005 International Business Machines * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define PIL_PLUGINTYPE HB_COMM_TYPE #define PIL_PLUGINTYPE_S HB_COMM_TYPE_S #define PIL_PLUGIN tipc #define PIL_PLUGIN_S "tipc" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL #include struct tipc_private { struct sockaddr_tipc maddr; int recvfd; int sendfd; unsigned int name_type; unsigned int seq_lower; unsigned int seq_upper; }; static struct hb_media * tipc_new(unsigned int name_type, unsigned int seq_lower, unsigned int seq_upper); static int tipc_parse(const char * line); static int tipc_open(struct hb_media * mp); static int tipc_close(struct hb_media * mp); static void * tipc_read(struct hb_media * mp, int * lenp); static int tipc_write(struct hb_media * mp, void * msg, int len); static int tipc_make_receive_sock(struct hb_media * mp); static int tipc_make_send_sock(struct hb_media * mp); static int tipc_descr(char ** buffer); static int tipc_mtype(char ** buffer); static int tipc_isping(void); static struct hb_media_fns tipcOps; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports * PluginImports; static PILPlugin * OurPlugin; static PILInterface * OurInterface; static struct hb_media_imports * OurImports; static void * interfprivate; #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree #define IS_TIPC_OBJECT(mp) ((mp) && ((mp)->vf == (void*)&tipcOps)) #define TIPC_ASSERT(mp) g_assert(IS_TIPC_OBJECT(mp)) PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S , PIL_PLUGIN_S , &tipcOps , NULL /*close */ , &OurInterface , (void*)&OurImports , interfprivate); } static struct hb_media * tipc_new(unsigned int name_type, unsigned int seq_lower, unsigned int seq_upper) { struct tipc_private * tipc = NULL; struct hb_media * mp = NULL; mp = MALLOC(sizeof(struct hb_media)); if ( mp == NULL ){ PILCallLog(LOG, PIL_CRIT, "%s: malloc failed for hb_media", __FUNCTION__); return NULL; } memset(mp, 0, sizeof(*mp)); tipc = MALLOC(sizeof(struct tipc_private)); if ( tipc == NULL ){ PILCallLog(LOG, PIL_CRIT, "%s: malloc failed for tipc_private", __FUNCTION__); FREE(mp); return NULL; } tipc->name_type = name_type; tipc->seq_lower = seq_lower; tipc->seq_upper = seq_upper; /* setting mcast addr */ tipc->maddr.family = AF_TIPC; tipc->maddr.addrtype = TIPC_ADDR_MCAST; tipc->maddr.addr.name.domain = 0; tipc->maddr.addr.nameseq.type = name_type; tipc->maddr.addr.nameseq.lower = seq_lower; tipc->maddr.addr.nameseq.upper = seq_upper; mp->pd = (void *)tipc; return mp; } /* tipc name_type seq_lower seq_upper */ #define GET_NEXT_TOKEN(bp, token) do { \ int toklen; \ bp += strspn(bp, WHITESPACE); \ toklen = strcspn(bp, WHITESPACE); \ strncpy(token, bp, toklen); \ bp += toklen; \ token[toklen] = EOS; \ }while(0) static int tipc_parse(const char * line) { const char * bp = NULL; struct hb_media * media = NULL; char token[MAXLINE]; unsigned int name_type; unsigned int seq_lower; unsigned int seq_upper; bp = line; /* name_type */ GET_NEXT_TOKEN(bp, token); name_type = (unsigned int)atoi(token); /* seq_lower */ GET_NEXT_TOKEN(bp, token); seq_lower = (unsigned int)atoi(token); /* seq_upper */ GET_NEXT_TOKEN(bp, token); seq_upper = (unsigned int)atoi(token); PILCallLog(LOG, PIL_INFO, "%s: name type: %u, sequence lower: %u, sequence upper: %u", __FUNCTION__, name_type, seq_lower, seq_upper); media = tipc_new (name_type, seq_lower, seq_upper ); if ( media == NULL ) { PILCallLog(LOG, PIL_CRIT, "%s: Could not create media", __FUNCTION__); return HA_FAIL; } sprintf(token, "TIPC:<%u>", name_type); media->name = STRDUP(token); if ( media->name == NULL ) { PILCallLog(LOG, PIL_CRIT, "%s: Could not alloc media's name", __FUNCTION__); FREE(media); return HA_FAIL; } OurImports->RegisterNewMedium(media); PILCallLog(LOG, PIL_INFO, "%s: register new medium OK", __FUNCTION__); return HA_OK; } static int tipc_open(struct hb_media * mp) { struct tipc_private * tipc = NULL; tipc = (struct tipc_private *) mp->pd; TIPC_ASSERT(mp); tipc->recvfd = tipc_make_receive_sock(mp); if ( tipc->recvfd < 0 ) { PILCallLog(LOG, PIL_CRIT, "%s: Open receive socket failed", __FUNCTION__); return HA_FAIL; } tipc->sendfd = tipc_make_send_sock(mp); if ( tipc->sendfd < 0 ) { close(tipc->recvfd); PILCallLog(LOG, PIL_CRIT, "%s: Open send socket failed", __FUNCTION__); return HA_FAIL; } PILCallLog(LOG, PIL_INFO, "%s: Open tipc successfully", __FUNCTION__); return HA_OK; } static int tipc_close(struct hb_media * mp) { struct tipc_private * tipc; TIPC_ASSERT(mp); tipc = (struct tipc_private *) mp->pd; if ( tipc->recvfd >= 0 ) { close(tipc->recvfd); tipc->recvfd = -1; } if ( tipc->sendfd >= 0 ) { close(tipc->sendfd); tipc->sendfd = -1; } PILCallLog(LOG, PIL_INFO, "%s: tipc closed", __FUNCTION__); return HA_OK; } char tipc_pkt[MAXMSG]; static void * tipc_read(struct hb_media * mp, int * len) { struct sockaddr_tipc client_addr; struct tipc_private * tipc; socklen_t sock_len; int numbytes; TIPC_ASSERT(mp); tipc = (struct tipc_private *) mp->pd; sock_len = sizeof(struct sockaddr_tipc); if (( numbytes = recvfrom(tipc->recvfd, tipc_pkt, MAXMSG, 0, (struct sockaddr*)&client_addr, &sock_len)) < 0) { if ( errno != EINTR ) { PILCallLog(LOG, PIL_CRIT, "%s: Error receiving message: %s", __FUNCTION__, strerror(errno)); } return NULL; } tipc_pkt[numbytes] = EOS; *len = numbytes + 1; if ( Debug >= PKTTRACE ) { PILCallLog(LOG, PIL_INFO, "%s: Got %d bytes", __FUNCTION__, numbytes); } return tipc_pkt; } static int tipc_write(struct hb_media * mp, void * msg, int len) { struct tipc_private * tipc; int numbytes; TIPC_ASSERT(mp); tipc = (struct tipc_private *) mp->pd; if ( (numbytes = sendto(tipc->sendfd, msg, len, 0, (struct sockaddr *)&tipc->maddr, sizeof(struct sockaddr_tipc))) < 0 ){ if (!mp->suppresserrs) { PILCallLog(LOG, PIL_CRIT, "%s: Unable to send message: %s", __FUNCTION__, strerror(errno)); } return HA_FAIL; } if ( numbytes != len ) { PILCallLog(LOG, PIL_WARN, "%s: Sent %d bytes, message length is %d", __FUNCTION__, numbytes, len); return HA_FAIL; } if ( Debug >= PKTTRACE ) { PILCallLog(LOG, PIL_INFO, "%s: Sent %d bytes", __FUNCTION__, numbytes); } return HA_OK; } static int tipc_mtype(char ** buffer) { *buffer = STRDUP(PIL_PLUGIN_S); if (!*buffer) { return 0; } return STRLEN_CONST(PIL_PLUGIN_S); } static int tipc_descr(char ** buffer) { const char constret[] = "tipc communication module"; *buffer = STRDUP(constret); if (!*buffer) { return 0; } return STRLEN_CONST(constret); } static int tipc_isping(void) { return 0; } static int tipc_make_receive_sock(struct hb_media * mp) { struct sockaddr_tipc server_addr; struct tipc_private * tipc = NULL; int sd; sd = socket (AF_TIPC, SOCK_RDM, 0); tipc = (struct tipc_private *) mp->pd; server_addr.family = AF_TIPC; server_addr.addrtype = TIPC_ADDR_NAMESEQ; server_addr.scope = TIPC_CLUSTER_SCOPE; server_addr.addr.nameseq.type = tipc->name_type; server_addr.addr.nameseq.lower = tipc->seq_lower; server_addr.addr.nameseq.upper = tipc->seq_upper; /* Bind port to sequence */ if (bind (sd, (struct sockaddr*)&server_addr, sizeof(struct sockaddr_tipc)) != 0){ PILCallLog(LOG, PIL_CRIT, "%s: Could not bind to sequence <%u,%u,%u> scope %u", __FUNCTION__, server_addr.addr.nameseq.type, server_addr.addr.nameseq.lower, server_addr.addr.nameseq.upper, server_addr.scope); return -1; } PILCallLog(LOG, PIL_INFO, "%s: Bound to name sequence <%u,%u,%u> scope %u", __FUNCTION__, server_addr.addr.nameseq.type, server_addr.addr.nameseq.lower, server_addr.addr.nameseq.upper, server_addr.scope); return sd; } static int tipc_make_send_sock(struct hb_media * mp) { int sd = socket (AF_TIPC, SOCK_RDM, 0); return sd; } static struct hb_media_fns tipcOps ={ NULL, tipc_parse, tipc_open, tipc_close, tipc_read, tipc_write, tipc_mtype, tipc_descr, tipc_isping, }; Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBcomm/ucast.c0000644000000000000000000004016711576626513022066 0ustar00usergroup00000000000000/* * Adapted from alanr's UDP broadcast heartbeat bcast.c by Stéphane Billiart * * * (c) 2002 Stéphane Billiart * (c) 2002 Alan Robertson * * Brian Tinsley * - allow use of hostname in ha.cf * - set IP type of service of write socket * - many MALLOC calls were not checked for failure * - code janitor * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #ifdef HAVE_STRINGS_H #include #endif #include #include #include #include #include #ifndef HAVE_INET_ATON extern int inet_aton(const char *, struct in_addr *); #endif #include #include #include #if defined(SO_BINDTODEVICE) #include #endif #include #include /* * Plugin information */ #define PIL_PLUGINTYPE HB_COMM_TYPE #define PIL_PLUGINTYPE_S HB_COMM_TYPE_S #define PIL_PLUGIN ucast #define PIL_PLUGIN_S "ucast" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL #include /* * Macros/Defines */ #define ISUCASTOBJECT(mp) ((mp) && ((mp)->vf == (void*)&ucastOps)) #define UCASTASSERT(mp) g_assert(ISUCASTOBJECT(mp)) #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree #define MAXBINDTRIES 10 /* * Structure Declarations */ struct ip_private { char* interface; /* Interface name */ struct in_addr heartaddr; /* Peer node address */ struct sockaddr_in addr; /* Local address */ int port; /* UDP port */ int rsocket; /* Read-socket */ int wsocket; /* Write-socket */ }; /* * Function Prototypes */ PIL_rc PIL_PLUGIN_INIT(PILPlugin *us, const PILPluginImports *imports); static int ucast_parse(const char *line); static struct hb_media* ucast_new(const char *intf, const char *addr); static int ucast_open(struct hb_media *mp); static int ucast_close(struct hb_media *mp); static void* ucast_read(struct hb_media *mp, int* lenp); static int ucast_write(struct hb_media *mp, void *msg, int len); static int HB_make_receive_sock(struct hb_media *ei); static int HB_make_send_sock(struct hb_media *mp); static struct ip_private* new_ip_interface(const char *ifn, const char *hbaddr, int port); static int ucast_descr(char **buffer); static int ucast_mtype(char **buffer); static int ucast_isping(void); /* * External Data */ extern struct hb_media *sysmedia[]; extern int nummedia; /* * Module Public Data */ const char hb_media_name[] = "UDP/IP unicast"; static struct hb_media_fns ucastOps = { NULL, ucast_parse, ucast_open, ucast_close, ucast_read, ucast_write, ucast_mtype, ucast_descr, ucast_isping }; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate; static int localudpport; /* * Implmentation */ PIL_rc PIL_PLUGIN_INIT(PILPlugin *us, const PILPluginImports *imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S, PIL_PLUGIN_S, &ucastOps, NULL, &OurInterface, (void*)&OurImports, interfprivate); } static int ucast_parse(const char *line) { const char *bp = line; int toklen; struct hb_media *mp; char dev[MAXLINE]; char ucast[MAXLINE]; /* Skip over white space, then grab the device */ bp += strspn(bp, WHITESPACE); toklen = strcspn(bp, WHITESPACE); strncpy(dev, bp, toklen); bp += toklen; dev[toklen] = EOS; if (*dev != EOS) { #ifdef NOTYET if (!is_valid_dev(dev)) { PILCallLog(LOG, PIL_CRIT, "ucast: bad device [%s]", dev); return HA_FAIL; } #endif bp += strspn(bp, WHITESPACE); toklen = strcspn(bp, WHITESPACE); strncpy(ucast, bp, toklen); bp += toklen; ucast[toklen] = EOS; if (*ucast == EOS) { PILCallLog(LOG, PIL_CRIT, "ucast: [%s] missing target IP address/hostname", dev); return HA_FAIL; } if (!(mp = ucast_new(dev, ucast))) { return HA_FAIL; } sysmedia[nummedia++] = mp; } return HA_OK; } static int ucast_mtype(char **buffer) { *buffer = STRDUP(PIL_PLUGIN_S); if (!*buffer) { PILCallLog(LOG, PIL_CRIT, "ucast: memory allocation error (line %d)", (__LINE__ - 2) ); return 0; } return strlen(*buffer); } static int ucast_descr(char **buffer) { *buffer = strdup(hb_media_name); if (!*buffer) { PILCallLog(LOG, PIL_CRIT, "ucast: memory allocation error (line %d)", (__LINE__ - 2) ); return 0; } return strlen(*buffer); } static int ucast_isping(void) { return 0; } static int ucast_init(void) { struct servent *service; g_assert(OurImports != NULL); if (localudpport <= 0) { const char *chport; if ((chport = OurImports->ParamValue("udpport")) != NULL) { if (sscanf(chport, "%d", &localudpport) <= 0 || localudpport <= 0) { PILCallLog(LOG, PIL_CRIT, "ucast: bad port number %s", chport); return HA_FAIL; } } } /* No port specified in the configuration... */ if (localudpport <= 0) { /* If our service name is in /etc/services, then use it */ if ((service=getservbyname(HA_SERVICENAME, "udp")) != NULL) localudpport = ntohs(service->s_port); else localudpport = UDPPORT; } return HA_OK; } /* * Create new UDP/IP unicast heartbeat object * Name of interface and address are passed as parameters */ static struct hb_media* ucast_new(const char *intf, const char *addr) { struct ip_private *ipi; struct hb_media *ret; char *name; ucast_init(); if (!(ipi = new_ip_interface(intf, addr, localudpport))) { PILCallLog(LOG, PIL_CRIT, "ucast: interface [%s] does not exist", intf); return NULL; } if (!(ret = (struct hb_media*)MALLOC(sizeof(struct hb_media)))) { PILCallLog(LOG, PIL_CRIT, "ucast: memory allocation error (line %d)", (__LINE__ - 2) ); FREE(ipi->interface); FREE(ipi); } else { memset(ret, 0, sizeof(*ret)); ret->pd = (void*)ipi; if (!(name = STRDUP(intf))) { PILCallLog(LOG, PIL_CRIT, "ucast: memory allocation error (line %d)", (__LINE__ - 3) ); FREE(ipi->interface); FREE(ipi); FREE(ret); ret = NULL; } else { ret->name = name; } } return ret; } /* * Open UDP/IP unicast heartbeat interface */ static int ucast_open(struct hb_media* mp) { struct ip_private * ei; UCASTASSERT(mp); ei = (struct ip_private*)mp->pd; if ((ei->wsocket = HB_make_send_sock(mp)) < 0) return HA_FAIL; if ((ei->rsocket = HB_make_receive_sock(mp)) < 0) { ucast_close(mp); return HA_FAIL; } PILCallLog(LOG, PIL_INFO, "ucast: started on port %d interface %s to %s", localudpport, ei->interface, inet_ntoa(ei->addr.sin_addr)); return HA_OK; } /* * Close UDP/IP unicast heartbeat interface */ static int ucast_close(struct hb_media* mp) { struct ip_private *ei; int rc = HA_OK; UCASTASSERT(mp); ei = (struct ip_private*)mp->pd; if (ei->rsocket >= 0) { if (close(ei->rsocket) < 0) { rc = HA_FAIL; } ei->rsocket = -1; } if (ei->wsocket >= 0) { if (close(ei->wsocket) < 0) { rc = HA_FAIL; } ei->wsocket = -1; } return rc; } /* * Receive a heartbeat unicast packet from UDP interface */ char ucast_pkt[MAXMSG]; static void * ucast_read(struct hb_media* mp, int *lenp) { struct ip_private *ei; socklen_t addr_len; struct sockaddr_in their_addr; int numbytes; UCASTASSERT(mp); ei = (struct ip_private*)mp->pd; addr_len = sizeof(struct sockaddr); if ((numbytes = recvfrom(ei->rsocket, ucast_pkt, MAXMSG-1, 0, (struct sockaddr *)&their_addr, &addr_len)) == -1) { if (errno != EINTR) { PILCallLog(LOG, PIL_CRIT, "ucast: error receiving from socket: %s", strerror(errno)); } return NULL; } if (numbytes == 0) { PILCallLog(LOG, PIL_CRIT, "ucast: received zero bytes"); return NULL; } ucast_pkt[numbytes] = EOS; if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "ucast: received %d byte packet from %s", numbytes, inet_ntoa(their_addr.sin_addr)); } if (DEBUGPKTCONT) { PILCallLog(LOG, PIL_DEBUG, "%s", ucast_pkt); } *lenp = numbytes +1; return ucast_pkt; } /* * Send a heartbeat packet over unicast UDP/IP interface */ static int ucast_write(struct hb_media* mp, void *pkt, int len) { struct ip_private *ei; int rc; UCASTASSERT(mp); ei = (struct ip_private*)mp->pd; if ((rc = sendto(ei->wsocket, pkt, len, 0 , (struct sockaddr *)&ei->addr , sizeof(struct sockaddr))) != len) { if (!mp->suppresserrs) { PILCallLog(LOG, PIL_CRIT , "%s: Unable to send " PIL_PLUGINTYPE_S " packet %s %s:%u len=%d [%d]: %s" , __FUNCTION__, ei->interface, inet_ntoa(ei->addr.sin_addr), ei->port , len, rc, strerror(errno)); } return HA_FAIL; } if (DEBUGPKT) { PILCallLog(LOG, PIL_DEBUG, "ucast: sent %d bytes to %s", rc, inet_ntoa(ei->addr.sin_addr)); } if (DEBUGPKTCONT) { PILCallLog(LOG, PIL_DEBUG, "%s", (const char*)pkt); } return HA_OK; } /* * Set up socket for sending unicast UDP heartbeats */ static int HB_make_send_sock(struct hb_media *mp) { int sockfd; struct ip_private *ei; int tos; #if defined(SO_BINDTODEVICE) struct ifreq i; #endif #if defined(SO_REUSEPORT) int i = 1; #endif UCASTASSERT(mp); ei = (struct ip_private*)mp->pd; if ((sockfd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { PILCallLog(LOG, PIL_CRIT, "ucast: Error creating write socket: %s", strerror(errno)); } /* * 21 December 2002 * Added by Brian TInsley */ tos = IPTOS_LOWDELAY; if (setsockopt(sockfd, IPPROTO_IP, IP_TOS, &tos, sizeof(tos)) < 0) { PILCallLog(LOG, PIL_CRIT, "ucast: error setting socket option IP_TOS: %s", strerror(errno)); } else { PILCallLog(LOG, PIL_INFO, "ucast: write socket priority set to IPTOS_LOWDELAY on %s", ei->interface); } #if defined(SO_BINDTODEVICE) { /* * We want to send out this particular interface * * This is so we can have redundant NICs, and heartbeat on both */ strcpy(i.ifr_name, ei->interface); if (setsockopt(sockfd, SOL_SOCKET, SO_BINDTODEVICE, &i, sizeof(i)) == -1) { PILCallLog(LOG, PIL_CRIT, "ucast: error setting option SO_BINDTODEVICE(w) on %s: %s", i.ifr_name, strerror(errno)); close(sockfd); return -1; } PILCallLog(LOG, PIL_INFO, "ucast: bound send socket to device: %s", i.ifr_name); } #endif #if defined(SO_REUSEPORT) { /* this is for OpenBSD to allow multiple * * ucast connections, e.g. a more than * * two node cluster */ if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEPORT, &i, sizeof(i)) == -1) { PILCallLog(LOG, PIL_CRIT, "ucast: error setting option SO_REUSEPORT(w): %s", strerror(errno)); close(sockfd); return -1; } PILCallLog(LOG, PIL_INFO, "ucast: set SO_REUSEPORT(w)"); } #endif if (fcntl(sockfd,F_SETFD, FD_CLOEXEC) < 0) { PILCallLog(LOG, PIL_CRIT, "ucast: error setting close-on-exec flag: %s", strerror(errno)); } return sockfd; } /* * Set up socket for listening to heartbeats (UDP unicast) */ static int HB_make_receive_sock(struct hb_media *mp) { struct ip_private *ei; struct sockaddr_in my_addr; int sockfd; int bindtries; int boundyet = 0; int j; UCASTASSERT(mp); ei = (struct ip_private*)mp->pd; memset(&(my_addr), 0, sizeof(my_addr)); /* zero my address struct */ my_addr.sin_family = AF_INET; /* host byte order */ my_addr.sin_port = htons(ei->port); /* short, network byte order */ my_addr.sin_addr.s_addr = INADDR_ANY; /* auto-fill with my IP */ if ((sockfd = socket(AF_INET, SOCK_DGRAM, 0)) == -1) { PILCallLog(LOG, PIL_CRIT, "ucast: error creating read socket: %s", strerror(errno)); return -1; } /* * Set SO_REUSEADDR on the server socket s. Variable j is used * as a scratch varable. * * 16th February 2000 * Added by Horms * with thanks to Clinton Work */ j = 1; if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, (void *)&j, sizeof j) < 0) { /* Ignore it. It will almost always be OK anyway. */ PILCallLog(LOG, PIL_CRIT, "ucast: error setting socket option SO_REUSEADDR: %s", strerror(errno)); } #if defined(SO_BINDTODEVICE) { /* * We want to receive packets only from this interface... */ struct ifreq i; strcpy(i.ifr_name, ei->interface); if (setsockopt(sockfd, SOL_SOCKET, SO_BINDTODEVICE, &i, sizeof(i)) == -1) { PILCallLog(LOG, PIL_CRIT, "ucast: error setting option SO_BINDTODEVICE(r) on %s: %s", i.ifr_name, strerror(errno)); close(sockfd); return -1; } PILCallLog(LOG, PIL_INFO, "ucast: bound receive socket to device: %s", i.ifr_name); } #endif #if defined(SO_REUSEPORT) { /* * Needed for OpenBSD for more than two nodes in a ucast cluster */ int i = 1; if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEPORT, &i, sizeof(i)) == -1) { PILCallLog(LOG, PIL_CRIT, "ucast: error setting option SO_REUSEPORT(r) %s", strerror(errno)); close(sockfd); return -1; } PILCallLog(LOG, PIL_INFO, "ucast: set SO_REUSEPORT(w)"); } #endif /* Try binding a few times before giving up */ /* Sometimes a process with it open is exiting right now */ for (bindtries=0; !boundyet && bindtries < MAXBINDTRIES; ++bindtries) { if (bind(sockfd, (struct sockaddr *)&my_addr, sizeof(struct sockaddr)) < 0) { PILCallLog(LOG, PIL_CRIT, "ucast: error binding socket. Retrying: %s", strerror(errno)); sleep(1); } else{ boundyet = 1; } } if (!boundyet) { #if !defined(SO_BINDTODEVICE) if (errno == EADDRINUSE) { /* This happens with multiple udp or ppp interfaces */ PILCallLog(LOG, PIL_INFO, "ucast: someone already listening on port %d [%s]", ei->port, ei->interface); PILCallLog(LOG, PIL_INFO, "ucast: UDP read process exiting"); close(sockfd); cleanexit(0); } #else PILCallLog(LOG, PIL_CRIT, "ucast: unable to bind socket. Giving up: %s", strerror(errno)); close(sockfd); return -1; #endif } if (fcntl(sockfd,F_SETFD, FD_CLOEXEC) < 0) { PILCallLog(LOG, PIL_CRIT, "ucast: error setting close-on-exec flag: %s", strerror(errno)); } return sockfd; } static struct ip_private* new_ip_interface(const char *ifn, const char *hbaddr, int port) { struct ip_private *ep; struct hostent *h; /* * 21 December 2002 * Added by Brian TInsley */ if (!(h = gethostbyname(hbaddr))) { PILCallLog(LOG, PIL_CRIT, "ucast: cannot resolve hostname"); return NULL; } if (!(ep = (struct ip_private*) MALLOC(sizeof(struct ip_private)))) { PILCallLog(LOG, PIL_CRIT, "ucast: memory allocation error (line %d)", (__LINE__ - 2) ); return NULL; } /* * use address from gethostbyname */ memcpy(&ep->heartaddr, h->h_addr_list[0], sizeof(ep->heartaddr)); if (!(ep->interface = STRDUP(ifn))) { PILCallLog(LOG, PIL_CRIT, "ucast: memory allocation error (line %d)", (__LINE__ - 2) ); FREE(ep); return NULL; } memset(&ep->addr, 0, sizeof(ep->addr)); /* zero the struct */ ep->addr.sin_family = AF_INET; /* host byte order */ ep->addr.sin_port = htons(port); /* short, network byte order */ ep->port = port; ep->wsocket = -1; ep->rsocket = -1; ep->addr.sin_addr = ep->heartaddr; return ep; } Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBcompress/Makefile.am0000644000000000000000000000325111576626513023530 0ustar00usergroup00000000000000# # InterfaceMgr: Interface manager plugins for Linux-HA # # Copyright (C) 2001 Alan Robertson # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in if BUILD_ZLIB_COMPRESS_MODULE zlibmodule = zlib.la endif if BUILD_BZ2_COMPRESS_MODULE bz2module = bz2.la endif SUBDIRS = INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \ -I$(top_builddir)/lib/upmls -I$(top_srcdir)/lib/upmls AM_CFLAGS = @CFLAGS@ ## libraries halibdir = $(libdir)/@HB_PKG@ plugindir = $(halibdir)/plugins/HBcompress plugin_LTLIBRARIES = $(zlibmodule) $(bz2module) zlib_la_SOURCES = zlib.c zlib_la_LDFLAGS = -export-dynamic -module -avoid-version -lz zlib_la_LIBADD = $(top_builddir)/replace/libreplace.la bz2_la_SOURCES = bz2.c bz2_la_LDFLAGS = -export-dynamic -module -avoid-version -lbz2 bz2_la_LIBADD = $(top_builddir)/replace/libreplace.la Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBcompress/bz2.c0000644000000000000000000000730011576626513022334 0ustar00usergroup00000000000000 /* bz2.c: compression module using bz2 for heartbeat. * * Copyright (C) 2005 Guochun Shi * * SECURITY NOTE: It would be very easy for someone to masquerade as the * device that you're pinging. If they don't know the password, all they can * do is echo back the packets that you're sending out, or send out old ones. * This does mean that if you're using such an approach, that someone could * make you think you have quorum when you don't during a cluster partition. * The danger in that seems small, but you never know ;-) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #define PIL_PLUGINTYPE HB_COMPRESS_TYPE #define PIL_PLUGINTYPE_S HB_COMPRESS_TYPE_S #define PIL_PLUGIN bz2 #define PIL_PLUGIN_S "bz2" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL #include #include #include #include #include #include #include static struct hb_compress_fns bz2Ops; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate; #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S , PIL_PLUGIN_S , &bz2Ops , NULL /*close */ , &OurInterface , (void*)&OurImports , interfprivate); } static int bz2_compress(char* dest, size_t* destlen, const char* _src, size_t srclen) { int ret; char* src; unsigned int tmpdestlen; memcpy(&src, &_src, sizeof(char*)); tmpdestlen = *destlen; ret = BZ2_bzBuffToBuffCompress(dest, &tmpdestlen, src, srclen, 1, 0, 30); if (ret != BZ_OK){ cl_log(LOG_ERR, "%s: compression failed", __FUNCTION__); return HA_FAIL; } *destlen = tmpdestlen; return HA_OK; } static int bz2_decompress(char* dest, size_t* destlen, const char* _src, size_t srclen) { int ret; char* src; unsigned int tmpdestlen; memcpy(&src, &_src, sizeof(char*)); tmpdestlen = *destlen; ret = BZ2_bzBuffToBuffDecompress(dest, &tmpdestlen, src, srclen, 1, 0); if (ret != BZ_OK){ cl_log(LOG_ERR, "%s: decompression failed", __FUNCTION__); return HA_FAIL; } *destlen = tmpdestlen; return HA_OK; } static const char* bz2_getname(void) { return "bz2"; } static struct hb_compress_fns bz2Ops ={ bz2_compress, bz2_decompress, bz2_getname, }; Heartbeat-3-0-7e3a82377fa8/lib/plugins/HBcompress/zlib.c0000644000000000000000000000707311576626513022606 0ustar00usergroup00000000000000 /* zlib.c: compression module using zlib for heartbeat. * * Copyright (C) 2005 Guochun Shi * * SECURITY NOTE: It would be very easy for someone to masquerade as the * device that you're pinging. If they don't know the password, all they can * do is echo back the packets that you're sending out, or send out old ones. * This does mean that if you're using such an approach, that someone could * make you think you have quorum when you don't during a cluster partition. * The danger in that seems small, but you never know ;-) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #define PIL_PLUGINTYPE HB_COMPRESS_TYPE #define PIL_PLUGINTYPE_S HB_COMPRESS_TYPE_S #define PIL_PLUGIN zlib #define PIL_PLUGIN_S "zlib" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL #include #include #include #include #include #include static struct hb_compress_fns zlibOps; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate; #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S , PIL_PLUGIN_S , &zlibOps , NULL /*close */ , &OurInterface , (void*)&OurImports , interfprivate); } static int zlib_compress(char* dest, size_t* _destlen, const char* src, size_t _srclen) { int ret; uLongf destlen = *_destlen; uLongf srclen = _srclen; ret = compress((Bytef *)dest, &destlen, (const Bytef *)src, srclen); if (ret != Z_OK){ cl_log(LOG_ERR, "%s: compression failed", __FUNCTION__); return HA_FAIL; } *_destlen = destlen; return HA_OK; } static int zlib_decompress(char* dest, size_t* _destlen, const char* src, size_t _srclen) { int ret; uLongf destlen = *_destlen; uLongf srclen = _srclen; ret = uncompress((Bytef *)dest, &destlen, (const Bytef *)src, srclen); if (ret != Z_OK){ cl_log(LOG_ERR, "%s: decompression failed", __FUNCTION__); return HA_FAIL; } *_destlen = destlen; return HA_OK; } static const char* zlib_getname(void) { return "zlib"; } static struct hb_compress_fns zlibOps ={ zlib_compress, zlib_decompress, zlib_getname, }; Heartbeat-3-0-7e3a82377fa8/lib/plugins/Makefile.am0000644000000000000000000000170011576626513021460 0ustar00usergroup00000000000000# # heartbeat lib/plugin directory: Linux-HA code # # Copyright (C) 2001 Alan Robertson # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in if QUORUMD_BUILD QUORUMD_DIR = quorumd endif SUBDIRS = HBauth HBcomm HBcompress \ quorum tiebreaker $(QUORUMD_DIR) Heartbeat-3-0-7e3a82377fa8/lib/plugins/quorum/Makefile.am0000644000000000000000000000414211576626513023013 0ustar00usergroup00000000000000# # InterfaceMgr: Interface manager plugins for Linux-HA # # Copyright (C) 2005 Guochun Shi # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in SUBDIRS = INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \ -I$(top_builddir)/lib/upmls -I$(top_srcdir)/lib/upmls COMMONLIBS = -lplumb \ $(GLIBLIB) AM_CFLAGS = @CFLAGS@ ## libraries if QUORUMD_BUILD QUORUMD_PLUGIN = quorumd.la endif halibdir = $(libdir)/@HB_PKG@ plugindir = $(halibdir)/plugins/quorum plugin_LTLIBRARIES = majority.la twonodes.la weight.la $(QUORUMD_PLUGIN) majority_la_SOURCES = majority.c majority_la_LDFLAGS = -export-dynamic -module -avoid-version -lz majority_la_LIBADD = $(top_builddir)/replace/libreplace.la twonodes_la_SOURCES = twonodes.c twonodes_la_LDFLAGS = -export-dynamic -module -avoid-version -lz twonodes_la_LIBADD = $(top_builddir)/replace/libreplace.la weight_la_SOURCES = weight.c weight_la_LDFLAGS = -export-dynamic -module -avoid-version -lz weight_la_LIBADD = $(top_builddir)/replace/libreplace.la if QUORUMD_BUILD quorumd_la_SOURCES = quorumd.c quorumd_la_LDFLAGS = -export-dynamic -module -avoid-version -lz $(GNUTLSLIBS) quorumd_la_LIBADD = $(top_builddir)/replace/libreplace.la $(COMMONLIBS) quorumd_la_CFLAGS = $(INCLUDES) $(GNUTLSHEAD) endif Heartbeat-3-0-7e3a82377fa8/lib/plugins/quorum/majority.c0000644000000000000000000000733311576626513022766 0ustar00usergroup00000000000000/* majority.c: quorum module * policy --- if it has more than half of total number of nodes, you have the quorum * if you have exactly half othe total number of nodes, you don't have the quorum * otherwise you have a tie * * Copyright (C) 2005 Guochun Shi * * SECURITY NOTE: It would be very easy for someone to masquerade as the * device that you're pinging. If they don't know the password, all they can * do is echo back the packets that you're sending out, or send out old ones. * This does mean that if you're using such an approach, that someone could * make you think you have quorum when you don't during a cluster partition. * The danger in that seems small, but you never know ;-) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #define PIL_PLUGINTYPE HB_QUORUM_TYPE #define PIL_PLUGINTYPE_S HB_QUORUM_TYPE_S #define PIL_PLUGIN majority #define PIL_PLUGIN_S "majority" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL static struct hb_quorum_fns majorityOps; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate = NULL; #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S, PIL_PLUGIN_S, &majorityOps, NULL, &OurInterface, (void*)&OurImports, interfprivate); } static int majority_getquorum(const char* cluster , int member_count, int member_quorum_votes , int total_node_count, int total_quorum_votes) { cl_log(LOG_DEBUG, "quorum plugin: majority"); cl_log(LOG_DEBUG, "cluster:%s, member_count=%d, member_quorum_votes=%d", cluster, member_count, member_quorum_votes); cl_log(LOG_DEBUG, "total_node_count=%d, total_quorum_votes=%d", total_node_count, total_quorum_votes); if(member_count >= total_node_count/2 + 1){ return QUORUM_YES; } else if ( total_node_count % 2 == 0 && member_count == total_node_count/2){ return QUORUM_TIE; } return QUORUM_NO; } static int majority_init(callback_t notify, const char* cl_name, const char* qs_name) { return 0; } static void majority_stop(void) { } static struct hb_quorum_fns majorityOps ={ majority_getquorum, majority_init, majority_stop }; Heartbeat-3-0-7e3a82377fa8/lib/plugins/quorum/quorumd.c0000644000000000000000000002440211576626513022620 0ustar00usergroup00000000000000/* quorumd.c: quorum module * policy --- connect to quorumd for asking whether we have quorum. * * Copyright (C) 2006 Huang Zhen * * SECURITY NOTE: It would be very easy for someone to masquerade as the * device that you're pinging. If they don't know the password, all they can * do is echo back the packets that you're sending out, or send out old ones. * This does mean that if you're using such an approach, that someone could * make you think you have quorum when you don't during a cluster partition. * The danger in that seems small, but you never know ;-) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define PIL_PLUGINTYPE HB_QUORUM_TYPE #define PIL_PLUGINTYPE_S HB_QUORUM_TYPE_S #define PIL_PLUGIN quorumd #define PIL_PLUGIN_S "quorumd" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL static struct hb_quorum_fns Ops; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate = NULL; #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree #define DH_BITS 1024 #define MAX_BUF 1024 #define CACERT HA_HBCONF_DIR"/ca-cert.pem" #define CLIENTKEY HA_HBCONF_DIR"/client-key.pem" #define CLIENTCERT HA_HBCONF_DIR"/client-cert.pem" static int verify_certificate (gnutls_session session); static gnutls_session initialize_tls_session (int sd); static void initialize_tls_global(void); static gboolean query_quorum(gpointer data); static gboolean connect_quorum_server(gpointer data); static void quorumd_stop(void); static gnutls_certificate_credentials xcred; PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S, PIL_PLUGIN_S, &Ops, NULL, &OurInterface, (void*)&OurImports, interfprivate); } static int sock = 0; static gnutls_session session = NULL; static guint repeat_timer = 0; static int nodenum = 0; static int weight = 0; static int cur_quorum = -1; static callback_t callback = NULL; static const char* cluster = NULL; static const char* quorum_server = NULL; static int interval = 0; static int quorumd_getquorum(const char* cluster , int member_count, int member_quorum_votes , int total_node_count, int total_quorum_votes) { cl_log(LOG_DEBUG, "quorum plugin: quorumd"); cl_log(LOG_DEBUG, "cluster:%s, member_count=%d, member_quorum_votes=%d", cluster, member_count, member_quorum_votes); cl_log(LOG_DEBUG, "total_node_count=%d, total_quorum_votes=%d", total_node_count, total_quorum_votes); nodenum = member_count; weight = member_quorum_votes; if (cur_quorum == -1) { connect_quorum_server(NULL); } cl_log(LOG_DEBUG,"zhenh: return cur_quorum %d\n", cur_quorum); return cur_quorum==1? QUORUM_YES:QUORUM_NO; } static int quorumd_init(callback_t notify, const char* cl_name, const char* qs_name) { cl_log(LOG_DEBUG, "quorum plugin: quorumd, quorumd_init()"); cl_log(LOG_DEBUG, "quorum plugin: cluster:%s, quorum_server:%s", cl_name, qs_name); callback = notify; cluster = cl_name; quorum_server = qs_name; return 0; } static void quorumd_stop(void) { cl_log(LOG_DEBUG, "quorum plugin: quorumd, quorumd_stop()"); if (repeat_timer != 0) { g_source_remove(repeat_timer); repeat_timer = 0; } if (session != NULL) { gnutls_bye (session, GNUTLS_SHUT_WR); gnutls_deinit (session); close(sock); session = NULL; } cur_quorum = -1; } static struct hb_quorum_fns Ops ={ quorumd_getquorum, quorumd_init, quorumd_stop }; gboolean connect_quorum_server(gpointer data) { struct sockaddr_in addr; struct ha_msg* msg = NULL; struct ha_msg* ret = NULL; const char* version = "2_0_8"; struct hostent* hp; int quorum; size_t len; char* s = NULL; char buf[MAXMSG]; cl_log(LOG_DEBUG, "quorum plugin: quorumd, connect_quorum_server"); /* initialize gnutls */ initialize_tls_global(); /* create socket */ sock=socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); if (sock == -1 ) { return FALSE; } /* connect to server*/ memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; hp = gethostbyname(quorum_server); if (hp == NULL) { return FALSE; } memcpy(&addr.sin_addr, hp->h_addr_list[0], sizeof(struct in_addr)); addr.sin_port = htons(5561); if (connect(sock, (struct sockaddr *) &addr, sizeof(addr)) == -1) { return FALSE; } session = initialize_tls_session(sock); if (session == NULL) { close(sock); session = NULL; return FALSE; } /* send the version */ gnutls_record_send(session, version, strlen(version)+1); /* send initialize msg */ msg = ha_msg_new(10); ha_msg_add(msg, "t","init"); ha_msg_add(msg, "cl_name", cluster); s = msg2wirefmt(msg, &len); gnutls_record_send(session, s, len); free(s); len = gnutls_record_recv(session, buf, MAXMSG); if ((ssize_t)len <=0) { close(sock); session = NULL; return FALSE; } ret = wirefmt2msg(buf, len, FALSE); if (STRNCMP_CONST(ha_msg_value(ret, "result"), "ok") != 0) { close(sock); session = NULL; return FALSE; } if (ha_msg_value_int(ret, "interval", &interval)!= HA_OK) { close(sock); session = NULL; return FALSE; } ha_msg_del(ret); ha_msg_del(msg); /* send quorum query msg */ msg = ha_msg_new(10); ha_msg_add(msg, "t","quorum"); ha_msg_add_int(msg, "nodenum", nodenum); ha_msg_add_int(msg, "weight", weight); s = msg2wirefmt(msg, &len); gnutls_record_send(session, s, len); free(s); len = gnutls_record_recv(session, buf, MAXMSG); ret = wirefmt2msg(buf, len, FALSE); ha_msg_value_int(ret, "quorum", &quorum); LOG(LOG_DEBUG,"quorum:%d\n", quorum); cur_quorum = quorum; ha_msg_del(ret); ha_msg_del(msg); /* set the repeatly query */ repeat_timer = g_timeout_add(interval, query_quorum, NULL); return FALSE; } gboolean query_quorum(gpointer data) { int quorum; size_t len; char* s = NULL; char buf[MAXMSG]; struct ha_msg* msg = NULL; struct ha_msg* ret = NULL; if(session != NULL) { msg = ha_msg_new(10); ha_msg_add(msg, "t","quorum"); ha_msg_add_int(msg, "nodenum", nodenum); ha_msg_add_int(msg, "weight", weight); s = msg2wirefmt(msg, &len); gnutls_record_send(session, s, len); free(s); len = gnutls_record_recv(session, buf, MAXMSG); if ((ssize_t)len < 0) { gnutls_bye (session, GNUTLS_SHUT_WR); gnutls_deinit (session); close(sock); session = NULL; cur_quorum = -1; ha_msg_del(msg); return TRUE; } ret = wirefmt2msg(buf, len, FALSE); ha_msg_value_int(ret, "quorum", &quorum); ha_msg_del(ret); ha_msg_del(msg); if (cur_quorum!=-1 && cur_quorum!=quorum && callback!=NULL){ cur_quorum = quorum; callback(); } cur_quorum = quorum; } else { connect_quorum_server(NULL); } return TRUE; } void initialize_tls_global(void) { gnutls_global_init (); gnutls_certificate_allocate_credentials (&xcred); gnutls_certificate_set_x509_trust_file (xcred, CACERT, GNUTLS_X509_FMT_PEM); gnutls_certificate_set_x509_key_file (xcred, CLIENTCERT, CLIENTKEY, GNUTLS_X509_FMT_PEM); } gnutls_session initialize_tls_session (int sd) { int ret; gnutls_session session; const int cert_type_priority[2] = { GNUTLS_CRT_X509,0}; gnutls_init (&session, GNUTLS_CLIENT); gnutls_set_default_priority (session); gnutls_certificate_type_set_priority (session, cert_type_priority); gnutls_credentials_set (session, GNUTLS_CRD_CERTIFICATE, xcred); gnutls_transport_set_ptr (session, (gnutls_transport_ptr) GINT_TO_POINTER(sd)); ret = gnutls_handshake (session); if (ret < 0) { close (sd); gnutls_deinit (session); fprintf (stderr, "*** Handshake failed\n"); gnutls_perror (ret); return NULL; } verify_certificate(session); return session; } int verify_certificate (gnutls_session session) { unsigned int cert_list_size; const gnutls_datum *cert_list; int ret; gnutls_x509_crt cert; ret = gnutls_certificate_verify_peers (session); if (ret < 0) { printf("gnutls_certificate_verify_peers2 returns error.\n"); return -1; } if (gnutls_certificate_type_get (session) != GNUTLS_CRT_X509) { printf("The certificate is not a x.509 cert\n"); return -1; } if (gnutls_x509_crt_init (&cert) < 0) { printf("error in gnutls_x509_crt_init\n"); return -1; } cert_list = gnutls_certificate_get_peers (session, &cert_list_size); if (cert_list == NULL) { printf("No certificate was found!\n"); return -1; } if (gnutls_x509_crt_import (cert, &cert_list[0], GNUTLS_X509_FMT_DER) < 0) { printf("error parsing certificate\n"); return -1; } if (gnutls_x509_crt_get_expiration_time (cert) < time (0)) { printf("The certificate has expired\n"); return -1; } if (gnutls_x509_crt_get_activation_time (cert) > time (0)) { printf("The certificate is not yet activated\n"); return -1; } gnutls_x509_crt_deinit (cert); return 0; } Heartbeat-3-0-7e3a82377fa8/lib/plugins/quorum/twonodes.c0000644000000000000000000000620511576626513022767 0ustar00usergroup00000000000000 /* twonodes.c: tiebreaker module * this module breaks the tie if number of nodes is 2, otherwise the tie * is not broken. * * Copyright (C) 2005 Guochun Shi * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #define PIL_PLUGINTYPE HB_QUORUM_TYPE #define PIL_PLUGINTYPE_S HB_QUORUM_TYPE_S #define PIL_PLUGIN twonodes #define PIL_PLUGIN_S "twonodes" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL static struct hb_quorum_fns twonodesOps; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate = NULL; #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S, PIL_PLUGIN_S, &twonodesOps, NULL, &OurInterface, (void*)&OurImports, interfprivate); } static int twonodes_break_tie(const char* cluster , int member_count, int member_quorum_votes , int total_node_count, int total_quorum_votes) { cl_log(LOG_DEBUG, "quorum plugin: twonodes"); cl_log(LOG_DEBUG, "cluster:%s, member_count=%d, member_quorum_votes=%d", cluster, member_count, member_quorum_votes); cl_log(LOG_DEBUG, "total_node_count=%d, total_quorum_votes=%d", total_node_count, total_quorum_votes); if (total_node_count == 2) { cl_log(LOG_INFO, "Break tie for 2 nodes cluster"); return QUORUM_YES; } return QUORUM_NO; } static int twonodes_init(callback_t notify, const char* cl_name, const char* qs_name) { return 0; } static void twonodes_stop(void) { } static struct hb_quorum_fns twonodesOps ={ twonodes_break_tie, twonodes_init, twonodes_stop }; Heartbeat-3-0-7e3a82377fa8/lib/plugins/quorum/weight.c0000644000000000000000000000733311576626513022417 0ustar00usergroup00000000000000/* majority.c: quorum module * policy --- if it has more than half of total number of nodes, you have the quorum * if you have exactly half othe total number of nodes, you don't have the quorum * otherwise you have a tie * * Copyright (C) 2005 Guochun Shi * * SECURITY NOTE: It would be very easy for someone to masquerade as the * device that you're pinging. If they don't know the password, all they can * do is echo back the packets that you're sending out, or send out old ones. * This does mean that if you're using such an approach, that someone could * make you think you have quorum when you don't during a cluster partition. * The danger in that seems small, but you never know ;-) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #define PIL_PLUGINTYPE HB_QUORUM_TYPE #define PIL_PLUGINTYPE_S HB_QUORUM_TYPE_S #define PIL_PLUGIN weight #define PIL_PLUGIN_S "weight" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL static struct hb_quorum_fns weightOps; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate = NULL; #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S, PIL_PLUGIN_S, &weightOps, NULL, &OurInterface, (void*)&OurImports, interfprivate); } static int weight_getquorum(const char* cluster , int member_count, int member_quorum_votes , int total_node_count, int total_quorum_votes) { cl_log(LOG_DEBUG, "quorum plugin: weight"); cl_log(LOG_DEBUG, "cluster:%s, member_count=%d, member_quorum_votes=%d", cluster, member_count, member_quorum_votes); cl_log(LOG_DEBUG, "total_node_count=%d, total_quorum_votes=%d", total_node_count, total_quorum_votes); if(member_quorum_votes >= total_quorum_votes/2 + 1){ return QUORUM_YES; } else if ( total_quorum_votes % 2 == 0 && member_quorum_votes == total_quorum_votes/2){ return QUORUM_TIE; } return QUORUM_NO; } static int weight_init(callback_t notify, const char* cl_name, const char* qs_name) { return 0; } static void weight_stop(void) { } static struct hb_quorum_fns weightOps ={ weight_getquorum, weight_init, weight_stop }; Heartbeat-3-0-7e3a82377fa8/lib/plugins/quorumd/2_0_8.c0000644000000000000000000004242411576626513022103 0ustar00usergroup00000000000000/* quorumd.c: quorum module * policy --- connect the the quorum server configured in ha.cf to query quorum * * Author: Huang Zhen * Copyright (c) 2006 International Business Machines * * SECURITY NOTE: It would be very easy for someone to masquerade as the * device that you're pinging. If they don't know the password, all they can * do is echo back the packets that you're sending out, or send out old ones. * This does mean that if you're using such an approach, that someone could * make you think you have quorum when you don't during a cluster partition. * The danger in that seems small, but you never know ;-) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #include #include #include #include #define PIL_PLUGINTYPE HB_QUORUMD_TYPE #define PIL_PLUGINTYPE_S HB_QUORUMD_TYPE_S #define PIL_PLUGIN 2_0_8 #define PIL_PLUGIN_S "2_0_8" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL static struct hb_quorumd_fns Ops; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate = NULL; #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S, PIL_PLUGIN_S, &Ops, NULL, &OurInterface, (void*)&OurImports, interfprivate); } static int test(void) { cl_log(LOG_DEBUG, "quorumd plugin 2.0.8, test()"); return 123; } #define DEFAULT_TIMEOUT 5000 #define MAX_NAME_LEN 255 #define MAX_DATA 1024 #define QUORUM_YES 0 #define QUORUM_NO 1 #define QUORUM_TIE 2 #define T_INIT "init" #define T_QUORUM "quorum" #define T_BRB "brb" #define T_ACK "ack" typedef struct { char name[MAXLINE]; int t_timeout; int t_interval; int t_takeover; int t_giveup; int cur_quorum; int waiting; guint waiting_src; GList* clients; int nodenum; int weight; }qs_cluster_t; typedef struct { char CN[MAX_DN_LEN]; int id; guint ch_src; guint timeout_src; int nodenum; int weight; GIOChannel* ch; qs_cluster_t* cluster; gnutls_session session; }qs_client_t; static void del_cluster(gpointer data); static gboolean del_client(gpointer data); static int load_config_file(void); static gboolean _remove_cluster(gpointer key, gpointer value, gpointer user_data); static int dump_data(int priority); static void dump_cluster(int priority, qs_cluster_t* cluster); static void _dump_cluster(gpointer key, gpointer value, gpointer user_data); static void dump_client(int priority,qs_client_t* client); static int on_connect(int sock, gnutls_session session, const char* CN); static void on_disconnect(gpointer data); static gboolean on_msg_arrived(GIOChannel *ch , GIOCondition condition , gpointer data); static struct ha_msg* on_init_msg(struct ha_msg* msg, qs_client_t* client); static struct ha_msg* on_quorum_msg(struct ha_msg* msg, qs_client_t* client); static gboolean on_cluster_finish_waiting(gpointer data); static int calculate_quorum(qs_client_t* client); static GHashTable* clusters = NULL; static int init(void) { cl_log(LOG_DEBUG, "quorumd plugin 2.0.8, init()"); clusters = g_hash_table_new_full(g_str_hash, g_str_equal, free, del_cluster); if(load_config_file() == -1) { return -1; } return 0; } #define WHITESPACE " \t\n\r\f" #define COMMENTCHAR '#' #define CRLF "\r\n" static int load_config_file(void) { FILE* f; qs_cluster_t* cluster = NULL; GList* list = NULL; int skip = 0; char buf[MAXLINE]; char key[MAXLINE]; char* p; char* cp; int value; quorum_log(LOG_INFO, "load config file %s", CONFIGFILE); /* read the config file*/ f = fopen(CONFIGFILE, "r"); if (f == NULL) { quorum_log(LOG_ERR, "can't open file %s", CONFIGFILE); return -1; } while (fgets(buf, MAXLINE, f) != NULL) { p = buf; p += strspn(p, WHITESPACE); if ((cp = strchr(p, COMMENTCHAR)) != NULL) { *cp = EOS; } if ((cp = strpbrk(p, CRLF)) != NULL) { *cp = EOS; } if (*p == EOS) { continue; } sscanf(p, "%s", key); if (STRNCMP_CONST(key,"cluster")==0) { if(cluster != NULL) { if(!skip) { list = g_list_append(list, cluster); } else { free(cluster); } } cluster = (qs_cluster_t*)malloc(sizeof(qs_cluster_t)); memset(cluster->name, 0, MAXLINE); sscanf(p, "%s %s", key, cluster->name); cluster->t_timeout = 5000; cluster->t_interval = 1000; cluster->t_giveup = 3000; cluster->t_takeover = 5000; cluster->clients = NULL; cluster->cur_quorum = -1; cluster->waiting = FALSE; cluster->nodenum = 0; cluster->weight = 0; skip = 0; } else { if(cluster == NULL) { fclose(f); quorum_log(LOG_ERR, "wrong format in file %s" , CONFIGFILE); return -1; } if (STRNCMP_CONST(key,"version")==0) { sscanf(p, "%s %s", key, buf); if(STRNCMP_CONST(buf,"2_0_8")!=0) { skip = 1; } } else if (!skip && STRNCMP_CONST(key,"timeout")==0) { sscanf(p, "%s %d", key, &value); cluster->t_timeout = value; } else if (!skip && STRNCMP_CONST(key,"interval")==0) { sscanf(p, "%s %d", key, &value); cluster->t_interval = value; } else if (!skip && STRNCMP_CONST(key,"giveup")==0) { sscanf(p, "%s %d", key, &value); cluster->t_giveup = value; } else if (!skip && STRNCMP_CONST(key,"takeover")==0) { sscanf(p, "%s %d", key, &value); cluster->t_takeover = value; } else if (!skip && STRNCMP_CONST(key,"nodenum")==0) { sscanf(p, "%s %d", key, &value); cluster->nodenum = value; } else if (!skip && STRNCMP_CONST(key,"weight")==0) { sscanf(p, "%s %d", key, &value); cluster->weight = value; } else if (!skip) { quorum_log(LOG_ERR, "unknown key %s in file %s" , key, CONFIGFILE); } } } if(cluster != NULL) { if(!skip) { list = g_list_append(list, cluster); } else { free(cluster); } } fclose(f); /* remove the cluster which is not in new configuration*/ g_hash_table_foreach_remove(clusters, _remove_cluster, list); /* insert or update the clusters */ while (list != NULL) { qs_cluster_t* old = NULL; qs_cluster_t* new = (qs_cluster_t*)list->data; list = g_list_remove(list, new); old = (qs_cluster_t*)g_hash_table_lookup(clusters, new->name); if (old == NULL) { g_hash_table_insert(clusters, strdup(new->name), new); } else { old->t_timeout = new->t_timeout; old->nodenum = new->nodenum; old->weight = new->weight; del_cluster(new); } } return 0; } gboolean _remove_cluster(gpointer key, gpointer value, gpointer user_data) { const char* name = (const char*) key; GList* list = (GList*)user_data; while (list != NULL) { qs_cluster_t* cluster = (qs_cluster_t*) list->data; if(strncmp(name, cluster->name, MAXLINE) == 0) { return FALSE; } list = g_list_next(list); } return TRUE; } int on_connect(int sock, gnutls_session session, const char* CN) { static int id = 1; qs_client_t* client = malloc(sizeof(qs_client_t)); if (client == NULL) { quorum_log(LOG_ERR, "malloc failed for new client"); return -1; } strncpy(client->CN, CN, MAX_DN_LEN); client->CN[MAX_DN_LEN-1] = '\0'; client->id = id; client->cluster = NULL; client->ch = g_io_channel_unix_new(sock); g_io_channel_set_close_on_unref(client->ch,TRUE); client->ch_src = g_io_add_watch_full(client->ch,G_PRIORITY_DEFAULT , G_IO_IN|G_IO_ERR|G_IO_HUP,on_msg_arrived, client, on_disconnect); client->timeout_src = -1; client->nodenum = 0; client->weight = 0; client->session = session; quorum_log(LOG_DEBUG, "create new client %d", id); id++; return 0; } void on_disconnect(gpointer data) { qs_client_t* client = (qs_client_t*)data; quorum_log(LOG_DEBUG, "client %d disconnected", client->id); if (client->timeout_src != -1) { g_source_remove(client->timeout_src); } client->timeout_src = g_timeout_add(0,del_client,client); } void del_cluster(gpointer data) { qs_client_t* client; qs_cluster_t* cluster = (qs_cluster_t*)data; while(cluster->clients != NULL) { client = (qs_client_t*)cluster->clients->data; cluster->clients = g_list_remove(cluster->clients,client); del_client(client); } if (cluster->waiting) { g_source_remove(cluster->waiting_src); } quorum_log(LOG_DEBUG, "delete cluster %s", cluster->name); free(cluster); return; } gboolean del_client(gpointer data) { qs_client_t* client = (qs_client_t*)data; if (client == NULL) { return FALSE; } if (client->session != NULL) { gnutls_bye (client->session, GNUTLS_SHUT_WR); gnutls_deinit (client->session); } if (client->ch_src != 0) { g_source_remove(client->ch_src); client->ch_src = -1; } if (client->ch != NULL) { g_io_channel_unref(client->ch); client->ch = NULL; } if (client->timeout_src != 0) { g_source_remove(client->timeout_src); client->timeout_src = -1; } if (client->cluster != NULL) { client->cluster->clients = g_list_remove(client->cluster->clients, client); if (client->cluster->cur_quorum == client->id) { client->cluster->waiting_src = g_timeout_add( client->cluster->t_takeover , on_cluster_finish_waiting, client->cluster); client->cluster->waiting = TRUE; client->cluster->cur_quorum = -1; } } quorum_log(LOG_DEBUG, "delete client %d", client->id); free(client); return FALSE; } gboolean on_msg_arrived(GIOChannel *ch, GIOCondition condition, gpointer data) { qs_client_t* client; int sock; char buf[MAXMSG]; size_t len; client = (qs_client_t*) data; if (condition & G_IO_IN) { struct ha_msg* msg; sock = g_io_channel_unix_get_fd(ch); len = gnutls_record_recv(client->session, buf, MAXMSG); if ((ssize_t)len <= 0) { quorum_log(LOG_DEBUG , "receive 0 byte or error from client %d", client->id); return FALSE; } msg = wirefmt2msg(buf, len, FALSE); if (msg != NULL) { struct ha_msg* ret = NULL; char* str; const char* type; quorum_debug(LOG_DEBUG, "receive from client %d:", client->id); type = ha_msg_value(msg, F_TYPE); if (STRNCMP_CONST(type,T_INIT)==0) { ret = on_init_msg(msg, client); } else if (STRNCMP_CONST(type,T_QUORUM)==0) { ret = on_quorum_msg(msg, client); } else { ret = ha_msg_new(1); ha_msg_add(ret, F_TYPE, T_ACK); ha_msg_add(ret, "reason", "unknown msg type"); ha_msg_add(ret, "result", "fail"); quorum_log(LOG_ERR, "UNKOWN msg %s ", type); } if (ret != NULL) { str = msg2wirefmt(ret, &len); gnutls_record_send(client->session, str, len); quorum_debug(LOG_DEBUG, "send to client %d:", client->id); free(str); ha_msg_del(ret); } ha_msg_del(msg); } } return TRUE; } struct ha_msg* on_init_msg(struct ha_msg* msg, qs_client_t* client) { struct ha_msg* ret; const char* cl_name; qs_cluster_t* cluster; ret = ha_msg_new(1); ha_msg_add(ret, F_TYPE, T_ACK); if((cl_name = ha_msg_value(msg, "cl_name")) == NULL || strncmp(cl_name, client->CN, MAX_DN_LEN) != 0 || (cluster = g_hash_table_lookup(clusters, cl_name)) == NULL) { quorum_log(LOG_DEBUG, "cl_name:%s, CN:%s",cl_name, client->CN); ha_msg_add(ret, "result", "fail"); return ret; } client->cluster = cluster; cluster->clients = g_list_append(cluster->clients, client); client->timeout_src = g_timeout_add(cluster->t_timeout,del_client,client); ha_msg_add_int(ret, "timeout", cluster->t_timeout); ha_msg_add_int(ret, "interval", cluster->t_interval); ha_msg_add_int(ret, "giveup", cluster->t_giveup); ha_msg_add_int(ret, "takeover", cluster->t_takeover); ha_msg_add(ret, "result", "ok"); return ret; } struct ha_msg* on_quorum_msg(struct ha_msg* msg, qs_client_t* client) { struct ha_msg* ret = ha_msg_new(1); ha_msg_add(ret, F_TYPE, T_ACK); if (client->timeout_src != -1) { g_source_remove(client->timeout_src); } client->timeout_src = g_timeout_add(client->cluster->t_timeout , del_client,client); if (ha_msg_value_int(msg, "nodenum", &client->nodenum) != HA_OK || ha_msg_value_int(msg, "weight", &client->weight) != HA_OK) { ha_msg_add_int(ret, "quorum", 0); ha_msg_add(ret, "reason", "can't find nodenum or weight"); ha_msg_add(ret, "result", "fail"); return ret; } ha_msg_add(ret, F_TYPE, T_ACK); ha_msg_add_int(ret, "quorum", calculate_quorum(client)); ha_msg_add(ret, "result", "ok"); return ret; } int calculate_quorum(qs_client_t* client) { qs_cluster_t* cluster = client->cluster; qs_client_t* cur_owner = NULL; qs_client_t* new_owner = NULL; GList* cur; int max_weight = 0; if (cluster->waiting) { return 0; } cur = cluster->clients; while (cur != NULL) { qs_client_t* cur_client = (qs_client_t*)cur->data; if (cur_client->id == cluster->cur_quorum) { cur_owner = cur_client; } if (cur_client->weight > max_weight) { max_weight = cur_client->weight; new_owner = cur_client; } cur = g_list_next(cur); } if (cur_owner == NULL) { cluster->cur_quorum = new_owner->id; } else if (new_owner != cur_owner) { cluster->waiting_src = g_timeout_add( cluster->t_timeout + cluster->t_giveup , on_cluster_finish_waiting, cluster); cluster->waiting = TRUE; cluster->cur_quorum = -1; return 0; } if (client->id == cluster->cur_quorum) { return 1; } return 0; } gboolean on_cluster_finish_waiting(gpointer data) { GList* cur; int max_weight = 0; qs_cluster_t* cluster = (qs_cluster_t*) data; cur = cluster->clients; while (cur != NULL) { qs_client_t* client = (qs_client_t*) cur->data; if (client->weight > max_weight) { cluster->cur_quorum = client-> id; max_weight = client->weight; } cur = g_list_next(cur); } cluster->waiting = FALSE; return FALSE; } void dump_client(int priority, qs_client_t* client) { /* typedef struct { char CN[MAX_DN_LEN]; int id; guint ch_src; guint timeout_src; int nodenum; int weight; GIOChannel* ch; qs_cluster_t* cluster; gnutls_session session; }qs_client_t; */ quorum_log(priority, "\t\tclient %p", client); quorum_log(priority, "\t\tCN=%s", client->CN); quorum_log(priority, "\t\tid=%d", client->id); quorum_log(priority, "\t\tch_src=%d", client->ch_src); quorum_log(priority, "\t\ttimeout_src=%d", client->timeout_src); quorum_log(priority, "\t\tnodenum=%d", client->nodenum); quorum_log(priority, "\t\tweight=%d", client->weight); quorum_log(priority, "\t\tch=%p", client->ch); quorum_log(priority, "\t\tcluster=%p", client->cluster); quorum_log(priority, "\t\tsession=%p", client->session); } void dump_cluster(int priority, qs_cluster_t* cluster) { /* typedef struct { char name[MAXLINE]; int t_timeout; int t_interval; int t_takeover; int t_giveup; int cur_quorum; int waiting; guint waiting_src; GList* clients; int nodenum; int weight; }qs_cluster_t; */ GList* cur; quorum_log(priority, "cluster %p", cluster); quorum_log(priority, "\tname=%s", cluster->name); quorum_log(priority, "\tt_timeout=%d", cluster->t_timeout); quorum_log(priority, "\tt_interval=%d", cluster->t_interval); quorum_log(priority, "\tt_takeover=%d", cluster->t_takeover); quorum_log(priority, "\tt_giveup=%d", cluster->t_giveup); quorum_log(priority, "\tcur_quorum=%d", cluster->cur_quorum); quorum_log(priority, "\twaiting=%d", cluster->waiting); quorum_log(priority, "\twaiting_src=%d", cluster->waiting_src); quorum_log(priority, "\tnodenum=%d", cluster->nodenum); quorum_log(priority, "\tweight=%d", cluster->weight); quorum_log(priority, "\tclients=%p(%d)", cluster->clients , g_list_length(cluster->clients)); cur = cluster->clients; while (cur != NULL) { qs_client_t* client = (qs_client_t*)cur->data; dump_client(priority, client); cur = g_list_next(cur); } } void _dump_cluster(gpointer key, gpointer value, gpointer user_data) { qs_cluster_t* cluster = (qs_cluster_t*)value; int priority = GPOINTER_TO_INT(user_data); dump_cluster(priority, cluster); } int dump_data(int priority) { quorum_log(priority, "dump data of quorum server (2_0_8):"); g_hash_table_foreach(clusters, _dump_cluster, GINT_TO_POINTER(priority)); return 0; } static struct hb_quorumd_fns Ops ={ test, init, load_config_file, dump_data, on_connect }; Heartbeat-3-0-7e3a82377fa8/lib/plugins/quorumd/Makefile.am0000644000000000000000000000271511576626513023163 0ustar00usergroup00000000000000# # InterfaceMgr: Interface manager plugins for Linux-HA # # Copyright (C) 2005 Guochun Shi # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in SUBDIRS = INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \ -I$(top_builddir)/lib/upmls -I$(top_srcdir)/lib/upmls AM_CFLAGS = @CFLAGS@ ## libraries halibdir = $(libdir)/@HB_PKG@ plugindir = $(halibdir)/plugins/quorumd plugin_LTLIBRARIES = 2_0_8.la 2_0_8_la_SOURCES = 2_0_8.c 2_0_8_la_LDFLAGS = -export-dynamic -module -avoid-version -lz $(GNUTLSLIBS) 2_0_8_la_LIBADD = $(top_builddir)/replace/libreplace.la 2_0_8_la_CFLAGS = $(INCLUDES) $(GNUTLSHEAD) Heartbeat-3-0-7e3a82377fa8/lib/plugins/tiebreaker/Makefile.am0000644000000000000000000000264611576626513023607 0ustar00usergroup00000000000000# # InterfaceMgr: Interface manager plugins for Linux-HA # # Copyright (C) 2005 Guochun Shi # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in SUBDIRS = INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \ -I$(top_builddir)/lib/upmls -I$(top_srcdir)/lib/upmls AM_CFLAGS = @CFLAGS@ ## libraries halibdir = $(libdir)/@HB_PKG@ plugindir = $(halibdir)/plugins/tiebreaker plugin_LTLIBRARIES = twonodes.la twonodes_la_SOURCES = twonodes.c twonodes_la_LDFLAGS = -export-dynamic -module -avoid-version -lz twonodes_la_LIBADD = $(top_builddir)/replace/libreplace.la Heartbeat-3-0-7e3a82377fa8/lib/plugins/tiebreaker/twonodes.c0000644000000000000000000000513211576626513023552 0ustar00usergroup00000000000000 /* twonodes.c: tiebreaker module * this module breaks the tie if number of nodes is 2, otherwise the tie * is not broken. * * Copyright (C) 2005 Guochun Shi * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #define PIL_PLUGINTYPE HB_TIEBREAKER_TYPE #define PIL_PLUGINTYPE_S HB_TIEBREAKER_TYPE_S #define PIL_PLUGIN twonodes #define PIL_PLUGIN_S "twonodes" #define PIL_PLUGINLICENSE LICENSE_LGPL #define PIL_PLUGINLICENSEURL URL_LGPL static struct hb_tiebreaker_fns twonodesOps; PIL_PLUGIN_BOILERPLATE2("1.0", Debug) static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static struct hb_media_imports* OurImports; static void* interfprivate; #define LOG PluginImports->log #define MALLOC PluginImports->alloc #define STRDUP PluginImports->mstrdup #define FREE PluginImports->mfree PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin*us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interface implementation */ return imports->register_interface(us, PIL_PLUGINTYPE_S, PIL_PLUGIN_S, &twonodesOps, NULL, &OurInterface, (void*)&OurImports, interfprivate); } static int twonodes_break_tie(int member_count, int total_count) { if (total_count == 2) { cl_log(LOG_INFO, "Break tie for 2 nodes cluster"); return TRUE; } return FALSE; } static struct hb_tiebreaker_fns twonodesOps ={ twonodes_break_tie }; Heartbeat-3-0-7e3a82377fa8/membership/Makefile.am0000644000000000000000000000164311576626513021372 0ustar00usergroup00000000000000# # heartbeat library directory: Linux-HA code # # Copyright (C) 2001 Alan Robertson # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in ## Subdirectories... if QUORUMD_BUILD QUORUMD_DIR = quorumd endif SUBDIRS = ccm $(QUORUMD_DIR) Heartbeat-3-0-7e3a82377fa8/membership/ccm/Makefile.am0000644000000000000000000000551711576626513022140 0ustar00usergroup00000000000000# # heartbeat: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \ -I$(includedir)/clplumbing hadir = $(sysconfdir)/ha.d halibdir = $(libdir)/@HB_PKG@ ccmlibdir = $(halibdir) commmoddir = $(halibdir)/modules/comm havarlibdir = $(localstatedir)/lib/@HB_PKG@ havarrundir = $(localstatedir)/run/ ccmdir = $(HA_VARRUNDIR)/$(HB_PKG)/ccm apigid = @HA_APIGID@ ccmuid = @HA_CCMUID@ gliblib = @GLIBLIB@ AM_CFLAGS = @CFLAGS@ noinst_HEADERS = ccm.h ccmlib.h ## libraries lib_LTLIBRARIES = libccmclient.la libclm.la ## binary progs ccmlib_PROGRAMS = ccm ccm_testclient ## SOURCES ccm_SOURCES = ccm.c \ ccmmisc.c \ ccmgraph.c \ ccmversion.c \ ccmupdate.c \ ccmllm.c \ ccmbitmap.c \ ccm.h \ ccmmain.c \ ccmclient.c \ ccmmsg.h \ ccmmsg.c \ ccm_statemachine.c \ ccmmisc.h \ ccmmem.c ccm_LDADD = -lplumb \ $(top_builddir)/lib/hbclient/libhbclient.la \ $(gliblib) \ -lpils ccm_LDFLAGS = ccm_testclient_SOURCES = ccm_testclient.c ccm_testclient_LDADD = libccmclient.la \ -lplumb \ $(gliblib) \ -lpils libccmclient_la_SOURCES = ccmlib_memapi.c ccmlib_eventapi.c ccmlib.h libccmclient_la_LDFLAGS = -version-info 1:0:0 ## library libclm.la libclmdir = $(libdir)/@HB_PKG@ ## binary program libclm_PROGRAMS = clmtest libclm_la_SOURCES = ccmlib_clm.c ## Add -DPOSIX_THREADS to CFLAGS to compile a thread-safe version library libclm_la_CFLAGS = -g #-DPOSIX_THREADS libclm_la_LDFLAGS = -version-info 1:0:0 clmtest_SOURCES = clmtest.c clmtest_LDADD = libclm.la libccmclient.la \ -lplumb \ $(gliblib)\ -lpils ## additional Makefile targets # additional installations not covered normally install-exec-local: $(mkinstalldirs) $(DESTDIR)$(ccmdir) -chgrp $(apigid) $(DESTDIR)/$(ccmdir) -chown $(ccmuid) $(DESTDIR)/$(ccmdir) chmod 750 $(DESTDIR)/$(ccmdir) uninstall-local: rm -fr $(DESTDIR)$(ccmdir) Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccm.c0000644000000000000000000002057411576626513021012 0ustar00usergroup00000000000000 /* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include "ccm.h" #include "ccmmsg.h" #include "ccmmisc.h" #include #include #include #include extern state_msg_handler_t state_msg_handler[]; struct ha_msg * ccm_readmsg(ccm_info_t *info, ll_cluster_t *hb); static GList* quorum_list = NULL; static struct ha_msg* ccm_handle_hbapiclstat(ccm_info_t *info, const char *orig, const char *status) { int index; enum ccm_state state = info->state; if(state == CCM_STATE_NONE || state == CCM_STATE_VERSION_REQUEST) { return NULL; } assert(status); if(strncmp(status, JOINSTATUS, 5) == 0) { ccm_debug2(LOG_DEBUG, "ccm on %s started", orig); return NULL; } if(!orig){ return NULL; } index = llm_get_index(&info->llm, orig); if(index == -1) { return NULL; } return(ccm_create_leave_msg(info, index)); } /* * The callback function which is called when the status * of a node changes. */ static int nodelist_update(ll_cluster_t* hb, ccm_info_t* info, const char *id, const char *status) { llm_info_t *llm; char oldstatus[STATUSSIZE]; /* update the low level membership of the node * if the status moves from active to dead and if the member * is already part of the ccm, then we have to mimic a * leave message for us */ ccm_debug2(LOG_DEBUG, "nodelist update: Node %s now has status %s", id, status); llm = &info->llm; if (llm_status_update(llm, id, status,oldstatus) != HA_OK) { ccm_log(LOG_ERR, "%s: updating status for node %s failed", __FUNCTION__, id); return HA_FAIL; } if (STRNCMP_CONST(status, DEADSTATUS) == 0){ if(node_is_member(info, id)){ leave_cache(llm_get_index(llm, id)); } } if ( strncmp(llm_get_mynodename(llm), id,NODEIDSIZE ) == 0){ return HA_OK; } ccm_debug(LOG_DEBUG, "status of node %s: %s -> %s" , id, oldstatus, status); if ( part_of_cluster(info->state) && ( STRNCMP_CONST(oldstatus, DEADSTATUS) == 0 && STRNCMP_CONST(status, DEADSTATUS) != 0)){ ccm_send_state_info(hb, info, id); } return HA_OK; } int ccm_control_process(ccm_info_t *info, ll_cluster_t * hb) { struct ha_msg* msg; struct ha_msg* newmsg; const char* type; int ccm_msg_type; const char* orig=NULL; const char* status=NULL; llm_info_t* llm= &info->llm; const char* mynode = llm_get_mynodename(llm); const char* numnodes; int numnodes_val; repeat: /* read the next available message */ msg = ccm_readmsg(info, hb); /* this is non-blocking */ if (msg) { type = ha_msg_value(msg, F_TYPE); orig = ha_msg_value(msg, F_ORIG); status = ha_msg_value(msg, F_STATUS); ccm_debug(LOG_DEBUG, "recv msg %s from %s, status:%s" , type, orig, (status ? status : "[null ptr]")); ccm_message_debug2(LOG_DEBUG, msg); if(strcmp(type, T_APICLISTAT) == 0){ /* handle ccm status of on other nodes of the cluster */ if((newmsg = ccm_handle_hbapiclstat(info, orig, status)) == NULL) { ha_msg_del(msg); return TRUE; } ha_msg_del(msg); msg = newmsg; } else if(strcasecmp(type, T_STATUS) == 0){ const char* nodetype; const char* site; int weight; if (llm_is_valid_node(&info->llm, orig)){ if (nodelist_update(hb, info,orig, status) != HA_OK){ ccm_log(LOG_ERR, "%s: updating node status for " "nodelist failed(%s-%s)", __FUNCTION__, orig, status); return FALSE; } ha_msg_del(msg); return TRUE; } nodetype = hb->llc_ops->node_type(hb, orig); if (nodetype == NULL){ ccm_log(LOG_ERR, "%s: get node %s's type failed", __FUNCTION__, orig); return TRUE; } if (STRNCMP_CONST(nodetype, NORMALNODE) !=0 ){ return TRUE; } nodetype = hb->llc_ops->node_type(hb, orig); site = hb->llc_ops->node_site(hb, orig); weight = hb->llc_ops->node_weight(hb, orig); if (llm_add(llm, orig, status, mynode, site, weight) != HA_OK){ ccm_log(LOG_ERR, "%s: adding node(%s) to llm failed", __FUNCTION__,orig); return FALSE; } ccm_debug2(LOG_INFO, "after adding node %s", orig); llm_display(llm); jump_to_joining_state(hb, info, msg); } else if (strcasecmp(type, T_DELNODE) ==0){ const char* node = ha_msg_value(msg, F_NODE); if (node == NULL){ ccm_log(LOG_ERR, "%s: field node not found", __FUNCTION__); return FALSE; } if (llm_del(llm, node) != HA_OK){ ccm_log(LOG_ERR, "%s: deleting node %s failed", __FUNCTION__, node); return FALSE; } jump_to_joining_state(hb, info, msg); } } else { msg = timeout_msg_mod(info); } type = ha_msg_value(msg, F_TYPE); ccm_msg_type = ccm_string2type(type); if (ccm_msg_type < 0){ goto out; } numnodes = ha_msg_value(msg, F_NUMNODES); if(numnodes != NULL){ numnodes_val = atoi(numnodes); if (numnodes_val != info->llm.nodecount){ ccm_log(LOG_ERR , "%s: Node count from node %s does not agree" ": local count=%d, count in message=%d" , __FUNCTION__ , orig, info->llm.nodecount, numnodes_val); ccm_log(LOG_ERR, "Please make sure ha.cf files on all" " nodes have same nodes list or add \"autojoin any\" " "to ha.cf"); ccm_log(LOG_INFO, "%s", "If this problem persists" ", check the heartbeat 'hostcache' files" " in the cluster to look for problems."); exit(1); } } state_msg_handler[info->state](ccm_msg_type, msg, hb, info); out: if(ccm_msg_type != CCM_TYPE_TIMEOUT) { ha_msg_del(msg); } /* If there is another message in the channel, * process it now. */ if (hb->llc_ops->msgready(hb)) goto repeat; return TRUE; } #define QUORUM_S "HA_quorum" #define TIEBREAKER_S "HA_tiebreaker" gboolean ccm_calculate_quorum(ccm_info_t* info) { struct hb_quorum_fns* funcs = NULL; const char* quorum_env = NULL; char* quorum_str = NULL; char* end = NULL; char* begin = NULL; GList* cur = NULL; int rc; if (quorum_list == NULL){ quorum_env = cl_get_env(QUORUM_S); if (quorum_env == NULL){ ccm_debug(LOG_DEBUG, "No quorum selected," "using default quorum plugin(majority:twonodes)"); quorum_str = strdup("majority:twonodes"); } else { quorum_str = strdup(quorum_env); } begin = quorum_str; while (begin != NULL) { end = strchr(begin, ':'); if (end != NULL) { *end = 0; } funcs = cl_load_plugin("quorum", begin); if (funcs == NULL){ ccm_log(LOG_ERR, "%s: loading plugin %s failed", __FUNCTION__, begin); } else { funcs->init(ccm_on_quorum_changed , info->cluster, info->quorum_server); quorum_list = g_list_append(quorum_list, funcs); } begin = (end == NULL)? NULL:end+1; } free(quorum_str); } cur = g_list_first(quorum_list); while (cur != NULL) { int mem_weight = 0; int total_weight = 0; int i, node; for (i=0; imemcount; i++) { node = info->ccm_member[i]; mem_weight+=info->llm.nodes[node].weight; } for (i=0; illm.nodecount; i++) { total_weight+=info->llm.nodes[i].weight; } funcs = (struct hb_quorum_fns*)cur->data; rc = funcs->getquorum(info->cluster, info->memcount, mem_weight , info->llm.nodecount, total_weight); if (rc == QUORUM_YES){ return TRUE; } else if (rc == QUORUM_NO){ return FALSE; } cur = g_list_next(cur); } ccm_debug(LOG_ERR, "all quorum plugins can't make a decision! " "assume lost quorum"); return FALSE; } gboolean ccm_stop_query_quorum(void) { struct hb_quorum_fns* funcs = NULL; GList* cur = NULL; if (quorum_list == NULL){ return TRUE; } cur = g_list_first(quorum_list); while (cur != NULL) { funcs = (struct hb_quorum_fns*)cur->data; funcs->stop(); cur = g_list_next(cur); } return FALSE; } Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccm.h0000644000000000000000000004253711576626513021022 0ustar00usergroup00000000000000/* * ccm.h: definitions Consensus Cluster Manager internal header * file * * Copyright (c) International Business Machines Corp., 2002 * Author: Ram Pai (linuxram@us.ibm.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #ifndef _CLUSTER_MANAGER_H_ #define _CLUSTER_MANAGER_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* */ /* ccm defined new type tokens used by the CCM protocol. */ /* */ #define CCM_VERSIONVAL "ccmpverval" /* version value token */ #define CCM_UPTIME "ccmuptime" /* Uptime for Consensus */ #define CCM_MEMLIST "ccmmemlist" /* bitmap for membership */ #define CCM_PROTOCOL "ccmproto" /* protocol version */ #define CCM_MAJORTRANS "ccmmajor" /* major transition version*/ #define CCM_MINORTRANS "ccmminor" /* minor transition version */ #define CCM_MAXTRANS "ccmmaxt" /* minor transition version */ #define CCM_COOKIE "ccmcookie" /* communication context */ #define CCM_NEWCOOKIE "ccmnewcookie" /* new communication context */ #define CCM_CLSIZE "ccmclsize" /* new cluster size */ #define CCM_UPTIMELIST "ccmuptimelist" /*uptime list*/ #define CCM_QUORUM "ccmquorum" /*do we have quorum?*/ /* ccm_types for easier processing. */ enum ccm_type { CCM_TYPE_PROTOVERSION, CCM_TYPE_PROTOVERSION_RESP, CCM_TYPE_JOIN, CCM_TYPE_REQ_MEMLIST, CCM_TYPE_RES_MEMLIST, CCM_TYPE_FINAL_MEMLIST, CCM_TYPE_ABORT, CCM_TYPE_LEAVE, CCM_TYPE_TIMEOUT, CCM_TYPE_NODE_LEAVE_NOTICE, CCM_TYPE_NODE_LEAVE, CCM_TYPE_MEM_LIST, CCM_TYPE_ALIVE, CCM_TYPE_NEW_NODE, CCM_TYPE_STATE_INFO, CCM_TYPE_RESTART, CCM_TYPE_LAST }; #define BitsInByte CHAR_BIT /* BEGINNING OF version request tracking interfaces */ typedef struct ccm_version_s { longclock_t time; int numtries; int n_resp; /* keeps track of the number of version */ /* responses recevied from other nodes */ /* after we received the first response. */ } ccm_version_t; void version_reset(ccm_version_t *); void version_some_activity(ccm_version_t *); int version_retry(ccm_version_t *, longclock_t); void version_inc_nresp(ccm_version_t *); void version_set_nresp(ccm_version_t *, int); unsigned int version_get_nresp(ccm_version_t *); #define VER_TRY_AGAIN 1 #define VER_NO_CHANGE 2 #define VER_TRY_END 3 /* END OF version request tracking interfaces */ /* BEGINING OF Low Level Membership interfaces */ #define NODEIDSIZE 255 /* if this value is changed, change it also in ccmlib.h */ #define STATUSSIZE 15 #define CCMFIFO HA_VARRUNDIR "/heartbeat/ccm/ccm" /* if this value is changed change it also in ccmlib.h */ typedef struct llm_node_s { char nodename[NODEIDSIZE]; char status[STATUSSIZE]; int uptime; gboolean join_request; int join_request_major_trans; gboolean receive_change_msg; char site[PATH_MAX]; int weight; }llm_node_t; typedef struct llm_info_s { int nodecount; int myindex; llm_node_t nodes[MAXNODE]; } llm_info_t; int llm_get_live_nodecount(llm_info_t *); int llm_node_cmp(llm_info_t *llm, int indx1, int indx2); const char* llm_get_nodename(llm_info_t *, const int ); int llm_status_update(llm_info_t *, const char *, const char *, char*); void llm_display(llm_info_t *llm); int llm_init(llm_info_t *); int llm_is_valid_node(llm_info_t *, const char *); int llm_add(llm_info_t *, const char *, const char *, const char * , const char *,int); int llm_del(llm_info_t* llm,const char* node); int llm_get_index(llm_info_t *, const char *); int llm_get_myindex(llm_info_t *); int llm_get_nodecount(llm_info_t* llm); const char* llm_get_mynodename(llm_info_t* llm); char* llm_get_nodestatus(llm_info_t* llm, const int index); int llm_set_joinrequest(llm_info_t* llm, int index, gboolean value, int ); gboolean llm_get_joinrequest(llm_info_t* llm, int index); gboolean llm_get_joinrequest_majortrans(llm_info_t* llm, int index); int llm_set_change(llm_info_t* llm, int index, gboolean value); gboolean llm_get_change(llm_info_t* llm, int index); int llm_set_uptime(llm_info_t* llm, int index, int uptime); int llm_get_uptime(llm_info_t* llm, int index); /* ccm prototypes */ longclock_t ccm_get_time(void); int ccm_timeout(longclock_t, longclock_t, unsigned long); int ccm_need_control(void *); int ccm_take_control(void *); void* ccm_initialize(void); void ccm_on_quorum_changed(void); IPC_Channel * ccm_get_ipcchan(void *); void ccm_send_init_state(void *); void ccm_check_memoryleak(void); /* BEGINING OF update interfaces */ /* structure that keeps track of new joining requests. */ typedef struct update_s { int index; /* index of the node in the ccm_llm table */ int uptime;/* uptime as specified by the node */ } update_t; typedef struct ccm_update_s { int leader; uint nodeCount; longclock_t inittime; update_t update[MAXNODE]; GSList *cl_head; /* a linked list of cached cluster leader */ /* requests */ } ccm_update_t; #define UPDATE_GET_LEADER(updt) updt->leader #define UPDATE_GET_NODECOUNT(updt) updt->nodeCount #define UPDATE_GET_INITTIME(updt) updt->inittime #define UPDATE_GET_INDEX(updt, i) updt->update[i].index #define UPDATE_GET_UPTIME(updt, i) updt->update[i].uptime #define UPDATE_GET_CLHEAD(updt) (updt)->cl_head #define UPDATE_SET_LEADER(updt, lead) updt->leader = lead #define UPDATE_SET_NODECOUNT(updt, count) updt->nodeCount = count #define UPDATE_SET_INITTIME(updt, time) updt->inittime = time #define UPDATE_SET_INDEX(updt, i, value) updt->update[i].index = value #define UPDATE_SET_UPTIME(updt, i, value) updt->update[i].uptime = value #define UPDATE_SET_CLHEAD(updt, ptr) (updt)->cl_head = ptr #define UPDATE_INCR_NODECOUNT(updt) (updt->nodeCount)++ #define UPDATE_DECR_NODECOUNT(updt) (updt->nodeCount)-- void update_add_memlist_request(ccm_update_t *, llm_info_t *, const char *, const int); void update_free_memlist_request(ccm_update_t *); void update_reset(ccm_update_t *); void update_init(ccm_update_t *); int update_timeout_expired(ccm_update_t *, unsigned long); void update_add(ccm_update_t *, llm_info_t *, const char *, int, gboolean); void update_remove(ccm_update_t *, llm_info_t *, const char *); int update_am_i_leader(ccm_update_t *, llm_info_t *); int update_can_be_leader(ccm_update_t *, llm_info_t *llm, const char *, int ); const char * update_get_cl_name(ccm_update_t *, llm_info_t *); void * update_initlink(ccm_update_t *); const char * update_next_link(ccm_update_t *, llm_info_t *, void *, uint *); void update_freelink(ccm_update_t *, void *); int update_get_next_index(ccm_update_t *, llm_info_t *, int *); int update_strcreate(ccm_update_t *tab, char *memlist,llm_info_t *llm); int update_is_node_updated(ccm_update_t *, llm_info_t *, const char *); int update_get_uptime(ccm_update_t *, llm_info_t *, int ); void update_display(int pri,llm_info_t* llm, ccm_update_t* tab); /* END OF update interfaces */ /* BEGINNING OF graph interfaces */ typedef struct vertex_s { char *bitmap; /* bitmap sent by each node */ int count; /* connectivity number for each node */ int uuid; /* the uuid of the node */ } vertex_t; typedef struct graph_s { vertex_t *graph_node[MAXNODE]; int graph_nodes;/* no of nodes that had sent the join message */ /* whose bitmaps we are now expecting */ int graph_rcvd; /* no of nodes that have sent a memlistbitmap */ } graph_t; graph_t * graph_init(void); void graph_free(graph_t *); void graph_add_uuid(graph_t *, int ); void graph_update_membership(graph_t *, int , char *); int graph_filled_all(graph_t *); int graph_get_maxclique(graph_t *, char **); void graph_add_to_membership(graph_t *, int, int); /* END OF graph interfaces */ /* BEGINNING OF bitmap interfaces */ int bitmap_create(char **, int); void bitmap_delete(char *); void bitmap_mark(int, char *, int); void bitmap_clear(int, char *, int); int bitmap_test(int, const char *, int); int bitmap_count(const char *, int); void bitmap_print(char *, int, char *); void bitmap_reset(char *, int); int bitmap_size(int); int bitmap_copy(char *, char *); /* END OF bitmap interfaces */ size_t strnlen(const char *, size_t); /*TOBEDONE*/ /* end ccm */ /* BEGINNING OF client management interfaces */ void client_init(void); int client_add(struct IPC_CHANNEL *); void client_delete(struct IPC_CHANNEL *); void client_delete_all(void); void client_llm_init(llm_info_t *); void client_influx(void); void client_evicted(void); /* END OF client management interfaces */ /* */ /* the various states of the CCM state machine. */ /* */ enum ccm_state { CCM_STATE_NONE=0, /* is in NULL state */ CCM_STATE_VERSION_REQUEST, /* sent a request for protocol version */ CCM_STATE_JOINING, /* has initiated a join protocol */ CCM_STATE_SENT_MEMLISTREQ, /* CL has sent a request for member list */ /* this state is applicable only on CL */ CCM_STATE_MEMLIST_RES, /* Responded member list to the Cluster */ /* Leader */ CCM_STATE_JOINED, /* PART of the CCM cluster membership! */ CCM_STATE_WAIT_FOR_MEM_LIST, CCM_STATE_WAIT_FOR_CHANGE, CCM_STATE_NEW_NODE_WAIT_FOR_MEM_LIST, CCM_STATE_END }; /* the times for repeating sending message */ #define REPEAT_TIMES 10 /* add new enums to this structure as and when new protocols are added */ enum ccm_protocol { CCM_VER_NONE = 0, CCM_VER_1, CCM_VER_LAST }; typedef struct ccm_proto_s { enum ccm_protocol com_hiproto;/* highest protocol version that */ /* this node can handle */ int com_active_proto;/* protocol version */ } ccm_proto_t; typedef struct memcomp_s { graph_t *mem_graph; /* memlist calculation graph */ GSList *mem_maxt; /* the maxtrans of each node */ /* participating in the computation . */ /* NOTE: the transition number of the */ /* next transition is always 1 higher */ /* than that of all transitions seen */ /* by each node participating in the */ /* membership */ longclock_t mem_inittime; /* the time got intialized */ } memcomp_t; #define MEMCOMP_GET_GRAPH(memc) memc->mem_graph #define MEMCOMP_GET_MAXT(memc) memc->mem_maxt #define MEMCOMP_GET_INITTIME(memc) memc->mem_inittime #define MEMCOMP_SET_GRAPH(memc, gr) memc->mem_graph=gr #define MEMCOMP_SET_MAXT(memc, list) memc->mem_maxt=list #define MEMCOMP_SET_INITTIME(memc,time) memc->mem_inittime=time #define CCM_SET_ACTIVEPROTO(info, val) \ info->ccm_active_proto = val #define CCM_SET_MAJORTRANS(info, val) \ { \ info->ccm_transition_major = val; \ info->ccm_max_transition = \ (info->ccm_max_transition < val ? \ val: info->ccm_max_transition); \ } #define CCM_SET_MINORTRANS(info, val) \ info->ccm_transition_minor = val #define CCM_INIT_MAXTRANS(info) \ info->ccm_max_transition = 0 /* NOTE the reason the increment for majortrans is done */ /* as below is to force recomputation of ccm_max_transition */ #define CCM_INCREMENT_MAJORTRANS(info) \ CCM_SET_MAJORTRANS(info, \ CCM_GET_MAJORTRANS(info)+1) #define CCM_INCREMENT_MINORTRANS(info) \ info->ccm_transition_minor++ #define CCM_RESET_MAJORTRANS(info) \ info->ccm_transition_major = 0 #define CCM_RESET_MINORTRANS(info) \ info->ccm_transition_minor = 0 #define CCM_SET_JOINED_TRANSITION(info, trans) \ info->ccm_joined_transition = trans #define CCM_SET_COOKIE(info, val) \ strncpy(info->ccm_cookie, val, COOKIESIZE) #define CCM_SET_CL(info, index) info->ccm_cluster_leader = index #define CCM_GET_ACTIVEPROTO(info) info->ccm_active_proto #define CCM_GET_MAJORTRANS(info) info->ccm_transition_major #define CCM_GET_MINORTRANS(info) info->ccm_transition_minor #define CCM_GET_MAXTRANS(info) info->ccm_max_transition #define CCM_GET_STATE(info) info->state #define CCM_GET_HIPROTO(info) info->ccm_hiproto #define CCM_GET_LLM(info) (&(info->llm)) #define CCM_GET_UPDATETABLE(info) (&(info->ccm_update)) #define CCM_GET_MEMCOMP(info) (&(info->ccm_memcomp)) #define CCM_GET_JOINED_TRANSITION(info) info->ccm_joined_transition #define CCM_GET_LLM_NODECOUNT(info) llm_get_nodecount(&info->llm) #define CCM_GET_MY_HOSTNAME(info) ccm_get_my_hostname(info) #define CCM_GET_COOKIE(info) info->ccm_cookie #define CCM_GET_MEMINDEX(info, i) info->ccm_member[i] #define CCM_GET_MEMTABLE(info) info->ccm_member #define CCM_GET_CL(info) info->ccm_cluster_leader #define CCM_TRANS_EARLIER(trans1, trans2) (trans1 < trans2) /*TOBEDONE*/ #define CCM_GET_VERSION(info) &(info->ccm_version) #define CCM_TMOUT_SET_U(info,t) info->tmout.u=t #define CCM_TMOUT_SET_LU(info,t) info->tmout.lu=t #define CCM_TMOUT_SET_VRS(info,t) info->tmout.vrs=t #define CCM_TMOUT_SET_ITF(info,t) info->tmout.itf=t #define CCM_TMOUT_SET_IFF(info,t) info->tmout.iff=t #define CCM_TMOUT_SET_FL(info,t) info->tmout.fl=t #define CCM_TMOUT_GET_U(info) info->tmout.u #define CCM_TMOUT_GET_LU(info) info->tmout.lu #define CCM_TMOUT_GET_VRS(info) info->tmout.vrs #define CCM_TMOUT_GET_ITF(info) info->tmout.itf #define CCM_TMOUT_GET_IFF(info) info->tmout.iff #define CCM_TMOUT_GET_FL(info) info->tmout.fl typedef struct ccm_tmout_s { long iff; /* membership_Info_From_Followers_timeout */ long itf; /* membership_Info_To_Followers_timeout */ long fl; /* membership_Final_List_timeout */ long u; /* update timeout */ long lu; /* long update timeout */ long vrs; /* version timeout */ } ccm_tmout_t; enum change_event_type{ TYPE_NONE, NODE_LEAVE, NEW_NODE }; #define COOKIESIZE 15 typedef struct ccm_info_s { llm_info_t llm; /* low level membership info */ int memcount; /* number of nodes in the ccm cluster */ int ccm_member[MAXNODE];/* members of the ccm cluster */ memcomp_t ccm_memcomp; /* the datastructure to compute the */ /* final membership for each membership */ /* computation instance of the ccm protocol. */ /* used by the leader only. */ ccm_proto_t ccm_proto; /* protocol version information */ #define ccm_active_proto ccm_proto.com_active_proto #define ccm_hiproto ccm_proto.com_hiproto char ccm_cookie[COOKIESIZE];/* context identification string. */ uint32_t ccm_transition_major;/* transition number of the cluster */ int ccm_cluster_leader; /* cluster leader of the last major */ /* transition. index of cl in ccm_member table */ int ccm_joined_transition; /* this indicates the major transition */ /* number during which this node became */ /* a member of the cluster. */ /* A sideeffect of this is it also */ /* is used to figure out if this node */ /* was ever a part of the cluster. */ /* Should be intially set to 0 */ uint32_t ccm_max_transition; /* the maximum transition number seen */ /* by this node ever since it was born. */ enum ccm_state state; /* cluster state of this node */ uint32_t ccm_transition_minor;/* minor transition number of the */ /* cluster */ ccm_update_t ccm_update; /* structure that keeps track */ /* of uptime of each member */ ccm_version_t ccm_version; /* keeps track of version request */ /* related info */ ccm_tmout_t tmout; uint32_t change_event_remaining_count; enum change_event_type change_type; char change_node_id[NODEIDSIZE]; char cluster[PATH_MAX]; char quorum_server[PATH_MAX]; int has_quorum; /* -1, not set, 0, no quorum, 1, has quorum */ } ccm_info_t; /* * datastructure passed to the event loop. * This acts a handle, and should not be interpreted * by the event loop. */ typedef struct ccm_s { ll_cluster_t *hbfd; void *info; } ccm_t; void client_new_mbrship(ccm_info_t*, void*); void ccm_reset(ccm_info_t *info); const char* state2string(int state); int ccm_control_process(ccm_info_t *info, ll_cluster_t * hb); int jump_to_joining_state(ll_cluster_t* hb, ccm_info_t* info, struct ha_msg* msg); gboolean ccm_calculate_quorum(ccm_info_t* info); gboolean ccm_stop_query_quorum(void); typedef void (*state_msg_handler_t)(enum ccm_type ccm_msg_type, struct ha_msg *reply, ll_cluster_t *hb, ccm_info_t *info); #define ccm_log(priority, fmt...); \ cl_log(priority, fmt); \ #define ccm_debug(priority, fmt...); \ if ( debug_level >= 1 ) { \ cl_log(priority, fmt); \ } #define ccm_debug2(priority, fmt...); \ if ( debug_level >= 2 ) { \ cl_log(priority, fmt); \ } #define ccm_message_debug2(priority,msg); \ if ( debug_level >= 2) { \ cl_log_message(priority, msg); \ } #endif Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccm_statemachine.c0000644000000000000000000030625111576626513023536 0ustar00usergroup00000000000000/* * ccm.c: Consensus Cluster Service Program * * Copyright (c) International Business Machines Corp., 2002 * Author: Ram Pai (linuxram@us.ibm.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #ifdef HAVE_STDINT_H #include #endif #include #include #include "ccmmsg.h" #include "ccmmisc.h" /* PROTOTYPE */ static void ccm_reset_all_join_request(ccm_info_t* info); static void report_reset(void); static int ccm_already_joined(ccm_info_t *); static void ccm_memcomp_reset(ccm_info_t *); /* For enhanced membership service */ static void append_change_msg(ccm_info_t *info,const char *node); static int received_all_change_msg(ccm_info_t *info); static int is_expected_change_msg(ccm_info_t *info, const char *node, enum change_event_type); static void add_change_msg(ccm_info_t *info, const char *node, const char *orig, enum change_event_type); static void reset_change_info(ccm_info_t *info); static void send_mem_list_to_all(ll_cluster_t *hb, ccm_info_t *info, char *cookie); static void ccm_fill_update_table(ccm_info_t *info, ccm_update_t *update_table, const void *uptime_list); static void dump_mbrs(ccm_info_t *info); static longclock_t change_time; static gboolean gl_membership_converged = FALSE; const char state_strings[12][64]={ "CCM_STATE_NONE", "CCM_STATE_VERSION_REQUEST", "CCM_STATE_JOINING", "CCM_STATE_SENT_MEMLISTREQ", "CCM_STATE_MEMLIST_RES", "CCM_STATE_JOINED", "CCM_STATE_WAIT_FOR_MEM_LIST", "CCM_STATE_WAIT_FOR_CHANGE", "CCM_STATE_NEW_NODE_WAIT_FOR_MEM_LIST", "CCM_STATE_END" }; const char* state2string(int state){ if (state > CCM_STATE_END){ return "INVALID STATE"; } return state_strings[state]; } static int string2state(const char* state_str) { int i; if (state_str == NULL){ ccm_log(LOG_ERR, "%s: state_str is NULL", __FUNCTION__); return -1; } for (i = 0 ; i < DIMOF(state_strings); i++){ if (strncmp(state_strings[i], state_str, 64) == 0){ return i; } } ccm_log(LOG_ERR, "%s: Cannot find a match for string %s", __FUNCTION__, state_str); return -1; } static void ccm_set_state(ccm_info_t* info, int istate,const struct ha_msg* msg) { int oldstate = info->state; info->state = (istate); if((istate)==CCM_STATE_JOINING){ client_influx(); } if (istate == CCM_STATE_JOINED){ gl_membership_converged =TRUE; } if (llm_get_myindex(CCM_GET_LLM(info)) == info->ccm_cluster_leader && CCM_STATE_JOINED == istate) { info->has_quorum = ccm_calculate_quorum(info); } else { ccm_stop_query_quorum (); } ccm_debug(LOG_DEBUG,"node state %s -> %s" , state2string(oldstate),state2string(istate)); } static void change_time_init(void) { change_time = ccm_get_time(); } static int change_timeout(unsigned long timeout) { return(ccm_timeout(change_time, ccm_get_time(), timeout)); } static longclock_t mem_list_time; static void mem_list_time_init(void) { mem_list_time = ccm_get_time(); } static int mem_list_timeout(unsigned long timeout) { return(ccm_timeout(mem_list_time, ccm_get_time(), timeout)); } static longclock_t new_node_mem_list_time; static void new_node_mem_list_time_init(void) { new_node_mem_list_time = ccm_get_time(); } static int new_node_mem_list_timeout(unsigned long timeout) { return(ccm_timeout(new_node_mem_list_time, ccm_get_time(), timeout)); } #define CCM_GET_MYNODE_ID(info) \ info->llm.nodes[info->llm.myindex].nodename #define CCM_GET_CL_NODEID(info) \ info->llm.nodes[CCM_GET_CL(info)].nodename #define CCM_GET_RECEIVED_CHANGE_MSG(info, node) \ llm_get_change(CCM_GET_LLM(info),llm_get_index(&info->llm, node)) #define CCM_SET_RECEIVED_CHANGE_MSG(info, node, value) \ llm_set_change(CCM_GET_LLM(info), llm_get_index(&info->llm, node), value) /* //////////////////////////////////////////////////////////////// // BEGIN OF Functions associated with CCM token types that are // communicated accross nodes and their values. //////////////////////////////////////////////////////////////// */ static void ccm_state_wait_for_mem_list(enum ccm_type ccm_msg_type, struct ha_msg *reply, ll_cluster_t *hb, ccm_info_t *info); static void ccm_state_new_node_wait_for_mem_list(enum ccm_type ccm_msg_type, struct ha_msg *reply, ll_cluster_t *hb, ccm_info_t *info); /* END OF TYPE_STR datastructure and associated functions */ /* */ /* timeout configuration function */ /* */ static void ccm_configure_timeout(ll_cluster_t *hb, ccm_info_t *info) { long keepalive = hb->llc_ops->get_keepalive(hb); ccm_debug2(LOG_DEBUG, "%s: keepalive=%ld", __FUNCTION__, keepalive); CCM_TMOUT_SET_U(info, 5*keepalive); CCM_TMOUT_SET_LU(info, 30*keepalive); CCM_TMOUT_SET_VRS(info, 9*keepalive); CCM_TMOUT_SET_ITF(info, 18*keepalive); CCM_TMOUT_SET_IFF(info, 12*keepalive); CCM_TMOUT_SET_FL(info, CCM_TMOUT_GET_ITF(info)+5); } /* */ /* timeout_msg_create: */ /* fake up a timeout message, which is in the */ /* same format as the other messages that are */ /* communicated across the nodes. */ /* */ #ifdef TIMEOUT_MSG_FUNCTIONS_NEEDED /* */ /* timeout_msg_done: */ /* done with the processing of this message. */ static void timeout_msg_done(void) { /* nothing to do. */ return; } /* */ /* timeout_msg_del: */ /* delete the given timeout message. */ /* nobody calls this function. */ /* someday somebody will call it :) */ static void timeout_msg_del(void) { ha_msg_del(timeout_msg); timeout_msg = NULL; } #endif /* */ /* These are the function that keep track of number of time a version */ /* response message has been dropped. These function are consulted by */ /* the CCM algorithm to determine if a version response message has */ /* to be dropped or not. */ /* */ static int respdrop=0; #define MAXDROP 3 static int resp_can_i_drop(void) { if (respdrop >= MAXDROP){ return FALSE; } return TRUE; } static void resp_dropped(void) { respdrop++; } static void resp_reset(void) { respdrop=0; } /* */ /* End of response processing messages. */ /* */ /* */ /* BEGIN OF functions that track the time since a connectivity reply has */ /* been sent to the leader. */ /* */ static longclock_t finallist_time; static void finallist_init(void) { finallist_time = ccm_get_time(); } static void finallist_reset(void) { finallist_time = 0; } static int finallist_timeout(unsigned long timeout) { return(ccm_timeout(finallist_time, ccm_get_time(), timeout)); } /* */ /* END OF functions that track the time since a connectivity reply has */ /* been sent to the leader. */ /* */ /* Reset all the datastructures. Go to a state which is equivalent */ /* to a state when the node is just about to join a cluster. */ void ccm_reset(ccm_info_t *info) { if(ccm_already_joined(info)){ client_evicted(); } ccm_mem_reset(info); ccm_memcomp_reset(info); CCM_SET_ACTIVEPROTO(info, CCM_VER_NONE); CCM_SET_COOKIE(info,""); CCM_SET_MAJORTRANS(info,0); CCM_SET_MINORTRANS(info,0); CCM_SET_CL(info,-1); CCM_SET_JOINED_TRANSITION(info, 0); ccm_set_state(info, CCM_STATE_NONE, NULL); info->has_quorum = -1; update_reset(CCM_GET_UPDATETABLE(info)); ccm_reset_all_join_request(info); version_reset(CCM_GET_VERSION(info)); finallist_reset(); leave_reset(); report_reset(); } static void ccm_init(ccm_info_t *info) { update_init(CCM_GET_UPDATETABLE(info)); ccm_reset_all_join_request(info); CCM_INIT_MAXTRANS(info); leave_init(); (void)timeout_msg_init(info); ccm_reset(info); } /* * BEGIN OF ROUTINES THAT REPORT THE MEMBERSHIP TO CLIENTS. */ static void report_reset(void) { return; } /* */ /* print and report the cluster membership to clients. */ /* */ static void report_mbrs(ccm_info_t *info) { int i; const char *nodename; static struct born_s { int index; int bornon; } bornon[MAXNODE];/*avoid making it a stack variable*/ if(ccm_get_memcount(info)==1){ bornon[0].index = CCM_GET_MEMINDEX(info,0); bornon[0].bornon = CCM_GET_MAJORTRANS(info); } else for(i=0; i < ccm_get_memcount(info); i++){ bornon[i].index = CCM_GET_MEMINDEX(info,i); bornon[i].bornon = update_get_uptime(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), CCM_GET_MEMINDEX(info,i)); if(bornon[i].bornon==0) bornon[i].bornon=CCM_GET_MAJORTRANS(info); assert(bornon[i].bornon!=-1); } ccm_debug2(LOG_DEBUG,"\t\t the following are the members " "of the group of transition=%d", CCM_GET_MAJORTRANS(info)); for (i=0 ; i < ccm_get_memcount(info); i++) { nodename = llm_get_nodename(CCM_GET_LLM(info), CCM_GET_MEMINDEX(info,i)); ccm_debug2(LOG_DEBUG,"\t\tnodename=%s bornon=%d", nodename, bornon[i].bornon); } /* * report to clients, the new membership */ dump_mbrs(info); client_new_mbrship(info, bornon); return; } /* */ /* generate a random cookie. */ /* NOTE: cookie is a mechanism of seperating out the contexts */ /* of messages of partially partitioned clusters. */ /* For example, consider a case where node A is physically */ /* in the partition X and partition Y, and but has joined */ /* membership in partition X. It will end up getting ccm protocol */ /* messages sent by members in both the partitions. In order to */ /* seperate out messages belonging to individual partition, a */ /* random string is used as a identifier by each partition to */ /* identify its messages. In the above case A will get message */ /* from both the partitions but only listens to messages from */ /* partition X and drops messages from partition Y. */ /* */ static char * ccm_generate_random_cookie(void) { char *cookie; int i; struct timeval tmp; cookie = g_malloc(COOKIESIZE*sizeof(char)); /* g_malloc never returns NULL: assert(cookie); */ /* seed the random with a random value */ gettimeofday(&tmp, NULL); srandom((unsigned int)tmp.tv_usec); for ( i = 0 ; i < COOKIESIZE-1; i++ ) { cookie[i] = random()%(127-'!')+'!'; } cookie[i] = '\0'; return cookie; } static void ccm_free_random_cookie(char *cookie) { assert(cookie && *cookie); g_free(cookie); } /* BEGIN OF FUNCTIONS that keep track of connectivity information */ /* conveyed by individual members of the cluster. These functions */ /* are used by only the cluster leader. Ultimately these connectivity */ /* information is used by the cluster to extract out the members */ /* of the cluster that have total connectivity. */ static int ccm_memcomp_cmpr(gconstpointer a, gconstpointer b) { return(*((const uint32_t *)a)-*((const uint32_t *)b)); } static void ccm_memcomp_free(gpointer data, gpointer userdata) { if(data) { g_free(data); } return; } static void ccm_memcomp_note(ccm_info_t *info, const char *orig, uint32_t maxtrans, const char *memlist) { int index, numbytes; char *bitmap = NULL; uint32_t *ptr; memcomp_t *mem_comp = CCM_GET_MEMCOMP(info); bitmap_create(&bitmap, MAXNODE); if (bitmap == NULL){ ccm_log(LOG_ERR, "bitmap creatation failed"); return; } /* find the index of the originator */ index = llm_get_index(CCM_GET_LLM(info), orig); /* convert the memlist into a bit map and feed it to the graph */ numbytes = ccm_str2bitmap(memlist, strlen(memlist), bitmap); graph_update_membership(MEMCOMP_GET_GRAPH(mem_comp), index, bitmap); /*NOTE DO NOT DELETE bitlist, because it is * being handled by graph*/ ptr = (uint32_t *)g_malloc(2*sizeof(uint32_t)); ptr[0] = maxtrans; ptr[1] = index; MEMCOMP_SET_MAXT(mem_comp, (g_slist_insert_sorted(MEMCOMP_GET_MAXT(mem_comp), ptr, ccm_memcomp_cmpr))); return; } /* called by the cluster leader only */ static void ccm_memcomp_note_my_membership(ccm_info_t *info) { char memlist[MAX_MEMLIST_STRING]; int str_len; str_len = update_strcreate(CCM_GET_UPDATETABLE(info), memlist, CCM_GET_LLM(info)); ccm_memcomp_note(info, llm_get_mynodename(&info->llm), CCM_GET_MAXTRANS(info), memlist); return; } /* add a new member to the membership list */ static void ccm_memcomp_add(ccm_info_t *info, const char *orig) { int index, myindex; memcomp_t *mem_comp = CCM_GET_MEMCOMP(info); index = llm_get_index(CCM_GET_LLM(info), orig); myindex = llm_get_myindex(&info->llm); graph_add_uuid(MEMCOMP_GET_GRAPH(mem_comp), index); graph_add_to_membership(MEMCOMP_GET_GRAPH(mem_comp), myindex, index); /* ccm_memcomp_note(info, orig, maxtrans, memlist); */ return; } static void ccm_memcomp_init(ccm_info_t *info) { int track=-1; int index; memcomp_t *mem_comp = CCM_GET_MEMCOMP(info); MEMCOMP_SET_GRAPH(mem_comp, graph_init()); /* go through the update list and note down all the members who * had participated in the join messages. We should be expecting * reply memlist bitmaps atleast from these nodes. */ while((index = update_get_next_index(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), &track)) != -1) { graph_add_uuid(MEMCOMP_GET_GRAPH(mem_comp),index); } MEMCOMP_SET_MAXT(mem_comp, NULL); MEMCOMP_SET_INITTIME(mem_comp, ccm_get_time()); } static void ccm_memcomp_reset(ccm_info_t *info) { GSList *head; memcomp_t *mem_comp = CCM_GET_MEMCOMP(info); graph_free(MEMCOMP_GET_GRAPH(mem_comp)); MEMCOMP_SET_GRAPH(mem_comp,NULL); head = MEMCOMP_GET_MAXT(mem_comp); g_slist_foreach(MEMCOMP_GET_MAXT(mem_comp), ccm_memcomp_free, NULL); g_slist_free(MEMCOMP_GET_MAXT(mem_comp)); MEMCOMP_SET_MAXT(mem_comp, NULL); return; } static int ccm_memcomp_rcvd_all(ccm_info_t *info) { return graph_filled_all(MEMCOMP_GET_GRAPH(CCM_GET_MEMCOMP(info))); } static int ccm_memcomp_timeout(ccm_info_t *info, long timeout) { memcomp_t *mem_comp = CCM_GET_MEMCOMP(info); return(ccm_timeout(MEMCOMP_GET_INITTIME(mem_comp), ccm_get_time(), timeout)); } static int ccm_memcomp_get_maxmembership(ccm_info_t *info, char **bitmap) { GSList *head; uint32_t *ptr; int uuid; memcomp_t *mem_comp = CCM_GET_MEMCOMP(info); (void)graph_get_maxclique(MEMCOMP_GET_GRAPH(mem_comp), bitmap); head = MEMCOMP_GET_MAXT(mem_comp); while (head) { ptr = (uint32_t *)g_slist_nth_data(head, 0); uuid = ptr[1]; if(bitmap_test(uuid, *bitmap, MAXNODE)) { return ptr[0]; } head = g_slist_next(head); } return 0; } /* */ /* END OF the membership tracking functions. */ /* */ static int ccm_am_i_leader(ccm_info_t *info) { llm_info_t *llm = CCM_GET_LLM(info); if ( llm_get_myindex(llm) == CCM_GET_CL(info)){ return TRUE; } return FALSE; } static gboolean node_is_leader(ccm_info_t* info, const char* nodename) { return( llm_get_index(&info->llm, nodename) == CCM_GET_CL(info)); } static int ccm_already_joined(ccm_info_t *info) { if (CCM_GET_JOINED_TRANSITION(info)) { return TRUE; } return FALSE; } /* * END OF FUNCTIONS that keep track of stablized membership list */ /* * BEGIN OF FUNCTIONS THAT KEEP TRACK of cluster nodes that have shown * interest in joining the cluster. * * NOTE: when a new node wants to join the cluster, it multicasts a * message asking for the necessary information to send out a join * message. (it needs the current major transistion number, the context * string i.e cookie, the protocol number that everybody is operating * in). * * The functions below track these messages sent out by new potential * members showing interest in acquiring the initial context. */ static void ccm_add_new_joiner(ccm_info_t *info, const char *orig, struct ha_msg* msg) { llm_info_t* llm = &info->llm; int idx = llm_get_index(&info->llm, orig); const char* major_trans = 0; int trans_val; /* get the major transition version */ if ((major_trans = ha_msg_value(msg, CCM_MAJORTRANS)) == NULL) { ccm_debug(LOG_WARNING, "ccm_state_version_request: " "no protocol information"); return; } trans_val = atoi(major_trans); llm_set_joinrequest(llm, idx, TRUE, trans_val); return; } static gboolean ccm_get_all_active_join_request(ccm_info_t* info) { llm_info_t* llm = &info->llm; size_t i; for (i = 0 ; i < llm->nodecount; i++){ if (STRNCMP_CONST(llm->nodes[i].status,"dead") != 0 && llm_get_joinrequest(llm, i) == FALSE ){ return FALSE; } } return TRUE; } static void ccm_reset_all_join_request(ccm_info_t* info) { llm_info_t* llm = &info->llm; size_t i; for (i = 0 ; i < llm->nodecount; i++){ llm_set_joinrequest(llm, i, FALSE, 0); } } static int ccm_am_i_highest_joiner(ccm_info_t *info) { llm_info_t* llm = &info->llm; int total_nodes =llm->nodecount; int my_indx = llm->myindex; int i; for (i =0; i < total_nodes;i++){ if (i == my_indx) continue; if ( llm_get_joinrequest(llm, i)){ int major_trans =llm_get_joinrequest_majortrans(llm, i); int my_major_trans = CCM_GET_MAJORTRANS(info); if (major_trans > my_major_trans ){ return FALSE; }else if (major_trans == my_major_trans){ if (i > my_indx){ return FALSE; } } } } return TRUE; } static void ccm_remove_new_joiner(ccm_info_t *info, const char *orig) { llm_info_t* llm = &info->llm; int index = llm_get_index(llm, orig); llm_set_joinrequest(llm, index, FALSE, 0); return; } /* send reply to a join quest and clear the request*/ static void ccm_send_join_reply(ll_cluster_t *hb, ccm_info_t *info) { llm_info_t* llm = &info->llm; size_t i; for (i = 0 ; i < llm->nodecount; i++){ if ( i == (size_t)llm->myindex){ continue; } if (llm_get_joinrequest(llm, i)){ ccm_send_one_join_reply(hb,info, llm->nodes[i].nodename); llm_set_joinrequest(llm, i, FALSE, 0); } } } /* */ /* END OF FUNCTIONS THAT KEEP TRACK of cluster nodes that have shown */ /* interest in joining the cluster. */ /* */ /* ///////////////////////////////////////////////////////////////////// // // BEGIN OF FUNCTIONS THAT SEND OUT messages to nodes of the cluster // ///////////////////////////////////////////////////////////////////// */ /* compute the final membership list from the acquired connectivity */ /* information from other nodes. And send out the consolidated */ /* members of the cluster information to the all the members of */ /* that have participated in the CCM protocol. */ /* */ /* NOTE: Called by the cluster leader only. */ /* */ static void ccm_compute_and_send_final_memlist(ll_cluster_t *hb, ccm_info_t *info) { char *bitmap; uint maxtrans; char string[MAX_MEMLIST_STRING]; char *cookie = NULL; int strsize; int repeat; /* get the maxmimum membership list */ maxtrans = ccm_memcomp_get_maxmembership(info, &bitmap); /* create a string with the membership information */ strsize = ccm_bitmap2str(bitmap, string, MAX_MEMLIST_STRING); cookie = ccm_generate_random_cookie(); repeat = 0; ccm_mem_bitmapfill(info, bitmap); bitmap_delete(bitmap); while (ccm_send_final_memlist(hb, info, cookie, string, maxtrans+1) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_log(LOG_ERR, "%s: failure to send finalmemlist", __FUNCTION__); cl_shortsleep(); repeat++; }else{ bitmap_delete(bitmap); return; } } /* fill my new memlist and update the new cookie if any */ /* increment the major transition number and reset the * minor transition number */ CCM_SET_MAJORTRANS(info, maxtrans+1); CCM_RESET_MINORTRANS(info); /* if cookie has changed update it. */ if (cookie) { ccm_debug2(LOG_DEBUG, "%s: cookie changed ", __FUNCTION__); CCM_SET_COOKIE(info, cookie); ccm_free_random_cookie(cookie); } /* check if any joiner is waiting for a response from us. * If so respond and free all the joiners. */ ccm_send_join_reply(hb, info); CCM_SET_CL(info, llm_get_myindex(CCM_GET_LLM(info))); report_mbrs(info);/* call this before update_reset() */ /* update_reset(CCM_GET_UPDATETABLE(info));*/ ccm_memcomp_reset(info); ccm_set_state(info, CCM_STATE_JOINED, NULL); if(!ccm_already_joined(info)) { CCM_SET_JOINED_TRANSITION(info, CCM_GET_MAJORTRANS(info)); } return; } /* */ /* Browse through the list of all the connectivity request messages */ /* from cluster leaders. Send out the connectivity information only */ /* to the node which we believe is the cluster leader. To everybody */ /* else send out a null message. */ /* */ static int ccm_send_cl_reply(ll_cluster_t *hb, ccm_info_t *info) { int ret=FALSE, bitmap_strlen; char memlist[MAX_MEMLIST_STRING]; const char* cl; const char* cl_tmp; void *cltrack; uint trans; int repeat; /* * Get the name of the cluster leader */ cl = update_get_cl_name(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info)); /* search through the update list and find if any Cluster * leader has sent a memlist request. For each, check if * that node is the one which we believe is the leader. * if it is the leader, send it our membership list. * if not send it an NULL membership reply. */ cltrack = update_initlink(CCM_GET_UPDATETABLE(info)); while((cl_tmp = update_next_link(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), cltrack, &trans)) != NULL) { if(strncmp(cl, cl_tmp, NODEIDSIZE) == 0) { if(ccm_already_joined(info) && CCM_GET_MAJORTRANS(info) != trans){ ccm_log(LOG_INFO, "ccm evicted"); ccm_reset(info); return FALSE; } ret = TRUE; bitmap_strlen = update_strcreate(CCM_GET_UPDATETABLE(info), memlist, CCM_GET_LLM(info)); /* send Cluster Leader our memlist only if we are * operating in the same transition as that of * the leader, provided we have been a cluster member * in the past */ repeat = 0; while (ccm_send_memlist_res(hb, info, cl, memlist) !=HA_OK) { if(repeat < REPEAT_TIMES){ ccm_log(LOG_ERR, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } } else { /* I dont trust this Cluster Leader. Send NULL memlist message */ repeat = 0; while (ccm_send_memlist_res(hb, info, cl_tmp, NULL) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_log(LOG_ERR, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } } } update_freelink(CCM_GET_UPDATETABLE(info), cltrack); update_free_memlist_request(CCM_GET_UPDATETABLE(info)); return ret; } /* ///////////////////////////////////////////////////////////////////// // // END OF FUNCTIONS THAT SEND OUT messages to nodes of the cluster // ///////////////////////////////////////////////////////////////////// */ struct ha_msg * ccm_readmsg(ccm_info_t *info, ll_cluster_t *hb); struct ha_msg * ccm_readmsg(ccm_info_t *info, ll_cluster_t *hb) { int uuid; assert(hb); /* check if there are any leave events to be delivered */ if ((uuid=leave_get_next()) != -1) { /* create a leave message and return it */ return ccm_create_leave_msg(info, uuid); } return hb->llc_ops->readmsg(hb, 0); } /* */ /* Move the state of this ccm node, from joining state directly to */ /* the joined state. */ /* */ /* NOTE: this is generally called when a joining nodes determines */ /* that it is the only node in the cluster, and everybody else are */ /* dead. */ /* */ static void ccm_joining_to_joined(ll_cluster_t *hb, ccm_info_t *info) { char *bitmap; char *cookie = NULL; /* create a bitmap with the membership information */ (void) bitmap_create(&bitmap, MAXNODE); bitmap_mark(llm_get_myindex(&info->llm), bitmap, MAXNODE); /* * I am the only around! Lets discard any cookie that we * got from others, and create a new cookie. * This bug was noticed: when testing with partitioned * clusters. */ cookie = ccm_generate_random_cookie(); /* fill my new memlist and update the new cookie if any */ ccm_mem_bitmapfill(info, bitmap); bitmap_delete(bitmap); /* increment the major transition number and reset the * minor transition number */ CCM_INCREMENT_MAJORTRANS(info); CCM_RESET_MINORTRANS(info); /* if cookie has changed update it. */ if (cookie) { ccm_debug2(LOG_DEBUG, "%s: cookie changed ", __FUNCTION__); CCM_SET_COOKIE(info, cookie); ccm_free_random_cookie(cookie); } /* check if any joiner is waiting for a response from us. * If so respond */ ccm_send_join_reply(hb, info); CCM_SET_CL(info, llm_get_myindex(CCM_GET_LLM(info))); update_reset(CCM_GET_UPDATETABLE(info)); ccm_set_state(info, CCM_STATE_JOINED, NULL); report_mbrs(info); if(!ccm_already_joined(info)) { CCM_SET_JOINED_TRANSITION(info, 1); } return; } /* * Move the state of this ccm node, from init state directly to * the joined state. * * NOTE: this is generally called when a node when it determines * that it is all alone in the cluster. */ static int ccm_init_to_joined(ccm_info_t *info) { char* cookie; int ret; llm_info_t* llm = &info->llm; ccm_mem_reset(info); ret = ccm_mem_add(info, llm_get_myindex(llm)); if (ret != HA_OK){ ccm_log(LOG_ERR, "%s: adding myself to membership failed", __FUNCTION__); return HA_FAIL; } llm_set_uptime(llm, llm_get_myindex(llm), 1); CCM_SET_MAJORTRANS(info, CCM_GET_MAJORTRANS(info)+1); CCM_SET_MINORTRANS(info, 0); cookie = ccm_generate_random_cookie(); CCM_SET_COOKIE(info, cookie); ccm_free_random_cookie(cookie); CCM_SET_CL(info, llm_get_myindex(CCM_GET_LLM(info))); ccm_set_state(info, CCM_STATE_JOINED, NULL); CCM_SET_JOINED_TRANSITION(info, 1); report_mbrs(info); return HA_OK; } static void ccm_all_restart(ll_cluster_t* hb, ccm_info_t* info, struct ha_msg* msg) { const char * orig; llm_info_t* llm = & info->llm; if ( (orig = ha_msg_value(msg, F_ORIG)) ==NULL){ ccm_log(LOG_ERR, "orig not found in message"); return ; } if (strncmp(orig, llm_get_mynodename(llm), NODEIDSIZE) == 0){ /*don't react to our own message*/ return ; } if (info->state != CCM_STATE_VERSION_REQUEST && gl_membership_converged ){ gl_membership_converged = FALSE; ccm_set_state(info, CCM_STATE_NONE, msg); CCM_SET_CL(info,-1); if (ccm_send_restart_msg(hb, info) != HA_OK){ ccm_log(LOG_ERR, "sending out restart msg failed"); return; } if (ccm_send_protoversion(hb, info) != HA_OK){ ccm_log(LOG_ERR, "sending protoversion failed"); return; } ccm_set_state(info, CCM_STATE_VERSION_REQUEST, NULL); } } static int ccm_handle_state_info(ll_cluster_t* hb, ccm_info_t* info, struct ha_msg* msg) { const char* other_node_state; int state; if (!part_of_cluster(info->state)){ return HA_OK; } other_node_state = ha_msg_value(msg, F_STATE); state = string2state(other_node_state); if (state < 0){ ccm_log(LOG_ERR, "%s: wrong state", __FUNCTION__); return HA_FAIL; } if (!part_of_cluster(state)){ return HA_OK; } /*both machines are already part of a cluster, i.e. we are merging two partitions */ ccm_all_restart(hb, info, msg); return HA_OK; } /* */ /* The state machine that processes message when it is */ /* the CCM_STATE_VERSION_REQUEST state */ /* */ static void ccm_state_version_request(enum ccm_type ccm_msg_type, struct ha_msg *reply, ll_cluster_t *hb, ccm_info_t *info) { const char *orig, *proto, *cookie, *trans, *clsize; uint trans_val; int proto_val; uint clsize_val; int try; int repeat; /* who sent this message */ if ((orig = ha_msg_value(reply, F_ORIG)) == NULL) { ccm_debug(LOG_WARNING, "%s: received message from unknown", __FUNCTION__); return; } if(!llm_is_valid_node(CCM_GET_LLM(info), orig)) { ccm_debug(LOG_WARNING, "%s: received message from unknown host %s", __FUNCTION__, orig); return; } switch (ccm_msg_type) { case CCM_TYPE_PROTOVERSION_RESP: /* get the protocol version */ if ((proto = ha_msg_value(reply, CCM_PROTOCOL)) == NULL) { ccm_debug(LOG_WARNING, "%s: no protocol information", __FUNCTION__); return; } proto_val = atoi(proto); /*TOBEDONE*/ if (proto_val >= CCM_VER_LAST) { ccm_debug(LOG_WARNING, "%s: unknown protocol value", __FUNCTION__); ccm_reset(info); return; } /* if this reply has come from a node which is a member * of a larger cluster, we will try to join that cluster * else we will wait for some time, by dropping this * response. */ if(resp_can_i_drop()) { if ((clsize = ha_msg_value(reply, CCM_CLSIZE)) == NULL){ ccm_debug(LOG_WARNING, "%s: no cookie information", __FUNCTION__); return; } clsize_val = atoi(clsize); if((clsize_val+1) <= (llm_get_nodecount(CCM_GET_LLM(info))+1)/2) { /* drop the response. We will wait for * a response from a bigger group */ resp_dropped(); cl_shortsleep(); /* sleep for a while */ /* send a fresh version request message */ version_reset(CCM_GET_VERSION(info)); ccm_set_state(info, CCM_STATE_NONE, reply); /* free all the joiners that we accumulated */ ccm_reset_all_join_request(info); break; } } resp_reset(); /* get the cookie string */ if ((cookie = ha_msg_value(reply, CCM_COOKIE)) == NULL) { ccm_debug(LOG_WARNING, "%s: no cookie information", __FUNCTION__); return; } /* get the major transition version */ if ((trans = ha_msg_value(reply, CCM_MAJORTRANS)) == NULL) { ccm_debug(LOG_WARNING, "%s: no protocol information", __FUNCTION__); return; } trans_val = atoi(trans); /* send the alive message to the cluster The alive msg means: "I want to join this partition!"*/ CCM_SET_ACTIVEPROTO(info, proto_val); CCM_SET_MAJORTRANS(info, trans_val); CCM_SET_MINORTRANS(info, 0); CCM_SET_COOKIE(info, cookie); version_set_nresp(CCM_GET_VERSION(info),0); repeat = 0; while(ccm_send_alive_msg(hb, info) != HA_OK){ if(repeat < REPEAT_TIMES){ ccm_debug(LOG_WARNING, "%s: failure to send alive", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } /* initialize the update table and set our state to NEW_NODE_WAIT_FOR_MEM_LIST */ update_reset(CCM_GET_UPDATETABLE(info)); new_node_mem_list_time_init(); ccm_set_state(info, CCM_STATE_NEW_NODE_WAIT_FOR_MEM_LIST, reply); /* free all the joiners that we accumulated */ ccm_reset_all_join_request(info); break; case CCM_TYPE_TIMEOUT: try = version_retry(CCM_GET_VERSION(info), CCM_TMOUT_GET_VRS(info)); switch (try) { case VER_NO_CHANGE: break; case VER_TRY_AGAIN: ccm_set_state(info, CCM_STATE_NONE, reply); break; case VER_TRY_END: if(ccm_am_i_highest_joiner(info)) { ccm_init_to_joined(info); ccm_send_join_reply(hb, info); } else { ccm_debug2(LOG_DEBUG,"joined but not really"); version_reset(CCM_GET_VERSION(info)); ccm_set_state(info, CCM_STATE_NONE, reply); ccm_reset_all_join_request(info); } break; } break; case CCM_TYPE_PROTOVERSION: /* * cache this request. If we declare ourselves as * a single member group, and if we find that * somebody else also wanted to join the group. * we will restart the join. */ ccm_add_new_joiner(info, orig, reply); if (ccm_get_all_active_join_request(info) && ccm_am_i_highest_joiner(info)){ ccm_init_to_joined(info); ccm_send_join_reply(hb, info); } break; case CCM_TYPE_ABORT: /* note down there is some activity going * on and we are not yet alone in the cluster */ version_some_activity(CCM_GET_VERSION(info)); default: /* nothing to do. Just forget the message */ break; } return; } static void ccm_state_none(enum ccm_type msgtype, struct ha_msg *msg, ll_cluster_t *hb, ccm_info_t *info) { if (ccm_send_protoversion(hb, info)!= HA_OK){ ccm_log(LOG_ERR, "sending version message failed"); return; } ccm_set_state(info, CCM_STATE_VERSION_REQUEST, NULL); ccm_state_version_request(msgtype, msg, hb, info); } /* */ /* The state machine that processes message when it is */ /* CCM_STATE_JOINED state. */ /* */ static void ccm_state_joined(enum ccm_type ccm_msg_type, struct ha_msg *reply, ll_cluster_t *hb, ccm_info_t *info) { const char *orig, *trans, *uptime; uint trans_majorval=0, trans_minorval=0, uptime_val; int repeat; if ((orig = ha_msg_value(reply, F_ORIG)) == NULL) { ccm_debug(LOG_WARNING, "%s: received message from unknown" , __FUNCTION__); return; } if(!llm_is_valid_node(CCM_GET_LLM(info), orig)) { ccm_debug(LOG_WARNING, "%s: received message " "from unknown host %s", __FUNCTION__, orig); return; } if(ccm_msg_type != CCM_TYPE_PROTOVERSION && ccm_msg_type != CCM_TYPE_STATE_INFO && ccm_msg_type != CCM_TYPE_RESTART) { const char* tmpcookie = ha_msg_value(reply, CCM_COOKIE); if (tmpcookie == NULL){ abort(); } if(strncmp(CCM_GET_COOKIE(info), ha_msg_value(reply, CCM_COOKIE), COOKIESIZE) != 0){ ccm_debug(LOG_WARNING, "%s: received message " "with unknown cookie, just dropping", __FUNCTION__); dump_mbrs(info); return; } /* get the major transition version */ if ((trans = ha_msg_value(reply, CCM_MAJORTRANS)) == NULL) { ccm_debug(LOG_WARNING, "%s: no transition major " "information", __FUNCTION__); return; } trans_majorval = atoi(trans); /*drop the message if it has lower major transition number */ if (CCM_TRANS_EARLIER(trans_majorval, CCM_GET_MAJORTRANS(info))) { ccm_debug(LOG_WARNING, "%s: received " "%s message with " "a earlier major transition number " "recv_trans=%d, mytrans=%d", __FUNCTION__, ccm_type2string(ccm_msg_type), trans_majorval, CCM_GET_MAJORTRANS(info)); return; } /* get the minor transition version */ if ((trans = ha_msg_value(reply, CCM_MINORTRANS)) == NULL) { ccm_debug(LOG_WARNING, "%s: no transition minor " "information", __FUNCTION__); return; } trans_minorval = atoi(trans); } switch (ccm_msg_type) { case CCM_TYPE_PROTOVERSION_RESP: ccm_debug(LOG_WARNING, "%s: dropping message " "of type %s. Is this a Byzantine failure?", __FUNCTION__, ccm_type2string(ccm_msg_type)); break; case CCM_TYPE_PROTOVERSION: /* If we were leader in the last successful iteration, * then we shall respond with the neccessary information */ if (ccm_am_i_leader(info)){ repeat = 0; while (ccm_send_one_join_reply(hb, info, orig) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_debug(LOG_WARNING, "%s: failure to send join reply", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } } break; case CCM_TYPE_JOIN: /* get the update value */ if ((uptime = ha_msg_value(reply, CCM_UPTIME)) == NULL){ ccm_debug(LOG_WARNING, "%s: no update " "information", __FUNCTION__); return; } uptime_val = atoi(uptime); /* update the minor transition number if it is of * higher value and send a fresh JOIN message */ if (trans_minorval < CCM_GET_MINORTRANS(info)) { ccm_log(LOG_WARNING, "%s: got a join message from %s from lower " "transition, restarting", __FUNCTION__, orig); ccm_all_restart(hb, info, reply); break; } update_reset(CCM_GET_UPDATETABLE(info)); update_add(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), orig, uptime_val, TRUE); CCM_SET_MINORTRANS(info, trans_minorval); repeat = 0; while (ccm_send_join(hb, info) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_debug(LOG_WARNING, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } ccm_set_state(info, CCM_STATE_JOINING, reply); break; case CCM_TYPE_LEAVE: if (!node_is_member(info, orig)){ return; } /* If the dead node is the partition leader, go to * JOINING state */ if (node_is_leader(info, orig)){ update_reset(CCM_GET_UPDATETABLE(info)); repeat = 0; while (ccm_send_join(hb, info) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_debug(LOG_WARNING, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } ccm_set_state(info, CCM_STATE_JOINING,reply); return; } /* If I'm the leader, record this "I received the * LEAVE message" and transit to WAIT_FOR_CHANGE */ if(ccm_am_i_leader(info)){ reset_change_info(info); update_reset(CCM_GET_UPDATETABLE(info)); add_change_msg(info, orig, CCM_GET_MYNODE_ID(info), NODE_LEAVE); update_add(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), CCM_GET_MYNODE_ID(info), CCM_GET_JOINED_TRANSITION(info), FALSE); if(received_all_change_msg(info)){ char *newcookie = ccm_generate_random_cookie(); ccm_mem_update(info, orig, NODE_LEAVE); send_mem_list_to_all(hb, info, newcookie); CCM_SET_MAJORTRANS(info, trans_majorval+1); CCM_RESET_MINORTRANS(info); CCM_SET_COOKIE(info, newcookie); ccm_free_random_cookie(newcookie); report_mbrs(info); return; } change_time_init(); ccm_bcast_node_leave_notice(hb,info, orig); ccm_set_state(info, CCM_STATE_WAIT_FOR_CHANGE, reply); } break; case CCM_TYPE_NODE_LEAVE_NOTICE:{ const char* node; const char* leader = orig; node = ha_msg_value(reply, F_NODE); if(node == NULL){ ccm_log(LOG_ERR, "%s: node not found in the message" , __FUNCTION__); ccm_message_debug2(LOG_INFO, reply); return; } if (!node_is_member(info, node)){ return; } if( !ccm_am_i_leader(info)){ send_node_leave_to_leader(hb, info, leader); mem_list_time_init(); ccm_set_state(info,CCM_STATE_WAIT_FOR_MEM_LIST, reply); } break; } case CCM_TYPE_NODE_LEAVE: if ((uptime = ha_msg_value(reply, CCM_UPTIME)) == NULL){ ccm_debug(LOG_WARNING, "%s: no update " "information", __FUNCTION__); return; } uptime_val = atoi(uptime); /* If I'm leader, record received LEAVE message by orig * and transition to WAIT_FOR_CHANGE state */ if(ccm_am_i_leader(info)){ const char *node = ha_msg_value(reply, F_NODE); reset_change_info(info); update_reset(CCM_GET_UPDATETABLE(info)); add_change_msg(info,node,orig,NODE_LEAVE); update_add(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), orig, uptime_val, FALSE); change_time_init(); ccm_set_state(info, CCM_STATE_WAIT_FOR_CHANGE, reply); } break; case CCM_TYPE_ALIVE: /* If I'm leader, record I received the ALIVE message and * transit to WAIT_FOR_CHANGE */ if (ccm_am_i_leader(info)){ reset_change_info(info); update_reset(CCM_GET_UPDATETABLE(info)); add_change_msg(info,orig, CCM_GET_MYNODE_ID(info), NEW_NODE); update_add(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), CCM_GET_MYNODE_ID(info), CCM_GET_JOINED_TRANSITION(info), FALSE); if(received_all_change_msg(info)){ char *newcookie = ccm_generate_random_cookie(); ccm_mem_update(info, orig, NEW_NODE); update_add(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), info->change_node_id, trans_majorval+1, FALSE); send_mem_list_to_all(hb, info, newcookie); CCM_SET_MAJORTRANS(info, trans_majorval+1); CCM_RESET_MINORTRANS(info); CCM_SET_COOKIE(info, newcookie); ccm_free_random_cookie(newcookie); report_mbrs(info); return; } change_time_init(); ccm_set_state(info, CCM_STATE_WAIT_FOR_CHANGE, reply); }else{ /* I'm not leader, send CCM_TYPE_NEW_NODE * to leader and transit to WAIT_FOR_MEM_LIST */ ccm_send_newnode_to_leader(hb, info, orig); mem_list_time_init(); ccm_set_state(info,CCM_STATE_WAIT_FOR_MEM_LIST, reply); } break; case CCM_TYPE_NEW_NODE: if ((uptime = ha_msg_value(reply, CCM_UPTIME)) == NULL){ ccm_debug(LOG_WARNING, "%s: no update " "information", __FUNCTION__); return; } uptime_val = atoi(uptime); /* If I'm leader, record received ALIVE message by orig * and transition to WAIT_FOR_CHANGE state */ if(ccm_am_i_leader(info)){ const char *node = ha_msg_value(reply, F_NODE); reset_change_info(info); update_reset(CCM_GET_UPDATETABLE(info)); add_change_msg(info,node, orig, NEW_NODE); update_add(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), orig, uptime_val, FALSE); change_time_init(); ccm_set_state(info, CCM_STATE_WAIT_FOR_CHANGE, reply); } break; case CCM_TYPE_STATE_INFO: ccm_handle_state_info(hb, info, reply); break; case CCM_TYPE_RESTART: ccm_all_restart(hb, info, reply); break; case CCM_TYPE_MEM_LIST:{ int quorum; const char* memlist; if (strncmp(orig, llm_get_mynodename((&info->llm) ), NODEIDSIZE) == 0){ /*this message is from myself, ignore it*/ break; } memlist = ha_msg_value(reply, CCM_MEMLIST); if (memlist == NULL){ break; } if (ha_msg_value_int (reply, CCM_QUORUM, &quorum)==HA_OK){ info->has_quorum = quorum; } else { info->has_quorum = -1; } if (node_is_leader(info, orig) && !am_i_member_in_memlist(info, memlist)){ ccm_set_state(info, CCM_STATE_NONE, reply); break; } report_mbrs(info); break; } case CCM_TYPE_REQ_MEMLIST: case CCM_TYPE_RES_MEMLIST: case CCM_TYPE_FINAL_MEMLIST: case CCM_TYPE_ABORT: ccm_log(LOG_ERR, "%s: dropping message " "of type %s. Is this a Byzantine failure?" , __FUNCTION__, ccm_type2string(ccm_msg_type)); /* nothing to do. Just forget the message */ break; default: break; } } /* */ /* The state machine that processes message when it is in */ /* CCM_STATE_WAIT_FOR_CHANGE state. */ /* */ static void ccm_state_wait_for_change(enum ccm_type ccm_msg_type, struct ha_msg *reply, ll_cluster_t *hb, ccm_info_t *info) { const char *orig, *trans, *uptime, *node; uint trans_majorval=0, trans_minorval=0, uptime_val=0; gboolean uptime_set = FALSE; int repeat; if ((orig = ha_msg_value(reply, F_ORIG)) == NULL) { ccm_debug(LOG_WARNING, "%s: received message from unknown" , __FUNCTION__ ); return; } if(!llm_is_valid_node(CCM_GET_LLM(info), orig)) { ccm_debug(LOG_WARNING, "%s: received message " "from unknown host %s", __FUNCTION__, orig); return; } node = ha_msg_value(reply, F_NODE); if(ccm_msg_type != CCM_TYPE_PROTOVERSION && ccm_msg_type != CCM_TYPE_STATE_INFO && ccm_msg_type != CCM_TYPE_RESTART) { if(strncmp(CCM_GET_COOKIE(info), ha_msg_value(reply, CCM_COOKIE), COOKIESIZE) != 0){ ccm_debug(LOG_WARNING, "%s: received message " "with unknown cookie, just dropping", __FUNCTION__); return; } /* get the major transition version */ if ((trans = ha_msg_value(reply, CCM_MAJORTRANS)) == NULL) { ccm_debug(LOG_WARNING, "%s: no transition major " "information", __FUNCTION__); return; } trans_majorval = atoi(trans); /* drop the message if it has lower major transition number */ if (CCM_TRANS_EARLIER(trans_majorval, CCM_GET_MAJORTRANS(info))) { ccm_debug(LOG_WARNING, "%s: received " "%s message with " "a earlier major transition number " "recv_trans=%d, mytrans=%d", __FUNCTION__, ccm_type2string(ccm_msg_type), trans_majorval, CCM_GET_MAJORTRANS(info)); return; } /* get the minor transition version */ if ((trans = ha_msg_value(reply, CCM_MINORTRANS)) == NULL) { ccm_debug(LOG_WARNING, "%s: no transition minor " "information", __FUNCTION__); return; } trans_minorval = atoi(trans); } switch (ccm_msg_type) { case CCM_TYPE_PROTOVERSION: /* * cache this request. We will respond to it, * after transition is complete. */ ccm_add_new_joiner(info, orig, reply); break; case CCM_TYPE_NODE_LEAVE_NOTICE: /* It is my own message, then I can ignore it * or from another lead, i.e. we are in split-brain * and I can do nothing about it */ break; case CCM_TYPE_LEAVE: if (!node_is_member(info, orig)){ return; } if(strcmp(info->change_node_id, orig) == 0 && info->change_type == NODE_LEAVE){ /*It is the same node leaving*/ return; } node = orig; orig = CCM_GET_MYNODE_ID(info); uptime_val = CCM_GET_JOINED_TRANSITION(info); uptime_set = TRUE; /*fall through*/ case CCM_TYPE_NODE_LEAVE: /* only leader can stay in this state */ if(!ccm_am_i_leader(info)) break; if (!uptime_set){ if ((uptime = ha_msg_value(reply, CCM_UPTIME)) == NULL){ ccm_debug(LOG_WARNING, "%s: no update information", __FUNCTION__); return; } uptime_val = atoi(uptime); uptime_set = TRUE; } /* Record received LEAVE message by orig. * If received all change msg, send mem_list to members. */ if(is_expected_change_msg(info,node,NODE_LEAVE)){ append_change_msg(info,orig); update_add(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), orig, uptime_val, FALSE); if(received_all_change_msg(info)){ char *newcookie = ccm_generate_random_cookie(); ccm_mem_update(info, node, NODE_LEAVE); send_mem_list_to_all(hb, info, newcookie); CCM_SET_MAJORTRANS(info, trans_majorval+1); CCM_RESET_MINORTRANS(info); CCM_SET_COOKIE(info, newcookie); report_mbrs(info); reset_change_info(info); /* update_reset(CCM_GET_UPDATETABLE(info));*/ ccm_free_random_cookie(newcookie); ccm_send_join_reply(hb, info); CCM_SET_CL(info, llm_get_myindex(CCM_GET_LLM(info))); ccm_set_state(info, CCM_STATE_JOINED,reply); return; } }else{ reset_change_info(info); update_reset(CCM_GET_UPDATETABLE(info)); CCM_INCREMENT_MINORTRANS(info); repeat = 0; while (ccm_send_join(hb, info) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_debug(LOG_WARNING, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } ccm_set_state(info, CCM_STATE_JOINING, reply); return; } break; case CCM_TYPE_ALIVE: node = orig; orig = CCM_GET_MYNODE_ID(info); uptime_val = CCM_GET_JOINED_TRANSITION(info); uptime_set = TRUE; /*fall through*/ case CCM_TYPE_NEW_NODE: /* only leader can stay in this state */ if(!ccm_am_i_leader(info)){ assert(0); } if (!uptime_set){ if ((uptime = ha_msg_value(reply, CCM_UPTIME)) == NULL){ ccm_debug(LOG_WARNING, "%s: no update information", __FUNCTION__); return; } uptime_val = atoi(uptime); uptime_set = TRUE; } if(is_expected_change_msg(info,node, NEW_NODE)){ append_change_msg(info,orig); update_add(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), orig, uptime_val, FALSE); if(received_all_change_msg(info)){ char *newcookie = ccm_generate_random_cookie(); ccm_mem_update(info, node, NEW_NODE); update_add(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), info->change_node_id, trans_majorval+1, FALSE); send_mem_list_to_all(hb, info, newcookie); CCM_SET_MAJORTRANS(info, trans_majorval+1); CCM_RESET_MINORTRANS(info); CCM_SET_COOKIE(info, newcookie); report_mbrs(info); reset_change_info(info); /* update_reset(CCM_GET_UPDATETABLE(info));*/ ccm_free_random_cookie(newcookie); ccm_send_join_reply(hb, info); ccm_set_state(info, CCM_STATE_JOINED, reply); return; } }else{ reset_change_info(info); update_reset(CCM_GET_UPDATETABLE(info)); CCM_INCREMENT_MINORTRANS(info); while (ccm_send_join(hb, info) != HA_OK) { ccm_debug(LOG_WARNING, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); } ccm_set_state(info, CCM_STATE_JOINING, reply); return; } break; case CCM_TYPE_TIMEOUT: if(change_timeout(CCM_TMOUT_GET_U(info))){ reset_change_info(info); update_reset(CCM_GET_UPDATETABLE(info)); CCM_INCREMENT_MINORTRANS(info); repeat = 0; while (ccm_send_join(hb, info) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_debug(LOG_WARNING, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } ccm_set_state(info, CCM_STATE_JOINING, reply); } break; case CCM_TYPE_JOIN: /* get the update value */ if ((uptime = ha_msg_value(reply, CCM_UPTIME)) == NULL){ ccm_debug(LOG_WARNING, "%s: no update information", __FUNCTION__); return; } uptime_val = atoi(uptime); uptime_set = TRUE; /* update the minor transition number if it is of * higher value and send a fresh JOIN message */ if (trans_minorval < CCM_GET_MINORTRANS(info)) { ccm_log(LOG_WARNING, "%s: got a join message from %s from lower " "transition, restarting", __FUNCTION__, orig); ccm_all_restart(hb, info, reply); break; } update_reset(CCM_GET_UPDATETABLE(info)); update_add(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), orig, uptime_val, TRUE); CCM_SET_MINORTRANS(info, trans_minorval); repeat = 0; while (ccm_send_join(hb, info) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_debug(LOG_WARNING, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } ccm_set_state(info, CCM_STATE_JOINING, reply); break; case CCM_TYPE_STATE_INFO: ccm_handle_state_info(hb, info, reply); break; case CCM_TYPE_RESTART: ccm_all_restart(hb, info, reply); break; default: ccm_log(LOG_ERR, "%s: dropping message " "of type %s. Is this a Byzantine failure?", __FUNCTION__, ccm_type2string(ccm_msg_type)); /* nothing to do. Just forget the message */ break; } } /* */ /* The state machine that processes message when it is */ /* in the CCM_STATE_SENT_MEMLISTREQ state */ /* */ static void ccm_state_sent_memlistreq(enum ccm_type ccm_msg_type, struct ha_msg *reply, ll_cluster_t *hb, ccm_info_t *info) { const char *orig, *trans, *memlist, *uptime; uint trans_minorval=0, trans_majorval=0, trans_maxval=0; uint uptime_val; int repeat; if ((orig = ha_msg_value(reply, F_ORIG)) == NULL) { ccm_debug(LOG_WARNING, "%s: received message from unknown", __FUNCTION__); return; } if(!llm_is_valid_node(CCM_GET_LLM(info), orig)) { ccm_debug(LOG_WARNING, "%s: received message from unknown host %s", __FUNCTION__, orig); return; } if(ccm_msg_type == CCM_TYPE_PROTOVERSION || ccm_msg_type == CCM_TYPE_STATE_INFO || ccm_msg_type == CCM_TYPE_RESTART) { goto switchstatement; } if(strncmp(CCM_GET_COOKIE(info), ha_msg_value(reply, CCM_COOKIE), COOKIESIZE) != 0){ ccm_debug(LOG_WARNING, "%s: received message " "with unknown cookie, just dropping", __FUNCTION__); return; } /* get the major transition version */ if ((trans = ha_msg_value(reply, CCM_MAJORTRANS)) == NULL) { ccm_debug(LOG_WARNING, "%s :no transition major information", __FUNCTION__); return; } trans_majorval = atoi(trans); /* drop the message if it has lower major transition number */ if (CCM_TRANS_EARLIER(trans_majorval, CCM_GET_MAJORTRANS(info))) { ccm_debug(LOG_WARNING, "%s: received CCM_TYPE_JOIN message with" "a earlier major transition number", __FUNCTION__); return; } /* get the minor transition version */ if ((trans = ha_msg_value(reply, CCM_MINORTRANS)) == NULL) { ccm_debug(LOG_WARNING, "%s: no transition minor information", __FUNCTION__); return; } trans_minorval = atoi(trans); switchstatement: switch (ccm_msg_type) { case CCM_TYPE_PROTOVERSION_RESP: ccm_debug(LOG_WARNING, "%s: " "dropping message of type %s. " " Is this a Byzantine failure?", __FUNCTION__, ccm_type2string(ccm_msg_type)); break; case CCM_TYPE_PROTOVERSION: /* * cache this request. We will respond to it, * if we become the leader. */ ccm_add_new_joiner(info, orig, reply); break; case CCM_TYPE_JOIN: /* The join request has come too late. * I am already the leader, and my * leadership cannot be relinquished * because that can confuse everybody. * This join request shall be considered. * But leadership shall not be relinquished. */ if(trans_majorval != CCM_GET_MAJORTRANS(info) || trans_minorval != CCM_GET_MINORTRANS(info)) { ccm_log(LOG_WARNING, "%s: got a join message from %s from a wrong " "transition, restarting", __FUNCTION__, orig); ccm_all_restart(hb, info, reply); break; } ccm_debug2(LOG_DEBUG, "considering a late join message " "from orig=%s", orig); /* get the update value */ if ((uptime = ha_msg_value(reply, CCM_UPTIME)) == NULL){ ccm_debug(LOG_WARNING, "%s: no update information", __FUNCTION__); return; } uptime_val = atoi(uptime); update_add(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), orig, uptime_val, FALSE); ccm_memcomp_add(info, orig); break; case CCM_TYPE_TIMEOUT: if (ccm_memcomp_timeout(info, CCM_TMOUT_GET_IFF(info))) { /* we waited long for membership response * from all nodes, stop waiting and send * final membership list */ ccm_compute_and_send_final_memlist(hb, info); } break; case CCM_TYPE_REQ_MEMLIST: /* if this is my own message just forget it */ if(strncmp(orig, llm_get_mynodename(&info->llm), NODEIDSIZE) == 0){ if(llm_get_live_nodecount(&info->llm) == 1){ ccm_log(LOG_INFO, "%s: directly call" "ccm_compute_and_send_final_memlist()", __FUNCTION__); ccm_compute_and_send_final_memlist(hb, info); } break; } /* whoever is requesting memlist from me thinks it is * the leader. Hmm....., we will send it a NULL memlist. * In partitioned network case both of us can be * leaders. Right? */ repeat = 0; while (ccm_send_memlist_res(hb, info, orig, NULL) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_log(LOG_ERR, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } break; case CCM_TYPE_RES_MEMLIST: /* mark that this node has sent us a memlist reply. * Calculate the membership list with this new message */ if(trans_minorval != CCM_GET_MINORTRANS(info)){ break; } if(trans_majorval != CCM_GET_MAJORTRANS(info)) { ccm_log(LOG_WARNING, "%s: dropping CCM_TYPE_RES_MEMLIST " "from orig=%s mymajor=%d msg_major=%d", __FUNCTION__, orig, trans_majorval, CCM_GET_MAJORTRANS(info)); break; } if ((memlist = ha_msg_value(reply, CCM_MEMLIST)) == NULL) { ccm_debug(LOG_WARNING, "%s: no memlist ", __FUNCTION__); break; } /* get the max transition version */ if (!(trans = ha_msg_value(reply, CCM_MAXTRANS))) { ccm_debug(LOG_WARNING, "%s: no max transition " "information %s, type=%d", __FUNCTION__, orig, ccm_msg_type); return; } trans_maxval = atoi(trans); ccm_memcomp_note(info, orig, trans_maxval, memlist); if (ccm_memcomp_rcvd_all(info)) { ccm_compute_and_send_final_memlist(hb,info); } break; case CCM_TYPE_LEAVE: /* since we are waiting for a memlist from all the * members who have sent me a join message, we * should be waiting for their message or their * leave message atleast. */ /* if this node had not participated in the update * exchange than just neglect it */ if(!update_is_node_updated(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), orig)) { break; } /* if this node had sent a memlist before dying, * reset its memlist information */ ccm_memcomp_note(info, orig, 0, ""); if (ccm_memcomp_rcvd_all(info)) { ccm_compute_and_send_final_memlist(hb, info); } break; case CCM_TYPE_STATE_INFO: ccm_handle_state_info(hb, info, reply); break; case CCM_TYPE_RESTART: ccm_all_restart(hb, info, reply); break; case CCM_TYPE_FINAL_MEMLIST: case CCM_TYPE_ABORT: default: ccm_log(LOG_ERR, "%s: dropping message of type %s. Is this " "a Byzantine failure?", __FUNCTION__, ccm_type2string(ccm_msg_type)); /* nothing to do. Just forget the message */ break; } } /* */ /* the state machine that processes messages when it is in the */ /* CCM_STATE_MEMLIST_RES state. */ /* */ static void ccm_state_memlist_res(enum ccm_type ccm_msg_type, struct ha_msg *reply, ll_cluster_t *hb, ccm_info_t *info) { const char *orig, *trans, *uptime, *memlist, *cookie, *cl; uint trans_majorval=0, trans_minorval=0, trans_maxval=0; uint uptime_val; uint curr_major, curr_minor; int indx; int repeat; int quorum; if ((orig = ha_msg_value(reply, F_ORIG)) == NULL) { ccm_debug(LOG_WARNING, "%s: received message from unknown", __FUNCTION__); return; } if(!llm_is_valid_node(CCM_GET_LLM(info), orig)) { ccm_debug(LOG_WARNING, "%s: received message from unknown host %s", __FUNCTION__, orig); return; } if(ccm_msg_type == CCM_TYPE_PROTOVERSION || ccm_msg_type == CCM_TYPE_STATE_INFO || ccm_msg_type == CCM_TYPE_RESTART) { goto switchstatement; } if(strncmp(CCM_GET_COOKIE(info), ha_msg_value(reply, CCM_COOKIE), COOKIESIZE) != 0){ ccm_debug(LOG_WARNING, "%s: received message with unknown cookie, just dropping", __FUNCTION__); return; } /* get the major transition version */ if ((trans = ha_msg_value(reply, CCM_MAJORTRANS)) == NULL) { ccm_debug(LOG_WARNING, "%s: no transition major information", __FUNCTION__); return; } trans_majorval = atoi(trans); /* drop the message if it has lower major transition number */ if (CCM_TRANS_EARLIER(trans_majorval, CCM_GET_MAJORTRANS(info))) { ccm_debug(LOG_WARNING, "%s: received CCM_TYPE_JOIN message with" "a earlier major transition number", __FUNCTION__); return; } /* get the minor transition version */ if ((trans = ha_msg_value(reply, CCM_MINORTRANS)) == NULL) { ccm_debug(LOG_WARNING, "%s: no transition minor information", __FUNCTION__); return; } trans_minorval = atoi(trans); switchstatement: switch (ccm_msg_type) { case CCM_TYPE_PROTOVERSION_RESP: ccm_debug(LOG_WARNING, "%s: dropping message" " of type %s. Is this a Byzantine failure?", __FUNCTION__, ccm_type2string(ccm_msg_type)); break; case CCM_TYPE_PROTOVERSION: /* * cache this request. We will respond to it, if we * become the leader. */ ccm_add_new_joiner(info, orig, reply); break; case CCM_TYPE_JOIN: /* * This could have happened because the leader died * and somebody noticed this and sent us this request. * In such a case the minor transition number should * have incremented. Or * This could have happened because the leader's * FINAL_MEMLIST * has not reach us, whereas it has reached somebody * else, and since that somebody saw a change in * membership, initiated another join protocol. * In such a case the major transition * number should have incremented. */ /* * if major number is incremented, send an abort message * to the sender. The sender must resend the message. */ if (trans_majorval > CCM_GET_MAJORTRANS(info)) { repeat = 0; while (ccm_send_abort(hb, info, orig, trans_majorval, trans_minorval) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_log(LOG_ERR, "%s: failure to send abort", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } break; } /* if minor transition number is incremented, * reset uptable table and start a join protocol */ if (trans_minorval > CCM_GET_MINORTRANS(info)) { /* get the update value */ if ((uptime = ha_msg_value(reply, CCM_UPTIME)) == NULL){ ccm_debug(LOG_WARNING, "%s: no update information", __FUNCTION__); return; } uptime_val = atoi(uptime); update_reset(CCM_GET_UPDATETABLE(info)); update_add(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), orig, uptime_val, TRUE); CCM_SET_MINORTRANS(info, trans_minorval); repeat = 0; while (ccm_send_join(hb, info) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_log(LOG_ERR, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } ccm_set_state(info, CCM_STATE_JOINING, reply); } break; case CCM_TYPE_REQ_MEMLIST: /* there are two reasons that can bring us here * 1. Because some other node still thinks he is * the master,(though we dont think so). Send * a NULL membership list to him immidiately. * 2. Because of byzantine failures, though we have * not received the membership list in the last * round. We have waited to such an exent that some * node already thinks he is the master of the * the new group transition. Well, there is something * seriously wrong with us. We will send a leave * message to everybody and say good bye. And we * will start all fresh! */ if (trans_minorval == CCM_GET_MINORTRANS(info)) { repeat = 0; while (ccm_send_memlist_res(hb, info, orig, NULL) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_log(LOG_ERR, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } break; } break; case CCM_TYPE_TIMEOUT: /* If we have waited too long for the leader to respond * just assume that the leader is dead and start over * a new round of the protocol */ if(!finallist_timeout(CCM_TMOUT_GET_FL(info))) { break; } update_reset(CCM_GET_UPDATETABLE(info)); CCM_INCREMENT_MINORTRANS(info); repeat = 0; while (ccm_send_join(hb, info) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_log(LOG_ERR, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } finallist_reset(); ccm_set_state(info, CCM_STATE_JOINING, reply); break; case CCM_TYPE_LEAVE: /* * If this message is because of loss of connectivity * with the node which we think is the master, then * restart the join. Loss of anyother node should be * confirmed by the finalmemlist of the master. */ cl = update_get_cl_name(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info)); if(strncmp(cl, orig, NODEIDSIZE) == 0) { /* increment the current minor transition value * and resend the join message */ update_reset(CCM_GET_UPDATETABLE(info)); CCM_INCREMENT_MINORTRANS(info); repeat = 0; while (ccm_send_join(hb, info) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_log(LOG_ERR, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } finallist_reset(); ccm_set_state(info, CCM_STATE_JOINING, reply); } break; case CCM_TYPE_FINAL_MEMLIST: /* WOW we received the membership list from the master. * Check if I am part of the membership list. If not, * voluntarily leave the cluster and start all over * again */ cl = update_get_cl_name(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info)); if(strncmp(cl, orig, NODEIDSIZE) != 0) { /* received memlist from a node we do not * think is the leader. We just reject the * message and wait for a message from the * our percieved master */ ccm_debug(LOG_WARNING, "%s: received final memlist from " "non-master,neglecting", __FUNCTION__); break; } /* * confirm that the major transition and minor * transition version match */ curr_major = CCM_GET_MAJORTRANS(info); curr_minor = CCM_GET_MINORTRANS(info); if(curr_major != trans_majorval || curr_minor != trans_minorval){ ccm_debug(LOG_WARNING, "%s: " "received final memlist from master, " "but transition versions do not match: " "rejecting the message", __FUNCTION__); break; } if ((memlist = ha_msg_value(reply, CCM_MEMLIST)) == NULL) { ccm_debug(LOG_WARNING, "%s: no membership list ", __FUNCTION__); return; } if ((trans = ha_msg_value(reply, CCM_MAXTRANS)) == NULL) { ccm_debug(LOG_WARNING, "%s: no membership list ", __FUNCTION__); return; } trans_maxval = atoi(trans); if (ha_msg_value_int (reply, CCM_QUORUM, &quorum)==HA_OK){ info->has_quorum = quorum; } else { info->has_quorum = -1; } if (!am_i_member_in_memlist(info, memlist)) { ccm_reset(info); break; } ccm_mem_strfill(info, (const char *)memlist); /* increment the major transition number and reset the * minor transition number */ CCM_SET_MAJORTRANS(info, trans_maxval); CCM_RESET_MINORTRANS(info); /* check if leader has changed the COOKIE, this can * happen if the leader sees a partitioned group */ if ((cookie = ha_msg_value(reply, CCM_NEWCOOKIE)) != NULL) { ccm_debug2(LOG_DEBUG, "%s: leader changed cookie ", __FUNCTION__); CCM_SET_COOKIE(info, cookie); } indx = llm_get_index(&info->llm, cl); assert(indx != -1); CCM_SET_CL(info, indx); report_mbrs(info); /* call before update_reset */ /* update_reset(CCM_GET_UPDATETABLE(info));*/ finallist_reset(); ccm_set_state(info, CCM_STATE_JOINED, reply); ccm_reset_all_join_request(info); if(!ccm_already_joined(info)) CCM_SET_JOINED_TRANSITION(info, CCM_GET_MAJORTRANS(info)); break; case CCM_TYPE_STATE_INFO: ccm_handle_state_info(hb, info, reply); break; case CCM_TYPE_RESTART: ccm_all_restart(hb, info, reply); break; case CCM_TYPE_ABORT: case CCM_TYPE_RES_MEMLIST: default: ccm_log(LOG_ERR, "%s: dropping message of type %s. " "Is this a Byzantine failure?", __FUNCTION__, ccm_type2string(ccm_msg_type)); /* nothing to do. Just forget the message */ break; } } /* */ /* the state machine that processes messages when it is in the */ /* CCM_STATE_JOINING state. */ /* */ static void ccm_state_joining(enum ccm_type ccm_msg_type, struct ha_msg *reply, ll_cluster_t *hb, ccm_info_t *info) { const char *orig, *trans, *uptime; uint trans_majorval=0, trans_minorval=0; uint uptime_val; int repeat; if ((orig = ha_msg_value(reply, F_ORIG)) == NULL) { ccm_debug(LOG_WARNING, "%s: received message from unknown", __FUNCTION__); return; } if(!llm_is_valid_node(CCM_GET_LLM(info), orig)) { ccm_debug(LOG_WARNING, "%s: received message " "from unknown host %s", __FUNCTION__, orig); return; } if(ccm_msg_type == CCM_TYPE_PROTOVERSION || ccm_msg_type == CCM_TYPE_STATE_INFO || ccm_msg_type == CCM_TYPE_RESTART) { goto switchstatement; } if(strncmp(CCM_GET_COOKIE(info), ha_msg_value(reply, CCM_COOKIE), COOKIESIZE) != 0){ if(ccm_msg_type == CCM_TYPE_PROTOVERSION_RESP) { version_inc_nresp(CCM_GET_VERSION(info)); ccm_debug(LOG_WARNING, "%s: received message " "incrementing versionresp counter %d", __FUNCTION__, version_get_nresp(CCM_GET_VERSION(info))); } ccm_debug(LOG_WARNING, "%s: received message " "with unknown cookie, just dropping", __FUNCTION__); return; } /* get the major transition version */ if ((trans = ha_msg_value(reply, CCM_MAJORTRANS)) == NULL) { ccm_debug(LOG_WARNING, "%s: no transition major information", __FUNCTION__); return; } trans_majorval = atoi(trans); /* drop the message if it has lower major transition number */ if (CCM_TRANS_EARLIER(trans_majorval, CCM_GET_MAJORTRANS(info))) { ccm_debug(LOG_WARNING, "%s: received CCM_TYPE_JOIN message with" "a earlier major transition number", __FUNCTION__); return; } /* get the minor transition version */ if ((trans = ha_msg_value(reply, CCM_MINORTRANS)) == NULL) { ccm_debug(LOG_WARNING, "%s: no transition minor information", __FUNCTION__); return; } trans_minorval = atoi(trans); if (trans_minorval < CCM_GET_MINORTRANS(info)) { return; } switchstatement: switch (ccm_msg_type) { case CCM_TYPE_PROTOVERSION_RESP: /* If we were joined in an earlier iteration, then this * message should not have arrived. A bug in the logic! */ if(ccm_already_joined(info)) { ccm_debug(LOG_WARNING, "%s: BUG:" " received CCM_TYPE_PROTOVERSION_RESP " "message when we have not asked for " "it ", __FUNCTION__); break; } ccm_debug(LOG_WARNING, "%s: dropping message " " of type %s. Is this a Byzantine failure?", __FUNCTION__, ccm_type2string(ccm_msg_type)); break; case CCM_TYPE_PROTOVERSION: /* * cache this request. We will respond to it, * if we become the leader. */ ccm_add_new_joiner(info, orig, reply); break; case CCM_TYPE_JOIN: /* get the update value */ if((uptime = ha_msg_value(reply, CCM_UPTIME)) == NULL){ ccm_debug(LOG_WARNING, "%s: no update information", __FUNCTION__); return; } uptime_val = atoi(uptime); /* * note down all the information contained in the * message There is a possibility that I am the leader, * if all the nodes died, and I am the only surviving * node! If this message has originated from me, * note down the current time. This information is * needed, to later recognize that I am the only * surviving node. */ /* update the minor transition number if it is of * higher value * and send a fresh JOIN message */ if (trans_minorval > CCM_GET_MINORTRANS(info)) { update_reset(CCM_GET_UPDATETABLE(info)); update_add( CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), orig, uptime_val, TRUE); CCM_SET_MINORTRANS(info, trans_minorval); repeat = 0; while (ccm_send_join(hb, info) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_log(LOG_ERR, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } } else { /* update the update table */ update_add( CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), orig, uptime_val, TRUE); /* if all nodes have responded, its time * to elect the leader */ if (UPDATE_GET_NODECOUNT( CCM_GET_UPDATETABLE(info)) == llm_get_live_nodecount(&info->llm)) { /* check if I am the leader */ if (update_am_i_leader( CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info))) { /* send out the * membershiplist request */ repeat = 0; while(ccm_send_memlist_request( hb, info)!=HA_OK) { if(repeat < REPEAT_TIMES){ ccm_log(LOG_ERR, "%s: failure to send memlist request", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } ccm_memcomp_init(info); ccm_memcomp_note_my_membership( info); ccm_set_state(info, CCM_STATE_SENT_MEMLISTREQ, reply); } else { /* check if we have already * received memlist request * from any node(which * believes itself to be the * leader) * If so,we have to reply to * them with our membership * list. But there is a catch. * If we do not think the * requestor to be the leader, * then we send it an null * membership message! */ if (ccm_send_cl_reply(hb,info) == TRUE) { finallist_init(); ccm_set_state(info, CCM_STATE_MEMLIST_RES, reply); } } break; /* done all processing */ } } break; case CCM_TYPE_REQ_MEMLIST: /* well we have not yet timedout! And a memlist * request has arrived from the cluster leader. Hmm... * We should wait till timeout, to respond. * * NOTE: there is a chance * that more than one cluster leader might request * the membership list. Due to cluster partitioning :( ) */ /* If we have received CCM_TYPE_JOIN from all nodes * we don't need wait for timeout here. */ update_add_memlist_request(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), orig, trans_majorval); if (UPDATE_GET_NODECOUNT( CCM_GET_UPDATETABLE(info)) == llm_get_live_nodecount(&info->llm) && !update_am_i_leader(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info))) { if (ccm_send_cl_reply(hb,info) == TRUE) { finallist_init(); ccm_set_state(info, CCM_STATE_MEMLIST_RES, reply); break; } } /* * FALL THROUGH */ case CCM_TYPE_TIMEOUT: /* * If timeout expired, elect the leader. * If I am the leader, send out the membershiplist request */ if (!update_timeout_expired(CCM_GET_UPDATETABLE(info), CCM_TMOUT_GET_U(info))) { break; } if (update_am_i_leader(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info))) { /* if I am the only one around go directly * to joined state. */ if (UPDATE_GET_NODECOUNT( CCM_GET_UPDATETABLE(info)) == 1) { if(ccm_already_joined(info) || !version_get_nresp( CCM_GET_VERSION(info))){ ccm_joining_to_joined(hb, info); } else { ccm_reset(info); } break; } /* send out the membershiplist request */ repeat = 0; while (ccm_send_memlist_request(hb, info) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_log(LOG_ERR, "%s: failure to send memlist request", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } ccm_memcomp_init(info); ccm_memcomp_note_my_membership(info); ccm_set_state(info, CCM_STATE_SENT_MEMLISTREQ, reply); } else { /* check if we have already received memlist * request from any node(which believes itself * to be the leader) * If so,we have to reply to them with our * membership list. But there is a catch. * If we do not think the * requestor to be the leader, then we send * it an abort message! */ if (ccm_send_cl_reply(hb, info) == TRUE) { /* free the update data*/ finallist_init(); ccm_set_state(info, CCM_STATE_MEMLIST_RES, reply); } } break; case CCM_TYPE_ABORT: /* * This is a case where my JOIN request is not honoured * by the recieving host(probably because it is waiting * on some message, before which it cannot initiate * the join). * We will resend the join message, incrementing the * minor version number, provided this abort is * requested * for this minor version. */ if(trans_majorval != CCM_GET_MAJORTRANS(info) || trans_minorval != CCM_GET_MINORTRANS(info)) { /* nothing to worry just forget this message */ break; } /* increment the current minor transition value * and resend the join message */ CCM_INCREMENT_MINORTRANS(info); update_reset(CCM_GET_UPDATETABLE(info)); repeat = 0; while (ccm_send_join(hb, info) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_log(LOG_ERR, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } break; case CCM_TYPE_LEAVE: /* * Has that node already sent a valid update message * before death. If so, remove him from the update * table. */ update_remove(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), orig); /* if we have any cached version-request from this node * we will get rid of that too */ ccm_remove_new_joiner(info, orig); break; case CCM_TYPE_STATE_INFO: ccm_handle_state_info(hb, info, reply); break; case CCM_TYPE_RESTART: ccm_all_restart(hb, info, reply); break; case CCM_TYPE_RES_MEMLIST: case CCM_TYPE_FINAL_MEMLIST: /* this message is from other partitions*/ ccm_debug(LOG_WARNING, "%s: received a %s message", __FUNCTION__, ccm_type2string(ccm_msg_type)); ccm_debug(LOG_WARNING, "We probably have different partitions"); break; default: ccm_log(LOG_ERR, "%s: dropping message " "of type %s from %s. Is this a Byzantine failure?", __FUNCTION__, ccm_type2string(ccm_msg_type), orig); /* nothing to do. Just forget the message */ break; } return; } static void ccm_control_init(ccm_info_t *info) { ccm_init(info); /* if this is the only active node in the cluster, go to the JOINED state */ if (llm_get_live_nodecount(CCM_GET_LLM(info)) == 1) { ccm_init_to_joined(info); } else { ccm_set_state(info, CCM_STATE_NONE, NULL); } return; } /* */ /* The callback function which is called when the status of a link */ /* changes. */ /* */ static void LinkStatus(const char * node, const char * lnk, const char * status , void * private) { ccm_debug2(LOG_DEBUG, "Link Status update: Link %s/%s " "now has status %s", node, lnk, status); } /* */ /* The most important function which tracks the state machine. */ /* */ /* look at the current state machine and decide if */ /* the state machine needs immidiate control for further */ /* state machine processing. Called by the check function */ /* of heartbeat-source of the main event loop. */ int ccm_need_control(void *data) { ccm_info_t *info = (ccm_info_t *)((ccm_t *)data)->info; if(leave_any() || CCM_GET_STATE(info) != CCM_STATE_JOINED){ return TRUE; } return FALSE; } /* look at the current state machine and decide if */ /* the state machine needs immidiate control for further */ /* state machine processing. Called by the check function */ /* of heartbeat-source of the main event loop. */ int ccm_take_control(void *data) { ccm_info_t *info = (ccm_info_t *)((ccm_t *)data)->info; ll_cluster_t *hbfd = (ll_cluster_t *)((ccm_t *)data)->hbfd; return ccm_control_process(info, hbfd); } IPC_Channel * ccm_get_ipcchan(void *data) { ll_cluster_t *hbfd = (ll_cluster_t *)((ccm_t *)data)->hbfd; return hbfd->llc_ops->ipcchan(hbfd); } #define PINGNODE "ping" static int set_llm_from_heartbeat(ll_cluster_t* llc, ccm_info_t* info){ llm_info_t* llm = &info->llm; struct llc_ops* ops = llc->llc_ops; const char* status; const char* node; const char* mynode = ops->get_mynodeid(llc); const char* cluster; const char* quorum_server; const char* site; int weight; if (mynode == NULL){ ccm_log(LOG_ERR, "%s: mynode is NULL", __FUNCTION__); return HA_FAIL; } ccm_debug2(LOG_DEBUG, "==== Starting Node Walk ========="); if (ops->init_nodewalk(llc) != HA_OK) { ccm_log(LOG_ERR, "Cannot start node walk"); ccm_log(LOG_ERR, "REASON: %s", ops->errmsg(llc)); return HA_FAIL; } llm = CCM_GET_LLM(info); llm_init(llm); memset(info->cluster, 0, sizeof(info->cluster)); cluster = llc->llc_ops->get_parameter(llc, KEY_CLUSTER); if (cluster != NULL) { strncpy(info->cluster, cluster, PATH_MAX); } memset(info->quorum_server, 0, sizeof(info->quorum_server)); quorum_server = llc->llc_ops->get_parameter(llc, KEY_QSERVER); if (quorum_server != NULL) { strncpy(info->quorum_server, quorum_server, PATH_MAX); } while((node = ops->nextnode(llc)) != NULL) { if (strcmp(ops->node_type(llc, node), PINGNODE)==0){ continue; } status = ops->node_status(llc, node); site = ops->node_site(llc, node); weight = ops->node_weight(llc, node); ccm_debug2(LOG_DEBUG, "Cluster node: %s: status: %s", node, status); if (llm_add(llm, node, status, mynode, site, weight)!= HA_OK){ ccm_log(LOG_ERR, "%s: adding node %s to llm failed", __FUNCTION__, node); return HA_FAIL; } } llm_display(llm); if (ops->end_nodewalk(llc) != HA_OK) { ccm_log(LOG_ERR, "Cannot end node walk"); ccm_log(LOG_ERR, "REASON: %s", ops->errmsg(llc)); return HA_FAIL; } ccm_debug2(LOG_DEBUG, "======= Ending Node Walk =========="); ccm_debug2(LOG_DEBUG, "Total # of Nodes in the Cluster: %d", llm_get_nodecount(llm)); return HA_OK; } ccm_info_t* ccm_info_saved = NULL; ll_cluster_t* hb_fd_saved = NULL; void * ccm_initialize() { unsigned fmask; const char * hname; ccm_info_t *global_info = NULL; ll_cluster_t* hb_fd; ccm_t *ccmret = NULL; const char * parameter; ccm_debug2(LOG_DEBUG, "========================== Starting CCM ====" "======================"); CL_SIGINTERRUPT(SIGTERM, 1); cl_inherit_logging_environment(0); hb_fd = ll_cluster_new("heartbeat"); ccm_debug(LOG_DEBUG, "Signing in with Heartbeat"); if (hb_fd->llc_ops->signon(hb_fd, "ccm")!= HA_OK) { ccm_log(LOG_ERR, "Cannot sign on with heartbeat"); ccm_log(LOG_ERR, "REASON: %s", hb_fd->llc_ops->errmsg(hb_fd)); goto errout; } /* See if we should drop cores somewhere odd... */ parameter = hb_fd->llc_ops->get_parameter(hb_fd, KEY_COREROOTDIR); if (parameter) { cl_set_corerootdir(parameter); } cl_cdtocoredir(); if((global_info = (ccm_info_t *)g_malloc(sizeof(ccm_info_t))) == NULL){ ccm_log(LOG_ERR, "Cannot allocate memory "); goto errout; } memset(global_info, 0, sizeof(ccm_info_t)); if((ccmret = (ccm_t *)g_malloc(sizeof(ccm_t))) == NULL){ ccm_log(LOG_ERR, "Cannot allocate memory"); goto errout; } if((hname = hb_fd->llc_ops->get_mynodeid(hb_fd)) == NULL) { ccm_log(LOG_ERR, "get_mynodeid() failed"); goto errout; } ccm_log(LOG_INFO, "Hostname: %s", hname); if (hb_fd->llc_ops->set_ifstatus_callback(hb_fd, LinkStatus, NULL) !=HA_OK){ ccm_log(LOG_ERR, "Cannot set if status callback"); ccm_log(LOG_ERR, "REASON: %s", hb_fd->llc_ops->errmsg(hb_fd)); goto errout; } fmask = LLC_FILTER_DEFAULT; if (hb_fd->llc_ops->setfmode(hb_fd, fmask) != HA_OK) { ccm_log(LOG_ERR, "Cannot set filter mode"); ccm_log(LOG_ERR, "REASON: %s", hb_fd->llc_ops->errmsg(hb_fd)); goto errout; } /* we'll benefit from a bigger queue length on heartbeat side. * Otherwise, if peers send messages faster than we can consume * them right now, heartbeat messaging layer will kick us out once * it's (small) default queue fills up :( * If we fail to adjust the sendq length, that's not yet fatal, though. */ if (HA_OK != hb_fd->llc_ops->set_sendq_len(hb_fd, 1024)) { ccm_log(LOG_WARNING, "Cannot set sendq length: %s", hb_fd->llc_ops->errmsg(hb_fd)); } if (set_llm_from_heartbeat(hb_fd, global_info) != HA_OK){ goto errout; } ccm_control_init(global_info); ccm_configure_timeout(hb_fd, global_info); ccmret->info = global_info; ccmret->hbfd = hb_fd; client_llm_init(&global_info->llm); ccm_info_saved = global_info; hb_fd_saved = hb_fd; return (void*)ccmret; errout: if (ccmret){ g_free(ccmret); ccmret = NULL; } if (global_info){ g_free(global_info); global_info = NULL; } return NULL; } static void add_change_msg(ccm_info_t *info, const char *node, const char *orig, enum change_event_type type) { strlcpy(info->change_node_id, node, sizeof(info->change_node_id)); info->change_type = type; if(type == NODE_LEAVE){ info->change_event_remaining_count = ccm_get_memcount(info)-1; }else{ info->change_event_remaining_count = ccm_get_memcount(info); } append_change_msg(info, orig); return; } static void append_change_msg(ccm_info_t *info, const char *node) { if (CCM_GET_RECEIVED_CHANGE_MSG(info, node) == 0){ CCM_SET_RECEIVED_CHANGE_MSG(info, node, 1); info->change_event_remaining_count--; } return; } static int received_all_change_msg(ccm_info_t *info) { if(info->change_event_remaining_count == 0){ return 1; }else{ return 0; } } static int is_expected_change_msg(ccm_info_t *info, const char *node,enum change_event_type type) { if(strcmp(info->change_node_id, node) == 0){ if(info->change_type == type){ return 1; } } return 0; } static void ccm_state_wait_for_mem_list(enum ccm_type ccm_msg_type, struct ha_msg *reply, ll_cluster_t *hb, ccm_info_t *info) { const char *orig, *trans, *uptime, *cookie, *memlist; int uptime_list[MAXNODE]; size_t uptime_size = MAXNODE; uint trans_majorval=0,trans_minorval=0, uptime_val; uint curr_major, curr_minor; int repeat; int quorum; if ((orig = ha_msg_value(reply, F_ORIG)) == NULL) { ccm_debug(LOG_WARNING, "%s: received message from unknown", __FUNCTION__); return; } if(!llm_is_valid_node(CCM_GET_LLM(info), orig)) { ccm_debug(LOG_WARNING, "%s: received message from unknown host %s", __FUNCTION__, orig); return; } if(ccm_msg_type != CCM_TYPE_PROTOVERSION && ccm_msg_type != CCM_TYPE_STATE_INFO && ccm_msg_type != CCM_TYPE_RESTART) { if(strncmp(CCM_GET_COOKIE(info), ha_msg_value(reply, CCM_COOKIE), COOKIESIZE) != 0){ ccm_debug(LOG_WARNING, "%s: received message" " with unknown cookie, just dropping", __FUNCTION__); return; } /* get the major transition version */ if ((trans = ha_msg_value(reply, CCM_MAJORTRANS)) == NULL) { ccm_debug(LOG_WARNING, "%s: no transition major information", __FUNCTION__); return; } trans_majorval = atoi(trans); /* drop the message if it has lower major transition number */ if (CCM_TRANS_EARLIER(trans_majorval, CCM_GET_MAJORTRANS(info))) { ccm_debug(LOG_WARNING, "%s: received %s message with " "a earlier major transition number " "recv_trans=%d, mytrans=%d", __FUNCTION__, ccm_type2string(ccm_msg_type), trans_majorval, CCM_GET_MAJORTRANS(info)); return; } /* get the minor transition version */ if ((trans = ha_msg_value(reply, CCM_MINORTRANS)) == NULL) { ccm_debug(LOG_WARNING, "%s: no transition minor information", __FUNCTION__); return; } trans_minorval = atoi(trans); } switch(ccm_msg_type){ case CCM_TYPE_MEM_LIST: curr_major = CCM_GET_MAJORTRANS(info); curr_minor = CCM_GET_MINORTRANS(info); if(curr_major != trans_majorval || curr_minor != trans_minorval){ ccm_debug(LOG_WARNING, "%s: " "received final memlist from master, " "but transition versions do not match: " "rejecting the message", __FUNCTION__); break; } if ((memlist = ha_msg_value(reply, CCM_MEMLIST)) == NULL) { ccm_debug(LOG_WARNING, "%s: no membership list ", __FUNCTION__); return; } if (cl_msg_get_list_int(reply,CCM_UPTIMELIST, uptime_list, &uptime_size) != HA_OK){ ccm_log(LOG_ERR,"%s: geting uptie_list failed", __FUNCTION__); return; } if (ha_msg_value_int (reply, CCM_QUORUM, &quorum)==HA_OK){ info->has_quorum = quorum; } else { info->has_quorum = -1; } ccm_mem_strfill(info, (const char *)memlist); CCM_SET_MAJORTRANS(info, curr_major+1); CCM_RESET_MINORTRANS(info); if ((cookie = ha_msg_value(reply, CCM_NEWCOOKIE)) != NULL) { ccm_debug2(LOG_DEBUG, "%s: leader changed cookie ", __FUNCTION__); CCM_SET_COOKIE(info, cookie); } CCM_SET_CL(info, llm_get_index(&info->llm,orig)); ccm_fill_update_table(info, CCM_GET_UPDATETABLE(info), uptime_list); report_mbrs(info); ccm_set_state(info, CCM_STATE_JOINED, reply); break; case CCM_TYPE_TIMEOUT: if (mem_list_timeout(CCM_TMOUT_GET_U(info))){ reset_change_info(info); update_reset(CCM_GET_UPDATETABLE(info)); CCM_INCREMENT_MINORTRANS(info); repeat = 0; while (ccm_send_join(hb, info) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_debug(LOG_WARNING, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } ccm_set_state(info, CCM_STATE_JOINING, reply); } break; case CCM_TYPE_JOIN: /* get the update value */ if ((uptime = ha_msg_value(reply, CCM_UPTIME)) == NULL){ ccm_debug(LOG_WARNING, "%s: no update information", __FUNCTION__); return; } uptime_val = atoi(uptime); /* update the minor transition number if it is of * higher value and send a fresh JOIN message */ if (trans_minorval < CCM_GET_MINORTRANS(info)) { ccm_log(LOG_WARNING, "%s: got a join message from %s from earlier " "transition, restarting", __FUNCTION__, orig); ccm_all_restart(hb, info, reply); break; } update_reset(CCM_GET_UPDATETABLE(info)); update_add(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), orig, uptime_val, TRUE); CCM_SET_MINORTRANS(info, trans_minorval); repeat = 0; while (ccm_send_join(hb, info) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_debug(LOG_WARNING, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } ccm_set_state(info, CCM_STATE_JOINING, reply); break; case CCM_TYPE_LEAVE: /* if the dead node is leader, jump to CCM state machine */ if(node_is_leader(info, orig)){ update_reset(CCM_GET_UPDATETABLE(info)); CCM_INCREMENT_MINORTRANS(info); repeat = 0; while (ccm_send_join(hb, info) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_debug(LOG_WARNING, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } ccm_set_state(info, CCM_STATE_JOINING, reply); return; } case CCM_TYPE_ALIVE: /* We do nothing here because we believe leader * will deal with this LEAVE message. SPOF? */ break; case CCM_TYPE_PROTOVERSION: /* leader will handle this message * we can safely ignore it */ break; case CCM_TYPE_STATE_INFO: ccm_handle_state_info(hb, info, reply); break; case CCM_TYPE_RESTART: ccm_all_restart(hb, info, reply); break; default: ccm_log(LOG_ERR, "%s: dropping message " "of type %s. Is this a Byzantine failure?", __FUNCTION__, ccm_type2string(ccm_msg_type)); /* nothing to do. Just forget the message */ break; } } static void reset_change_info(ccm_info_t *info) { llm_info_t *llm = CCM_GET_LLM(info); unsigned i; for(i=0; imemcount; for (i=0; iccm_member[i]; } for (i=0; i tmp_mem[j+1]){ tmp = tmp_mem[j]; tmp_mem[j] = tmp_mem[j+1]; tmp_mem[j+1] = tmp; } } } for ( i = 0 ; i < size ; i++ ) { bitmap_mark(info->ccm_member[i], bitmap, MAXNODE); uptime[i] = htonl(update_get_uptime(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), tmp_mem[i])); } strsize = ccm_bitmap2str(bitmap, memlist, MAX_MEMLIST_STRING); bitmap_delete(bitmap); ccm_send_to_all(hb, info, memlist, cookie, uptime, size); return; } static void ccm_state_new_node_wait_for_mem_list(enum ccm_type ccm_msg_type, struct ha_msg *reply, ll_cluster_t *hb, ccm_info_t *info) { const char *orig, *trans, *uptime, *memlist, *cookie; int uptime_list[MAXNODE]; size_t uptime_size = MAXNODE; uint trans_majorval=0,trans_minorval=0, uptime_val; uint curr_major, curr_minor; int repeat; int ret; int quorum; if ((orig = ha_msg_value(reply, F_ORIG)) == NULL) { ccm_debug(LOG_WARNING, "%s: received message from unknown", __FUNCTION__); return; } if(!llm_is_valid_node(CCM_GET_LLM(info), orig)) { ccm_debug(LOG_WARNING, "%s: received message from unknown host %s", __FUNCTION__, orig); return; } if(ccm_msg_type != CCM_TYPE_PROTOVERSION && ccm_msg_type != CCM_TYPE_STATE_INFO && ccm_msg_type != CCM_TYPE_RESTART) { if(strncmp(CCM_GET_COOKIE(info), ha_msg_value(reply, CCM_COOKIE), COOKIESIZE) != 0){ ccm_debug(LOG_WARNING, "%s: received message with unknown cookie, just dropping", __FUNCTION__); return; } /* get the major transition version */ if ((trans = ha_msg_value(reply, CCM_MAJORTRANS)) == NULL) { ccm_debug(LOG_WARNING, "%s: no transition major information", __FUNCTION__); return; } trans_majorval = atoi(trans); /*drop the message if it has lower major transition number */ if (CCM_TRANS_EARLIER(trans_majorval, CCM_GET_MAJORTRANS(info))) { ccm_debug(LOG_WARNING, "%s: received" " %s message with " "a earlier major transition number " "recv_trans=%d, mytrans=%d", __FUNCTION__, ccm_type2string(ccm_msg_type), trans_majorval, CCM_GET_MAJORTRANS(info)); return; } /* get the minor transition version */ if ((trans = ha_msg_value(reply, CCM_MINORTRANS)) == NULL) { ccm_debug(LOG_WARNING, "%s: no transition minor information", __FUNCTION__); return; } trans_minorval = atoi(trans); } switch(ccm_msg_type){ case CCM_TYPE_MEM_LIST: curr_major = CCM_GET_MAJORTRANS(info); curr_minor = CCM_GET_MINORTRANS(info); if(curr_major != trans_majorval || curr_minor != trans_minorval){ ccm_debug(LOG_WARNING, "%s: received final memlist from master, " "but transition versions do not match: " "rejecting the message", __FUNCTION__); break; } if ((memlist = ha_msg_value(reply, CCM_MEMLIST)) == NULL) { ccm_debug(LOG_WARNING, "%s: no membership list ", __FUNCTION__); return; } if (cl_msg_get_list_int(reply,CCM_UPTIMELIST, uptime_list, &uptime_size) != HA_OK){ ccm_log(LOG_ERR,"%s: geting uptie_list failed", __FUNCTION__); return; } ret = ccm_mem_strfill(info, (const char *)memlist); if (ret != HA_OK){ ccm_log(LOG_ERR, "%s: filling membership from string failed", __FUNCTION__); return; } ret = ccm_mem_filluptime(info, uptime_list, uptime_size); if (ret != HA_OK){ ccm_log(LOG_ERR, "%s: filling uptime failed", __FUNCTION__); return; } if (ha_msg_value_int (reply, CCM_QUORUM, &quorum)==HA_OK){ info->has_quorum = quorum; } else { info->has_quorum = -1; } if (i_am_member(info) == FALSE){ version_reset(CCM_GET_VERSION(info)); ccm_set_state(info, CCM_STATE_NONE, reply); ccm_reset_all_join_request(info); break; } CCM_SET_MAJORTRANS(info, curr_major+1); CCM_RESET_MINORTRANS(info); if ((cookie = ha_msg_value(reply, CCM_NEWCOOKIE)) != NULL) { ccm_debug2(LOG_DEBUG, "%s: leader changed cookie ", __FUNCTION__); CCM_SET_COOKIE(info, cookie); } CCM_SET_CL(info,llm_get_index(&info->llm, orig)); CCM_SET_JOINED_TRANSITION(info, CCM_GET_MAJORTRANS(info)); ccm_fill_update_table(info, CCM_GET_UPDATETABLE(info), uptime_list); ccm_set_state(info, CCM_STATE_JOINED, reply); report_mbrs(info); break; case CCM_TYPE_TIMEOUT: if (new_node_mem_list_timeout(CCM_TMOUT_GET_U(info))){ update_reset(CCM_GET_UPDATETABLE(info)); CCM_INCREMENT_MINORTRANS(info); repeat = 0; while (ccm_send_join(hb, info) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_debug(LOG_WARNING, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } ccm_set_state(info, CCM_STATE_JOINING, reply); } break; case CCM_TYPE_JOIN: /* get the update value */ if ((uptime = ha_msg_value(reply, CCM_UPTIME)) == NULL){ ccm_debug(LOG_WARNING, "%s: no update information", __FUNCTION__); return; } uptime_val = atoi(uptime); /* update the minor transition number if it is of * higher value and send a fresh JOIN message */ if (trans_minorval < CCM_GET_MINORTRANS(info)) { ccm_log(LOG_WARNING, "%s: got a join message from %s from earlier " "transition, restarting", __FUNCTION__, orig); ccm_all_restart(hb, info, reply); break; } update_reset(CCM_GET_UPDATETABLE(info)); update_add(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), orig, uptime_val, TRUE); CCM_SET_MINORTRANS(info, trans_minorval); repeat = 0; while (ccm_send_join(hb, info) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_debug(LOG_WARNING, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } ccm_set_state(info, CCM_STATE_JOINING, reply); break; case CCM_TYPE_LEAVE: /* if the dead node is leader, jump to CCM state machine */ if(node_is_leader(info, orig)){ update_reset(CCM_GET_UPDATETABLE(info)); CCM_INCREMENT_MINORTRANS(info); repeat = 0; while (ccm_send_join(hb, info) != HA_OK) { if(repeat < REPEAT_TIMES){ ccm_debug(LOG_WARNING, "%s: failure to send join", __FUNCTION__); cl_shortsleep(); repeat++; }else{ break; } } ccm_set_state(info, CCM_STATE_JOINING, reply); } case CCM_TYPE_ALIVE: /* We do nothing here because we believe leader * will deal with this LEAVE message. SPOF? */ break; case CCM_TYPE_PROTOVERSION: /* we are waiting for the leader for membership list * it's ok if someone want to join -- just ignore * the message and let the leader handl it */ break; case CCM_TYPE_PROTOVERSION_RESP: break; case CCM_TYPE_STATE_INFO: ccm_handle_state_info(hb, info, reply); break; case CCM_TYPE_RESTART: ccm_all_restart(hb, info, reply); break; default: ccm_log(LOG_ERR,"%s: dropping message" " of type %s. Is this a Byzantine failure?", __FUNCTION__, ccm_type2string(ccm_msg_type)); /* nothing to do. Just forget the message */ break; } } static void ccm_fill_update_table(ccm_info_t *info, ccm_update_t *update_table, const void *uptime_list) { const int *uptime; int i; uptime = (const int *)uptime_list; UPDATE_SET_NODECOUNT(update_table, info->memcount); for (i = 0; i< info->memcount; i++){ update_table->update[i].index = info->ccm_member[i]; update_table->update[i].uptime = ntohl(uptime[i]); } return; } int jump_to_joining_state(ll_cluster_t *hb, ccm_info_t *info, struct ha_msg* msg){ reset_change_info(info); update_reset(CCM_GET_UPDATETABLE(info)); CCM_INCREMENT_MINORTRANS(info); if (ccm_send_join(hb, info) != HA_OK){ ccm_log(LOG_ERR, "sending joining message failed"); return HA_FAIL; } ccm_set_state(info, CCM_STATE_JOINING, msg); return HA_OK; } state_msg_handler_t state_msg_handler[]={ ccm_state_none, ccm_state_version_request, ccm_state_joining, ccm_state_sent_memlistreq, ccm_state_memlist_res, ccm_state_joined, ccm_state_wait_for_mem_list, ccm_state_wait_for_change, ccm_state_new_node_wait_for_mem_list, }; static void dump_mbrs(ccm_info_t *info) { int i; const char *nodename; int leader; static struct born_s { int index; int bornon; } bornon[MAXNODE];/*avoid making it a stack variable*/ if(ccm_get_memcount(info)==1){ bornon[0].index = CCM_GET_MEMINDEX(info,0); bornon[0].bornon = CCM_GET_MAJORTRANS(info); } else for(i=0; i < ccm_get_memcount(info); i++){ bornon[i].index = CCM_GET_MEMINDEX(info,i); bornon[i].bornon = update_get_uptime(CCM_GET_UPDATETABLE(info), CCM_GET_LLM(info), CCM_GET_MEMINDEX(info,i)); if(bornon[i].bornon==0) bornon[i].bornon=CCM_GET_MAJORTRANS(info); } ccm_debug(LOG_DEBUG,"dump current membership %p", info); leader = info->ccm_cluster_leader; ccm_debug(LOG_DEBUG,"\tleader=%s" , leader < 0 ?"none": info->llm.nodes[leader].nodename); ccm_debug(LOG_DEBUG,"\ttransition=%d", CCM_GET_MAJORTRANS(info)); ccm_debug(LOG_DEBUG,"\tstatus=%s",state2string(info->state)); ccm_debug(LOG_DEBUG,"\thas_quorum=%d",info->has_quorum); for (i=0 ; i < ccm_get_memcount(info); i++) { nodename = llm_get_nodename(CCM_GET_LLM(info), CCM_GET_MEMINDEX(info,i)); ccm_debug(LOG_DEBUG,"\tnodename=%s bornon=%d", nodename, bornon[i].bornon); } return; } void ccm_on_quorum_changed(void) { ccm_debug(LOG_DEBUG,"quorum changed"); if (ccm_info_saved->state != CCM_STATE_JOINED) { ccm_debug(LOG_DEBUG,"we are not in CCM_STATE_JOINED, ignore"); return; } send_mem_list_to_all(hb_fd_saved, ccm_info_saved, ccm_info_saved->ccm_cookie); report_mbrs(ccm_info_saved); } Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccm_testclient.c0000644000000000000000000000744611576626513023253 0ustar00usergroup00000000000000/* * ccm.c: A consensus cluster membership sample client * * Copyright (c) International Business Machines Corp., 2000 * Author: Ram Pai (linuxram@us.ibm.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include static oc_ev_t *ev_token; extern void oc_ev_special(const oc_ev_t *, oc_ev_class_t , int ); static void my_ms_events(oc_ed_t event, void *cookie, size_t size, const void *data) { const oc_ev_membership_t *oc = (const oc_ev_membership_t *)data; uint i; int i_am_in; cl_log(LOG_INFO,"event=%s:", event==OC_EV_MS_NEW_MEMBERSHIP?"NEW MEMBERSHIP": event==OC_EV_MS_NOT_PRIMARY?"NOT PRIMARY": event==OC_EV_MS_PRIMARY_RESTORED?"PRIMARY RESTORED": event==OC_EV_MS_EVICTED?"EVICTED": "NO QUORUM MEMBERSHIP" ); if(OC_EV_MS_EVICTED == event) { oc_ev_callback_done(cookie); return; } cl_log(LOG_INFO,"instance=%d\n" "# ttl members=%d, ttl_idx=%d\n" "# new members=%d, new_idx=%d\n" "# out members=%d, out_idx=%d", oc->m_instance, oc->m_n_member, oc->m_memb_idx, oc->m_n_in, oc->m_in_idx, oc->m_n_out, oc->m_out_idx); i_am_in=0; cl_log(LOG_INFO, "NODES IN THE PRIMARY MEMBERSHIP"); for(i=0; im_n_member; i++) { cl_log(LOG_INFO,"\tnodeid=%d, uname=%s, born=%d", oc->m_array[oc->m_memb_idx+i].node_id, oc->m_array[oc->m_memb_idx+i].node_uname, oc->m_array[oc->m_memb_idx+i].node_born_on); if(oc_ev_is_my_nodeid(ev_token, &(oc->m_array[i]))){ i_am_in=1; } } if(i_am_in) { cl_log(LOG_INFO,"MY NODE IS A MEMBER OF THE MEMBERSHIP LIST"); } cl_log(LOG_INFO, "NEW MEMBERS"); if(oc->m_n_in==0) cl_log(LOG_INFO, "\tNONE"); for(i=0; im_n_in; i++) { cl_log(LOG_INFO,"\tnodeid=%d, uname=%s, born=%d", oc->m_array[oc->m_in_idx+i].node_id, oc->m_array[oc->m_in_idx+i].node_uname, oc->m_array[oc->m_in_idx+i].node_born_on); } cl_log(LOG_INFO, "MEMBERS LOST"); if(oc->m_n_out==0) cl_log(LOG_INFO, "\tNONE"); for(i=0; im_n_out; i++) { cl_log(LOG_INFO,"\tnodeid=%d, uname=%s, born=%d", oc->m_array[oc->m_out_idx+i].node_id, oc->m_array[oc->m_out_idx+i].node_uname, oc->m_array[oc->m_out_idx+i].node_born_on); } cl_log(LOG_INFO, "-----------------------"); oc_ev_callback_done(cookie); } int main(int argc, char *argv[]) { int ret; fd_set rset; int my_ev_fd; cl_log_set_entity(argv[0]); cl_log_enable_stderr(TRUE); cl_log_set_facility(LOG_USER); oc_ev_register(&ev_token); oc_ev_set_callback(ev_token, OC_EV_MEMB_CLASS, my_ms_events, NULL); oc_ev_special(ev_token, OC_EV_MEMB_CLASS, 0/*don't care*/); ret = oc_ev_activate(ev_token, &my_ev_fd); if(ret){ oc_ev_unregister(ev_token); return(1); } for (;;) { FD_ZERO(&rset); FD_SET(my_ev_fd, &rset); if(select(my_ev_fd + 1, &rset, NULL,NULL,NULL) == -1){ perror("select"); return(1); } if(oc_ev_handle_event(ev_token)){ cl_log(LOG_ERR,"terminating"); return(1); } } return 0; } Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccmbitmap.c0000644000000000000000000000622711576626513022206 0ustar00usergroup00000000000000/* * ccmbitmap.c: functions that manipulate bitmaps * * Copyright (C) 2001 Aatash Patel * Copyright (C) 2001 Dr Xu * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ /* bitmap.c */ /* Routines to manage a bitmap -- an array of bits each of which */ /* can be either on or off. Represented as an array of integers. */ #include #include #ifndef TRUE # define TRUE 1 # define FALSE 0 #endif /* Initialize a bitmap with "nitems" bits, so that every bit is clear. */ /* it can be added somewhere on a list. */ int bitmap_create(char **map, int numBits) { int i, numBytes; if (numBits%BitsInByte == 0) { numBytes = numBits/BitsInByte; } else { numBytes = numBits/BitsInByte+1; } *map = g_malloc(sizeof(char)*numBytes); assert(*map); for ( i = 0 ; i < numBytes; i++ ) (*map)[i] = 0; return(numBytes); } /* return the number of bytes required to represent numBits */ int bitmap_size(int numBits) { int numBytes; if (numBits%BitsInByte == 0) { numBytes = numBits/BitsInByte; } else { numBytes = numBits/BitsInByte+1; } return(numBytes); } /* delete bitmap */ void bitmap_delete(char *map) { g_free(map); } /* mark the which bit as set */ void bitmap_mark(int which, char *map, int numBits) { assert(which >= 0 && which < numBits); map[which / BitsInByte] |= 1 << (which % BitsInByte); } /* Clear the "which" bit in a bitmap. */ void bitmap_clear(int which, char *map, int numBits) { assert(which >= 0 && which < numBits); map[which / BitsInByte] &= ~(1 << (which % BitsInByte)); } /* Return TRUE if the "which" bit is set. */ int bitmap_test(int which, const char *map, int numBits) { assert(which >= 0 && which < numBits); if (map[which / BitsInByte] & (1 << (which % BitsInByte))) return(TRUE); else return(FALSE); } /* Return total number of bits already set */ int bitmap_count(const char *map, int numBits) { int count, i; count = 0; for (i = 0; i < numBits; i++) if (bitmap_test(i, map, numBits)) count++; return count; } /* Print the contents of the bitmap, for debugging. */ void bitmap_print(char *map, int numBits, char * comments) { int i; fprintf(stderr, "%s\n", comments); for (i = 0; i < numBits; i++) if (bitmap_test(i, map, numBits)) fprintf(stderr, "%d, ", i); fprintf(stderr, "\n"); } /* Reset the bitmap. */ void bitmap_reset(char *map, int numBits) { int i; for(i=0; i #include #include typedef struct ccm_client_s { int ccm_clid; int ccm_flags; struct IPC_CHANNEL *ccm_ipc_client; } ccm_client_t; #define CL_INIT 0x0 #define CL_LLM 0x1 #define CL_MEM 0x2 #define CL_ERROR 0x4 typedef struct ccm_ipc_s { int count; struct IPC_MESSAGE ipcmsg;/*this should be the last field*/ } ccm_ipc_t; static ccm_ipc_t *ipc_llm_message = NULL; /* active low level membership */ static ccm_ipc_t *ipc_mem_message = NULL; /* active membership */ static ccm_ipc_t *ipc_misc_message = NULL; /* active misc information */ #define MAXIPC 100 static gboolean membership_ready = FALSE; static void refresh_llm_msg(llm_info_t *llm); /* * the fully initialized clients. */ static GHashTable *ccm_hashclient = NULL; static void send_message(ccm_client_t *ccm_client, ccm_ipc_t *msg) { int send_rc; int do_warn = 0; struct IPC_CHANNEL *chan = ccm_client->ccm_ipc_client; ++(msg->count); do { send_rc = chan->ops->send(chan, &(msg->ipcmsg)); if (send_rc == IPC_OK) break; if (chan->ops->get_chan_status(chan) != IPC_CONNECT) { ccm_debug(LOG_WARNING, "Channel is dead. Cannot send message." " farside_pid=%u", chan->farside_pid); break; } if (10 == ++do_warn) { cl_log(LOG_WARNING, "ipc channel blocked, farside_pid=%u, reason: %s", chan->farside_pid, chan->failreason); } /* FIXME this can livelock, if a ccm client does not consume * its messages! If we want to block, why not set the channel * to blocking mode in the first place? */ cl_shortsleep(); } while(send_rc == IPC_FAIL); return; } static void send_func(gpointer key, gpointer value, gpointer user_data) { ccm_client_t *ccm_client = (ccm_client_t *)value; int msg_type = GPOINTER_TO_INT(user_data); switch (msg_type) { case CCM_EVICTED: if(ccm_client->ccm_flags == CL_MEM) { struct IPC_CHANNEL* chan = ccm_client->ccm_ipc_client; if (chan->ops->get_chan_status(chan) == IPC_CONNECT){ send_message(ccm_client, ipc_misc_message); }else { /* IPC is broken, the client is already gone * Do nothing */ } ccm_client->ccm_flags = CL_INIT; } break; case CCM_INFLUX: send_message(ccm_client, ipc_misc_message); break; case CCM_NEW_MEMBERSHIP: if(membership_ready) { send_message(ccm_client, ipc_llm_message); send_message(ccm_client, ipc_mem_message); } break; default: ccm_log(LOG_ERR, "send_func:unknown message"); } } static void delete_message(ccm_ipc_t *ccmipc) { g_free(ccmipc); } static void send_func_done(struct IPC_MESSAGE *ipcmsg) { ccm_ipc_t *ccmipc = (ccm_ipc_t *)ipcmsg->msg_private; int count = --(ccmipc->count); if(count==0){ delete_message(ccmipc); } return; } static ccm_ipc_t * create_message(void *data, int size) { ccm_ipc_t *ipcmsg; ipcmsg = g_malloc(sizeof(ccm_ipc_t)+size); ipcmsg->count = 0; memset(&ipcmsg->ipcmsg, 0, sizeof(IPC_Message)); ipcmsg->ipcmsg.msg_body = ipcmsg+1; memcpy(ipcmsg->ipcmsg.msg_body, data, size); ipcmsg->ipcmsg.msg_len = size; ipcmsg->ipcmsg.msg_done = send_func_done; ipcmsg->ipcmsg.msg_private = ipcmsg; ipcmsg->ipcmsg.msg_buf = NULL; return ipcmsg; } static void send_all(int msg_type) { if(g_hash_table_size(ccm_hashclient)) { g_hash_table_foreach(ccm_hashclient, send_func, GINT_TO_POINTER(msg_type)); } return; } static void flush_func(gpointer key, gpointer value, gpointer user_data) { struct IPC_CHANNEL *ipc_client = (struct IPC_CHANNEL *)key; int do_warn = 0; while(ipc_client->ops->is_sending_blocked(ipc_client)) { /* FIXME misbehaving client can live lock whole ccm layer! */ if (10 == ++do_warn) { cl_log(LOG_WARNING, "ipc channel blocked, farside_pid=%u", ipc_client->farside_pid); } cl_shortsleep(); if(ipc_client->ops->resume_io(ipc_client) == IPC_BROKEN) { break; } } } static void flush_all(void) { if(g_hash_table_size(ccm_hashclient)) { g_hash_table_foreach(ccm_hashclient, flush_func, NULL); } return; } static void cleanup(void) { membership_ready=FALSE; flush_all(); /* flush out all the messages to all the clients*/ if (ipc_mem_message) { delete_message(ipc_mem_message); } if (ipc_misc_message) { delete_message(ipc_misc_message); } ipc_mem_message = NULL; ipc_misc_message = NULL; /* NOTE: ipc_llm_message is never destroyed. */ /* Also, do not free the client structure. */ return; } void client_init(void) { if(ccm_hashclient) { ccm_log(LOG_INFO, "client already initialized"); return; } ccm_hashclient = g_hash_table_new(g_direct_hash, g_direct_equal); return; } int client_add(struct IPC_CHANNEL *ipc_client) { ccm_client_t *ccm_client; if(!ccm_hashclient) { ccm_log(LOG_ERR, "client subsystem not initialized"); return -1; } ccm_client = (ccm_client_t *) g_malloc(sizeof(ccm_client_t)); ccm_client->ccm_clid = 0; /* don't care, TOBEDONE */ ccm_client->ccm_ipc_client = ipc_client; ccm_client->ccm_flags = CL_INIT; send_func(ipc_client, ccm_client, (gpointer)CCM_NEW_MEMBERSHIP); g_hash_table_insert(ccm_hashclient, ipc_client, ccm_client); return 0; } static void client_destroy(struct IPC_CHANNEL *ipc_client) { ccm_client_t *ccm_client; if((ccm_client = g_hash_table_lookup(ccm_hashclient, ipc_client)) != NULL){ g_free(ccm_client); } /* IPC_Channel is automatically destroyed when channel is disconnected */ } void client_delete(struct IPC_CHANNEL *ipc_client) { g_hash_table_remove(ccm_hashclient, ipc_client); client_destroy(ipc_client); return; } static gboolean destroy_func(gpointer key, gpointer value, gpointer user_data) { struct IPC_CHANNEL *ipc_client = (struct IPC_CHANNEL *)key; client_destroy(ipc_client); return TRUE; } void client_delete_all(void) { if(g_hash_table_size(ccm_hashclient)) { g_hash_table_foreach_remove(ccm_hashclient, destroy_func, NULL); } return; } static gboolean get_quorum(ccm_info_t* info) { if (info->has_quorum != -1) { return info->has_quorum; } return ccm_calculate_quorum(info); } static void display_func(gpointer key, gpointer value, gpointer user_data) { ccm_client_t * ccm_client = (ccm_client_t*) value; ccm_debug(LOG_DEBUG, "client: pid =%d", ccm_client->ccm_ipc_client->farside_pid); return; } void client_new_mbrship(ccm_info_t* info, void* borndata) { /* creating enough heap memory in order to avoid allocation */ static struct born_s bornbuffer[MAXNODE+10]; ccm_meminfo_t *ccm=(ccm_meminfo_t *)bornbuffer; struct born_s *born_arry = (struct born_s *)borndata; int n = info->memcount; int trans = info->ccm_transition_major; int* member = info->ccm_member; int i, j; assert( n<= MAXNODE); membership_ready=TRUE; ccm->ev = CCM_NEW_MEMBERSHIP; ccm->n = n; ccm->trans = trans; ccm->quorum = get_quorum(info); (void)get_quorum; ccm_debug(LOG_DEBUG, "quorum is %d", ccm->quorum); for (i = 0; i < n; i++) { ccm->member[i].index = member[i]; ccm->member[i].bornon = -1; for (j = 0; j < n; j ++) { if (born_arry[j].index == ccm->member[i].index) { ccm->member[i].bornon = born_arry[j].bornon; } } } if(ipc_mem_message && --(ipc_mem_message->count)==0){ delete_message(ipc_mem_message); } ipc_mem_message = create_message(ccm, (sizeof(ccm_meminfo_t) + n*sizeof(born_t))); ipc_mem_message->count++; refresh_llm_msg(&info->llm); #if 1 ccm_debug(LOG_DEBUG, "delivering new membership to %d clients: ", g_hash_table_size(ccm_hashclient)); if(g_hash_table_size(ccm_hashclient)){ g_hash_table_foreach(ccm_hashclient, display_func, NULL); } #else (void)display_func; #endif send_all(CCM_NEW_MEMBERSHIP); ccm_debug2(LOG_DEBUG, "membership state: new membership"); } void client_influx(void) { int type = CCM_INFLUX; if(membership_ready){ membership_ready = FALSE; if(ipc_misc_message && --(ipc_misc_message->count)==0){ delete_message(ipc_misc_message); } ipc_misc_message = create_message(&type, sizeof(int)); ipc_misc_message->count++; send_all(CCM_INFLUX); } ccm_debug2(LOG_DEBUG, "membership state: not primary"); } void client_evicted(void) { int type = CCM_EVICTED; if(ipc_misc_message && --(ipc_misc_message->count)==0){ delete_message(ipc_misc_message); } ipc_misc_message = create_message(&type, sizeof(int)); ipc_misc_message->count++; send_all(CCM_EVICTED); cleanup(); ccm_debug2(LOG_DEBUG, "membership state: evicted"); } void client_llm_init(llm_info_t *llm) { refresh_llm_msg(llm); return; } void refresh_llm_msg(llm_info_t *llm) { int maxnode = llm_get_nodecount(llm); int size = sizeof(ccm_llm_t)+ maxnode*sizeof(struct node_s); ccm_llm_t *data = (ccm_llm_t *)g_malloc(size); int i; data->ev = CCM_LLM; /* copy the relevent content of llm into data */ CLLM_SET_NODECOUNT(data,maxnode); CLLM_SET_MYNODE(data, llm_get_myindex(llm)); for ( i = 0; i < maxnode; i ++ ) { CLLM_SET_NODEID(data,i,llm_get_nodename(llm,i)); CLLM_SET_UUID(data,i,i); } if(ipc_llm_message && --(ipc_llm_message->count)==0){ delete_message(ipc_llm_message); } ipc_llm_message = create_message(data, size); ipc_llm_message->count++; g_free(data); return; } Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccmgraph.c0000644000000000000000000002654611576626513022041 0ustar00usergroup00000000000000/* * ccmgraph.c: Keeps track of the connectivity within the cluster members * to derive the largest totally connected subgraph. * * Copyright (c) International Business Machines Corp., 2002 * Author: Ram Pai (linuxram@us.ibm.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include /* ASSUMPTIONS IN THIS FILE. * we assume that there can be at most MAXNODE number of nodes in the graph. * we assume that we are working with only one graph at a time, from the call * to graph_init() to graph_delete(). If Multiple graph_init() are done * simultaneously then results may be unpredictable. */ static vertex_t graph[MAXNODE]; /* allocate memory statically */ static char vyesorno='n'; #define GRAPH_TIMEOUT 15 #define GRAPH_TIMEOUT_TOO_LONG 25 static void bitmap_display(char* bitmap) { ccm_debug2(LOG_DEBUG, "bitmap_display:%x", (unsigned int) bitmap[0]); } static void graph_display(graph_t* gr) { int i; if (gr == NULL){ ccm_log(LOG_ERR, "graph_display:graph is NULL"); return; } for ( i = 0 ; i < gr->graph_nodes; i++ ) { char* bitmap = gr->graph_node[i]->bitmap; int index = gr->graph_node[i]->uuid; ccm_debug2(LOG_DEBUG, "graph_display:node[%d]'s bitmap is:", index); if(bitmap != NULL) { bitmap_display(bitmap); } } } /* */ /* clean up the unneccessary bits in the graph and check for */ /* inconsistency. */ /* */ static void graph_sanitize(graph_t *gr) { char *bitmap; int i,j, uuid_i, uuid_j; vertex_t **graph_node; (void)graph_display; graph_node = gr->graph_node; for ( i=0; i < gr->graph_nodes; i++ ) { uuid_i = graph_node[i]->uuid; assert(uuid_i >= 0 && uuid_i < MAXNODE); if(graph_node[i]->bitmap == NULL) { bitmap_create(&bitmap, MAXNODE); graph_node[i]->bitmap = bitmap; graph_node[i]->count = 0; } /* * Loop through each uuid from 0 to MAXNODE. * If there is no vertex with the corresponding uuid, * reset the bit corresponding to this uuid. */ for(uuid_j=0; uuid_j < MAXNODE; uuid_j++) { for (j=0; jgraph_nodes; j++) { if(uuid_j == graph_node[j]->uuid) { break; } } if(j == gr->graph_nodes) { /* node uuid_j is not in the graph, so clear its * bits. */ bitmap_clear(uuid_j, graph_node[i]->bitmap, MAXNODE); } else { if(uuid_i == uuid_j) { continue; } if(graph_node[j]->bitmap == NULL) { bitmap_create(&bitmap, MAXNODE); graph_node[j]->bitmap = bitmap; graph_node[j]->count = 0; } if(!bitmap_test(uuid_j,graph_node[i]->bitmap, MAXNODE) || !bitmap_test(uuid_i,graph_node[j]->bitmap, MAXNODE)){ bitmap_clear(uuid_j, graph_node[i]->bitmap,MAXNODE); bitmap_clear(uuid_i, graph_node[j]->bitmap,MAXNODE); } } } graph_node[i]->count = bitmap_count(graph_node[i]->bitmap,MAXNODE); } return; } /* */ /* print the vertices that belong the largest totally connected subgraph. */ /* */ static void print_vertex(vertex_t **vertex, int maxnode) { int i,j; for ( i = 0 ; i < maxnode ; i++) { printf("%d:\t",i); for ( j = 0 ; j < maxnode ; j++) { if(bitmap_test(j, vertex[i]->bitmap, maxnode)) { printf(" 1 "); }else{ printf(" 0 "); } } printf("uuid=%d, count=%d\n",vertex[i]->uuid,vertex[i]->count); printf("\n"); } printf("----------------------------------------\n"); } /* */ /* BEGIN OF FUNCTIONS THAT FORM THE CORE OF THE ALGORITHM */ /* */ /* */ /* the function that orders the vertices in the graph while sorting. */ /* */ static int compare(const void *value1, const void *value2) { const vertex_t *t1 = *(const vertex_t * const *)value1; const vertex_t *t2 = *(const vertex_t * const *)value2; return(t2->count - t1->count); } static void relocate(vertex_t **vertex, int indx, int size, int *indxtab, int maxnode) { vertex_t *tmp_vertex; int i; tmp_vertex = vertex[indx]; for ( i = indx+1; i < size; i++ ) { if(tmp_vertex->count >= vertex[i]->count) { break; } vertex[i-1] = vertex[i]; indxtab[vertex[i-1]->uuid] = i-1; } vertex[i-1] = tmp_vertex; indxtab[vertex[i-1]->uuid] = i-1; } static void decrement_count(vertex_t **vertex, int indx, int size, int *indxtab, int maxnode) { vertex_t *tmp_vertex; tmp_vertex = vertex[indx]; tmp_vertex->count--; relocate(vertex, indx, size, indxtab, maxnode); } static int find_best_candidate(vertex_t **vertex, int startindx, int size, int *indxtab, int maxnode) { int min_indx, min_count; int i, uuid; int count, indx; min_indx = startindx; min_count = INT_MAX; for ( i = size-1; i >= startindx; i-- ) { count = 0; for (uuid = 0; uuid < maxnode; uuid++) { if(bitmap_test(uuid, vertex[i]->bitmap, maxnode)){ indx = indxtab[uuid]; if(indx == -1 || indx >= size) { continue; } count += vertex[indx]->count; } } if(count == min_count) { if (vyesorno == 'y') { ccm_debug2(LOG_DEBUG , "find_best_candidate:probably 1 more group exists"); } } if(count < min_count) { min_count = count; min_indx = i; } } return min_indx; } static int find_size(vertex_t **vertex, int maxnode) { int size, i; assert(vertex[0]->count == 0); size=1; for ( i= 1 ; i < maxnode; i++ ) { if(vertex[i]->count == size) { size++; } else { break; } } return size; } static int delete_entry(vertex_t **vertex, int indx, int size, int *indxtab, int maxnode) { vertex_t *tmp_vertex; int uuid; int loc; /* move this entry to the end of the table and shuffle the other * entries up */ if (vyesorno == 'y') { ccm_debug2(LOG_DEBUG, "delete_entry:k=%d is being removed",indx); } tmp_vertex = vertex[indx]; tmp_vertex->count--; relocate(vertex, indx, size, indxtab, maxnode); for ( uuid = 0 ; uuid < maxnode ; uuid ++ ){ if(bitmap_test(uuid, tmp_vertex->bitmap, maxnode)) { loc = indxtab[uuid]; if(loc == -1 || loc >= size-1) { continue; } decrement_count(vertex, loc, size-1, indxtab, maxnode); } } if (vyesorno == 'y') { print_vertex(vertex, maxnode); } if (tmp_vertex->count == 0) { return find_size(vertex+size-1, maxnode-indx); } return -1; } #ifdef NEED_PRINT_MEMBERS static void print_members(vertex_t **vertex, int maxmem) { int i; printf("the members are \n"); for ( i = 0 ; i < maxmem ; i++) { printf("%d ", vertex[i]->uuid); } printf("\n"); } #endif static int get_max_clique(vertex_t **vertex, int maxnode, int *loc) { int i,j,k,num; int maxconnect, tmp_maxconnect; int size; int *indxtab; /* sort the scratchpad entries with respect to their * connectivity value */ qsort(vertex, maxnode, sizeof(vertex_t *), compare); /* indx the uuid into the indx table */ indxtab = g_malloc(MAXNODE*sizeof(int)); /* * TOBEDONE: we really do not need to allocate MAXNODE size array * What is required is: Find the max uuid in the vertex[] array * and allocate a indxtab table of that size. Postponing * the implementation currently. */ for ( i = 0 ; i < MAXNODE ; i++ ) { *(indxtab+i) = -1; } for ( i = 0 ; i < maxnode ; i++ ) { if(vertex[i]->uuid != -1) { indxtab[vertex[i]->uuid] = i; } } maxconnect = 1; for ( j=i-1 ; j>=0; j-- ) { if (vyesorno == 'y') { print_vertex(vertex, maxnode); } if((j+1)count >= j+1){ break; /* done */ } /* find number of entries with the same connectivity value */ num=1; for(k=j-1; k>=0; k--) { if(vertex[j]->count == vertex[k]->count){ num++; } else { break; } } /* find the best candidate to be considered for removal */ k = find_best_candidate(vertex, j-num+1, j+1, indxtab, maxnode); if (vyesorno == 'y') { ccm_debug2(LOG_DEBUG , "get_max_clique:k=%d is the best candidate for removal",k); } /* delete the candidate */ tmp_maxconnect = delete_entry(vertex, k, j+1, indxtab, maxnode); if(tmp_maxconnect>maxconnect) { *loc = j; maxconnect=tmp_maxconnect; } } if ((j+1) < maxconnect) { size = maxconnect; } else { *loc = 0; size = j+1; } g_free(indxtab); return size; } /* */ /* END OF FUNCTIONS THAT FORM THE CORE OF THE ALGORITHM */ /* */ /* */ /* initialize the graph. */ /* */ graph_t * graph_init() { int i; graph_t *gr; if((gr = (graph_t *)g_malloc(sizeof(graph_t))) == NULL){ return NULL; } memset(gr, 0, sizeof(graph_t)); memset(graph, 0, sizeof(graph)); for ( i = 0 ; i < MAXNODE ; i++ ) { gr->graph_node[i] = &graph[i]; } return gr; } /* */ /* free all the datastructures */ /* */ void graph_free(graph_t *gr) { int i; if(!gr) { return; } for ( i = 0 ; i < gr->graph_nodes; i++ ) { if(gr->graph_node[i]->bitmap != NULL) { bitmap_delete(gr->graph_node[i]->bitmap); } } g_free(gr); return; } /* */ /* add a new member to the graph, whose id is 'uuid' */ /* */ void graph_add_uuid(graph_t *gr, int uuid) { int i; for ( i = 0 ; i < gr->graph_nodes; i++ ) { if(gr->graph_node[i]->uuid == uuid) { return; } } gr->graph_node[gr->graph_nodes++]->uuid = uuid; } /* */ /* add the member whose id is 'dst_uuid' to the connectivity list */ /* of the member with id 'src_uuid' */ /* */ void graph_add_to_membership(graph_t *gr, int src_uuid, int dst_uuid) { int i; for ( i = 0 ; i < gr->graph_nodes; i++ ) { if(gr->graph_node[i]->uuid == src_uuid) { assert(gr->graph_node[i]->bitmap); bitmap_mark(dst_uuid, gr->graph_node[i]->bitmap, MAXNODE); return; } } assert(0); } /* */ /* update the connectivity information of the member whose id is 'uuid'. */ /* */ void graph_update_membership(graph_t *gr, int uuid, char *bitlist) { int i; for ( i = 0 ; i < gr->graph_nodes; i++ ) { if(gr->graph_node[i]->uuid == uuid) { /* assert that this is not a duplicate message */ if(gr->graph_node[i]->bitmap != NULL) { bitmap_delete(gr->graph_node[i]->bitmap); gr->graph_rcvd--; } gr->graph_node[i]->bitmap = bitlist; /* postpone the calculation of count, because * we have to sanitize this graph after * reciving all the bitmaps */ gr->graph_rcvd++; break; } } /* make sure we have not received message from unknown node */ assert(i < gr->graph_nodes); return; } /* */ /* return TRUE, if all the members of the graph have their */ /* connectivity information updated. */ /* */ int graph_filled_all(graph_t *gr) { return (gr->graph_rcvd == gr->graph_nodes); } /* */ /* return the largest fully connected subgraph. */ /* */ int graph_get_maxclique(graph_t *gr, char **bitmap) { int loc = 0; int i, size, numBytes; graph_sanitize(gr); size = get_max_clique(gr->graph_node, gr->graph_nodes, &loc); numBytes = bitmap_create(bitmap, MAXNODE); for ( i = loc ; i < size ; i++ ) { bitmap_mark(gr->graph_node[i]->uuid, *bitmap, MAXNODE); } return size; } Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccmlib.h0000644000000000000000000000671011576626513021502 0ustar00usergroup00000000000000/* * ccmlib.h: internal definations for ccm library files. * * Copyright (c) International Business Machines Corp., 2002 * Author: Ram Pai (linuxram@us.ibm.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef __CCMLIB_H_ #define __CCMLIB_H_ #include #include #include #include #include #include #include #include #include #include #ifdef __CCM_LIBRARY__ #include void oc_ev_special(const oc_ev_t *, oc_ev_class_t , int ); #endif #define NODEIDSIZE 255 /* if this value is changed change it also in ccm.h */ #define CCMFIFO HA_VARRUNDIR "/heartbeat/ccm/ccm" /* if this value is changed change it also in ccm.h */ size_t strnlen(const char *, size_t); /*TOBEDONE*/ typedef struct born_s { int index; int bornon; } born_t; /* to be include by the client side of ccm */ typedef struct ccm_meminfo_s { int ev; int n; int trans; int quorum; born_t member[0]; } ccm_meminfo_t; /* bornon structure sent to the client */ typedef struct ccm_born_s { int n; born_t born[0]; } ccm_born_t; typedef struct ccm_llm_s { /* information about low level membership info */ int ev; uint n; /* number of nodes in the cluster */ int mynode; /* index of mynode */ struct node_s { uint Uuid; /* a cluster unique id for the node */ char Id[NODEIDSIZE]; } node[0]; } ccm_llm_t; #define CLLM_GET_MYNODE(cllm) cllm->mynode #define CLLM_GET_NODECOUNT(cllm) cllm->n #define CLLM_GET_UUID(cllm,i) cllm->node[i].Uuid #define CLLM_GET_MYUUID(cllm) CLLM_GET_UUID(cllm, CLLM_GET_MYNODE(cllm)) #define CLLM_GET_NODEID(cllm,i) cllm->node[i].Id #define CLLM_GET_MYNODEID(cllm) CLLM_GET_NODEID(cllm, CLLM_GET_MYNODE(cllm)) #define CLLM_SET_MYNODE(cllm,indx) cllm->mynode = indx #define CLLM_SET_NODECOUNT(cllm, count) cllm->n = count #define CLLM_SET_UUID(cllm,i, uuid) cllm->node[i].Uuid = uuid #define CLLM_SET_MYUUID(cllm, uuid) CLLM_SET_UUID(cllm, CLLM_GET_MYNODE(cllm), uuid) #define CLLM_SET_NODEID(cllm, i, name) \ (strncpy(cllm->node[i].Id,name,NODEIDSIZE)) #define CLLM_SET_MYNODEID(cllm, name) \ CLLM_SET_NODEID(cllm, CLLM_GET_MYNODE(cllm), name) #ifdef __CCM_LIBRARY__ typedef struct class_s { int type; oc_ev_callback_t *(*set_callback)(struct class_s *, oc_ev_callback_t(*)); gboolean (*handle_event) (struct class_s *); int (*activate) (struct class_s *); void (*unregister) (struct class_s *); gboolean (*is_my_nodeid) (struct class_s *, const oc_node_t *); void (*special) (struct class_s *, int); void *private; } class_t; class_t *oc_ev_memb_class(oc_ev_callback_t *); #endif #define CCM_EVICTED 1 #define CCM_NEW_MEMBERSHIP 2 #define CCM_INFLUX 3 #define CCM_LLM 4 #endif /* __CCMLIB_H_ */ Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccmlib_clm.c0000644000000000000000000003676111576626513022341 0ustar00usergroup00000000000000/* * libclm.c: SAForum AIS Membership Service library * * Copyright (c) 2003 Intel Corp. * Author: Zhu Yi (yi.zhu@intel.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #include #include #include #include #include #ifdef POSIX_THREADS # include #endif #include #include #define CLM_TRACK_STOP 0 #define CLM_DEBUG 0 #define GET_CLM_HANDLE(x) (__clm_handle_t *)g_hash_table_lookup(__handle_hash,x) typedef struct __clm_handle_s { oc_ev_t *ev_token; SaClmCallbacksT callbacks; SaSelectionObjectT fd; SaUint8T trackflags; SaUint32T itemnum; SaClmClusterNotificationT *nbuf; SaSelectionObjectT st; } __clm_handle_t; static GHashTable *__handle_hash = NULL; static guint __handle_counter = 0; static const oc_ev_membership_t *__ccm_data = NULL; static oc_ev_t __ccm_event = OC_EV_MS_INVALID; static void *__ccm_cookie = NULL; #ifdef POSIX_THREADS static pthread_mutex_t __clmlib_mutex = PTHREAD_MUTEX_INITIALIZER; #endif static void pthread_lock(void); static void pthread_unlock(void); static void clm_init(void); extern void oc_ev_special(const oc_ev_t *, oc_ev_class_t , int ); static void retrieve_current_buffer(__clm_handle_t *hd); static void retrieve_changes_buffer(__clm_handle_t *hd); static void retrieve_changes_only_buffer(__clm_handle_t *hd); static SaErrorT retrieve_node_buffer(SaClmNodeIdT nodeId , SaClmClusterNodeT *clusterNode); static void pthread_lock(void) { #ifdef POSIX_THREADS pthread_mutex_lock(&__clmlib_mutex); #endif } static void pthread_unlock(void) { #ifdef POSIX_THREADS pthread_mutex_unlock(&__clmlib_mutex); #endif } static void clm_init(void) { static gboolean clminit_flag = FALSE; if (clminit_flag == FALSE) { __handle_hash = g_hash_table_new(g_int_hash , g_int_equal); clminit_flag = TRUE; } return; } static void ccm_events(oc_ed_t event, void *cookie, size_t size, const void *data) { pthread_lock(); /* dereference old cache */ if (__ccm_cookie) oc_ev_callback_done(__ccm_cookie); __ccm_cookie = cookie; __ccm_event = event; __ccm_data = (const oc_ev_membership_t *)data; #if CLM_DEBUG cl_log(LOG_DEBUG, "__ccm_data = <0x%x>" , (unsigned int)data); #endif pthread_unlock(); if (event == OC_EV_MS_EVICTED || event == OC_EV_MS_NOT_PRIMARY || event == OC_EV_MS_PRIMARY_RESTORED) { /* We do not care about this info */ return; } if (!data) { cl_log(LOG_ERR, "CCM event callback return NULL data"); return; } /* * Note: No need to worry about the buffer free problem, OCF * callback mechanism did this for us. */ } SaErrorT saClmInitialize(SaClmHandleT *clmHandle, const SaClmCallbacksT *clmCallbacks, const SaVersionT *version) { int ret; oc_ev_t *ev_token; __clm_handle_t *hd; SaClmHandleT *hash_key; fd_set rset; struct timeval tv; SaErrorT rc; oc_ev_register(&ev_token); if ((ret = oc_ev_set_callback(ev_token, OC_EV_MEMB_CLASS , ccm_events, NULL)) != 0) { if (ret == ENOMEM){ rc = SA_ERR_NO_MEMORY; goto err_nomem_exit; } else{ assert(0); /* Never runs here */ } } /* We must call it to get non-quorum partition info */ oc_ev_special(ev_token, OC_EV_MEMB_CLASS, 0); clm_init(); hash_key = (SaClmHandleT *)g_malloc(sizeof(SaClmHandleT)); if (!hash_key){ rc = SA_ERR_NO_MEMORY; goto err_nomem_exit; } hd = (__clm_handle_t *)g_malloc(sizeof(__clm_handle_t)); if (!hd){ g_free(hash_key); rc = SA_ERR_NO_MEMORY; goto err_nomem_exit; } *clmHandle = __handle_counter++; *hash_key = *clmHandle; hd->ev_token = ev_token; hd->callbacks = *clmCallbacks; hd->trackflags = CLM_TRACK_STOP; cl_log(LOG_INFO, "g_hash_table_insert hd = [%p]", hd); g_hash_table_insert(__handle_hash, hash_key, hd); if ((ret = oc_ev_activate(hd->ev_token, &hd->fd)) != 0) { cl_log(LOG_ERR, "oc_ev_activate error [%d]", ret); rc = SA_ERR_LIBRARY; goto err_lib_exit; } /* Prepare information for saClmClusterNodeGet() series calls */ while (!__ccm_data) { FD_ZERO(&rset); FD_SET(hd->fd, &rset); tv.tv_sec = 2; tv.tv_usec = 0; if ((ret = select(hd->fd + 1, &rset, NULL, NULL, &tv)) == -1) { cl_log(LOG_ERR, "%s: select error [%d]" , __FUNCTION__, ret); rc = SA_ERR_LIBRARY; goto err_lib_exit; } else if (ret == 0) { cl_log(LOG_WARNING, "%s: select timeout", __FUNCTION__); rc = SA_ERR_TIMEOUT; goto err_lib_exit; } if ((ret = oc_ev_handle_event(hd->ev_token) != 0)) { cl_log(LOG_ERR, "%s: oc_ev_handle_event error [%d]" , __FUNCTION__, ret); rc = SA_ERR_LIBRARY; goto err_lib_exit; } } return SA_OK; err_lib_exit: g_hash_table_remove(__handle_hash, hash_key); g_free(hd); g_free(hash_key); err_nomem_exit: oc_ev_unregister(ev_token); return rc; } SaErrorT saClmSelectionObjectGet(const SaClmHandleT *clmHandle, SaSelectionObjectT *selectionObject) { __clm_handle_t *hd = GET_CLM_HANDLE(clmHandle); if (!hd){ return SA_ERR_BAD_HANDLE; } *selectionObject = hd->fd; return SA_OK; } #define MEMCHANGE(x) hd->nbuf[x].clusterChanges #define MEMNODE(x) hd->nbuf[x].clusterNode static void set_misc_node_info(SaClmClusterNodeT *cn) { cn->nodeAddress.length = 0; cn->nodeAddress.value[0] = '\0'; cn->nodeName.length = strlen((char*)cn->nodeName.value); cn->clusterName.length = 0; cn->clusterName.value[0] = '\0'; cn->bootTimestamp = 0; } static void retrieve_current_buffer(__clm_handle_t *hd) { uint i; char *p; const oc_ev_membership_t *oc = __ccm_data; for (i = 0; i < oc->m_n_member; i++) { MEMCHANGE(i) = SA_CLM_NODE_NO_CHANGE; MEMNODE(i).nodeId = oc->m_array[oc->m_memb_idx+i].node_id; MEMNODE(i).member = 1; p = oc->m_array[oc->m_memb_idx+i].node_uname; if (p) { strncpy((char *)MEMNODE(i).nodeName.value, p, SA_MAX_NAME_LENGTH - 1); MEMNODE(i).nodeName.value[SA_MAX_NAME_LENGTH-1] = '\0'; } else { MEMNODE(i).nodeName.value[0] = '\0'; } set_misc_node_info(&MEMNODE(i)); } } static void retrieve_changes_buffer(__clm_handle_t *hd) { uint i, j; int n; char *p; const oc_ev_membership_t *oc = __ccm_data; retrieve_current_buffer(hd); for (i = 0; i < oc->m_n_in; i++) { for (j = 0; j < oc->m_n_member; j++) { if (MEMNODE(j).nodeId == oc->m_array[oc->m_in_idx+i].node_id) { MEMCHANGE(j) = SA_CLM_NODE_JOINED; p = oc->m_array[oc->m_in_idx+i].node_uname; if (p) { strncpy((char*)MEMNODE(j).nodeName.value, p, SA_MAX_NAME_LENGTH-1); MEMNODE(j).nodeName.value \ [SA_MAX_NAME_LENGTH-1] = '\0'; } else { MEMNODE(j).nodeName.value[0] = '\0'; } break; } } assert(j < oc->m_n_member); /* must find new in all */ } for (j = 0, n = oc->m_n_member; j < oc->m_n_out; j++, n++) { MEMCHANGE(n) = SA_CLM_NODE_LEFT; MEMNODE(n).nodeId = oc->m_array[oc->m_out_idx+j].node_id; MEMNODE(n).member = 0; p = oc->m_array[oc->m_out_idx+j].node_uname; if (p) { strncpy((char*)MEMNODE(n).nodeName.value, p, SA_MAX_NAME_LENGTH - 1); MEMNODE(n).nodeName.value[SA_MAX_NAME_LENGTH-1] = '\0'; } else { MEMNODE(n).nodeName.value[0] = '\0'; } set_misc_node_info(&MEMNODE(n)); } } static void retrieve_changes_only_buffer(__clm_handle_t *hd) { uint i; int n; char *p; const oc_ev_membership_t *oc = __ccm_data; for (i = 0, n = 0; i < oc->m_n_in; i++, n++) { MEMCHANGE(n) = SA_CLM_NODE_JOINED; MEMNODE(n).nodeId = oc->m_array[oc->m_in_idx+i].node_id; MEMNODE(n).member = 1; p = oc->m_array[oc->m_in_idx+i].node_uname; if (p) { strncpy((char*)MEMNODE(n).nodeName.value, p, SA_MAX_NAME_LENGTH - 1); MEMNODE(n).nodeName.value[SA_MAX_NAME_LENGTH-1] = '\0'; } else { MEMNODE(n).nodeName.value[0] = '\0'; } set_misc_node_info(&MEMNODE(n)); } for (i = 0; i < oc->m_n_out; i++, n++) { MEMCHANGE(n) = SA_CLM_NODE_LEFT; MEMNODE(n).nodeId = oc->m_array[oc->m_out_idx+i].node_id; MEMNODE(n).member = 0; p = oc->m_array[oc->m_out_idx+i].node_uname; if (p) { strncpy((char *)MEMNODE(n).nodeName.value, p, SA_MAX_NAME_LENGTH - 1); MEMNODE(n).nodeName.value[SA_MAX_NAME_LENGTH-1] = '\0'; } else { MEMNODE(n).nodeName.value[0] = '\0'; } set_misc_node_info(&MEMNODE(n)); } } SaErrorT saClmDispatch(const SaClmHandleT *clmHandle, SaDispatchFlagsT dispatchFlags) { int ret; const oc_ev_membership_t *oc; uint itemnum; __clm_handle_t *hd = GET_CLM_HANDLE(clmHandle); if (!hd){ return SA_ERR_BAD_HANDLE; } if ((ret = oc_ev_handle_event(hd->ev_token)) != 0) { if (ret == EINVAL){ return SA_ERR_BAD_HANDLE; } /* else we must be evicted */ } /* We did not lock for read here because other writers will set it * with the same value (if there really exist some). Otherwise we * need to lock here. */ if (__ccm_event == OC_EV_MS_EVICTED) { cl_log(LOG_WARNING , "This node is evicted from the current partition!"); return SA_ERR_LIBRARY; } if (__ccm_event == OC_EV_MS_NOT_PRIMARY || __ccm_event == OC_EV_MS_PRIMARY_RESTORED) { cl_log(LOG_DEBUG, "Received not interested event [%d]" , __ccm_event); return SA_OK; } if (!__ccm_data){ return SA_ERR_INIT; } oc = __ccm_data; if(CLM_TRACK_STOP == hd->trackflags){ return SA_OK; } /* SA_TRACK_CURRENT is cleared in saClmClusterTrackStart, hence we * needn't to deal with it now*/ if (hd->trackflags & SA_TRACK_CHANGES) { itemnum = oc->m_n_member + oc->m_n_out; if (itemnum > hd->itemnum) { hd->callbacks.saClmClusterTrackCallback(hd->nbuf , hd->itemnum, oc->m_n_member, oc->m_instance , SA_ERR_NO_SPACE); return SA_OK; } pthread_lock(); retrieve_changes_buffer(hd); pthread_unlock(); hd->callbacks.saClmClusterTrackCallback(hd->nbuf, itemnum , oc->m_n_member, oc->m_instance, SA_OK); } else if (hd->trackflags & SA_TRACK_CHANGES_ONLY) { itemnum = oc->m_n_in + oc->m_n_out; if (itemnum > hd->itemnum) { hd->callbacks.saClmClusterTrackCallback(hd->nbuf , hd->itemnum, oc->m_n_member, oc->m_instance , SA_ERR_NO_SPACE); return SA_OK; } pthread_lock(); retrieve_changes_only_buffer(hd); pthread_unlock(); hd->callbacks.saClmClusterTrackCallback(hd->nbuf, itemnum , oc->m_n_member, oc->m_instance, SA_OK); } else { assert(0); } /* unlock */ return SA_OK; } SaErrorT saClmFinalize(SaClmHandleT *clmHandle) { gpointer hd, oldkey; if (g_hash_table_lookup_extended(__handle_hash, clmHandle , &oldkey, &hd) == FALSE) { return SA_ERR_BAD_HANDLE; } oc_ev_unregister(((__clm_handle_t *)hd)->ev_token); /* TODO: unregister saClmClusterNodeGetCall here */ g_free(hd); g_free(oldkey); return SA_OK; } SaErrorT saClmClusterTrackStart(const SaClmHandleT *clmHandle, SaUint8T trackFlags, SaClmClusterNotificationT *notificationBuffer, SaUint32T numberOfItems) { __clm_handle_t *hd = GET_CLM_HANDLE(clmHandle); if (!hd){ return SA_ERR_BAD_HANDLE; } hd->trackflags = trackFlags; hd->itemnum = numberOfItems; hd->nbuf = notificationBuffer; if (trackFlags & SA_TRACK_CURRENT) { const oc_ev_membership_t *oc; SaUint32T itemnum; /* Clear SA_TRACK_CURRENT, it's no use since now. */ hd->trackflags &= ~SA_TRACK_CURRENT; if (__ccm_data == NULL) { return SA_ERR_LIBRARY; } oc = __ccm_data; itemnum = oc->m_n_member; if (itemnum > numberOfItems) { hd->callbacks.saClmClusterTrackCallback(hd->nbuf , hd->itemnum, oc->m_n_member, oc->m_instance , SA_ERR_NO_SPACE); return SA_OK; } pthread_lock(); retrieve_current_buffer(hd); pthread_unlock(); hd->callbacks.saClmClusterTrackCallback(hd->nbuf, itemnum , oc->m_n_member, oc->m_instance, SA_OK); return SA_OK; } return SA_OK; } SaErrorT saClmClusterTrackStop(const SaClmHandleT *clmHandle) { __clm_handle_t *hd = GET_CLM_HANDLE(clmHandle); if (!hd){ return SA_ERR_BAD_HANDLE; } /* This is ugly. But we currently depends on OCF interface, we have * no choice. This should be fixed in the next version after we remove * the dependency with OCF. */ hd->trackflags = CLM_TRACK_STOP; return SA_OK; } static SaErrorT retrieve_node_buffer(SaClmNodeIdT nodeId, SaClmClusterNodeT *clusterNode) { const oc_ev_membership_t *oc; uint i; char *p; oc = (const oc_ev_membership_t *)__ccm_data; for (i = 0; i < oc->m_n_member; i++) { if (oc->m_array[oc->m_memb_idx+i].node_id == nodeId) { clusterNode->nodeId = nodeId; clusterNode->member = 1; p = oc->m_array[oc->m_memb_idx+i].node_uname; if (p) { strncpy((char *)clusterNode->nodeName.value, p, SA_MAX_NAME_LENGTH - 1); clusterNode->nodeName.value \ [SA_MAX_NAME_LENGTH-1] = '\0'; } else { clusterNode->nodeName.value[0] = '\0'; } goto found; } } for (i = 0; i < oc->m_n_out; i++) { if (oc->m_array[oc->m_out_idx+i].node_id == nodeId) { clusterNode->nodeId = nodeId; clusterNode->member = 0; p = oc->m_array[oc->m_out_idx+i].node_uname; if (p) { strncpy((char *)clusterNode->nodeName.value, p, SA_MAX_NAME_LENGTH - 1); clusterNode->nodeName.value \ [SA_MAX_NAME_LENGTH-1] = '\0'; } else { clusterNode->nodeName.value[0] = '\0'; } goto found; } } cl_log(LOG_WARNING, "%s: no record for nodeId [%lu]" , __FUNCTION__, nodeId); return SA_ERR_INVALID_PARAM; found: set_misc_node_info(clusterNode); return SA_OK; } SaErrorT saClmClusterNodeGet(SaClmNodeIdT nodeId, SaTimeT timeout, SaClmClusterNodeT *clusterNode) { int i; SaErrorT ret; if (!clusterNode) { cl_log(LOG_ERR, "Invalid parameter clusterNode <%p>" , clusterNode); return SA_ERR_INVALID_PARAM; } for (i = 0; i < timeout; i++) { if (__ccm_data){ break; } sleep(1); } if (i == timeout){ return SA_ERR_TIMEOUT; } pthread_lock(); ret = retrieve_node_buffer(nodeId, clusterNode); pthread_unlock(); return ret; } /* * This API is highly deprecated in version 1 implementation base on OCF. * It is actually _not_ an asynchronous call. TODO fix in version 2. */ SaErrorT saClmClusterNodeGetAsync(const SaClmHandleT *clmHandle, SaInvocationT invocation, SaClmNodeIdT nodeId, SaClmClusterNodeT *clusterNode) { int ret; __clm_handle_t *hd = GET_CLM_HANDLE(clmHandle); if (!hd){ return SA_ERR_BAD_HANDLE; } if (!clusterNode) { cl_log(LOG_ERR, "Invalid parameter clusterNode <%p>" , clusterNode); return SA_ERR_INVALID_PARAM; } if (!__ccm_data) { cl_log(LOG_ERR, "__ccm_data is NULL"); return SA_ERR_INIT; } pthread_lock(); if ((ret = retrieve_node_buffer(nodeId, clusterNode)) != SA_OK) { cl_log(LOG_ERR, "retrieve_node_buffer error [%d]", ret); pthread_unlock(); return ret; } pthread_unlock(); hd->callbacks.saClmClusterNodeGetCallback(invocation, clusterNode , SA_OK); return SA_OK; } Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccmlib_eventapi.c0000644000000000000000000001766611576626513023404 0ustar00usergroup00000000000000/* * ccmlib_eventapi.c: OCF event API. * * Copyright (c) International Business Machines Corp., 2002 * Author: Ram Pai (linuxram@us.ibm.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * implements 0.2 version of proposed event api */ #define __CCM_LIBRARY__ #include void *cookie_construct(void (*f)(void *), void (*free_f)(void *), void *); void * cookie_get_data(void *ck); void * cookie_get_func(void *ck); void cookie_ref(void *ck); void cookie_unref(void *ck); static GHashTable *tokenhash = NULL; typedef struct oc_ev_cookie_s { void (*func) (void *); void (*freefunc) (void *); void *data; int refcount; } oc_ev_cookie_t; static guint token_counter=0; typedef struct oc_ev_s { int oc_flag; GHashTable *oc_eventclass; } __oc_ev_t; #define EVENT_INIT 1 /* * BEGIN OF FUNCTIONS DEALING WITH COOKIES */ void * cookie_construct(void (*f)(void *), void (*free_f)(void *), void *data) { oc_ev_cookie_t *cookie = g_malloc(sizeof(oc_ev_cookie_t)); cookie->func = f; cookie->data = data; cookie->freefunc = free_f; cookie->refcount = 1; return (void *)cookie; } void * cookie_get_data(void *ck) { oc_ev_cookie_t *cookie = (oc_ev_cookie_t *)ck; if (!cookie) { return NULL; } return cookie->data; } void * cookie_get_func(void *ck) { oc_ev_cookie_t *cookie = (oc_ev_cookie_t *)ck; if(!cookie) { return NULL; } return cookie->func; } void cookie_unref(void *ck) { oc_ev_cookie_t *cookie = (oc_ev_cookie_t *)ck; if(!cookie) { return; } if(--cookie->refcount == 0) { if(cookie->freefunc){ cookie->freefunc(cookie->data); } g_free(cookie); } return; } void cookie_ref(void *ck) { oc_ev_cookie_t *cookie = (oc_ev_cookie_t *)ck; if(!cookie) { return; } ++cookie->refcount; return; } /* * END OF FUNCTIONS DEALING WITH COOKIES */ static class_t * class_construct(oc_ev_class_t class_type, oc_ev_callback_t *fn) { class_t *t_class = NULL; switch(class_type) { case OC_EV_MEMB_CLASS: t_class = oc_ev_memb_class(fn); break; case OC_EV_CONN_CLASS: case OC_EV_GROUP_CLASS: default : break; } return t_class; } static void oc_ev_init(void) { static gboolean ocinit_flag = FALSE; if(ocinit_flag==FALSE) { tokenhash = g_hash_table_new(g_direct_hash, g_direct_equal); ocinit_flag = TRUE; } return; } static gboolean eventclass_remove_func(gpointer key, gpointer value, gpointer user_data) { class_t *class = (class_t *)value; class->unregister(class); g_free(class); return TRUE; } static gboolean token_invalid(const __oc_ev_t *token) { if(!token){ return TRUE; } if(token->oc_flag!= EVENT_INIT) { return TRUE; } return FALSE; } static void activate_func(gpointer key, gpointer value, gpointer user_data) { class_t *class = (class_t *)value; oc_ev_class_t class_type = (oc_ev_class_t) GPOINTER_TO_SIZE(key); int *fd = (int *) user_data; int tmp; tmp = class->activate(class); /* NOTE: the event API 0.2 is broken. * since membership class is the only event * class that is supported with this API, we * just return its file descriptor */ if(class_type == OC_EV_MEMB_CLASS) { *fd = tmp; } return; } static gboolean handle_func(gpointer key, gpointer value, gpointer user_data) { class_t *class = (class_t *)value; /* if handle event fails, remove this class */ if(!class->handle_event((void *)class)){ /* before we remove this class, we should unregister and free it first */ class->unregister(class); g_free(class); return TRUE; } /*do not remove this class*/ return FALSE; } int oc_ev_register(oc_ev_t **token) { __oc_ev_t *rettoken; oc_ev_init(); rettoken = (__oc_ev_t *)g_malloc(sizeof(__oc_ev_t)); *token = (oc_ev_t *)GUINT_TO_POINTER(token_counter++); if(!rettoken) { return ENOMEM; } rettoken->oc_flag = EVENT_INIT; rettoken->oc_eventclass = g_hash_table_new(g_direct_hash, g_direct_equal); g_hash_table_insert(tokenhash, *token, rettoken); return 0; } int oc_ev_unregister(oc_ev_t *tok) { __oc_ev_t *token = NULL; if(tokenhash) { token = g_hash_table_lookup(tokenhash, tok); } if(token == NULL || token_invalid(token)){ return EINVAL; } /* * delete all the event classes associated within * this handle */ g_hash_table_foreach_remove(token->oc_eventclass, eventclass_remove_func, NULL); g_hash_table_remove(tokenhash, tok); g_free(token); return 0; } /* a to configure any special parameters for * any of the classes. This function is not * part of the 0.2 event API. Is been added * to support setup of any special behaviour. */ void oc_ev_special(const oc_ev_t *tok, oc_ev_class_t class_type, int type) { class_t *class; const __oc_ev_t *token = (__oc_ev_t *) g_hash_table_lookup(tokenhash, tok); if(token == NULL || token_invalid(token)){ return; } /* if structure for the class already exists * just update the callback. Else allocate * a structure and update the callback */ if((class = g_hash_table_lookup(token->oc_eventclass, (void *)class_type)) == NULL){ return; } class->special(class, type); return; } int oc_ev_set_callback(const oc_ev_t *tok, oc_ev_class_t class_type, oc_ev_callback_t *fn, oc_ev_callback_t **prev_fn) { class_t *class; oc_ev_callback_t *pre_callback; const __oc_ev_t *token = (__oc_ev_t *) g_hash_table_lookup(tokenhash, tok); if(token == NULL || token_invalid(token)){ return EINVAL; } /* if structure for the class already exists * just update the callback. Else allocate * a structure and update the callback */ if((class = g_hash_table_lookup(token->oc_eventclass, (void *)class_type)) == NULL){ class = class_construct(class_type, NULL); g_hash_table_insert(token->oc_eventclass, (void *)GINT_TO_POINTER(class_type), class); } assert(class && class->set_callback); pre_callback = class->set_callback(class, fn); if(prev_fn) *prev_fn = pre_callback; return 0; } int oc_ev_activate(const oc_ev_t *tok, int *fd) { const __oc_ev_t *token = (__oc_ev_t *) g_hash_table_lookup(tokenhash, tok); *fd = -1; if(token == NULL || token_invalid(token)){ return EINVAL; } if(!g_hash_table_size(token->oc_eventclass)){ return EMFILE; } g_hash_table_foreach(token->oc_eventclass, activate_func, fd); if(*fd == -1){ return 1; } return 0; } int oc_ev_handle_event(const oc_ev_t *tok) { const __oc_ev_t *token = (__oc_ev_t *) g_hash_table_lookup(tokenhash, tok); if(token == NULL || token_invalid(token)){ return EINVAL; } if(!g_hash_table_size(token->oc_eventclass)){ return -1; } if(g_hash_table_size(token->oc_eventclass)) { g_hash_table_foreach_remove(token->oc_eventclass, handle_func, NULL); } return 0; } int oc_ev_callback_done(void *ck) { void (*f)(void *); oc_ev_cookie_t *cookie = (oc_ev_cookie_t *)ck; f = cookie_get_func(cookie); f(ck); return 0; } int oc_ev_is_my_nodeid(const oc_ev_t *tok, const oc_node_t *node) { class_t *class; const __oc_ev_t *token = (__oc_ev_t *) g_hash_table_lookup(tokenhash, tok); if(token == NULL || token_invalid(token) || !node){ return EINVAL; } class = g_hash_table_lookup(token->oc_eventclass, (void *)OC_EV_MEMB_CLASS); return class->is_my_nodeid(class, node); } Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccmlib_memapi.c0000644000000000000000000004406411576626513023031 0ustar00usergroup00000000000000/* * ccmlib_memapi.c: Consensus Cluster Membership API * * Copyright (c) International Business Machines Corp., 2002 * Author: Ram Pai (linuxram@us.ibm.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #define __CCM_LIBRARY__ #include #include /*#include */ #include /* structure to track the membership delivered to client */ typedef struct mbr_track_s { int quorum; int m_size; oc_ev_membership_t m_mem; } mbr_track_t; typedef struct mbr_private_s { int magiccookie; gboolean client_report; /* report to client */ oc_ev_callback_t *callback; /* the callback function registered by the client */ struct IPC_CHANNEL *channel; /* the channel to talk to ccm */ ccm_llm_t *llm; /* list of all nodes */ GHashTable *bornon; /* list of born time for all nodes */ void *cookie; /* the last known membership event cookie */ gboolean special; /* publish non primary membership. * This is a kludge to accomodate * special behaviour not provided * but desired from the 0.2 API. * By default this behaviour is * turned off. */ } mbr_private_t; static char event_strings[5][32]={ "OC_EV_MS_INVALID", "OC_EV_MS_NEW_MEMBERSHIP", "OC_EV_MS_NOT_PRIMARY", "OC_EV_MS_PRIMARY_RESTORED", "OC_EV_MS_EVICTED" }; #define EVENT_STRING(x) event_strings[x - OC_EV_MS_INVALID] #define OC_EV_SET_INSTANCE(m,trans) m->m_mem.m_instance=trans #define OC_EV_SET_N_MEMBER(m,n) m->m_mem.m_n_member=n #define OC_EV_SET_MEMB_IDX(m,idx) m->m_mem.m_memb_idx=idx #define OC_EV_SET_N_OUT(m,n) m->m_mem.m_n_out=n #define OC_EV_SET_OUT_IDX(m,idx) m->m_mem.m_out_idx=idx #define OC_EV_SET_N_IN(m,n) m->m_mem.m_n_in=n #define OC_EV_SET_IN_IDX(m,idx) m->m_mem.m_in_idx=idx #define OC_EV_SET_NODEID(m,idx,nodeid) m->m_mem.m_array[idx].node_id=nodeid #define OC_EV_SET_BORN(m,idx,born) m->m_mem.m_array[idx].node_born_on=born #define OC_EV_INC_N_MEMBER(m) m->m_mem.m_n_member++ #define OC_EV_INC_N_IN(m) m->m_mem.m_n_in++ #define OC_EV_INC_N_OUT(m) m->m_mem.m_n_out++ #define OC_EV_SET_SIZE(m,size) m->m_size=size #define OC_EV_SET_DONEFUNC(m,f) m->m_func=f #define OC_EV_GET_INSTANCE(m) m->m_mem.m_instance #define OC_EV_GET_N_MEMBER(m) m->m_mem.m_n_member #define OC_EV_GET_MEMB_IDX(m) m->m_mem.m_memb_idx #define OC_EV_GET_N_OUT(m) m->m_mem.m_n_out #define OC_EV_GET_OUT_IDX(m) m->m_mem.m_out_idx #define OC_EV_GET_N_IN(m) m->m_mem.m_n_in #define OC_EV_GET_IN(m) m->m_mem.m_in_idx #define OC_EV_GET_NODEARRY(m) m->m_mem.m_array #define OC_EV_GET_NODE(m,idx) m->m_mem.m_array[idx] #define OC_EV_GET_NODEID(m,idx) m->m_mem.m_array[idx].node_id #define OC_EV_GET_BORN(m,idx) m->m_mem.m_array[idx].node_born_on #define OC_EV_COPY_NODE_WITHOUT_UNAME(m1,idx1,m2,idx2) \ m1->m_mem.m_array[idx1]=m2->m_mem.m_array[idx2] #define OC_EV_COPY_NODE(m1,idx1,m2,idx2) \ m1->m_mem.m_array[idx1]=m2->m_mem.m_array[idx2]; \ m1->m_mem.m_array[idx1].node_uname = \ strdup(m2->m_mem.m_array[idx2].node_uname) #define OC_EV_GET_SIZE(m) m->m_size /* prototypes of external functions used in this file * Should be made part of some header file */ void *cookie_construct(void (*f)(void *), void (*free_f)(void *), void *); void * cookie_get_data(void *ck); void * cookie_get_func(void *ck); void cookie_ref(void *ck); void cookie_unref(void *ck); static const char *llm_get_Id_from_Uuid(ccm_llm_t *stuff, uint uuid); static void on_llm_msg(mbr_private_t *mem, struct IPC_MESSAGE *msg) { unsigned long len = msg->msg_len; if (mem->llm != NULL) { g_free(mem->llm); } mem->llm = (ccm_llm_t *)g_malloc(len); memcpy(mem->llm, msg->msg_body, len); return; } static void reset_bornon(mbr_private_t *private) { g_hash_table_destroy(private->bornon); private->bornon = NULL; } static void reset_llm(mbr_private_t *private) { g_free(private->llm); private->llm = NULL; } static int init_llm(mbr_private_t *private) { struct IPC_CHANNEL *ch; int sockfd, ret; struct IPC_MESSAGE *msg; if(private->llm) { return 0; } ch = private->channel; sockfd = ch->ops->get_recv_select_fd(ch); while(1) { if(ch->ops->waitin(ch) != IPC_OK){ ch->ops->destroy(ch); return -1; } ret = ch->ops->recv(ch,&msg); if(ret == IPC_BROKEN) { fprintf(stderr, "connection denied\n"); return -1; } if(ret == IPC_FAIL){ fprintf(stderr, "."); cl_shortsleep(); continue; } break; } on_llm_msg(private, msg); private->bornon = g_hash_table_new(g_direct_hash, g_direct_equal); private->cookie = NULL; private->client_report = TRUE; msg->msg_done(msg); return 0; } static gboolean class_valid(class_t *class) { mbr_private_t *private; if(class->type != OC_EV_MEMB_CLASS) { return FALSE; } private = (mbr_private_t *)class->private; if(!private || private->magiccookie != 0xabcdef){ return FALSE; } return TRUE; } static gboolean already_present(oc_node_t *arr, uint size, oc_node_t node) { uint i; for ( i = 0 ; i < size ; i ++ ) { if(arr[i].node_id == node.node_id) { return TRUE; } } return FALSE; } static int compare(const void *value1, const void *value2) { const oc_node_t *t1 = (const oc_node_t *)value1; const oc_node_t *t2 = (const oc_node_t *)value2; if (t1->node_born_on < t2->node_born_on){ return -1; } if (t1->node_born_on > t2->node_born_on){ return 1; } if (t1->node_id < t2->node_id) { return -1; } if (t1->node_id > t2->node_id) { return 1; } return 0; } static const char * llm_get_Id_from_Uuid(ccm_llm_t *stuff, uint uuid) { uint lpc = 0; for (; lpc < stuff->n; lpc++) { if(stuff->node[lpc].Uuid == uuid){ return stuff->node[lpc].Id; } } return NULL; } static int get_new_membership(mbr_private_t *private, ccm_meminfo_t *mbrinfo, int len, mbr_track_t **mbr) { mbr_track_t *newmbr, *oldmbr; int trans, i, j, in_index, out_index, born; int n_members; int n_nodes = CLLM_GET_NODECOUNT(private->llm); int size = sizeof(oc_ev_membership_t) + 2*n_nodes*sizeof(oc_node_t); newmbr = *mbr = (mbr_track_t *)g_malloc(size + sizeof(mbr_track_t)-sizeof(newmbr->m_mem)); trans = OC_EV_SET_INSTANCE(newmbr,mbrinfo->trans); n_members = OC_EV_SET_N_MEMBER(newmbr,mbrinfo->n); OC_EV_SET_SIZE(newmbr, size); j = OC_EV_SET_MEMB_IDX(newmbr,0); for ( i = 0 ; i < n_members; i++ ) { const char *uname = NULL; int index; index = mbrinfo->member[i].index; uname = llm_get_Id_from_Uuid(private->llm, index); newmbr->m_mem.m_array[j].node_uname = strdup(uname); OC_EV_SET_NODEID(newmbr,j,index); /* gborn was an int to begin with - so this is safe */ born = mbrinfo->member[i].bornon; /* if there is already a born entry for the * node, use it. Otherwise create a born entry * for the node. * * NOTE: born==0 implies the entry has not been * initialized. */ OC_EV_SET_BORN(newmbr,j, born); j++; } /* sort the m_arry */ qsort(OC_EV_GET_NODEARRY(newmbr), n_members, sizeof(oc_node_t), compare); in_index = OC_EV_SET_IN_IDX(newmbr,j); out_index = OC_EV_SET_OUT_IDX(newmbr,(j+n_nodes)); OC_EV_SET_N_IN(newmbr,0); OC_EV_SET_N_OUT(newmbr,0); oldmbr = (mbr_track_t *) cookie_get_data(private->cookie); if(oldmbr) { for ( i = 0 ; i < n_members; i++ ) { if(!already_present(OC_EV_GET_NODEARRY(oldmbr), OC_EV_GET_N_MEMBER(oldmbr), OC_EV_GET_NODE(newmbr,i))){ OC_EV_COPY_NODE_WITHOUT_UNAME(newmbr , in_index, newmbr, i); in_index++; OC_EV_INC_N_IN(newmbr); } } for ( i = 0 ; (uint)i < OC_EV_GET_N_MEMBER(oldmbr) ; i++ ) { if(!already_present(OC_EV_GET_NODEARRY(newmbr), OC_EV_GET_N_MEMBER(newmbr), OC_EV_GET_NODE(oldmbr,i))){ OC_EV_COPY_NODE(newmbr, out_index, oldmbr, i); out_index++; OC_EV_INC_N_OUT(newmbr); } } } else { OC_EV_SET_IN_IDX(newmbr,0); OC_EV_SET_N_IN(newmbr,OC_EV_GET_N_MEMBER(newmbr)); } return size; } static void mem_free_func(void *data) { unsigned lpc = 0; char * uname; mbr_track_t *mbr_track = (mbr_track_t *)data; if(mbr_track) { /* free m_n_member uname, m_n_in is actually the same ptr */ for (lpc = 0 ; lpc < OC_EV_GET_N_MEMBER(mbr_track); lpc++ ) { if ((uname = OC_EV_GET_NODE(mbr_track, lpc).node_uname)){ g_free(uname); } } /* free m_n_out uname */ for (lpc = OC_EV_GET_OUT_IDX(mbr_track) ; lpc < OC_EV_GET_OUT_IDX(mbr_track) + OC_EV_GET_N_OUT(mbr_track) ; lpc++) { if ((uname = OC_EV_GET_NODE(mbr_track, lpc).node_uname)){ g_free(uname); } } g_free(mbr_track); } return; } static void mem_callback_done(void *cookie) { cookie_unref(cookie); return; } static void update_bornons(mbr_private_t *private, mbr_track_t *mbr) { uint i,j; for(i=0; i < OC_EV_GET_N_MEMBER(mbr); i++) { g_hash_table_insert(private->bornon, GINT_TO_POINTER(OC_EV_GET_NODEID(mbr,i)), GINT_TO_POINTER(OC_EV_GET_BORN(mbr,i)+1)); } j=OC_EV_GET_OUT_IDX(mbr); for(i=OC_EV_GET_OUT_IDX(mbr); ibornon, GINT_TO_POINTER(OC_EV_GET_NODEID(mbr,i)), GINT_TO_POINTER(0)); } } static gboolean membership_unchanged(mbr_private_t *private, mbr_track_t *mbr) { uint i; mbr_track_t *oldmbr = (mbr_track_t *) cookie_get_data(private->cookie); if(!oldmbr) { return FALSE; } if(OC_EV_GET_N_MEMBER(mbr) != OC_EV_GET_N_MEMBER(oldmbr)){ return FALSE; } for(i=0; i < OC_EV_GET_N_MEMBER(mbr); i++) { if((OC_EV_GET_NODEID(mbr,i) != OC_EV_GET_NODEID(oldmbr,i)) || OC_EV_GET_BORN(mbr,i) != OC_EV_GET_BORN(oldmbr,i)) { return FALSE; } } return TRUE; } static gboolean mem_handle_event(class_t *class) { struct IPC_MESSAGE *msg; mbr_private_t *private; struct IPC_CHANNEL *ch; mbr_track_t *mbr_track; int size; int type; oc_memb_event_t oc_type; void *cookie; int ret; gboolean quorum; if(!class_valid(class)){ return FALSE; } private = (mbr_private_t *)class->private; ch = private->channel; if(init_llm(private)){ return FALSE; } while(ch->ops->is_message_pending(ch)){ /* receive the message and call the callback*/ ret=ch->ops->recv(ch,&msg); if(ret != IPC_OK){ /* If IPC is broken * the we return FALSE, which results in removing of * this class in handle function * This should only happen when ccm is shutdown before the client * */ cl_log(LOG_INFO, "mem_handle_func:IPC broken, ccm is dead before the client!"); return FALSE; } type = ((ccm_meminfo_t *)msg->msg_body)->ev; cookie= mbr_track = NULL; size=0; oc_type = OC_EV_MS_INVALID; switch(type) { case CCM_NEW_MEMBERSHIP :{ ccm_meminfo_t* cmi = (ccm_meminfo_t*)msg->msg_body; size = get_new_membership(private, cmi, msg->msg_len, &mbr_track); mbr_track->quorum = quorum = cmi->quorum; /* if no quorum, delete the bornon dates for lost * nodes, add bornon dates for the new nodes and * return * * however if special behaviour is being asked * for report the membership even when this node * has no quorum. */ if (!private->special && !quorum){ update_bornons(private, mbr_track); private->client_report = FALSE; mem_free_func(mbr_track); break; } private->client_report = TRUE; /* if quorum and old membership is same as the new * membership set type to OC_EV_MS_RESTORED , * pick the old membership and deliver it. * Do not construct a new membership */ if (membership_unchanged(private, mbr_track)){ mbr_track_t* old_mbr_track; old_mbr_track = (mbr_track_t *) cookie_get_data(private->cookie); if (mbr_track->quorum == old_mbr_track->quorum){ oc_type = OC_EV_MS_PRIMARY_RESTORED; }else { cl_log(LOG_DEBUG, "membership unchanged but quorum changed"); oc_type = quorum? OC_EV_MS_NEW_MEMBERSHIP: OC_EV_MS_INVALID; } } else { oc_type = quorum? OC_EV_MS_NEW_MEMBERSHIP: OC_EV_MS_INVALID; /* NOTE: OC_EV_MS_INVALID overloaded to * mean that the membership has no quorum. * This is returned only when special behaviour * is asked for. In normal behaviour case * (as per 0.2 version of the api), * OC_EV_MS_INVALID is never returned. * I agree this is a kludge!! */ if(!private->special) { assert(oc_type == OC_EV_MS_NEW_MEMBERSHIP); } } update_bornons(private, mbr_track); cookie_unref(private->cookie); cookie = cookie_construct(mem_callback_done, mem_free_func, mbr_track); private->cookie = cookie; size = OC_EV_GET_SIZE(mbr_track); break; } case CCM_EVICTED: oc_type = OC_EV_MS_EVICTED; private->client_report = TRUE; size = 0; mbr_track = NULL; if (private->cookie){ cookie_unref(private->cookie); } cookie= cookie_construct(mem_callback_done, NULL,NULL); if ( cookie == NULL){ cl_log(LOG_ERR, "mem_handle_event: coookie construction failed"); abort(); } private->cookie=cookie; break; case CCM_INFLUX: if(type==CCM_INFLUX){ oc_type = OC_EV_MS_NOT_PRIMARY; } cookie = private->cookie; if(cookie) { mbr_track = (mbr_track_t *) cookie_get_data(cookie); size=mbr_track? OC_EV_GET_SIZE(mbr_track): 0; } else { /* if no cookie exists, create one. * This can happen if no membership * has been delivered. */ mbr_track=NULL; size=0; cookie = private->cookie = cookie_construct(mem_callback_done, NULL,NULL); } break; case CCM_LLM: on_llm_msg(private, msg); } cl_log(LOG_INFO, "%s: Got an event %s from ccm" , __FUNCTION__ , EVENT_STRING(oc_type)); #define ALAN_DEBUG 1 #ifdef ALAN_DEBUG if (!mbr_track) { cl_log(LOG_INFO, "%s: no mbr_track info" , __FUNCTION__); }else{ cl_log(LOG_INFO , "%s: instance=%d, nodes=%d, new=%d, lost=%d" ", n_idx=%d, new_idx=%d, old_idx=%d" , __FUNCTION__ , mbr_track->m_mem.m_instance , mbr_track->m_mem.m_n_member , mbr_track->m_mem.m_n_in , mbr_track->m_mem.m_n_out , mbr_track->m_mem.m_memb_idx , mbr_track->m_mem.m_in_idx , mbr_track->m_mem.m_out_idx); } #endif if(private->callback && private->client_report && cookie){ cookie_ref(cookie); private->callback(oc_type, (uint *)cookie, size, mbr_track?&(mbr_track->m_mem):NULL); } if(ret==IPC_OK) { msg->msg_done(msg); } else { return FALSE; } if(type == CCM_EVICTED) { /* clean up the dynamic information in the * private structure */ reset_llm(private); reset_bornon(private); cookie_unref(private->cookie); private->cookie = NULL; } } return TRUE; } static int mem_activate(class_t *class) { mbr_private_t *private; struct IPC_CHANNEL *ch; int sockfd; if(!class_valid(class)) { return -1; } /* if already activated */ private = (mbr_private_t *)class->private; if(private->llm){ return -1; } ch = private->channel; if(!ch || ch->ops->initiate_connection(ch) != IPC_OK) { return -1; } ch->ops->set_recv_qlen(ch, 0); sockfd = ch->ops->get_recv_select_fd(ch); return sockfd; } static void mem_unregister(class_t *class) { mbr_private_t *private; struct IPC_CHANNEL *ch; private = (mbr_private_t *)class->private; ch = private->channel; /* TOBEDONE * call all instances, of message done * on channel ch. */ ch->ops->destroy(ch); g_free(private->llm); g_free(private); } static oc_ev_callback_t * mem_set_callback(class_t *class, oc_ev_callback_t f) { mbr_private_t *private; oc_ev_callback_t *ret_f; if(!class_valid(class)){ return NULL; } private = (mbr_private_t *)class->private; ret_f = private->callback; private->callback = f; return ret_f; } /* this function is a kludge, to accomodate special behaviour not * supported by 0.2 version of the API */ static void mem_set_special(class_t *class, int type) { mbr_private_t *private; if(!class_valid(class)) { return; } private = (mbr_private_t *)class->private; private->special = 1; /* turn on the special behaviour not supported by 0.2 version of the API */ return; } static gboolean mem_is_my_nodeid(class_t *class, const oc_node_t *node) { mbr_private_t *private; if(!class_valid(class)){ return FALSE; } private = (mbr_private_t *)class->private; if (node->node_id == CLLM_GET_MYUUID(private->llm)){ return TRUE; } return FALSE; } class_t * oc_ev_memb_class(oc_ev_callback_t *fn) { mbr_private_t *private; class_t *memclass; struct IPC_CHANNEL *ch; GHashTable * attrs; static char path[] = IPC_PATH_ATTR; static char ccmfifo[] = CCMFIFO; memclass = g_malloc(sizeof(class_t)); if (!memclass){ return NULL; } private = (mbr_private_t *)g_malloc0(sizeof(mbr_private_t)); if (!private) { g_free(memclass); return NULL; } memclass->type = OC_EV_MEMB_CLASS; memclass->set_callback = mem_set_callback; memclass->handle_event = mem_handle_event; memclass->activate = mem_activate; memclass->unregister = mem_unregister; memclass->is_my_nodeid = mem_is_my_nodeid; memclass->special = mem_set_special; memclass->private = (void *)private; private->callback = fn; private->magiccookie = 0xabcdef; private->client_report = FALSE; private->special = 0; /* no special behaviour */ private->llm = NULL; attrs = g_hash_table_new(g_str_hash,g_str_equal); g_hash_table_insert(attrs, path, ccmfifo); ch = ipc_channel_constructor(IPC_DOMAIN_SOCKET, attrs); g_hash_table_destroy(attrs); if(!ch) { g_free(memclass); g_free(private); return NULL; } private->channel = ch; return memclass; } Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccmllm.c0000644000000000000000000002133711576626513021515 0ustar00usergroup00000000000000/* * ccmllm.c: Low Level membership routines. * * Copyright (c) International Business Machines Corp., 2002 * Author: Ram Pai (linuxram@us.ibm.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include "ccm.h" int llm_get_nodecount(llm_info_t* llm){ if (llm == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return -1; } return llm->nodecount; } int llm_get_live_nodecount(llm_info_t *llm) { int count = 0; int i; if (llm == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return -1; } for ( i = 0 ; i < llm->nodecount; i++ ) { const char* status = llm->nodes[i].status; if (STRNCMP_CONST(status, DEADSTATUS) != 0){ count++; } } return count; } const char * llm_get_nodename(llm_info_t *llm, const int index) { if (llm == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return ""; } if (index < 0 || index > MAXNODE){ ccm_log(LOG_ERR, "%s: index(%d) out of range", __FUNCTION__, index); return ""; } return llm->nodes[index].nodename; } char * llm_get_nodestatus(llm_info_t* llm, const int index) { if (llm == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return NULL; } if (index < 0 || index > MAXNODE){ ccm_log(LOG_ERR, "%s: index(%d) out of range", __FUNCTION__, index); return NULL; } return llm->nodes[index].status; } int llm_node_cmp(llm_info_t *llm, int indx1, int indx2) { return strncmp(llm_get_nodename(llm, indx1), llm_get_nodename(llm, indx2), NODEIDSIZE); } void llm_display(llm_info_t *llm) { unsigned int i; ccm_debug2(LOG_DEBUG, "total node number is %d", llm->nodecount); for (i = 0 ;i < llm->nodecount; i++){ ccm_debug2(LOG_DEBUG, "node %d =%s, status=%s", i, llm->nodes[i].nodename, llm->nodes[i].status); } } int llm_get_myindex(llm_info_t* llm) { if (llm == NULL){ ccm_log(LOG_ERR, "NULL pointer"); return -1; } return llm->myindex; } const char* llm_get_mynodename(llm_info_t* llm) { if (llm == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return NULL; } if (llm->myindex < 0){ ccm_log(LOG_ERR, "%s: mynode is not set", __FUNCTION__); return NULL; } return llm->nodes[llm->myindex].nodename; } int llm_get_index(llm_info_t *llm, const char *node) { int low,high,mid; int value; /*binary search */ low = 0; high = llm->nodecount - 1; do { mid = (low+high+1)/2; value = strncmp(llm_get_nodename(llm, mid), node, NODEIDSIZE); if(value==0) { return mid; } if(high == low) { break; } if(value > 0) { high=mid-1; } else { low=mid+1; } } while(high>=low); return -1; } int llm_status_update(llm_info_t *llm, const char *node, const char *status, char* oldstatus) { int i; i = llm_get_index(llm, node); if(i == -1){ return HA_FAIL; } if (oldstatus){ strncpy(oldstatus, llm->nodes[i].status, STATUSSIZE); } strncpy(llm->nodes[i].status, status, STATUSSIZE); return HA_OK; } int llm_is_valid_node(llm_info_t *llm, const char *node) { if(llm_get_index(llm, node) == -1 ) { return FALSE; } return TRUE; } int llm_init(llm_info_t *llm) { if (llm == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return HA_FAIL; } llm->nodecount = 0; llm->myindex = -1; return HA_OK; } int llm_del(llm_info_t* llm, const char* node) { int i; int j; for ( i = 0 ;i < llm->nodecount; i++){ if (strncmp(llm->nodes[i].nodename, node, NODEIDSIZE)==0){ break; } } if (i == llm->nodecount){ ccm_log(LOG_ERR, "%s: Node %s not found in llm", __FUNCTION__, node); return HA_FAIL; } if (llm->myindex > i){ llm->myindex --; }else if (llm->myindex ==i){ ccm_log(LOG_ERR, "%s: deleting myself in ccm is not allowed", __FUNCTION__); return HA_FAIL; } for ( j = i; j< llm->nodecount - 1; j++){ strncpy(llm->nodes[j].nodename, llm->nodes[j+1].nodename, NODEIDSIZE); strncpy(llm->nodes[j].status, llm->nodes[j+1].status, STATUSSIZE); } llm->nodecount --; return HA_OK; } int llm_add(llm_info_t *llm, const char *node, const char *status, const char *mynode, const char *site, int weight) { int nodecount; int i, j; nodecount = llm->nodecount; if (nodecount < 0 || nodecount > MAXNODE ){ ccm_log(LOG_ERR, "nodecount out of range(%d)", nodecount); return HA_FAIL; } for ( i = 0 ; i < nodecount ; i++ ) { int value = strncmp(llm_get_nodename(llm, i), node, NODEIDSIZE); if (value == 0){ ccm_log(LOG_ERR, "%s: adding same node(%s) twice(?)", __FUNCTION__, node); return HA_FAIL; } if (value > 0) { break; } } for ( j = nodecount; j > i; j-- ) { llm->nodes[j] = llm->nodes[j - 1]; } llm->nodes[i].join_request = FALSE; strncpy(llm->nodes[i].nodename, node,NODEIDSIZE); strncpy(llm->nodes[i].status, status, STATUSSIZE); strncpy(llm->nodes[i].site, site, PATH_MAX); llm->nodes[i].weight = weight; llm->nodecount++; if (llm->myindex >= i) { llm->myindex++; } if (llm->myindex < 0 && strncmp(mynode, node, NODEIDSIZE) == 0) { llm->myindex = i; } if (llm->myindex >= llm->nodecount){ ccm_log(LOG_ERR, "%s: myindex(%d) out of range," "llm->nodecount =%d", __FUNCTION__, llm->myindex, llm->nodecount); return HA_FAIL; } return HA_OK; } int llm_set_joinrequest(llm_info_t* llm, int index, gboolean value, int major_trans) { if (llm == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return HA_FAIL; } if (index < 0 || index > MAXNODE){ ccm_log(LOG_ERR, "%s: index(%d) out of range", __FUNCTION__, index); return HA_FAIL; } llm->nodes[index].join_request = value; llm->nodes[index].join_request_major_trans = major_trans; return HA_OK; } gboolean llm_get_joinrequest(llm_info_t* llm, int index) { if (llm == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return FALSE; } if (index < 0 || index > MAXNODE){ ccm_log(LOG_ERR, "%s: index(%d) out of range", __FUNCTION__, index); return FALSE; } return llm->nodes[index].join_request; } gboolean llm_get_joinrequest_majortrans(llm_info_t* llm, int index) { if (llm == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return FALSE; } if (index < 0 || index > MAXNODE){ ccm_log(LOG_ERR, "%s: index(%d) out of range", __FUNCTION__, index); return FALSE; } return llm->nodes[index].join_request_major_trans; } int llm_set_change(llm_info_t* llm, int index, gboolean value) { if (llm == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return HA_FAIL; } if (index < 0 || index > MAXNODE){ ccm_log(LOG_ERR, "%s: index(%d) out of range", __FUNCTION__, index); return HA_FAIL; } llm->nodes[index].receive_change_msg = value; return HA_OK; } gboolean llm_get_change(llm_info_t* llm, int index) { if (llm == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return FALSE; } if (index < 0 || index > MAXNODE){ ccm_log(LOG_ERR, "%s: index(%d) out of range", __FUNCTION__, index); return FALSE; } return llm->nodes[index].receive_change_msg; } int llm_set_uptime(llm_info_t* llm, int index, int uptime) { if (llm == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return FALSE; } if (index < 0 || index > MAXNODE){ ccm_log(LOG_ERR, "%s: index(%d) out of range", __FUNCTION__, index); return FALSE; } if (uptime < 0){ ccm_log(LOG_ERR, "%s: Negative uptime %d for node %d [%s]", __FUNCTION__, uptime, index, llm_get_nodename(llm,index)); return FALSE; } llm->nodes[index].uptime = uptime; return HA_OK; } int llm_get_uptime(llm_info_t* llm, int index) { if (llm == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return -1; } if (index < 0 || index > MAXNODE){ ccm_log(LOG_ERR, "%s: index(%d) out of range", __FUNCTION__, index); return -1; } return llm->nodes[index].uptime; } Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccmmain.c0000644000000000000000000001542111576626513021652 0ustar00usergroup00000000000000/* * ccm.c: Consensus Cluster Service Program * * Copyright (c) International Business Machines Corp., 2002 * Author: Ram Pai (linuxram@us.ibm.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include "ccm.h" #include "ccmmisc.h" #include #define SECOND 1000 #define OPTARGS "dv" GMainLoop* mainloop = NULL; /* * hearbeat event source. * */ static gboolean hb_input_dispatch(IPC_Channel *, gpointer); static void hb_input_destroy(gpointer); static gboolean hb_timeout_dispatch(gpointer); static gboolean hb_input_dispatch(IPC_Channel * channel, gpointer user_data) { if (channel && (channel->ch_status == IPC_DISCONNECT)) { ccm_log(LOG_ERR, "Lost connection to heartbeat service. Need to bail out."); return FALSE; } return ccm_take_control(user_data); } static void hb_input_destroy(gpointer user_data) { /* close connections to all the clients */ client_delete_all(); g_main_quit(mainloop); return; } static gboolean hb_timeout_dispatch(gpointer user_data) { if(debug_level > 0) { ccm_check_memoryleak(); } return hb_input_dispatch(0, user_data); } /* * client messaging events sources... * */ static gboolean clntCh_input_dispatch(IPC_Channel * , gpointer); static void clntCh_input_destroy(gpointer ); static gboolean clntCh_input_dispatch(IPC_Channel *client, gpointer user_data) { if(client->ch_status == IPC_DISCONNECT){ ccm_log(LOG_INFO, "client (pid=%d) removed from ccm", client->farside_pid); client_delete(client); return FALSE; } return TRUE; /* TOBEDONE */ } static void clntCh_input_destroy(gpointer user_data) { return; } /* * client connection events source.. * */ static gboolean waitCh_input_dispatch(IPC_Channel *, gpointer); static void waitCh_input_destroy(gpointer); static gboolean waitCh_input_dispatch(IPC_Channel *newclient, gpointer user_data) { client_add(newclient); /* Some ccm clients may not consume our messages fast enough, * but will have serious trouble if we kick them out for that reason. * Worse, still, if send_message() "live locks" on one client, we won't * be able to drain requests from other clients, possibly causing them * to close their connection to us, or even causing the heartbeat MCP * to kick _us_ out. Kick out the ccm! We really want to avoid that. * Chose a "generous" queue length. * And consider kicking out clients that still manage to have their * queue fill up. (TOBEDONE) */ newclient->ops->set_send_qlen(newclient, 1024); G_main_add_IPC_Channel(G_PRIORITY_LOW, newclient, FALSE, clntCh_input_dispatch, newclient, clntCh_input_destroy); return TRUE; } static void waitCh_input_destroy(gpointer user_data) { IPC_WaitConnection *wait_ch = (IPC_WaitConnection *)user_data; wait_ch->ops->destroy(wait_ch); return; } static IPC_WaitConnection * wait_channel_init(void) { IPC_WaitConnection *wait_ch; mode_t mask; char path[] = IPC_PATH_ATTR; char ccmfifo[] = CCMFIFO; char domainsocket[] = IPC_DOMAIN_SOCKET; GHashTable * attrs = g_hash_table_new(g_str_hash,g_str_equal); g_hash_table_insert(attrs, path, ccmfifo); mask = umask(0); wait_ch = ipc_wait_conn_constructor(domainsocket, attrs); if (wait_ch == NULL){ cl_perror("Can't create wait channel"); exit(1); } mask = umask(mask); g_hash_table_destroy(attrs); return wait_ch; } static void usage(const char *cmd) { fprintf(stderr, "\nUsage: %s [-v]\n", cmd); } /* */ /* debug facilitator. */ /* */ static void usr_signal_handler(int signum) { switch(signum) { case SIGUSR1: debug_level++; break; case SIGUSR2: debug_level--; if (debug_level < 0) { debug_level = 0; } break; } if(debug_level > 0){ cl_log_enable_stderr(TRUE); } else { cl_log_enable_stderr(FALSE); } ccm_log(LOG_INFO, "set debug_level to %d", debug_level); } static gboolean ccm_shutdone(int sig, gpointer userdata) { ccm_t* ccm = (ccm_t*)userdata; ccm_info_t * info = (ccm_info_t*)ccm->info; ccm_log(LOG_INFO, "received SIGTERM, going to shut down"); if (info == NULL){ ccm_log(LOG_ERR, "ccm_shutdone: invalid arguments"); return FALSE; } ccm_reset(info); g_main_quit(mainloop); return TRUE; } /* */ /* The main function! */ /* */ int main(int argc, char **argv) { IPC_WaitConnection *wait_ch; char *cmdname; char *tmp_cmdname; int flag; ccm_t* ccm; tmp_cmdname = g_strdup(argv[0]); if ((cmdname = strrchr(tmp_cmdname, '/')) != NULL) { ++cmdname; } else { cmdname = tmp_cmdname; } cl_log_set_entity(cmdname); cl_log_set_facility(HA_LOG_FACILITY); while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch(flag) { case 'v': /* Debug mode, more logs*/ ++debug_level; break; default: usage(cmdname); return 1; } } if(debug_level > 0) cl_log_enable_stderr(TRUE); CL_SIGNAL(SIGUSR1, usr_signal_handler); CL_SIGNAL(SIGUSR2, usr_signal_handler); CL_IGNORE_SIG(SIGPIPE); cl_inherit_logging_environment(0); /* initialize the client tracking system */ client_init(); /* * heartbeat is the main source of events. * This source must be listened * at high priority */ ccm = ccm_initialize(); if(ccm == NULL){ ccm_log(LOG_ERR, "Initialization failed. Exit"); exit(1); } G_main_add_SignalHandler(G_PRIORITY_HIGH, SIGTERM, ccm_shutdone, ccm, NULL); /* we want hb_input_dispatch to be called when some input is * pending on the heartbeat fd, and every 1 second */ G_main_add_IPC_Channel(G_PRIORITY_HIGH, ccm_get_ipcchan(ccm), FALSE, hb_input_dispatch, ccm, hb_input_destroy); Gmain_timeout_add_full(G_PRIORITY_HIGH, SECOND, hb_timeout_dispatch, ccm, hb_input_destroy); /* the clients wait channel is the other source of events. * This source delivers the clients connection events. * listen to this source at a relatively lower priority. */ wait_ch = wait_channel_init(); G_main_add_IPC_WaitConnection(G_PRIORITY_LOW, wait_ch, NULL, FALSE, waitCh_input_dispatch, wait_ch, waitCh_input_destroy); mainloop = g_main_loop_new(NULL, FALSE); g_main_run(mainloop); g_main_destroy(mainloop); g_free(tmp_cmdname); /*this program should never terminate,unless killed*/ return(1); } Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccmmem.c0000644000000000000000000001300711576626513021502 0ustar00usergroup00000000000000 /* * ccmmem.c: membership routine * * Copyright (c) International Business Machines Corp., 2002 * Author: Ram Pai (linuxram@us.ibm.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include "ccm.h" #include "ccmmisc.h" void ccm_mem_reset(ccm_info_t* info) { info->memcount = 0; return; } int ccm_mem_add(ccm_info_t* info, int index) { int count; if (info == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return HA_FAIL; } if (index < 0 || index > llm_get_nodecount(&info->llm)){ ccm_log(LOG_ERR, "%s: index(%d) out of range", __FUNCTION__, index); return HA_FAIL; } count = info->memcount; info->ccm_member[count] = index; info->memcount++; if (info->memcount >= MAXNODE){ ccm_log(LOG_ERR, "%s: membership count(%d) out of range", __FUNCTION__, info->memcount); return HA_FAIL; } return HA_OK; } int ccm_get_memcount(ccm_info_t* info){ if (info == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return -1; } return info->memcount; } int ccm_mem_bitmapfill(ccm_info_t *info, const char *bitmap) { llm_info_t *llm; uint i; llm = &info->llm; ccm_mem_reset(info); for ( i = 0 ; i < llm_get_nodecount(llm); i++ ) { if(bitmap_test(i, bitmap, MAXNODE)){ if (ccm_mem_add(info, i) != HA_OK){ ccm_log(LOG_ERR, "%s: adding node(%s)" "to member failed", __FUNCTION__, llm_get_nodename(llm, i)); return HA_FAIL; } } } return HA_OK; } int ccm_mem_strfill(ccm_info_t *info, const char *memlist) { char *bitmap = NULL; int ret; bitmap_create(&bitmap, MAXNODE); if (bitmap == NULL){ ccm_log(LOG_ERR, "%s:bitmap creation failure", __FUNCTION__); return HA_FAIL; } if (ccm_str2bitmap((const char *) memlist, strlen(memlist), bitmap) < 0){ ccm_log(LOG_ERR, "%s: string(%s) to bitmap conversion failed", __FUNCTION__, memlist); return HA_FAIL; } ret = ccm_mem_bitmapfill(info, bitmap); bitmap_delete(bitmap); return ret; } gboolean node_is_member(ccm_info_t* info, const char* node) { int i,indx; llm_info_t *llm = &info->llm; if (info == NULL || node == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return HA_FAIL; } for ( i = 0 ; i < ccm_get_memcount(info) ; i++ ) { indx = info->ccm_member[i]; if(strncmp(llm_get_nodename(llm, indx), node, NODEIDSIZE) == 0){ return TRUE; } } return FALSE; } gboolean i_am_member(ccm_info_t* info) { llm_info_t* llm; int i; if (info == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return FALSE; } llm = &info->llm; if (llm->myindex <0){ ccm_log(LOG_ERR, "%s: myindex in llm is not set", __FUNCTION__); return FALSE; } for (i = 0; i < info->memcount; i++){ if (info->ccm_member[i] == llm->myindex){ return TRUE; } } return FALSE; } int ccm_mem_delete(ccm_info_t* info, int index) { int i; int memcount; if (info == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return HA_FAIL; } if (index < 0 || index > llm_get_nodecount(&info->llm)){ ccm_log(LOG_ERR, "%s: index(%d) out of range", __FUNCTION__, index); return HA_FAIL; } memcount = info->memcount; for (i =0; i < info->memcount; i++){ if (info->ccm_member[i] == index){ info->ccm_member[i] = info->ccm_member[memcount-1]; info->memcount--; return HA_OK; } } ccm_log(LOG_ERR, "%s: node index(%d) not found in membership", __FUNCTION__, index); return HA_FAIL; } int ccm_mem_update(ccm_info_t *info, const char *node, enum change_event_type change_type) { llm_info_t *llm = &info->llm; if (change_type == NODE_LEAVE){ return ccm_mem_delete(info, llm_get_index(llm ,node)); }else{ return ccm_mem_add(info, llm_get_index(llm, node)); } } int ccm_mem_filluptime(ccm_info_t* info, int* uptime_list, int uptime_size) { int i; if (uptime_size != info->memcount){ ccm_log(LOG_ERR, "%s: uptime_list size (%d) != memcount(%d)", __FUNCTION__, uptime_size, info->memcount); return HA_FAIL; } for (i = 0; i < info->memcount; i++){ llm_set_uptime(&info->llm, info->ccm_member[i], uptime_list[i]); } return HA_OK; } int am_i_member_in_memlist(ccm_info_t *info, const char *memlist) { char *bitmap = NULL; int numBytes, myindex; llm_info_t *llm; if (info == NULL || memlist == NULL){ ccm_log(LOG_ERR, "%s: NULL pointer", __FUNCTION__); return FALSE; } llm = &info->llm; bitmap_create(&bitmap, MAXNODE); if (bitmap == NULL){ ccm_log(LOG_ERR ,"%s: bitmap creatation failed", __FUNCTION__); return FALSE; } numBytes = ccm_str2bitmap(memlist, strlen(memlist), bitmap); myindex = llm_get_myindex(llm); if (bitmap_test(myindex, bitmap, numBytes*BitsInByte)){ bitmap_delete(bitmap); return TRUE; } bitmap_delete(bitmap); return FALSE; } Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccmmisc.c0000644000000000000000000001433011576626513021657 0ustar00usergroup00000000000000/* * ccmmisc.c: Miscellaneous Consensus Cluster Service functions * * Copyright (c) International Business Machines Corp., 2002 * Author: Ram Pai (linuxram@us.ibm.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #ifdef HAVE_MALLINFO #include #endif #include "ccmmisc.h" #if 0 int ccm_bitmap2str(const char *bitmap, char* memlist, int size) { int num_member = 0; char* p; int i; if (bitmap == NULL || memlist == NULL || size <= 0){ ccm_log(LOG_ERR, "invalid arguments"); return -1; } p =memlist; for ( i = 0 ; i < MAXNODE; i++ ) { if(bitmap_test(i, bitmap, MAXNODE)){ num_member++; p += sprintf(p, "%d ", i); } } return strnlen(memlist, size); } int ccm_str2bitmap(const char *_memlist, int size, char *bitmap) { char memlist[MAX_MEMLIST_STRING]; char* p; int num_members = 0; if (memlist == NULL || size <= 0 || size >= MAX_MEMLIST_STRING || bitmap == NULL){ ccm_log(LOG_ERR, "invalid arguments"); return -1; } memset(memlist, 0, MAX_MEMLIST_STRING); memcpy(memlist, _memlist, size); p = strtok(memlist, " "); while ( p != NULL){ int i = atoi(p); bitmap_mark(i, bitmap, MAXNODE); num_members ++; p = strtok(NULL, " "); } return num_members; } #else int ccm_str2bitmap(const char *memlist, int size, char *bitmap) { int outbytes = B64_maxbytelen(size); if (size == 0) { return 0; } outbytes = base64_to_binary(memlist, size, bitmap, outbytes); return outbytes; } int ccm_bitmap2str(const char *bitmap, char *memlist, int size) { int maxstrsize; int bytes; bytes = MAXNODE / BitsInByte; if (MAXNODE%BitsInByte != 0){ bytes++; } maxstrsize = B64_stringlen(bytes)+1; if (maxstrsize > MAX_MEMLIST_STRING){ ccm_log(LOG_ERR, "MAX_MEMLIST_STRING is too small(%d), sized required %d", MAX_MEMLIST_STRING, maxstrsize); return -1; } return binary_to_base64(bitmap, bytes, memlist, size); } #endif longclock_t ccm_get_time(void) { return time_longclock(); } /* * given two times, and a timeout interval(in milliseconds), * return true if the timeout has occured, else return * false. * NOTE: 'timeout' is in milliseconds. */ int ccm_timeout(longclock_t t1, longclock_t t2, unsigned long timeout) { longclock_t t1cl; t1cl = add_longclock(t1 , msto_longclock(timeout)); if(cmp_longclock(t1cl, t2) < 0) { return TRUE; } return FALSE; } void ccm_check_memoryleak(void) { #ifdef HAVE_MALLINFO /* check for memory leaks */ struct mallinfo i; static int arena=0; static int count = 0; ++count; /* Mallinfo is surprisingly expensive */ if (count >= 60) { count = 0; i = mallinfo(); if(arena==0) { arena = i.arena; } else if(arena < i.arena) { ccm_debug(LOG_WARNING, "leaking memory? previous arena=%d " "present arena=%d", arena, i.arena); arena=i.arena; } } #endif } /* * When ccm running on a node leaves the cluster voluntarily it * sends a leave message to the other nodes in the cluster. * Similarly whenever ccm running on some node of the cluster, * dies the local heartbeat delivers a leave message to ccm. * And whenever some node in the cluster dies, local heartbeat * informs the death through a callback. * In all these cases, ccm is informed about the loss of the node, * asynchronously, in some context where immidiate processing of * the message is not possible. * The following set of routines act as a cache that keep track * of message leaves and facilitates the delivery of these * messages at a convinient time. * */ static char *leave_bitmap=NULL; void leave_init(void) { int numBytes; assert(!leave_bitmap); numBytes = bitmap_create(&leave_bitmap, MAXNODE); memset(leave_bitmap, 0, numBytes); } void leave_reset(void) { int numBytes = bitmap_size(MAXNODE); if(!leave_bitmap) { return; } memset(leave_bitmap, 0, numBytes); return; } void leave_cache(int i) { assert(leave_bitmap); bitmap_mark(i, leave_bitmap, MAXNODE); } int leave_get_next(void) { int i; assert(leave_bitmap); for ( i = 0 ; i < MAXNODE; i++ ) { if(bitmap_test(i,leave_bitmap,MAXNODE)) { bitmap_clear(i,leave_bitmap,MAXNODE); return i; } } return -1; } int leave_any(void) { if(bitmap_count(leave_bitmap,MAXNODE)){ return TRUE; } return FALSE; } gboolean part_of_cluster(int state) { if (state >= CCM_STATE_END || state < 0){ ccm_log(LOG_ERR, "part_of_cluster:wrong state(%d)", state); return FALSE; } if (state == CCM_STATE_VERSION_REQUEST || state == CCM_STATE_NONE){ return FALSE; } return TRUE; } /* the ccm strings tokens communicated aross the wire. * these are the values for the F_TYPE names. */ #define TYPESTRSIZE 32 char ccm_type_str[CCM_TYPE_LAST + 1][TYPESTRSIZE] = { "CCM_TYPE_PROTOVERSION", "CCM_TYPE_PROTOVERSION_RESP", "CCM_TYPE_JOIN", "CCM_TYPE_REQ_MEMLIST", "CCM_TYPE_RES_MEMLIST", "CCM_TYPE_FINAL_MEMLIST", "CCM_TYPE_ABORT", "CCM_TYPE_LEAVE", "CCM_TYPE_TIMEOUT", "CCM_TYPE_NODE_LEAVE_NOTICE", "CCM_TYPE_NODE_LEAVE", "CCM_TYPE_MEM_LIST", "CCM_TYPE_ALIVE", "CCM_TYPE_NEW_NODE", "CCM_TYPE_STATE_INFO", "CCM_TYPE_RESTART", "CCM_TYPE_LAST" }; int ccm_string2type(const char *type) { int i; for ( i = CCM_TYPE_PROTOVERSION; i <= CCM_TYPE_LAST; i++ ) { if (strncmp(ccm_type_str[i], type, TYPESTRSIZE) == 0){ return i; } } /* this message is not any type of ccm state messages * but some other message from heartbeat */ return -1; } char * ccm_type2string(enum ccm_type type) { return ccm_type_str[type]; } Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccmmisc.h0000644000000000000000000000351511576626513021667 0ustar00usergroup00000000000000 /* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _CCM_MISC_H_ #define _CCM_MISC_H_ #define MAX_MEMLIST_STRING 512 int ccm_bitmap2str(const char *bitmap, char* memlist, int size); int ccm_str2bitmap(const char *_memlist, int size, char *bitmap); void leave_init(void); void leave_reset(void); void leave_cache(int i); int leave_get_next(void); int leave_any(void); int ccm_mem_bitmapfill(ccm_info_t *info, const char *bitmap); int ccm_mem_strfill(ccm_info_t *info, const char *memlist); gboolean node_is_member(ccm_info_t* info, const char* node); gboolean part_of_cluster(int state); int ccm_string2type(const char *type); char* ccm_type2string(enum ccm_type type); void ccm_mem_reset(ccm_info_t* info); int ccm_mem_add(ccm_info_t*, int index); int ccm_get_memcount(ccm_info_t* info); int ccm_mem_delete(ccm_info_t* info, int index); int ccm_mem_update(ccm_info_t *info, const char *node, enum change_event_type change_type); int ccm_mem_filluptime(ccm_info_t* info, int* uptime_list, int uptime_size); gboolean i_am_member(ccm_info_t* info); int am_i_member_in_memlist(ccm_info_t *info, const char *memlist); #endif Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccmmsg.c0000644000000000000000000003430311576626513021514 0ustar00usergroup00000000000000/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include "ccm.h" #include "ccmmsg.h" #include "ccmmisc.h" #include #include #include #include #include static void dump_sending_msg(struct ha_msg* msg, const char* node); /* FIXEM dead code alert: unused?? */ int ccm_send_cluster_msg(ll_cluster_t* hb, struct ha_msg* msg) { int rc; dump_sending_msg(msg, NULL); rc = hb->llc_ops->sendclustermsg(hb, msg); if (rc != HA_OK){ ccm_log(LOG_ERR, "sending out message failed"); ccm_message_debug2(LOG_DEBUG, msg); return rc; } return HA_OK; } /* FIXEM dead code alert: unused?? */ int ccm_send_node_msg(ll_cluster_t* hb, struct ha_msg* msg, const char* node) { int rc; dump_sending_msg(msg, node); rc = hb->llc_ops->sendnodemsg(hb, msg, node); if (rc != HA_OK){ ccm_log(LOG_ERR, "sending out message failed"); ccm_message_debug2(LOG_DEBUG, msg); return rc; } return HA_OK; } static struct ha_msg* ccm_create_minimum_msg(ccm_info_t * info, int type) { struct ha_msg *m; char majortrans[15]; snprintf(majortrans, sizeof(majortrans), "%d", info->ccm_transition_major); if ((m=ha_msg_new(0)) == NULL) { ccm_log(LOG_ERR, "%s: creating a new message failed", __FUNCTION__); return NULL; } if( ha_msg_add(m, F_TYPE, ccm_type2string(type)) == HA_FAIL ||(ha_msg_add(m, CCM_MAJORTRANS, majortrans) == HA_FAIL) ||ha_msg_add_int(m, F_NUMNODES, info->llm.nodecount) == HA_FAIL){ ccm_log(LOG_ERR, "%s: adding fields to an message failed", __FUNCTION__); ha_msg_del(m); return NULL; } return m; } struct ha_msg* ccm_create_msg(ccm_info_t * info, int type) { struct ha_msg *m = ccm_create_minimum_msg(info, type); char majortrans[15]; char minortrans[15]; char joinedtrans[15]; char *cookie; if (m == NULL) { ccm_log(LOG_ERR, "%s: creating a new message failed", __FUNCTION__); return NULL; } snprintf(majortrans, sizeof(majortrans), "%d", info->ccm_transition_major); snprintf(minortrans, sizeof(minortrans), "%d", info->ccm_transition_minor); snprintf(joinedtrans, sizeof(joinedtrans), "%d", info->ccm_joined_transition); cookie = info->ccm_cookie; assert(cookie && *cookie); if((ha_msg_add(m, CCM_COOKIE, cookie) == HA_FAIL) ||(ha_msg_add(m, CCM_MAJORTRANS, majortrans) == HA_FAIL) ||(ha_msg_add(m, CCM_UPTIME, joinedtrans) == HA_FAIL) ||(ha_msg_add(m, CCM_MINORTRANS, minortrans) == HA_FAIL)){ ccm_log(LOG_ERR, "%s: adding fields to an message failed", __FUNCTION__); ha_msg_del(m); return NULL; } return m; } static int ccm_mod_msg(struct ha_msg* msg, ccm_info_t * info, int type) { struct ha_msg *m; char majortrans[15]; /* 10 is the maximum number of digits in UINT_MAX , adding a buffer of 5 */ char minortrans[15]; /* ditto */ char joinedtrans[15]; /* ditto */ char *cookie; llm_info_t* llm = &info->llm; if (msg == NULL){ ccm_log(LOG_ERR, "NULL message"); return HA_FAIL; } m = msg; snprintf(majortrans, sizeof(majortrans), "%d", info->ccm_transition_major); snprintf(minortrans, sizeof(minortrans), "%d", info->ccm_transition_minor); snprintf(joinedtrans, sizeof(joinedtrans), "%d", info->ccm_joined_transition); cookie = info->ccm_cookie; if (cookie == NULL){ abort(); } if(ha_msg_mod(m, F_TYPE, ccm_type2string(type)) == HA_FAIL || ha_msg_mod(m, CCM_COOKIE, cookie) == HA_FAIL || ha_msg_mod(m, CCM_MAJORTRANS, majortrans) == HA_FAIL || ha_msg_mod(m, CCM_UPTIME, joinedtrans) == HA_FAIL || ha_msg_mod(m, CCM_MINORTRANS, minortrans) == HA_FAIL || ha_msg_mod_int(m, F_NUMNODES, llm->nodecount) == HA_FAIL){ ccm_log(LOG_ERR, "%s: moding fields to an message failed", __FUNCTION__); return HA_FAIL; } return HA_OK; } int ccm_send_standard_clustermsg(ll_cluster_t* hb, ccm_info_t* info, int type) { struct ha_msg *m = ccm_create_msg(info, type); int rc; if (m == NULL){ ccm_log(LOG_ERR, "creating message failed"); return HA_FAIL; } dump_sending_msg(m, NULL); rc = hb->llc_ops->sendclustermsg(hb, m); ha_msg_del(m); return(rc); } static int ccm_send_minimum_clustermsg(ll_cluster_t* hb, ccm_info_t* info, int type) { struct ha_msg *m = ccm_create_minimum_msg(info, type); int rc; if (m == NULL) { ccm_log(LOG_ERR, "creating a new message failed"); return(HA_FAIL); } dump_sending_msg(m, NULL); rc = hb->llc_ops->sendclustermsg(hb, m); ha_msg_del(m); return(rc); } static int ccm_send_extra_clustermsg(ll_cluster_t* hb, ccm_info_t* info, int type, const char* fieldname, const char* fieldvalue ) { struct ha_msg *m = ccm_create_msg(info, type); int rc; if (fieldname == NULL || fieldvalue == NULL){ ccm_log(LOG_ERR, "NULL argument"); return HA_FAIL; } if (m == NULL){ ccm_log(LOG_ERR, "message creating failed"); return HA_FAIL; } if ( ha_msg_add(m, fieldname, fieldvalue) == HA_FAIL){ ccm_log(LOG_ERR, "Adding a field failed"); ha_msg_del(m); return HA_FAIL; } dump_sending_msg(m, NULL); rc = hb->llc_ops->sendclustermsg(hb, m); ha_msg_del(m); return(rc); } static int ccm_send_extra_nodemsg(ll_cluster_t* hb, ccm_info_t* info, int type, const char* fieldname, const char* fieldvalue, const char* nodename ) { struct ha_msg *m = ccm_create_msg(info, type); int rc; if (fieldname == NULL || fieldvalue == NULL){ ccm_log(LOG_ERR, "NULL argument"); return HA_FAIL; } if (m == NULL){ ccm_log(LOG_ERR, "message creating failed"); return HA_FAIL; } if ( ha_msg_add(m, fieldname, fieldvalue) == HA_FAIL){ ccm_log(LOG_ERR, "Adding a field failed"); ha_msg_del(m); return HA_FAIL; } dump_sending_msg(m, nodename); rc = hb->llc_ops->sendnodemsg(hb, m, nodename); ha_msg_del(m); return(rc); } int ccm_send_protoversion(ll_cluster_t *hb, ccm_info_t *info) { return ccm_send_minimum_clustermsg(hb, info, CCM_TYPE_PROTOVERSION); } int ccm_send_join(ll_cluster_t *hb, ccm_info_t *info) { return ccm_send_standard_clustermsg(hb, info, CCM_TYPE_JOIN); } int ccm_send_memlist_request(ll_cluster_t *hb, ccm_info_t *info) { return ccm_send_standard_clustermsg(hb, info, CCM_TYPE_REQ_MEMLIST); } int ccm_send_memlist_res(ll_cluster_t *hb, ccm_info_t *info, const char *nodename, const char *memlist) { struct ha_msg *m = ccm_create_msg(info, CCM_TYPE_RES_MEMLIST); char maxtrans[15]; int rc; snprintf(maxtrans, sizeof(maxtrans), "%d", info->ccm_max_transition); if (!memlist) { memlist= ""; } if ( (ha_msg_add(m, CCM_MAXTRANS, maxtrans) == HA_FAIL) || (ha_msg_add(m, CCM_MEMLIST, memlist) == HA_FAIL)) { ccm_log(LOG_ERR, "ccm_send_memlist_res: Cannot create " "RES_MEMLIST message"); rc = HA_FAIL; ha_msg_del(m); return HA_FAIL; } dump_sending_msg(m, nodename); rc = hb->llc_ops->sendnodemsg(hb, m, nodename); ha_msg_del(m); return(rc); } int ccm_send_final_memlist(ll_cluster_t *hb, ccm_info_t *info, char *newcookie, char *finallist, uint32_t max_tran) { struct ha_msg *m = ccm_create_msg(info, CCM_TYPE_FINAL_MEMLIST); char activeproto[3]; char maxtrans[15]; int rc; if (m == NULL){ ccm_log(LOG_ERR, "msg creation failure"); return HA_FAIL; } snprintf(activeproto, sizeof(activeproto), "%d", info->ccm_active_proto); snprintf(maxtrans, sizeof(maxtrans), "%d", max_tran); assert(finallist); info->has_quorum = ccm_calculate_quorum(info); if (ha_msg_add(m, CCM_MAXTRANS, maxtrans) == HA_FAIL || ha_msg_add(m, CCM_MEMLIST, finallist) == HA_FAIL || ha_msg_add_int(m, CCM_QUORUM, info->has_quorum) == HA_FAIL ||(!newcookie?FALSE:(ha_msg_add(m,CCM_NEWCOOKIE,newcookie) ==HA_FAIL))) { ccm_log(LOG_ERR, "ccm_send_final_memlist: Cannot create " "FINAL_MEMLIST message"); rc = HA_FAIL; } else { dump_sending_msg(m, NULL); rc = hb->llc_ops->sendclustermsg(hb, m); } ha_msg_del(m); return(rc); } int ccm_send_one_join_reply(ll_cluster_t *hb, ccm_info_t *info, const char *joiner) { struct ha_msg *m; char activeproto[3]; char clsize[5]; int rc; /*send the membership information to all the nodes of the cluster*/ m=ccm_create_msg(info, CCM_TYPE_PROTOVERSION_RESP); if (m == NULL){ ccm_log(LOG_ERR, "%s: creating a message failed", __FUNCTION__); return(HA_FAIL); } snprintf(activeproto, sizeof(activeproto), "%d", info->ccm_active_proto); snprintf(clsize, sizeof(clsize), "%d", info->memcount); if ( ha_msg_add(m, CCM_PROTOCOL, activeproto) == HA_FAIL || ha_msg_add(m, CCM_CLSIZE, clsize) == HA_FAIL){ ccm_log(LOG_ERR, "ccm_send_one_join_reply: Cannot create JOIN " "reply message"); rc = HA_FAIL; } else { dump_sending_msg(m, joiner); rc = hb->llc_ops->sendnodemsg(hb, m, joiner); } ha_msg_del(m); return(rc); } int ccm_send_abort(ll_cluster_t *hb, ccm_info_t *info, const char *dest, const int major, const int minor) { struct ha_msg *m = ccm_create_msg(info, CCM_TYPE_ABORT); int rc; char majortrans[15]; char minortrans[15]; if (m == NULL){ return HA_FAIL; } snprintf(majortrans, sizeof(majortrans), "%d", info->ccm_transition_major); snprintf(minortrans, sizeof(minortrans), "%d", info->ccm_transition_minor); if (ha_msg_mod(m,CCM_MAJORTRANS ,majortrans) != HA_OK || ha_msg_mod(m,CCM_MINORTRANS ,majortrans) != HA_OK){ ccm_log(LOG_ERR, "modifying fields failed"); ha_msg_del(m); return HA_FAIL; } dump_sending_msg(m, dest); rc = hb->llc_ops->sendnodemsg(hb, m, dest); ha_msg_del(m); return(rc); } /* Fake up a leave message. * This is generally done when heartbeat informs ccm of the crash of * a cluster member. */ struct ha_msg * ccm_create_leave_msg(ccm_info_t *info, int uuid) { struct ha_msg *m = ccm_create_msg(info, CCM_TYPE_LEAVE); llm_info_t *llm; const char *nodename; if (m == NULL){ ccm_log(LOG_ERR, "message creating failed"); return NULL; } /* find the name of the node at index */ llm = &(info->llm); nodename = llm_get_nodename(llm, uuid); if(ha_msg_add(m, F_ORIG, nodename) == HA_FAIL) { ccm_log(LOG_ERR, "adding field failed"); ha_msg_del(m); return NULL; } return(m); } int timeout_msg_init(ccm_info_t *info) { return HA_OK; } static struct ha_msg * timeout_msg = NULL; struct ha_msg * timeout_msg_mod(ccm_info_t *info) { struct ha_msg *m = timeout_msg; char *hname; if (m !=NULL){ if (ccm_mod_msg(m, info, CCM_TYPE_TIMEOUT) != HA_OK){ ccm_log(LOG_ERR, "mod message failed"); ha_msg_del(timeout_msg); timeout_msg = NULL; return NULL; } return m; } m = ccm_create_minimum_msg(info, CCM_TYPE_TIMEOUT); if (m == NULL){ ccm_log(LOG_ERR, "creating a message failed"); return NULL; } hname = info->llm.nodes[info->llm.myindex].nodename; if (ha_msg_add(m, F_ORIG, hname) == HA_FAIL || ha_msg_add(m, CCM_COOKIE, " ") == HA_FAIL || ha_msg_add(m, CCM_MAJORTRANS, "0") == HA_FAIL ||(ha_msg_add(m, CCM_MINORTRANS, "0") == HA_FAIL)){ ccm_log(LOG_ERR, "Adding field to a message failed"); ha_msg_del(m); return NULL; } timeout_msg = m; return m; } /*broadcast CCM_TYPE_NODE_LEAVE_NOTICE */ int ccm_bcast_node_leave_notice(ll_cluster_t* hb, ccm_info_t* info, const char* node) { return ccm_send_extra_clustermsg(hb, info, CCM_TYPE_NODE_LEAVE_NOTICE, F_NODE, node); } int send_node_leave_to_leader(ll_cluster_t *hb, ccm_info_t *info, const char *node) { return ccm_send_extra_nodemsg(hb, info, CCM_TYPE_NODE_LEAVE, F_NODE, node, node); } int ccm_send_to_all(ll_cluster_t *hb, ccm_info_t *info, char *memlist, char *newcookie, int *uptime_list, size_t uptime_size) { struct ha_msg *m = ccm_create_msg(info, CCM_TYPE_MEM_LIST); char activeproto[3]; int rc; if (m == NULL){ ccm_log(LOG_ERR, "creating msg failed"); return HA_FAIL; } snprintf(activeproto, sizeof(activeproto), "%d", info->ccm_active_proto); info->has_quorum = ccm_calculate_quorum(info); if ( ha_msg_add(m, CCM_MEMLIST, memlist) == HA_FAIL || ha_msg_add_int(m, CCM_QUORUM, info->has_quorum) == HA_FAIL || cl_msg_add_list_int(m, CCM_UPTIMELIST, uptime_list, uptime_size) == HA_FAIL || !newcookie? FALSE: (ha_msg_add(m, CCM_NEWCOOKIE, newcookie) ==HA_FAIL)) { ccm_log(LOG_ERR, "ccm_send_final_memlist: Cannot create " "FINAL_MEMLIST message"); ha_msg_del(m); return HA_FAIL; } dump_sending_msg(m, NULL); rc = hb->llc_ops->sendclustermsg(hb, m); ha_msg_del(m); return(rc); } int ccm_send_alive_msg(ll_cluster_t *hb, ccm_info_t *info) { return ccm_send_standard_clustermsg(hb, info, CCM_TYPE_ALIVE); } int ccm_send_newnode_to_leader(ll_cluster_t *hb, ccm_info_t *info, const char *node) { return ccm_send_extra_nodemsg(hb, info, CCM_TYPE_NEW_NODE, F_NODE, node, info->llm.nodes[info->ccm_cluster_leader].nodename); } /* send a message to node * the message contains my state informaton */ int ccm_send_state_info(ll_cluster_t* hb, ccm_info_t* info, const char* node) { return ccm_send_extra_nodemsg(hb, info, CCM_TYPE_STATE_INFO, F_STATE, state2string(info->state), node); } int ccm_send_restart_msg(ll_cluster_t* hb, ccm_info_t* info) { return ccm_send_minimum_clustermsg(hb, info, CCM_TYPE_RESTART); } static void dump_sending_msg(struct ha_msg* msg, const char* node) { const char* type; const char* status; type = ha_msg_value(msg, F_TYPE); status = ha_msg_value(msg, F_STATUS); ccm_debug(LOG_DEBUG, "send msg %s to %s, status:%s" , type, node==NULL?"cluster":node, (status ? status : "[null]")); ccm_message_debug2(LOG_DEBUG, msg); } Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccmmsg.h0000644000000000000000000000523211576626513021520 0ustar00usergroup00000000000000/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _CCMMSG_H_ #define _CCMMSG_H_ #include #include #include int ccm_send_cluster_msg(ll_cluster_t* hb, struct ha_msg* msg); int ccm_send_node_msg(ll_cluster_t* hb, struct ha_msg* msg, const char* node); struct ha_msg* ccm_create_msg(ccm_info_t * info, int type); int ccm_send_protoversion(ll_cluster_t *hb, ccm_info_t *info); int ccm_send_one_join_reply(ll_cluster_t *hb, ccm_info_t *info, const char *joiner); int ccm_send_standard_clustermsg(ll_cluster_t* hb, ccm_info_t* info, int type); int ccm_send_join(ll_cluster_t *hb, ccm_info_t *info); int ccm_send_memlist_request(ll_cluster_t *hb, ccm_info_t *info); int ccm_send_memlist_res(ll_cluster_t *hb, ccm_info_t *info, const char *nodename, const char *memlist); int ccm_send_final_memlist(ll_cluster_t *hb, ccm_info_t *info, char *newcookie, char *finallist, uint32_t max_tran); int ccm_send_abort(ll_cluster_t *hb, ccm_info_t *info, const char *dest, const int major, const int minor); struct ha_msg * ccm_create_leave_msg(ccm_info_t *info, int uuid); int timeout_msg_init(ccm_info_t *info); struct ha_msg* timeout_msg_mod(ccm_info_t *info); int ccm_bcast_node_leave_notice(ll_cluster_t* hb, ccm_info_t* info, const char* node); int send_node_leave_to_leader(ll_cluster_t *hb, ccm_info_t *info, const char *node); int ccm_send_to_all(ll_cluster_t *hb, ccm_info_t *info, char *memlist, char *newcookie, int *uptime_list, size_t uptime_size); int ccm_send_alive_msg(ll_cluster_t *hb, ccm_info_t *info); int ccm_send_newnode_to_leader(ll_cluster_t *hb, ccm_info_t *info, const char *node); int ccm_send_state_info(ll_cluster_t* hb, ccm_info_t* info, const char* node); int ccm_send_restart_msg(ll_cluster_t* hb, ccm_info_t* info); #endif Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccmupdate.c0000644000000000000000000002433511576626513022214 0ustar00usergroup00000000000000/* * update.c: functions that track the votes during the voting protocol * * Copyright (c) International Business Machines Corp., 2002 * Author: Ram Pai (linuxram@us.ibm.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include "ccm.h" #include "ccmmisc.h" /* * add the node 'node' to the list of cluster leaders requesting * for membership information. */ void update_add_memlist_request(ccm_update_t *tab, llm_info_t *llm, const char *node, const int uptime) { int idx = llm_get_index(llm, node); update_t *obj; int i=0; while((obj = (update_t *) g_slist_nth_data(UPDATE_GET_CLHEAD(tab),i++)) != NULL){ if(idx == obj->index) { if(uptime > obj->uptime) { ccm_debug(LOG_WARNING , "WARNING:update_add_memlist_request" " %s already added(updating)", node); obj->uptime = uptime; } return; } } obj = g_malloc(sizeof(update_t)); obj->index = idx; obj->uptime = uptime; UPDATE_SET_CLHEAD(tab, g_slist_append(UPDATE_GET_CLHEAD(tab), obj)); return; } /* * free all the members in the list. */ void update_free_memlist_request(ccm_update_t *tab) { uint i; update_t *obj; for (i = 0; i < g_slist_length(UPDATE_GET_CLHEAD(tab)); i++) { obj = (update_t *)g_slist_nth_data(UPDATE_GET_CLHEAD(tab),i); if(obj) { g_free(obj); } } g_slist_free(UPDATE_GET_CLHEAD(tab)); UPDATE_SET_CLHEAD(tab, NULL); } /* * set up the context to traverse the list of * cluster leaders. */ void * update_initlink(ccm_update_t *tab) { GSList **track = (GSList **)g_malloc(sizeof(GSList *)); *track = UPDATE_GET_CLHEAD(tab); return (void *)track; } /* * return name of the cluster leader in the next element in the list. */ const char * update_next_link(ccm_update_t *tab, llm_info_t *llm, void *tr, uint *uptime) { update_t *node; GSList **track = (GSList **)tr; node = (update_t *)g_slist_nth_data((*track),0); if(node==NULL) { return NULL; } *uptime = node->uptime; *track = g_slist_next(*track); return (llm_get_nodename(llm, node->index)); } /* * free the context used for cluster leader link traversal. */ void update_freelink(ccm_update_t *tab, void *track) { g_free(track); return; } /* * clear all the information that we are tracking. */ void update_reset(ccm_update_t *tab) { int i; UPDATE_SET_LEADER(tab, -1); UPDATE_SET_NODECOUNT(tab, 0); for ( i = 0 ; i < MAXNODE; i++ ) { UPDATE_SET_INDEX(tab, i, -1); UPDATE_SET_UPTIME(tab, i, -1); } UPDATE_SET_INITTIME(tab, ccm_get_time()); update_free_memlist_request(tab); } /* * initialize our datastructures. */ void update_init(ccm_update_t *tab) { UPDATE_SET_CLHEAD(tab, NULL); update_reset(tab); } /* * return TRUE if sufficient time has expired since update messages * were exchanged. */ int update_timeout_expired(ccm_update_t *tab, unsigned long timeout) { return(ccm_timeout(UPDATE_GET_INITTIME(tab), ccm_get_time(), timeout)); } /* * given two members return the leader. */ static uint update_compute_leader(ccm_update_t *tab, uint j, llm_info_t *llm) { update_t *entry; update_t *leader_entry; int value; int leader = tab->leader; if(leader == -1) { return j; } entry = &(tab->update[j]); leader_entry = &(tab->update[leader]); if (leader_entry->uptime == entry->uptime){ goto namecompare; } if (leader_entry->uptime ==0){ return j; } if (entry->uptime == 0){ return leader; } if (leader_entry->uptime < entry->uptime) { return leader; } if (leader_entry->uptime > entry->uptime) { return j; } namecompare: value = llm_node_cmp(llm, leader_entry->index, entry->index); if (value == 0){ ccm_log(LOG_ERR, "update_compute_leader:same id comparsion?"); abort(); } if (value > 0) { return leader; } return j; } void update_display(int pri,llm_info_t* llm, ccm_update_t* tab) { int i; ccm_debug(pri, "diplaying update information: "); ccm_debug(pri, "leader=%d(%s) nodeCount=%d", tab -> leader, (tab->leader<0 || tab->leader >= (int)llm_get_nodecount(llm))? "":llm_get_nodename(llm, tab->update[tab->leader].index), tab->nodeCount); for ( i = 0; i < llm_get_nodecount(llm); i++){ if (tab->update[i].index >=0){ ccm_debug(pri, "%d:%s uptime=%d", i, llm_get_nodename(llm, tab->update[i].index), tab->update[i].uptime); } } } /* * given the current members, choose the leader. * set the leader and return the leader as well * */ static int update_find_leader(ccm_update_t *tab, llm_info_t *llm) { int i, leader, j; for ( i = 0 ; i < llm_get_nodecount(llm); i++ ){ if (UPDATE_GET_INDEX(tab, i) != -1) { break; } } if (i == llm_get_nodecount(llm)){ UPDATE_SET_LEADER(tab,-1); return -1; } leader = i; UPDATE_SET_LEADER(tab,leader); for ( j = i+1 ; j < llm_get_nodecount(llm); j++ ){ if (UPDATE_GET_INDEX(tab, j) == -1){ continue; } if(update_compute_leader(tab, j, llm) == j){ UPDATE_SET_LEADER(tab,j); leader = j; } } return leader; } /* return the index of the node in the update table. */ static int update_get_position(ccm_update_t *tab, llm_info_t *llm, const char *nodename) { int i; uint j; i = llm_get_index(llm, nodename); if ( i == -1 ){ return -1; } /* search for the index in the update table */ for ( j = 0 ; j < llm_get_nodecount(llm); j++ ){ if (UPDATE_GET_INDEX(tab,j) == i ){ break; } } if ( j == llm_get_nodecount(llm)){ return -1; } return j; } int update_get_uptime(ccm_update_t *tab, llm_info_t *llm, int idx) { uint count=0, j; int i; for ( j = 0 ; j < llm_get_nodecount(llm); j++ ){ i = UPDATE_GET_INDEX(tab,j); if (i == -1){ continue; } if (i == idx) { return UPDATE_GET_UPTIME(tab,j); } count++; if(count >= UPDATE_GET_NODECOUNT(tab)){ return -1; } } return -1; } /* * return TRUE if the node had participated in the update voting round. * */ int update_is_node_updated(ccm_update_t *tab, llm_info_t *llm, const char *node) { if(update_get_position(tab, llm, node) == -1 ){ return FALSE; } return TRUE; } /* * Update the vote of the node in the update table. */ void update_add(ccm_update_t *tab, llm_info_t *llm, const char *nodename, int uptime, gboolean leader_flag) { int i; uint j; i = llm_get_index(llm, nodename); if( i == -1 ) { ccm_log(LOG_ERR, "ccm_update_table:Internal Logic error i=%d", i); exit(1); } /* find a free location in the 'table' table to fill the new * entry. A free entry should be found within llm_get_nodecount * entries. */ for ( j = 0 ; j < llm_get_nodecount(llm); j++ ){ if (UPDATE_GET_INDEX(tab,j) == -1 ){ break; } if(i == UPDATE_GET_INDEX(tab,j)){ ccm_log(LOG_ERR, "ccm_update_table:duplicate entry %s", nodename); return; } } if( j == llm_get_nodecount(llm) ) { ccm_log(LOG_ERR, "ccm_update_table:Internal Logic error j=%d", j); exit(1); } UPDATE_SET_INDEX(tab,j,i); UPDATE_SET_UPTIME(tab,j,uptime); UPDATE_INCR_NODECOUNT(tab); if(leader_flag) { UPDATE_SET_LEADER(tab, update_compute_leader(tab, j, llm)); } return; } /* * remove the vote of a node from the update table. */ void update_remove(ccm_update_t *tab, llm_info_t *llm, const char *nodename) { int j, idx; update_t *obj; int i=0; j = update_get_position(tab, llm, nodename); if( j == -1 ) { return; } UPDATE_SET_UPTIME(tab, j, 0); UPDATE_SET_INDEX(tab, j, -1); UPDATE_DECR_NODECOUNT(tab); /* remove any request cached in our queue from this node */ idx = llm_get_index(llm, nodename); while((obj = (update_t *)g_slist_nth_data(tab->cl_head,i)) != NULL) { if(obj->index == idx){ tab->cl_head = g_slist_remove(tab->cl_head, obj); } else { i++; } } /* recalculate the new leader if leader's entry is being removed*/ if (UPDATE_GET_LEADER(tab) != j) { return; } UPDATE_SET_LEADER(tab,update_find_leader(tab, llm)); return; } /* * return TRUE if I am the leader among the members that have * voted in this round of update exchanges. */ int update_am_i_leader(ccm_update_t *tab, llm_info_t *llm) { int leader = UPDATE_GET_LEADER(tab); if (llm_get_myindex(llm) == UPDATE_GET_INDEX(tab,leader)) { return TRUE; } return FALSE; } /* * return the name of the cluster leader. */ const char * update_get_cl_name(ccm_update_t *tab, llm_info_t *llm) { int leader = UPDATE_GET_LEADER(tab); return(llm_get_nodename(llm,UPDATE_GET_INDEX(tab,leader))); } /* * return the uuid of the next member who has voted in the update * message transfer round. */ int update_get_next_index(ccm_update_t *tab, llm_info_t *llm, int *nextposition) { uint pos; if (*nextposition < -1 || *nextposition >= (int)llm_get_nodecount(llm)) { return -1; } pos = (*nextposition == -1 ? 0 : *nextposition); *nextposition = pos + 1; while (UPDATE_GET_INDEX(tab,pos) == -1 && pos < llm_get_nodecount(llm)){ pos++; } if (pos == llm_get_nodecount(llm)) { return -1; } return UPDATE_GET_INDEX(tab, pos); } /* * create a string that represents the members of the update * round, and return it through the memlist parameter. * also return the size of the string. */ int update_strcreate(ccm_update_t *tab, char *memlist, llm_info_t *llm) { uint i; int indx; char *bitmap; int str_len; bitmap_create(&bitmap, MAXNODE); for ( i = 0 ; i < llm_get_nodecount(llm); i ++ ) { indx = UPDATE_GET_INDEX(tab,i); if (indx == -1){ continue; } bitmap_mark(indx, bitmap, MAXNODE); } str_len = ccm_bitmap2str(bitmap, memlist, MAX_MEMLIST_STRING); bitmap_delete(bitmap); return str_len; } Heartbeat-3-0-7e3a82377fa8/membership/ccm/ccmversion.c0000644000000000000000000000545511576626513022421 0ustar00usergroup00000000000000/* * ccmversion.c: routines that handle information while in the version * request state * * Copyright (c) International Business Machines Corp., 2002 * Author: Ram Pai (linuxram@us.ibm.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #define MAXTRIES 3 #define VERSION_GET_TIMER(ver) (ver->time) #define VERSION_SET_TIMER(ver, t) ver->time = t #define VERSION_GET_TRIES(ver) ver->numtries #define VERSION_RESET_TRIES(ver) ver->numtries = 0 #define VERSION_INC_TRIES(ver) (ver->numtries)++ #define VERSION_INC_NRESP(ver) (ver->n_resp)++ #define VERSION_SET_NRESP(ver,val) ver->n_resp = val #define VERSION_GET_NRESP(ver) ver->n_resp /* */ /* return true if we have waited long enough for a response */ /* for our version request. */ /* */ static int version_timeout_expired(ccm_version_t *ver, longclock_t timeout) { return(ccm_timeout(VERSION_GET_TIMER(ver), ccm_get_time(), timeout)); } /* */ /* reset all the data structures used to track the version request */ /* state. */ /* */ void version_reset(ccm_version_t *ver) { VERSION_SET_TIMER(ver,ccm_get_time()); VERSION_RESET_TRIES(ver); VERSION_SET_NRESP(ver,0); } /* */ /* return true if version request has message has to be resent. */ /* else return false. */ /* */ int version_retry(ccm_version_t *ver, longclock_t timeout) { if(version_timeout_expired(ver, timeout)) { ccm_debug2(LOG_DEBUG, "version_retry:%d tries left" , 3-VERSION_GET_TRIES(ver)); if(VERSION_GET_TRIES(ver) == MAXTRIES) { return VER_TRY_END; } else { VERSION_INC_TRIES(ver); VERSION_SET_TIMER(ver,ccm_get_time()); return VER_TRY_AGAIN; } } return VER_NO_CHANGE; } /* */ /* The caller informs us: */ /* "please note that there is some activity going on in the cluster. */ /* Probably you may want to try for some more time" */ /* */ void version_some_activity(ccm_version_t *ver) { VERSION_RESET_TRIES(ver); } void version_inc_nresp(ccm_version_t *ver) { VERSION_INC_NRESP(ver); } void version_set_nresp(ccm_version_t *ver, int val) { VERSION_SET_NRESP(ver, val); } unsigned int version_get_nresp(ccm_version_t *ver) { return VERSION_GET_NRESP(ver); } Heartbeat-3-0-7e3a82377fa8/membership/ccm/clmtest.c0000644000000000000000000001425211576626513021717 0ustar00usergroup00000000000000/* * clmtest.c: AIS membership service client application * * Copyright (c) 2003 Intel Corp. * Author: Zhu Yi (yi.zhu@intel.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "lha_internal.h" #include #include #include #include #include #include #include #include #include #define MAX_ITEMS 5 /* for a max 5-nodes cluster */ /* global variables */ static SaClmHandleT hd; static SaClmClusterNotificationT nbuf[MAX_ITEMS]; static SaClmClusterNodeT cn; static void track_start(int sig) { int ret; SaUint8T flag = SA_TRACK_CHANGES; signal(SIGUSR1, &track_start); printf("-------------------------------------------------\n"); fprintf(stderr, "Start to Track Cluster Membership\n"); if ((ret = saClmClusterTrackStart(&hd, flag, nbuf , MAX_ITEMS)) != SA_OK) { fprintf(stderr, "saClmClusterTrackStart error, errno [%d]\n" , ret); exit(1); } } static void track_stop(int sig) { int ret; signal(SIGUSR2, &track_start); fprintf(stderr, "Stop to Track Cluster Membership\n"); if ((ret = saClmClusterTrackStop(&hd)) != SA_OK) { fprintf(stderr, "saClmClusterTrackStop error, errno [%d]\n" , ret); exit(1); } } static void track_callback(SaClmClusterNotificationT *nbuf, SaUint32T nitem , SaUint32T nmem, SaUint64T nview, SaErrorT error) { uint i; if (error != SA_OK) { fprintf(stderr, "Track Callback failed [%d]\n", error); exit(1); } printf("-------------------------------------------------\n"); printf("SA CLM Track Callback BEGIN\n"); printf("viewNumber = %llu\n", nview); printf("numberOfItems = %lu\n", nitem); printf("numberOfMembers = %lu\n", nmem); for (i = 0; i < nitem; i++) { printf("\n"); printf("\tclusterChanges = %s [%d]\n" , nbuf[i].clusterChanges == 1 ? "SA_CLM_NODE_NO_CHANGE" : nbuf[i].clusterChanges == 2 ? "SA_CLM_NODE_JOINED" : nbuf[i].clusterChanges == 3 ? "SA_CLM_NODE_LEFT":"ERROR" , nbuf[i].clusterChanges); printf("\tnodeId = %ld\n", nbuf[i].clusterNode.nodeId); printf("\tnodeAddress = %s\n" , nbuf[i].clusterNode.nodeAddress.length > 0 ? (char *)nbuf[i].clusterNode.nodeAddress.value : "N/A"); printf("\tnodeName = %s\n" , nbuf[i].clusterNode.nodeName.length > 0 ? (char *)nbuf[i].clusterNode.nodeName.value : "N/A"); printf("\tclusterName = %s\n" , nbuf[i].clusterNode.clusterName.length > 0 ? (char *)nbuf[i].clusterNode.clusterName.value : "N/A"); printf("\tmember = %d\n", nbuf[i].clusterNode.member); printf("\tbootTimestamp = %lld\n" , nbuf[i].clusterNode.bootTimestamp); } printf("\nSA CLM Track Callback END\n"); } static void dump_nodeinfo(SaClmClusterNodeT *cn) { printf("Dump information from SaClmClusterNodeGet\n"); printf("\n"); printf("nodeId = %ld\n", cn->nodeId); printf("nodeAddress = %s\n" , cn->nodeAddress.length > 0 ? (char *)cn->nodeAddress.value : "N/A"); printf("nodeName = %s\n" , cn->nodeName.length > 0 ? (char *)cn->nodeName.value : "N/A"); printf("clusterName = %s\n" , cn->clusterName.length > 0 ? (char *)cn->clusterName.value : "N/A"); printf("member = %d\n", cn->member); printf("bootTimestamp = %lld\n", cn->bootTimestamp); printf("\n"); } static void getnode_callback(SaInvocationT invocation, SaClmClusterNodeT *clusterNode , SaErrorT error) { if (error != SA_OK) { fprintf(stderr, "Get Node Callback failed [%d]\n", error); exit(1); } fprintf(stderr, "Invocation [%d]\n", invocation); dump_nodeinfo(clusterNode); } int main(void) { SaSelectionObjectT st; SaErrorT ret; SaClmNodeIdT nid; SaClmCallbacksT my_callbacks = { .saClmClusterTrackCallback = (SaClmClusterTrackCallbackT)track_callback, .saClmClusterNodeGetCallback = (SaClmClusterNodeGetCallbackT)getnode_callback }; if ((ret = saClmInitialize(&hd, &my_callbacks, NULL)) != SA_OK) { fprintf(stderr, "saClmInitialize error, errno [%d]\n",ret); return 1; } if ((ret = saClmSelectionObjectGet(&hd, &st)) != SA_OK) { fprintf(stderr, "saClmSelectionObjectGet error, errno [%d]\n" , ret); return 1; } nid = 0; /* Synchronously get nodeId information */ printf("-------------------------------------------------\n"); printf("Get nodeId [%lu] info by SaClmClusterNodeGet\n", nid); if ((ret = saClmClusterNodeGet(nid, 10, &cn)) != SA_OK) { if (ret == SA_ERR_INVALID_PARAM) { fprintf(stderr, "NodeId [%lu] record not found!\n",nid); } else { fprintf(stderr , "saClmClusterNodeGet error, errno [%d]\n" , ret); return 1; } } else { dump_nodeinfo(&cn); } /* Asynchronously get my nodeId information */ nid = 1; printf("-------------------------------------------------\n"); printf("Get nodeId [%lu] info by SaClmClusterNodeGetAsync\n", nid); if ((ret = saClmClusterNodeGetAsync(&hd, 1234, nid, &cn)) != SA_OK) { if (ret == SA_ERR_INVALID_PARAM) { fprintf(stderr, "NodeId [%lu] record not found!\n",nid); } else { fprintf(stderr , "saClmClusterNodeGet error, errno [%d]\n" , ret); return 1; } } /* Start to track cluster membership changes events */ track_start(SIGUSR1); signal(SIGUSR2, &track_stop); for (;;) { fd_set rset; FD_ZERO(&rset); FD_SET(st, &rset); if (select(st + 1, &rset, NULL, NULL, NULL) == -1) { /* TODO should we use pselect here? */ if (errno == EINTR) continue; else { perror("select"); return 1; } } if ((ret = saClmDispatch(&hd, SA_DISPATCH_ALL)) != SA_OK) { if (ret == SA_ERR_LIBRARY) { fprintf(stderr, "I am evicted!\n"); return 1; } fprintf(stderr, "saClmDispatch error, errno [%d]\n" , ret); return 1; } } return 0; } Heartbeat-3-0-7e3a82377fa8/membership/quorumd/Makefile.am0000644000000000000000000000274611576626513023073 0ustar00usergroup00000000000000# # Author: Huang Zhen # Copyright (c) 2006 International Business Machines # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir) -I$(top_srcdir) COMMONLIBS = -lplumb \ $(GLIBLIB) halibdir = $(libdir)/@HB_PKG@ halib_PROGRAMS = quorumd quorumdtest quorumd_SOURCES = quorumd.c quorumd_LDADD = $(COMMONLIBS) quorumd_LDFLAGS = $(GNUTLSLIBS) quorumd_CFLAGS = $(INCLUDES) $(GNUTLSHEAD) quorumdtest_SOURCES = quorumdtest.c quorumdtest_LDADD = $(COMMONLIBS) quorumdtest_LDFLAGS = $(GNUTLSLIBS) quorumdtest_CFLAGS = $(INCLUDES) $(GNUTLSHEAD) Heartbeat-3-0-7e3a82377fa8/membership/quorumd/quorumd.c0000644000000000000000000003311511576626513022671 0ustar00usergroup00000000000000/* * Linux HA quorum daemon * * Author: Huang Zhen * Copyright (c) 2006 International Business Machines * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #ifdef HAVE_SYS_SOCKET_H #include #endif #ifdef HAVE_SYS_SOCKIO_H #include #endif #include #include #include #include #include #include #include #ifdef HAVE_STRING_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* x.509 related */ #define SERVERKEY HA_HBCONF_DIR"/server-key.pem" #define SERVERCERT HA_HBCONF_DIR"/server-cert.pem" #define CACERT HA_HBCONF_DIR"/ca-cert.pem" #define CACRL HA_HBCONF_DIR"/ca-crl.pem" #define DH_BITS 1024 static int verify_certificate (gnutls_session session, char* CN); static gnutls_session initialize_tls_session (int sd, char* CN); static void initialize_tls_global(void); static gnutls_dh_params dh_params; static gnutls_certificate_credentials x509_cred; /* Message types */ #define ENV_PREFIX "HA_" #define KEY_LOGDAEMON "use_logd" #define HADEBUGVAL "HA_debug" #define OPTARGS "skrhvt" #define PID_FILE HA_VARRUNDIR"/quorumd.pid" #define QUORUMD "quorumd" #define PORT 5561 static gboolean sig_handler(int nsig, gpointer user_data); static void usage(const char* cmd, int exit_status); static int init_start(void); static int init_stop(const char *pid_file); static int init_status(const char *pid_file, const char *client_name); static void shutdown_quorumd(void); static gboolean sigterm_action(int nsig, gpointer unused); static gboolean on_listen(GIOChannel *ch , GIOCondition condition , gpointer data); static struct hb_quorumd_fns* get_protocol(const char* version); static void _load_config_file(gpointer key, gpointer value, gpointer user_data); static void _dump_data(gpointer key, gpointer value, gpointer user_data); extern int debug_level; static GMainLoop* mainloop = NULL; static GHashTable* protocols = NULL; int main(int argc, char ** argv) { int req_restart = FALSE; int req_status = FALSE; int req_stop = FALSE; int argerr = 0; int flag; char * inherit_debuglevel; while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch(flag) { case 'h': /* Help message */ usage(QUORUMD, LSB_EXIT_OK); break; case 'v': /* Debug mode, more logs*/ ++debug_level; break; case 's': /* Status */ req_status = TRUE; break; case 'k': /* Stop (kill) */ req_stop = TRUE; break; case 'r': /* Restart */ req_restart = TRUE; break; default: ++argerr; break; } } if (optind > argc) { quorum_log(LOG_ERR,"WHY WE ARE HERE?"); ++argerr; } if (argerr) { usage(QUORUMD, LSB_EXIT_GENERIC); } inherit_debuglevel = getenv(HADEBUGVAL); if (inherit_debuglevel != NULL) { debug_level = atoi(inherit_debuglevel); if (debug_level > 2) { debug_level = 2; } } cl_log_set_entity(QUORUMD); cl_log_enable_stderr(FALSE); cl_log_set_facility(HA_LOG_FACILITY); cl_inherit_logging_environment(0); if (req_status){ return init_status(PID_FILE, QUORUMD); } if (req_stop){ return init_stop(PID_FILE); } if (req_restart) { init_stop(PID_FILE); } return init_start(); } int init_status(const char *pid_file, const char *client_name) { long pid = cl_read_pidfile(pid_file); if (pid > 0) { fprintf(stderr, "%s is running [pid: %ld]\n" , client_name, pid); return LSB_STATUS_OK; } fprintf(stderr, "%s is stopped.\n", client_name); return LSB_STATUS_STOPPED; } int init_stop(const char *pid_file) { long pid; int rc = LSB_EXIT_OK; if (pid_file == NULL) { quorum_log(LOG_ERR, "No pid file specified to kill process"); return LSB_EXIT_GENERIC; } pid = cl_read_pidfile(pid_file); if (pid > 0) { if (CL_KILL((pid_t)pid, SIGTERM) < 0) { rc = (errno == EPERM ? LSB_EXIT_EPERM : LSB_EXIT_GENERIC); fprintf(stderr, "Cannot kill pid %ld\n", pid); }else{ quorum_log(LOG_INFO, "Signal sent to pid=%ld," " waiting for process to exit", pid); while (CL_PID_EXISTS(pid)) { sleep(1); } } } return rc; } static const char usagemsg[] = "[-srkhv]\n\ts: status\n\tr: restart" "\n\tk: kill\n\th: help\n\tv: debug\n"; void usage(const char* cmd, int exit_status) { FILE* stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s %s", cmd, usagemsg); fflush(stream); exit(exit_status); } gboolean sigterm_action(int nsig, gpointer user_data) { shutdown_quorumd(); return TRUE; } void shutdown_quorumd(void) { quorum_log(LOG_INFO,"quorumd is shutting down"); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); }else { exit(LSB_EXIT_OK); } } static void register_pid(gboolean do_fork, gboolean (*shutdown)(int nsig, gpointer userdata)) { int j; umask(022); for (j = 0; j < 3; ++j) { close(j); (void)open("/dev/null", j == 0 ? O_RDONLY : O_RDONLY); } CL_IGNORE_SIG(SIGINT); CL_IGNORE_SIG(SIGHUP); G_main_add_SignalHandler(G_PRIORITY_HIGH, SIGTERM , shutdown, NULL, NULL); cl_signal_set_interrupt(SIGTERM, 1); cl_signal_set_interrupt(SIGCHLD, 1); /* At least they are harmless, I think. ;-) */ cl_signal_set_interrupt(SIGINT, 0); cl_signal_set_interrupt(SIGHUP, 0); } gboolean sig_handler(int nsig, gpointer user_data) { switch (nsig) { case SIGUSR1: debug_level++; if (debug_level > 2) { debug_level = 0; } quorum_log(LOG_INFO, "set debug_level to %d", debug_level); break; case SIGUSR2: g_hash_table_foreach(protocols, _dump_data, GINT_TO_POINTER(LOG_INFO)); break; case SIGHUP: g_hash_table_foreach(protocols, _load_config_file, NULL); break; default: quorum_log(LOG_WARNING, "sig_handler: Received an " "unexpected signal(%d). Something wrong?.",nsig); } return TRUE; } /* main loop of the daemon*/ int init_start () { int ssock; struct sockaddr_in saddr; GIOChannel* sch; /* register pid */ if (cl_lock_pidfile(PID_FILE) < 0) { quorum_log(LOG_ERR, "already running: [pid %d]." , cl_read_pidfile(PID_FILE)); quorum_log(LOG_ERR, "Startup aborted (already running)." "Shutting down."); exit(100); } register_pid(FALSE, sigterm_action); /* enable coredumps */ quorum_log(LOG_DEBUG, "Enabling coredumps"); cl_cdtocoredir(); cl_enable_coredumps(TRUE); cl_set_all_coredump_signal_handlers(); /* initialize gnutls */ initialize_tls_global(); /* enable dynamic up/down debug level */ G_main_add_SignalHandler(G_PRIORITY_HIGH, SIGUSR1, sig_handler, NULL, NULL); G_main_add_SignalHandler(G_PRIORITY_HIGH, SIGUSR2, sig_handler, NULL, NULL); G_main_add_SignalHandler(G_PRIORITY_HIGH, SIGHUP, sig_handler, NULL, NULL); /* create the mainloop */ mainloop = g_main_new(FALSE); /* create the protocal table */ protocols = g_hash_table_new(g_str_hash, g_str_equal); /* create server socket */ ssock = socket(AF_INET, SOCK_STREAM, 0); if (ssock == -1) { quorum_log(LOG_ERR, "Can not create server socket." "Shutting down."); exit(100); } /* bind server socket*/ memset(&saddr, '\0', sizeof(saddr)); saddr.sin_family = AF_INET; saddr.sin_addr.s_addr = INADDR_ANY; saddr.sin_port = htons(PORT); if (bind(ssock, (struct sockaddr*)&saddr, sizeof(saddr)) == -1) { quorum_log(LOG_ERR, "Can not bind server socket." "Shutting down."); exit(100); } if (listen(ssock, 10) == -1) { quorum_log(LOG_ERR, "Can not start listen." "Shutting down."); exit(100); } /* create source for server socket and add to the mainloop */ sch = g_io_channel_unix_new(ssock); g_io_add_watch(sch, G_IO_IN|G_IO_ERR|G_IO_HUP, on_listen, NULL); /* run the mainloop */ quorum_log(LOG_DEBUG, "main: run the loop..."); quorum_log(LOG_INFO, "Started."); g_main_run(mainloop); /* exit, clean the pid file */ if (cl_unlock_pidfile(PID_FILE) == 0) { quorum_log(LOG_DEBUG, "[%s] stopped", QUORUMD); } return 0; } gboolean on_listen(GIOChannel *ch, GIOCondition condition, gpointer data) { int ssock, csock; unsigned laddr; struct sockaddr_in addr; char buf[MAXMSG]; char CN[MAX_DN_LEN]; ssize_t len; gnutls_session session; struct hb_quorumd_fns *fns; if (condition & G_IO_IN) { /* accept the connection */ ssock = g_io_channel_unix_get_fd(ch); laddr = sizeof(addr); csock = accept(ssock, (struct sockaddr*)&addr, &laddr); if (csock == -1) { quorum_log(LOG_ERR, "%s accept socket failed", __FUNCTION__); return TRUE; } memset(CN, 0, MAX_DN_LEN); session = initialize_tls_session(csock, CN); if (session == NULL) { quorum_log(LOG_ERR, "%s tls handshake failed", __FUNCTION__); close(csock); return TRUE; } memset(buf,0,MAXMSG); len = gnutls_record_recv(session, buf, MAXMSG); if (len <= 0) { quorum_log(LOG_ERR, "can't get version info"); gnutls_bye (session, GNUTLS_SHUT_WR); gnutls_deinit (session); close(csock); return TRUE; } quorum_debug(LOG_DEBUG, "version:%s(%d)",buf,(int)len); fns = get_protocol(buf); if(fns != NULL) { fns->on_connect(csock,session,CN); } else { quorum_log(LOG_WARNING, "version %s is not supported", buf); gnutls_bye (session, GNUTLS_SHUT_WR); gnutls_deinit (session); close(csock); } } return TRUE; } struct hb_quorumd_fns* get_protocol(const char* version) { struct hb_quorumd_fns* protocol; protocol = g_hash_table_lookup(protocols, version); if (protocol == NULL) { protocol = cl_load_plugin("quorumd", version); if (protocol != NULL) { if (protocol->init() != -1) { g_hash_table_insert(protocols, strdup(version), protocol); } else { protocol = NULL; } } } return protocol; } void _load_config_file(gpointer key, gpointer value, gpointer user_data) { struct hb_quorumd_fns* protocol = (struct hb_quorumd_fns*) value; protocol->load_config_file(); } void _dump_data(gpointer key, gpointer value, gpointer user_data) { struct hb_quorumd_fns* protocol = (struct hb_quorumd_fns*) value; protocol->dump_data(GPOINTER_TO_INT(user_data)); } int verify_certificate (gnutls_session session, char* CN) { unsigned int cert_list_size; const gnutls_datum *cert_list; int ret; char dn[MAX_DN_LEN]; size_t dn_len = MAX_DN_LEN; gnutls_x509_crt cert; ret = gnutls_certificate_verify_peers(session); if (ret < 0) { quorum_debug(LOG_DEBUG,"gnutls_certificate_verify_peers2 returns error"); return -1; } if (gnutls_certificate_type_get (session) != GNUTLS_CRT_X509) { quorum_debug(LOG_DEBUG,"The certificate is not a x.509 cert"); return -1; } if (gnutls_x509_crt_init (&cert) < 0) { quorum_debug(LOG_DEBUG,"error in gnutls_x509_crt_init"); return -1; } cert_list = gnutls_certificate_get_peers (session, &cert_list_size); if (cert_list == NULL) { quorum_debug(LOG_DEBUG,"No certificate was found!"); return -1; } if (gnutls_x509_crt_import (cert, &cert_list[0], GNUTLS_X509_FMT_DER) < 0) { quorum_debug(LOG_DEBUG,"error parsing certificate"); return -1; } if (gnutls_x509_crt_get_expiration_time (cert) < time (0)) { quorum_debug(LOG_DEBUG,"The certificate has expired"); return -1; } if (gnutls_x509_crt_get_activation_time (cert) > time (0)) { quorum_debug(LOG_DEBUG,"The certificate is not yet activated"); return -1; } memset(dn, 0, MAX_DN_LEN); gnutls_x509_crt_get_dn(cert, dn, &dn_len); strncpy(CN, strstr(dn, "CN=")+3, MAX_DN_LEN); CN[MAX_DN_LEN-1]= '\0'; quorum_debug(LOG_DEBUG,"The certificate cn:%s",CN); gnutls_x509_crt_deinit (cert); return 0; } gnutls_session initialize_tls_session (int sd, char* CN) { int ret; gnutls_session session; gnutls_init (&session, GNUTLS_SERVER); gnutls_set_default_priority (session); gnutls_credentials_set (session, GNUTLS_CRD_CERTIFICATE, x509_cred); gnutls_certificate_server_set_request (session, GNUTLS_CERT_REQUIRE); gnutls_dh_set_prime_bits (session, DH_BITS); gnutls_transport_set_ptr (session, (gnutls_transport_ptr) GINT_TO_POINTER(sd)); ret = gnutls_handshake (session); if (ret < 0) { close (sd); gnutls_deinit (session); quorum_log(LOG_WARNING,"handshake failed"); return NULL; } if (verify_certificate(session,CN) < 0) { return NULL; } return session; } void initialize_tls_global(void) { gnutls_global_init (); gnutls_certificate_allocate_credentials (&x509_cred); gnutls_certificate_set_x509_trust_file (x509_cred, CACERT, GNUTLS_X509_FMT_PEM); gnutls_certificate_set_x509_crl_file (x509_cred, CACRL, GNUTLS_X509_FMT_PEM); gnutls_certificate_set_x509_key_file (x509_cred, SERVERCERT, SERVERKEY, GNUTLS_X509_FMT_PEM); gnutls_dh_params_init (&dh_params); gnutls_dh_params_generate2 (dh_params, DH_BITS); gnutls_certificate_set_dh_params (x509_cred, dh_params); } Heartbeat-3-0-7e3a82377fa8/membership/quorumd/quorumd.conf0000644000000000000000000000027111576626513023371 0ustar00usergroup00000000000000cluster mycluster version 2_0_8 interval 1000 timeout 5000 takeover 3000 giveup 2000 nodenum 3 weight 300 cluster yourcluster version 2_0_7 timeout 5000 data 300 data 400 Heartbeat-3-0-7e3a82377fa8/membership/quorumd/quorumdtest.c0000644000000000000000000001325411576626513023573 0ustar00usergroup00000000000000/* * Test client for Linux HA quormd daemon test client * * Author: Huang Zhen * Copyright (c) 2006 International Business Machines * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #define DH_BITS 1024 #define MAX_BUF 1024 #define CACERT "/etc/ha.d/ca-cert.pem" #define CLIENTKEY "/etc/ha.d/client-key.pem" #define CLIENTCERT "/etc/ha.d/client-cert.pem" static int verify_certificate (gnutls_session session); static gnutls_session initialize_tls_session (int sd); static void initialize_tls_global(void); static gnutls_certificate_credentials xcred; int sock = 0; int main (int argc, char* argv[]) { struct sockaddr_in addr; struct ha_msg* msg = NULL; struct ha_msg* ret = NULL; const char* version = "2_0_8"; struct hostent* hp; int i; int quorum; size_t len; char* s = NULL; char buf[MAXMSG]; gnutls_session session; int t_interval; /* initialize gnutls */ initialize_tls_global(); /* create socket */ sock=socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); if (sock == -1 ) { return -1; } /* connect to server*/ memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; hp = gethostbyname("pluto"); memcpy(&addr.sin_addr, hp->h_addr_list[0], sizeof(struct in_addr)); addr.sin_port = htons(5561); if (connect(sock, (struct sockaddr *) &addr, sizeof(addr)) == -1) { close(sock); return -1; } session = initialize_tls_session(sock); if (session == NULL) { return -1; } gnutls_record_send(session, version, strlen(version)+1); msg = ha_msg_new(10); ha_msg_add(msg, "t","init"); ha_msg_add(msg, "cl_name","mycluster"); s = msg2wirefmt(msg, &len); gnutls_record_send(session, s, len); len = gnutls_record_recv(session, buf, MAXMSG); ret = wirefmt2msg(buf, len, FALSE); printf("result:%s\n",ha_msg_value(ret, "result")); ha_msg_value_int(ret, "interval", &t_interval); for (i = 0; i < 20; i++ ) { msg = ha_msg_new(10); ha_msg_add(msg, "t","quorum"); ha_msg_add_int(msg, "nodenum", 2); ha_msg_add_int(msg, "weight", 200); s = msg2wirefmt(msg, &len); gnutls_record_send(session, s, len); len = gnutls_record_recv(session, buf, MAXMSG); ret = wirefmt2msg(buf, len, FALSE); printf("result:%s\n",ha_msg_value(ret, "result")); ha_msg_value_int(ret, "quorum", &quorum); printf("quorum:%d\n",quorum); ha_msg_del(ret); ha_msg_del(msg); sleep(t_interval/1000); } gnutls_bye (session, GNUTLS_SHUT_WR); gnutls_deinit (session); close(sock); return 0; } void initialize_tls_global(void) { gnutls_global_init (); gnutls_certificate_allocate_credentials (&xcred); gnutls_certificate_set_x509_trust_file (xcred, CACERT, GNUTLS_X509_FMT_PEM); gnutls_certificate_set_x509_key_file (xcred, CLIENTCERT, CLIENTKEY, GNUTLS_X509_FMT_PEM); } gnutls_session initialize_tls_session (int sd) { int ret; gnutls_session session; const int cert_type_priority[2] = { GNUTLS_CRT_X509,0}; gnutls_init (&session, GNUTLS_CLIENT); gnutls_set_default_priority (session); gnutls_certificate_type_set_priority (session, cert_type_priority); gnutls_credentials_set (session, GNUTLS_CRD_CERTIFICATE, xcred); gnutls_transport_set_ptr (session, (gnutls_transport_ptr) GINT_TO_POINTER(sd)); ret = gnutls_handshake (session); if (ret < 0) { close (sd); gnutls_deinit (session); fprintf (stderr, "*** Handshake failed\n"); gnutls_perror (ret); return NULL; } verify_certificate(session); return session; } int verify_certificate (gnutls_session session) { unsigned int cert_list_size; const gnutls_datum *cert_list; int ret; gnutls_x509_crt cert; ret = gnutls_certificate_verify_peers (session); if (ret < 0) { printf("gnutls_certificate_verify_peers2 returns error.\n"); return -1; } /* if (status & GNUTLS_CERT_INVALID) { printf("The certificate is not trusted.\n"); return -1; } if (status & GNUTLS_CERT_SIGNER_NOT_FOUND) { printf("The certificate hasn't got a known issuer.\n"); return -1; } if (status & GNUTLS_CERT_REVOKED) { printf("The certificate has been revoked.\n"); return -1; } */ if (gnutls_certificate_type_get (session) != GNUTLS_CRT_X509) { printf("The certificate is not a x.509 cert\n"); return -1; } if (gnutls_x509_crt_init (&cert) < 0) { printf("error in gnutls_x509_crt_init\n"); return -1; } cert_list = gnutls_certificate_get_peers (session, &cert_list_size); if (cert_list == NULL) { printf("No certificate was found!\n"); return -1; } if (gnutls_x509_crt_import (cert, &cert_list[0], GNUTLS_X509_FMT_DER) < 0) { printf("error parsing certificate\n"); return -1; } if (gnutls_x509_crt_get_expiration_time (cert) < time (0)) { printf("The certificate has expired\n"); return -1; } if (gnutls_x509_crt_get_activation_time (cert) > time (0)) { printf("The certificate is not yet activated\n"); return -1; } gnutls_x509_crt_deinit (cert); return 0; } Heartbeat-3-0-7e3a82377fa8/pkg/InfoFiles/pkginfo.in0000644000000000000000000000026211576626513021623 0ustar00usergroup00000000000000PKG=@PKGNAME@ ARCH=@host_cpu@ VERSION=@VERSION@ NAME=Open-HA software for multi-machine resilience CATEGORY=system CLASSES=none build BASEDIR=/ EMAIL=linux-ha@lists.linux-ha.org Heartbeat-3-0-7e3a82377fa8/pkg/InfoFiles/postinstall.in0000644000000000000000000000020511576626513022537 0ustar00usergroup00000000000000#!/bin/sh # # Solaris pkg post-install script. # Nothing yet. Might start heartbeat. # # (c) 2004 David Lee Heartbeat-3-0-7e3a82377fa8/pkg/InfoFiles/preinstall.in0000644000000000000000000000147711576626513022354 0ustar00usergroup00000000000000#!/bin/sh # # Solaris pkg pre-install script. # Keep in step with "%pre" section of "heartbeat.spec". # # (c) 2004 David Lee # # License: GNU General Public License (GPL) # if getent group @HA_APIGROUP@ >/dev/null then : OK group @HA_APIGROUP@ already present else GROUPOPT="-g @HA_APIGID@" if /usr/sbin/groupadd $GROUPOPT @HA_APIGROUP@ 2>/dev/null then : OK we were able to add group @HA_APIGROUP@ else /usr/sbin/groupadd @HA_APIGROUP@ fi fi if getent passwd @HA_CCMUSER@ >/dev/null then : OK user @HA_CCMUSER@ already present else USEROPT="-g @HA_APIGROUP@ -u @HA_CCMUID@ -d @HA_COREDIR@/@HA_CCMUSER@" if /usr/sbin/useradd $USEROPT @HA_CCMUSER@ 2>/dev/null then : OK we were able to add user @HA_CCMUSER@ else /usr/sbin/useradd @HA_CCMUSER@ fi fi Heartbeat-3-0-7e3a82377fa8/pkg/Makefile.am0000644000000000000000000001023511576626513020015 0ustar00usergroup00000000000000# $Id: Makefile.am,v 1.14 2005/11/15 13:11:17 davidlee Exp $ # # "pkg" Makefile for Solaris etc. # # Copyright (C) 2001, 2004 David Lee # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Overview of principle: # 1. "make install" as a pseudo-install to a temporary location $(TMPINSTDIR) # 2. "pkgmk" from whatever is in $(TMPINSTDIR) into a pkg area $(PKGDIR) MAINTAINERCLEANFILES = Makefile.in PKGINFO = $(INFOFILES)/prototype \ $(INFOFILES)/pkginfo \ $(INFOFILES)/preinstall \ $(INFOFILES)/postinstall EXTRA_DIST = $(INFOFILES)/pkginfo.in \ $(INFOFILES)/preinstall.in \ $(INFOFILES)/postinstall.in CLEANFILES = stamp-tmpinst $(INFOFILES)/prototype DISTCLEANFILES = $(PKGINFO) PKGDIR = packages INFOFILES = InfoFiles # A pseudo-install directory, so that we know what files are generated. # This allows libtool to maintain its flexibility. TMPINSTDIR = tmpinst # Main target pkg: $(PKGDIR)/$(PKGNAME)/pkgmap # Build the "pkg" from the pseudo-install $(PKGDIR)/$(PKGNAME)/pkgmap: $(PKGINFO) -rm -rf $(PKGDIR) -mkdir $(PKGDIR) cd $(INFOFILES) && pkgmk -d ../$(PKGDIR) -r ../$(TMPINSTDIR) # pseudo-install into $(TMPINSTDIR) stamp-tmpinst: cd $(top_builddir) \ && (umask 022 && $(MAKE) install DESTDIR=$(PWD)/$(TMPINSTDIR)) echo timestamp > stamp-tmpinst ## The awk in here is hacky. Its intention is to tweak owner:group information ## for the files, directories, named pipes, etc. Most should end up as ## "root:sys", but a few should have either HA_CCMUSER or HA_APIGROUP or both. ## ## Note that there is the wider problem of maintaining this information ## (owner, group, etc.) across the various operating system package mechanisms. $(INFOFILES)/prototype: stamp-tmpinst -mkdir $(INFOFILES) (cd $(TMPINSTDIR) && pkgproto .) \ | $(AWK) ' \ $$1 !~ /^[ils]$$/ \ { $$(NF-1) = "root" ; $$(NF) = "sys" } \ "/" $$3 ~ /^(\/etc|\/etc\/init.d|\/opt|\/var|\/var\/run)$$/ \ { $$4 = $$5 = $$6 = "?" } \ "/" $$3 ~ /^(\/usr|\/usr\/lib|\/usr\/lib\/ocf)$$/ \ { $$4 = $$5 = $$6 = "?" } \ "/" $$3 == "@HA_VARRUNDIR@/@HB_PKG@/ccm" \ { $$5 = "@HA_CCMUSER@" ; $$6 = "@HA_APIGROUP@" } \ "/" $$3 == "@HA_VARRUNDIR@/@HB_PKG@/crm" \ { $$5 = "@HA_CCMUSER@" ; $$6 = "@HA_APIGROUP@" } \ "/" $$3 == "@HA_VARLIBDIR@/@HB_PKG@/crm" \ { $$5 = "@HA_CCMUSER@" ; $$6 = "@HA_APIGROUP@" } \ "/" $$3 == "@HA_COREDIR@/@HA_CCMUSER@" \ { $$5 = "@HA_CCMUSER@" } \ "/" $$3 == "@HA_COREDIR@/nobody" \ { $$5 = "nobody" } \ "/" $$3 == "@bindir@/cl_status" \ { $$5 = "@HA_CCMUSER@" ; $$6 = "@HA_APIGROUP@" } \ { print ; } \ ' > $@ echo "d none etc/rc2.d ? ? ?" >> $@ echo "d none etc/rc0.d ? ? ?" >> $@ echo "d none etc/rc1.d ? ? ?" >> $@ echo "d none etc/rcS.d ? ? ?" >> $@ echo "s none etc/rc2.d/S@HB_INITSTARTPRI@heartbeat=@INITDIR@/heartbeat@INIT_EXT@" >> $@ echo "s none etc/rc0.d/K@HB_INITSTOPPRI@heartbeat=@INITDIR@/heartbeat@INIT_EXT@" >> $@ echo "s none etc/rc1.d/K@HB_INITSTOPPRI@heartbeat=@INITDIR@/heartbeat@INIT_EXT@" >> $@ echo "s none etc/rcS.d/K@HB_INITSTOPPRI@heartbeat=@INITDIR@/heartbeat@INIT_EXT@" >> $@ echo "s none etc/rc2.d/S@LOGD_INITSTARTPRI@logd=@INITDIR@/logd@INIT_EXT@" >> $@ echo "s none etc/rc0.d/K@LOGD_INITSTOPPRI@logd=@INITDIR@/logd@INIT_EXT@" >> $@ echo "s none etc/rc1.d/K@LOGD_INITSTOPPRI@logd=@INITDIR@/logd@INIT_EXT@" >> $@ echo "s none etc/rcS.d/K@LOGD_INITSTOPPRI@logd=@INITDIR@/logd@INIT_EXT@" >> $@ echo "i pkginfo" >> $@ echo "i preinstall" >> $@ echo "i postinstall" >> $@ clean-local: rm -rf $(TMPINSTDIR) $(PKGDIR) Heartbeat-3-0-7e3a82377fa8/port/Makefile.am0000644000000000000000000000405211576626513020220 0ustar00usergroup00000000000000# $Id: Makefile.am,v 1.9 2006/04/13 09:54:19 andrew Exp $ # # Open-HA "port" Makefile for FreeBSD # # Copyright (C) 2005 Matthew Soffen # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Overview of principle: # 1. "make install" as a pseudo-install to a temporary location $(TMPINSTDIR) # 2. "pkgmk" from whatever is in $(TMPINSTDIR) into a pkg area $(PKGDIR) MAINTAINERCLEANFILES = Makefile.in EXTRA_DIST = \ portMakefile.in \ heartbeat/pkg-deinstall.in \ heartbeat/pkg-descr.in \ heartbeat/pkg-install.in \ heartbeat/pkg-plist.in CLEANFILES = DISTCLEANFILES = $(PKGINFO) PKGNAME = OPENHA TARFILE = @TARFILE@ MD5 = @MD5@ GZIP_PROG = @GZIP_PROG@ # A pseudo-install directory, so that we know what files are generated. # This allows libtool to maintain its flexibility. TMPINSTDIR = tmpinst # Main target portfile: rm -f $(TARFILE) heartbeat.tar heartbeat.tar.gz ln -s ../$(TARFILE) $(TARFILE) $(MD5) $(TARFILE) > heartbeat/distinfo # Create the DistInfo file for the port file. ls -l ../$(TARFILE) | $(AWK) '{print "SIZE \($(TARFILE)\) = "$$5}' >> heartbeat/distinfo cp portMakefile heartbeat/Makefile $(TAR) -cvf heartbeat.tar heartbeat/Makefile heartbeat/distinfo heartbeat/pkg-deinstall heartbeat/pkg-descr heartbeat/pkg-install heartbeat/pkg-plist $(GZIP_PROG) heartbeat.tar clean: rm -f $(TARFILE) heartbeat.tar port/distinfo port/Makefile clean-local: rm -rf $(TMPINSTDIR) $(PKGDIR) Heartbeat-3-0-7e3a82377fa8/port/heartbeat/pkg-deinstall.in0000644000000000000000000000313611576626513023213 0ustar00usergroup00000000000000#! /bin/sh # # License: GNU General Public License (GPL) # ask() { local question default answer question=$1 default=$2 if [ -z "${PACKAGE_BUILDING}" ]; then read -p "${question} [${default}]? " answer fi if [ x${answer} = x ]; then answer=${default} fi echo ${answer} } yesno() { local dflt question answer question=$1 dflt=$2 while :; do answer=$(ask "${question}" "${dflt}") case "${answer}" in [Yy]*) return 0;; [Nn]*) return 1;; esac echo "Please answer yes or no." done } delete_account() { local u g home u=$1 g=$2 if yesno "Do you want me to remove group \"${g}\"" y; then pw groupdel -n ${g} echo "Done." fi if yesno "Do you want me to remove user \"${u}\"" y; then eval home=~${u} pw userdel -n ${u} echo "Done." if [ -d "${home}" ]; then echo "Please remember to remove the home directory \"${home}\" as" echo "well as the mirrored files." fi fi } if [ x$2 != xDEINSTALL ]; then exit fi export PATH=/bin:/usr/bin:/usr/sbin base=${PKG_PREFIX} if ps -axc | grep -q heartbeat; then if yesno "There are some heartbeat processes running. Shall I kill them" y then killall heartbeat sleep 2 else echo "OK ... I hope you know what you are doing." fi fi delete_account hacluster haclient echo "Removing runtime files" if [ -d ${base}/var/lib/heartbeat ]; then rm -r ${base}/var/lib/heartbeat fi if [ -f ${base}/var/lock/subsys/heartbeat ]; then rm ${base}/var/lock/subsys/heartbeat fi if [ -f ${base}/var/run/heartbeat.pid ]; then rm ${base}/var/run/heartbeat.pid fi Heartbeat-3-0-7e3a82377fa8/port/heartbeat/pkg-descr.in0000644000000000000000000000172511576626513022336 0ustar00usergroup00000000000000heartbeat is a basic high-availability subsystem. It will run scripts at initialization, and when machines go up or down. This version will also perform IP address takeover using gratuitous ARPs. It implements the following kinds of heartbeats: - Bidirectional Serial Rings ("raw" serial ports) - UDP/IP broadcast (ethernet, etc) - Unicast heartbeats - "ping" heartbeats (for routers, switches, etc.) (to be used for breaking ties in 2-node systems) ldirectord is a stand-alone daemon to monitor services. The STONITH module (a.k.a. STOMITH) provides an extensible interface for remotely powering down a node in the cluster. PILS is an generalized and portable open source Plugin and Interface Loading System. PILS manages both plugins (loadable objects), and the interfaces these plugins implement. PILS is designed to support any number of plugins implementing any number of interfaces. WWW: http://linux-ha.org/wiki/Main_Page Heartbeat-3-0-7e3a82377fa8/port/heartbeat/pkg-install.in0000644000000000000000000000620611576626513022703 0ustar00usergroup00000000000000#! /bin/sh # # License: GNU General Public License (GPL) # base=${PKG_PREFIX} ask() { local question default answer question=$1 default=$2 if [ -z "${PACKAGE_BUILDING}" ]; then read -p "${question} [${default}]? " answer fi if [ x${answer} = x ]; then answer=${default} fi echo ${answer} } yesno() { local dflt question answer question=$1 dflt=$2 while :; do answer=$(ask "${question}" "${dflt}") case "${answer}" in [Yy]*) return 0;; [Nn]*) return 1;; esac echo "Please answer yes or no." done } make_account() { local u g gcos homeopt home u=$1 g=$2 gcos=$3 homeopt=${4:+"-d $4"} if pw group show "${g}" >/dev/null 2>&1; then echo "You already have a group \"${g}\", so I will use it." else echo "You need a group \"${g}\"." if which -s pw && yesno "Would you like me to create it" y; then pw groupadd ${g} -g @HA_APIGID@ || exit echo "Done." else echo "Please create it, and try again." if ! grep -q "^${u}:" /etc/passwd; then echo "While you're at it, please create a user \"${u}\" too," echo "with a default group of \"${g}\"." fi exit 1 fi fi if pw user show "${u}" >/dev/null 2>&1; then echo "You already have a user \"${u}\", so I will use it." else echo "You need a user \"${u}\"." if which -s pw && yesno "Would you like me to create it" y; then pw useradd ${u} -g ${g} -u @HA_CCMUID@ -h - ${homeopt} \ -s /nonexistent -c "${gcos}" || exit echo "Done." else echo "Please create it, and try again." exit 1 fi fi if [ x"$homeopt" = x ]; then eval home=~${u} if [ ! -d "${home}" ]; then if yesno \ "Would you like me to create ${u}'s home directory (${home})" y then mkdir -p ${home}/.cvsup || exit touch ${home}/.cvsup/auth || exit chown -R ${u}:${g} ${home} || exit chmod -R go= ${home} || exit else echo "Please create it, and try again." exit 1 fi fi fi } case $2 in POST-INSTALL) if which -s pw && which -s lockf; then : else cat </dev/null || true @unexec rmdir %D/var/lock/subsys 2>/dev/null || true @unexec rmdir %D/var/lock 2>/dev/null || true @unexec rmdir %D/var/lib/heartbeat/ckpt 2>/dev/null || true @unexec rmdir %D/var/lib/heartbeat/ccm 2>/dev/null || true @unexec rmdir %D/var/lib/heartbeat 2>/dev/null || true @unexec rmdir %D/var/lib 2>/dev/null || true @unexec rmdir %D/var 2>/dev/null || true @dirrm share/doc/heartbeat-@VERSION@ @dirrm lib/stonith/plugins/stonith @dirrm lib/stonith/plugins @dirrm lib/stonith @dirrm lib/pils/plugins/InterfaceMgr @dirrm lib/pils/plugins @dirrm lib/pils @dirrm lib/heartbeat/plugins/test @dirrm lib/heartbeat/plugins/HBcomm @dirrm lib/heartbeat/plugins/HBauth @dirrm lib/heartbeat/plugins/AppHBNotification @dirrm lib/heartbeat/plugins @dirrm lib/heartbeat/cts @dirrm lib/heartbeat @dirrm include/stonith @dirrm include/saf @dirrm include/pils @dirrm include/ocf @dirrm include/heartbeat @dirrm include/clplumbing @dirrm etc/ha.d/resource.d @dirrm etc/ha.d/rc.d @unexec rmdir %D/etc/ha.d/conf 2>/dev/null || true @unexec rmdir %D/etc/ha.d 2>/dev/null || true Heartbeat-3-0-7e3a82377fa8/port/portMakefile.in0000644000000000000000000000206611576626513021141 0ustar00usergroup00000000000000# New ports collection makefile for: hearbeat # Date created: 18 March 2005 # Added to HA Project from initial work by # Scott Kleihege # # License: GNU General Public License (GPL) # # $FreeBSD$ # PORTNAME= heartbeat PORTVERSION= @VERSION@ CATEGORIES= sysutils MASTER_SITES= http://linux-ha.org/wiki/Download MAINTAINER= sirgeek-ha@mrsucko.org COMMENT= Subsystem for High-Availability Clustering BUILD_DEPENDS= ${LOCALBASE}/include/libnet.h:${PORTSDIR}/net/libnet-devel \ ${LOCALBASE}/include/glib12/glib.h:${PORTSDIR}/devel/glib12 RUN_DEPENDS= ${BUILD_DEPENDS} USE_PERL5= yes GNU_CONFIGURE= yes USE_GMAKE= yes USE_AUTOTOOLS= yes CONFIGURE_ARGS= --prefix=/usr/local --sysconfdir=/usr/local/etc --localstatedir=/var --enable-all --with-group-id=90 --with-ccmuser-id=90 MAN1= cl_status.1 MAN8= apphbd.8 heartbeat.8 ldirectord.8 meatclient.8 \ stonith.8 supervise-ldirectord-config.8 pre-install: PATH=${PATH}:${LOCALBASE}/bin post-install: PKG_PREFIX=${LOCALBASE} ${SH} ${PKGINSTALL} ${PKGNAME} POST-INSTALL .include Heartbeat-3-0-7e3a82377fa8/replace/Makefile.am0000644000000000000000000000202311576626513020643 0ustar00usergroup00000000000000# # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include \ -I$(top_srcdir)/linux-ha -I$(top_builddir)/linux-ha QUIET_LIBTOOL_OPTS = @QUIET_LIBTOOL_OPTS@ LIBTOOL = @LIBTOOL@ @QUIET_LIBTOOL_OPTS@ noinst_LTLIBRARIES = libreplace.la libreplace_la_SOURCES = libreplace_la_LIBADD = @LTLIBOBJS@ Heartbeat-3-0-7e3a82377fa8/replace/NoSuchFunctionName.c0000644000000000000000000000206611576626513022470 0ustar00usergroup00000000000000/* * Copyright (C) 2002 Alan Robertson * This software licensed under the GNU LGPL. * * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ void nosuchfunctionname(void); /* * This is a completely useless function put here only to make OpenBSD make * procedures happy. I hope no one ever makes such a function ;-) */ void nosuchfunctionname(void) { return; } Heartbeat-3-0-7e3a82377fa8/replace/alphasort.c0000644000000000000000000000270311576626513020755 0ustar00usergroup00000000000000/* * * alphasort - replacement for alphasort functions. * * Matt Soffen * Copyright (C) 2001 Matt Soffen * * Taken from the FreeBSD file (with copyright notice) * /usr/src/gnu/lib/libdialog/dir.c *************************************************************************** * Program: dir.c * Author: Marc van Kempen * desc: Directory routines, sorting and reading * * Copyright (c) 1995, Marc van Kempen * * All rights reserved. * * This software may be used, modified, copied, distributed, and * sold, in both source and binary form provided that the above * copyright and these terms are retained, verbatim, as the first * lines of this file. Under no circumstances is the author * responsible for the proper functioning of this software, nor does * the author assume any responsibility for damages incurred with * its use. * *************************************************************************** */ #include #include #include #include /* XXX for _POSIX_VERSION ifdefs */ #if HAVE_STRINGS_H #include #endif #if !defined sgi && !defined _POSIX_VERSION #include #endif #include #include #include #include int alphasort(const void *dirent1, const void *dirent2) { return(strcmp((*(const struct dirent **)dirent1)->d_name, (*(const struct dirent **)dirent2)->d_name)); } Heartbeat-3-0-7e3a82377fa8/replace/daemon.c0000644000000000000000000000536211576626513020227 0ustar00usergroup00000000000000/*- * * daemon - replacement for daemon function. * * Matt Soffen * Copyright (C) 2004 Matt Soffen * * Taken from the FreeBSD file (with copyright notice) * ------------------------------------------------------------ * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD: src/lib/libc/gen/daemon.c,v 1.3 2000/01/27 23:06:14 jasone Exp $ * */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)daemon.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ #include #include #include int daemon(nochdir, noclose) int nochdir, noclose; { int fd; switch (fork()) { case -1: return (-1); case 0: break; default: _exit(0); } if (setsid() == -1) return (-1); if (!nochdir) (void)chdir("/"); if (!noclose && (fd = _open("/dev/null", O_RDWR, 0)) != -1) { (void)dup2(fd, STDIN_FILENO); (void)dup2(fd, STDOUT_FILENO); (void)dup2(fd, STDERR_FILENO); if (fd > 2) (void)_close(fd); } return (0); } Heartbeat-3-0-7e3a82377fa8/replace/inet_pton.c0000644000000000000000000001321211576626513020754 0ustar00usergroup00000000000000/* * Copyright (c) 1996,1999 by Internet Software Consortium. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS * SOFTWARE. */ /* Chris Wright June 22, 2001 * Merged contents of inet_pton.c from Apache2.0.16 and BIND8 * The Apache base is more portable within heartbeat's envrionment, * however, the BIND8 version has two small logic changes that are * newer. */ #include #if HAVE_SYS_TYPES_H #include #endif #if HAVE_SYS_SOCKET_H #include #endif #if HAVE_NETINET_IN_H #include #endif #if HAVE_ARPA_INET_H #include #endif #include #include #ifndef IN6ADDRSZ #define IN6ADDRSZ 16 #endif #ifndef INT16SZ #define INT16SZ sizeof(short) #endif #ifndef INADDRSZ #define INADDRSZ 4 #endif #ifndef __P #define __P(x) x #endif /* * WARNING: Don't even consider trying to compile this on a system where * sizeof(int) < 4. sizeof(int) > 4 is fine; all the world's not a VAX. */ static int inet_pton4 __P((const char *src, unsigned char *dst)); #if HAVE_IPV6 static int inet_pton6 __P((const char *src, unsigned char *dst)); #endif /* int * inet_pton(af, src, dst) * convert from presentation format (which usually means ASCII printable) * to network format (which is usually some kind of binary format). * return: * 1 if the address was valid for the specified address family * 0 if the address wasn't valid (`dst' is untouched in this case) * -1 if some other error occurred (`dst' is untouched in this case, too) * author: * Paul Vixie, 1996. */ int inet_pton(int af, const char *src, void *dst) { switch (af) { case AF_INET: return (inet_pton4(src, dst)); #if HAVE_IPV6 case AF_INET6: return (inet_pton6(src, dst)); #endif default: errno = EAFNOSUPPORT; return (-1); } /* NOTREACHED */ } /* int * inet_pton4(src, dst) * like inet_aton() but without all the hexadecimal and shorthand. * return: * 1 if `src' is a valid dotted quad, else 0. * notice: * does not touch `dst' unless it's returning 1. * author: * Paul Vixie, 1996. */ static int inet_pton4(const char *src, unsigned char *dst) { static const char digits[] = "0123456789"; int saw_digit, octets, ch; unsigned char tmp[INADDRSZ], *tp; saw_digit = 0; octets = 0; *(tp = tmp) = 0; while ((ch = *src++) != '\0') { const char *pch; if ((pch = strchr(digits, ch)) != NULL) { unsigned int new = *tp * 10 + (pch - digits); if (new > 255) return (0); *tp = new; if (! saw_digit) { if (++octets > 4) return (0); saw_digit = 1; } } else if (ch == '.' && saw_digit) { if (octets == 4) return (0); *++tp = 0; saw_digit = 0; } else return (0); } if (octets < 4) return (0); memcpy(dst, tmp, INADDRSZ); return (1); } #if HAVE_IPV6 /* int * inet_pton6(src, dst) * convert presentation level address to network order binary form. * return: * 1 if `src' is a valid [RFC1884 2.2] address, else 0. * notice: * (1) does not touch `dst' unless it's returning 1. * (2) :: in a full address is silently ignored. * credit: * inspired by Mark Andrews. * author: * Paul Vixie, 1996. */ static int inet_pton6(const char *src, unsigned char *dst) { static const char xdigits_l[] = "0123456789abcdef", xdigits_u[] = "0123456789ABCDEF"; unsigned char tmp[IN6ADDRSZ], *tp, *endp, *colonp; const char *xdigits, *curtok; int ch, saw_xdigit; unsigned int val; memset((tp = tmp), '\0', IN6ADDRSZ); endp = tp + IN6ADDRSZ; colonp = NULL; /* Leading :: requires some special handling. */ if (*src == ':') if (*++src != ':') return (0); curtok = src; saw_xdigit = 0; val = 0; while ((ch = *src++) != '\0') { const char *pch; if ((pch = strchr((xdigits = xdigits_l), ch)) == NULL) pch = strchr((xdigits = xdigits_u), ch); if (pch != NULL) { val <<= 4; val |= (pch - xdigits); if (val > 0xffff) return (0); saw_xdigit = 1; continue; } if (ch == ':') { curtok = src; if (!saw_xdigit) { if (colonp) return (0); colonp = tp; continue; } else if (*src == '\0') { return (0); } if (tp + INT16SZ > endp) return (0); *tp++ = (unsigned char) (val >> 8) & 0xff; *tp++ = (unsigned char) val & 0xff; saw_xdigit = 0; val = 0; continue; } if (ch == '.' && ((tp + INADDRSZ) <= endp) && inet_pton4(curtok, tp) > 0) { tp += INADDRSZ; saw_xdigit = 0; break; /* '\0' was seen by inet_pton4(). */ } return (0); } if (saw_xdigit) { if (tp + INT16SZ > endp) return (0); *tp++ = (unsigned char) (val >> 8) & 0xff; *tp++ = (unsigned char) val & 0xff; } if (colonp != NULL) { /* * Since some memmove()'s erroneously fail to handle * overlapping regions, we'll do the shift by hand. */ const int n = tp - colonp; int i; if (tp == endp) return (0); for (i = 1; i <= n; i++) { endp[- i] = colonp[n - i]; colonp[n - i] = 0; } tp = endp; } if (tp != endp) return (0); memcpy(dst, tmp, IN6ADDRSZ); return (1); } #endif /* HAVE_IPV6 */ Heartbeat-3-0-7e3a82377fa8/replace/scandir.c0000644000000000000000000001647011576626513020411 0ustar00usergroup00000000000000/* scandir: Scan a directory, collecting all (selected) items into a an array. * * This code borrowed from 'libit', which can be found here: * * http://www.iro.umontreal.ca/~pinard/libit/dist/scandir/ * * The original author put this code in the public domain. * It has been modified slightly to get rid of warnings, etc. * * Below is the email I received from pinard@iro.umontreal.ca (François Pinard) * when I sent him an email asking him about the license, etc. of this * code which I obtained from his site. * * I think the correct spelling of his name is Rich Salz. I think he's now * rsalz@datapower.com... * -- * Rich Salz, Chief Security Architect * DataPower Technology http://www.datapower.com * XS40 XML Security Gateway http://www.datapower.com/products/xs40.html * * Copyright(C): none (public domain) * License: none (public domain) * Author: Rich Salz * * * * -- Alan Robertson * alanr@unix.sh * ************************************************************************** * * Subject: Re: Scandir replacement function * Date: 18 May 2001 12:00:48 -0400 * From: pinard@iro.umontreal.ca (François Pinard) * To: Alan Robertson * References: 1 * * * [Alan Robertson] * * > Hi, I'd like to use your scandir replacement function found here: * > http://www.iro.umontreal.ca/~pinard/libit/dist/scandir/ But, it does * > not indicate authorship or licensing terms in it. Could you tell me * > who wrote this code, under what license you distribute it, and whether * > and under what terms I may further distribute it? * * Hello, Alan. These are (somewhat) explained in UNSHAR.HDR found in the * same directory. The routines have been written by Rick Saltz (I'm not * completely sure of the spelling) a long while ago. I think that nowadays, * Rick is better known as the main author of the nice INN package. * ************************************************************************** * * I spent a little time verifying this with Rick Salz. * The results are below: * ************************************************************************** * * Date: Tue, 20 Sep 2005 21:52:09 -0400 (EDT) * From: Rich Salz * To: Alan Robertson * Subject: Re: Verifying permissions/licenses/etc on some old code of yours - * scandir.c * In-Reply-To: <433071CA.8000107@unix.sh> * Message-ID: * Content-Type: TEXT/PLAIN; charset=US-ASCII * * yes, it's most definitely in the public domain. * * I'm glad you find it useful. I'm surprised it hasn't been replaced by, * e.g,. something in GLibC. Ii'm impressed you tracked me down. * * /r$ * * -- * Rich Salz Chief Security Architect * DataPower Technology http://www.datapower.com * XS40 XML Security Gateway http://www.datapower.com/products/xs40.html * ----------------------------------------------------------------------> * Subject: scandir, ftw REDUX * Date: 1 Jan 88 00:47:01 GMT * From: rsalz@pebbles.bbn.com * Newsgroups: comp.sources.misc * * * Forget my previous message -- I just decided for completeness's sake to * implement the SysV ftw(3) routine, too. * * To repeat, these are public-domain implementations of the SystemV ftw() * routine, the BSD scandir() and alphasort() routines, and documentation for * same. The FTW manpage could be more readable, but so it goes. * * Anyhow, feel free to post these, and incorporate them into your existing * packages. I have readdir() routiens for MSDOS and the Amiga if anyone * wants them, and should have them for VMS by the end of January; let me * know if you want copies. * * Yours in filesystems, * /r$ * * Anyhow, feel free to post * ----------------------------------------------------------------------< * */ #include #include #include #include #include #include #ifndef NULL # define NULL ((void *) 0) #endif /* Initial guess at directory allocated. */ #define INITIAL_ALLOCATION 20 int scandir (const char *directory_name, struct dirent ***array_pointer, int (*select_function) (const struct dirent *), #ifdef USE_SCANDIR_COMPARE_STRUCT_DIRENT /* This is what the Linux man page says */ int (*compare_function) (const struct dirent**, const struct dirent**) #else /* This is what the Linux header file says ... */ int (*compare_function) (const void *, const void *) #endif ); int scandir (const char *directory_name, struct dirent ***array_pointer, int (*select_function) (const struct dirent *), #ifdef USE_SCANDIR_COMPARE_STRUCT_DIRENT /* This is what the linux man page says */ int (*compare_function) (const struct dirent**, const struct dirent**) #else /* This is what the linux header file says ... */ int (*compare_function) (const void *, const void *) #endif ) { DIR *directory; struct dirent **array; struct dirent *entry; struct dirent *copy; int allocated = INITIAL_ALLOCATION; int counter = 0; /* Get initial list space and open directory. */ if (directory = opendir (directory_name), directory == NULL) return -1; if (array = (struct dirent **) malloc (allocated * sizeof (struct dirent *)), array == NULL) return -1; /* Read entries in the directory. */ while (entry = readdir (directory), entry) if (select_function == NULL || (*select_function) (entry)) { /* User wants them all, or he wants this one. Copy the entry. */ /* * On some OSes the declaration of "entry->d_name" is a minimal-length * placeholder. Example: Solaris: * /usr/include/sys/dirent.h: * "char d_name[1];" * man page "dirent(3)": * The field d_name is the beginning of the character array * giving the name of the directory entry. This name is * null terminated and may have at most MAXNAMLEN chars. * So our malloc length may need to be increased accordingly. * sizeof(entry->d_name): space (possibly minimal) in struct. * strlen(entry->d_name): actual length of the entry. * * John Kavadias * David Lee */ int namelength = strlen(entry->d_name) + 1; /* length with NULL */ int extra = 0; if (sizeof(entry->d_name) <= namelength) { /* allocated space <= required space */ extra += namelength - sizeof(entry->d_name); } if (copy = (struct dirent *) malloc (sizeof (struct dirent) + extra), copy == NULL) { closedir (directory); free (array); return -1; } copy->d_ino = entry->d_ino; copy->d_reclen = entry->d_reclen; strcpy (copy->d_name, entry->d_name); /* Save the copy. */ if (counter + 1 == allocated) { allocated <<= 1; array = (struct dirent **) realloc ((char *) array, allocated * sizeof (struct dirent *)); if (array == NULL) { closedir (directory); free (array); free (copy); return -1; } } array[counter++] = copy; } /* Close things off. */ array[counter] = NULL; *array_pointer = array; closedir (directory); /* Sort? */ if (counter > 1 && compare_function) qsort ((char *) array, counter, sizeof (struct dirent *) , (int (*)(const void *, const void *))(compare_function)); return counter; } Heartbeat-3-0-7e3a82377fa8/replace/setenv.c0000644000000000000000000000257411576626513020272 0ustar00usergroup00000000000000/* * Copyright (C) 2001 Alan Robertson * This software licensed under the GNU LGPL. * * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include /* * Small replacement function for setenv() */ int setenv(const char *name, const char * value, int why) { int rc = -1; if ( name && value ) { char * envp = NULL; envp = malloc(strlen(name)+strlen(value)+2); if (envp) { /* * Unfortunately, the putenv API guarantees memory leaks when * changing environment variables repeatedly... :-( */ sprintf(envp, "%s=%s", name, value); /* Cannot free envp (!) */ rc = putenv(envp); } } return(rc); } Heartbeat-3-0-7e3a82377fa8/replace/strerror.c0000644000000000000000000000217211576626513020642 0ustar00usergroup00000000000000/* * Copyright (C) 2002 Alan Robertson * This software licensed under the GNU LGPL. * * This library is free software; you can redistribute it and/or * modify it under the terms of version 2.1 of the GNU Lesser General Public * License as published by the Free Software Foundation. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include extern const char * sys_err[]; extern int sys_nerr; char * strerror(int errnum) { static char whaterr[32]; if (errnum < 0) { return "negative errno"; } if (errnum >= sys_nerr) { snprintf(whaterr, sizeof(whaterr),"error %d", errnum); return whaterr; } return sys_err[sys_nerr]; } Heartbeat-3-0-7e3a82377fa8/replace/strlcat.c0000644000000000000000000000220511576626513020431 0ustar00usergroup00000000000000#include #include /* * Copyright (C) 2007 Alan Robertson * This software licensed under the GNU LGPL. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ size_t strlcat(char *dest, const char * src, size_t maxlen) { size_t curlen = strlen(dest); size_t addlen = strlen(src); size_t appendlen = (maxlen-1) - curlen; if (appendlen > 0) { strlcpy(dest+curlen, src, maxlen-curlen); } return curlen + addlen; } Heartbeat-3-0-7e3a82377fa8/replace/strlcpy.c0000644000000000000000000000207111576626513020456 0ustar00usergroup00000000000000#include #include /* * Copyright (C) 2007 Alan Robertson * This software licensed under the GNU LGPL. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ size_t strlcpy(char *dest, const char * src, size_t maxlen) { size_t srclen = strlen(src); if (maxlen > 0) { strncpy(dest, src, maxlen); dest[maxlen-1]=EOS; } return srclen; } Heartbeat-3-0-7e3a82377fa8/replace/strndup.c0000644000000000000000000000224511576626513020460 0ustar00usergroup00000000000000#include #include #include /* * Copyright (C) 2004 Matt Soffen * This software licensed under the GNU LGPL. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ /* Taken from the GlibC implementation of strndup */ char *strndup(const char *str, size_t len) { size_t n = strnlen(str,len); char *new = (char *) malloc (len+1); if (NULL == new) { return NULL; } new[n] = '\0'; return (char *)memcpy (new, str, len); } Heartbeat-3-0-7e3a82377fa8/replace/strnlen.c0000644000000000000000000000205211576626513020442 0ustar00usergroup00000000000000#include #include /* * Copyright (C) 2003 Alan Robertson * This software licensed under the GNU LGPL. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ size_t strnlen(const char *s, size_t maxlen) { const char * eospos; eospos = memchr(s, (int)'\0', maxlen); return (eospos == NULL ? maxlen : (size_t)(eospos-s)); } Heartbeat-3-0-7e3a82377fa8/replace/unsetenv.c0000644000000000000000000000264211576626513020631 0ustar00usergroup00000000000000/* * Copyright (C) 2001 Alan Robertson * This software licensed under the GNU LGPL. * * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #define __environ environ #ifndef HAVE_ENVIRON_DECL extern char **environ; #endif int unsetenv (const char *name) { const size_t len = strlen (name); char **ep; for (ep = __environ; *ep; ++ep) { if (!strncmp (*ep, name, len) && (*ep)[len] == '=') { /* Found it. */ /* Remove this pointer by moving later ones back. */ char **dp = ep; do dp[0] = dp[1]; while (*dp++); /* Continue the loop in case NAME appears again. */ } } return 0; } Heartbeat-3-0-7e3a82377fa8/replace/uuid_parse.c0000644000000000000000000002564011576626513021125 0ustar00usergroup00000000000000/* * uuid: emulation of e2fsprogs interface if implementation lacking. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Original uuid implementation: copyright (C) Theodore Ts'o * * This importation into heartbeat: * Copyright (C) 2004 David Lee * */ #include #include #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #include #include #include /* * Local "replace" implementation of uuid functions. */ #include #include #include /* UUID Variant definitions */ #define UUID_VARIANT_NCS 0 #define UUID_VARIANT_DCE 1 #define UUID_VARIANT_MICROSOFT 2 #define UUID_VARIANT_OTHER 3 /* UUID Type definitions */ #define UUID_TYPE_DCE_TIME 1 #define UUID_TYPE_DCE_RANDOM 4 /* For uuid_compare() */ #define UUCMP(u1,u2) if (u1 != u2) return((u1 < u2) ? -1 : 1); /************************************ * Private types ************************************/ #define longlong long long /* * Offset between 15-Oct-1582 and 1-Jan-70 */ #define TIME_OFFSET_HIGH 0x01B21DD2 #define TIME_OFFSET_LOW 0x13814000 #if (SIZEOF_INT == 4) typedef unsigned int __u32; #elif (SIZEOF_LONG == 4) typedef unsigned long __u32; #endif #if (SIZEOF_INT == 2) typedef int __s16; typedef unsigned int __u16; #elif (SIZEOF_SHORT == 2) typedef short __s16; typedef unsigned short __u16; #endif typedef unsigned char __u8; struct uuid { __u32 time_low; __u16 time_mid; __u16 time_hi_and_version; __u16 clock_seq; __u8 node[6]; }; /************************************ * internal routines ************************************/ static void uuid_pack(const struct uuid *uu, uuid_t ptr) { __u32 tmp; unsigned char *out = ptr; tmp = uu->time_low; out[3] = (unsigned char) tmp; tmp >>= 8; out[2] = (unsigned char) tmp; tmp >>= 8; out[1] = (unsigned char) tmp; tmp >>= 8; out[0] = (unsigned char) tmp; tmp = uu->time_mid; out[5] = (unsigned char) tmp; tmp >>= 8; out[4] = (unsigned char) tmp; tmp = uu->time_hi_and_version; out[7] = (unsigned char) tmp; tmp >>= 8; out[6] = (unsigned char) tmp; tmp = uu->clock_seq; out[9] = (unsigned char) tmp; tmp >>= 8; out[8] = (unsigned char) tmp; memcpy(out+10, uu->node, 6); } static void uuid_unpack(const uuid_t in, struct uuid *uu) { const __u8 *ptr = in; __u32 tmp; tmp = *ptr++; tmp = (tmp << 8) | *ptr++; tmp = (tmp << 8) | *ptr++; tmp = (tmp << 8) | *ptr++; uu->time_low = tmp; tmp = *ptr++; tmp = (tmp << 8) | *ptr++; uu->time_mid = tmp; tmp = *ptr++; tmp = (tmp << 8) | *ptr++; uu->time_hi_and_version = tmp; tmp = *ptr++; tmp = (tmp << 8) | *ptr++; uu->clock_seq = tmp; memcpy(uu->node, ptr, 6); } /************************************ * Main routines, except uuid_generate*() ************************************/ void uuid_clear(uuid_t uu) { memset(uu, 0, 16); } int uuid_compare(const uuid_t uu1, const uuid_t uu2) { struct uuid uuid1, uuid2; uuid_unpack(uu1, &uuid1); uuid_unpack(uu2, &uuid2); UUCMP(uuid1.time_low, uuid2.time_low); UUCMP(uuid1.time_mid, uuid2.time_mid); UUCMP(uuid1.time_hi_and_version, uuid2.time_hi_and_version); UUCMP(uuid1.clock_seq, uuid2.clock_seq); return memcmp(uuid1.node, uuid2.node, 6); } void uuid_copy(uuid_t dst, const uuid_t src) { unsigned char *cp1; const unsigned char *cp2; int i; for (i=0, cp1 = dst, cp2 = src; i < 16; i++) *cp1++ = *cp2++; } /* if uu is the null uuid, return 1 else 0 */ int uuid_is_null(const uuid_t uu) { const unsigned char *cp; int i; for (i=0, cp = uu; i < 16; i++) if (*cp++) return 0; return 1; } /* 36byte-string=>uuid */ int uuid_parse(const char *in, uuid_t uu) { struct uuid uuid; int i; const char *cp; char buf[3]; if (strlen(in) != 36) return -1; for (i=0, cp = in; i <= 36; i++,cp++) { if ((i == 8) || (i == 13) || (i == 18) || (i == 23)) { if (*cp == '-') continue; else return -1; } if (i== 36) if (*cp == 0) continue; if (!isxdigit((int) *cp)) return -1; } uuid.time_low = strtoul(in, NULL, 16); uuid.time_mid = strtoul(in+9, NULL, 16); uuid.time_hi_and_version = strtoul(in+14, NULL, 16); uuid.clock_seq = strtoul(in+19, NULL, 16); cp = in+24; buf[2] = 0; for (i=0; i < 6; i++) { buf[0] = *cp++; buf[1] = *cp++; uuid.node[i] = strtoul(buf, NULL, 16); } uuid_pack(&uuid, uu); return 0; } /* uuid=>36byte-string-with-null */ void uuid_unparse(const uuid_t uu, char *out) { struct uuid uuid; uuid_unpack(uu, &uuid); sprintf(out, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", uuid.time_low, uuid.time_mid, uuid.time_hi_and_version, uuid.clock_seq >> 8, uuid.clock_seq & 0xFF, uuid.node[0], uuid.node[1], uuid.node[2], uuid.node[3], uuid.node[4], uuid.node[5]); } /************************************ * Main routines: uuid_generate*() ************************************/ #include #include #include #ifdef HAVE_SYS_IOCTL_H #include #endif #ifdef HAVE_SYS_SOCKET_H #include #endif #ifdef HAVE_SYS_SOCKIO_H #include #endif #ifdef HAVE_NET_IF_H #include #endif #ifdef HAVE_NETINET_IN_H #include #endif #ifdef HAVE_SRANDOM #define srand(x) srandom(x) #define rand() random() #endif static int get_random_fd(void) { struct timeval tv; static int fd = -2; int i; if (fd == -2) { gettimeofday(&tv, 0); fd = open("/dev/urandom", O_RDONLY); if (fd == -1) fd = open("/dev/random", O_RDONLY | O_NONBLOCK); srand((getpid() << 16) ^ getuid() ^ tv.tv_sec ^ tv.tv_usec); } /* Crank the random number generator a few times */ gettimeofday(&tv, 0); for (i = (tv.tv_sec ^ tv.tv_usec) & 0x1F; i > 0; i--) rand(); return fd; } /* * Generate a series of random bytes. Use /dev/urandom if possible, * and if not, use srandom/random. */ static void get_random_bytes(void *buf, int nbytes) { int i, n = nbytes, fd = get_random_fd(); int lose_counter = 0; unsigned char *cp = (unsigned char *) buf; if (fd >= 0) { while (n > 0) { i = read(fd, cp, n); if (i <= 0) { if (lose_counter++ > 16) break; continue; } n -= i; cp += i; lose_counter = 0; } } /* * We do this all the time, but this is the only source of * randomness if /dev/random/urandom is out to lunch. */ for (cp = buf, i = 0; i < nbytes; i++) *cp++ ^= (rand() >> 7) & 0xFF; return; } /* * Get the ethernet hardware address, if we can find it... */ static int get_node_id(unsigned char *node_id) { #ifdef HAVE_NET_IF_H int sd; struct ifreq ifr, *ifrp; struct ifconf ifc; char buf[1024]; int n, i; unsigned char *a; /* * BSD 4.4 defines the size of an ifreq to be * max(sizeof(ifreq), sizeof(ifreq.ifr_name)+ifreq.ifr_addr.sa_len * However, under earlier systems, sa_len isn't present, so the size is * just sizeof(struct ifreq) */ #ifdef HAVE_SA_LEN #ifndef max #define max(a,b) ((a) > (b) ? (a) : (b)) #endif #define ifreq_size(i) max(sizeof(struct ifreq),\ sizeof((i).ifr_name)+(i).ifr_addr.sa_len) #else #define ifreq_size(i) sizeof(struct ifreq) #endif /* HAVE_SA_LEN*/ sd = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP); if (sd < 0) { return -1; } memset(buf, 0, sizeof(buf)); ifc.ifc_len = sizeof(buf); ifc.ifc_buf = buf; if (ioctl (sd, SIOCGIFCONF, (char *)&ifc) < 0) { close(sd); return -1; } n = ifc.ifc_len; for (i = 0; i < n; i+= ifreq_size(*ifr) ) { ifrp = (struct ifreq *)((char *) ifc.ifc_buf+i); strncpy(ifr.ifr_name, ifrp->ifr_name, IFNAMSIZ); #ifdef SIOCGIFHWADDR if (ioctl(sd, SIOCGIFHWADDR, &ifr) < 0) continue; a = (unsigned char *) &ifr.ifr_hwaddr.sa_data; #else #ifdef SIOCGENADDR if (ioctl(sd, SIOCGENADDR, &ifr) < 0) continue; a = (unsigned char *) ifr.ifr_enaddr; #else /* * XXX we don't have a way of getting the hardware * address */ close(sd); return 0; #endif /* SIOCGENADDR */ #endif /* SIOCGIFHWADDR */ if (!a[0] && !a[1] && !a[2] && !a[3] && !a[4] && !a[5]) continue; if (node_id) { memcpy(node_id, a, 6); close(sd); return 1; } } close(sd); #endif return 0; } /* Assume that the gettimeofday() has microsecond granularity */ #define MAX_ADJUSTMENT 10 static int get_clock(__u32 *clock_high, __u32 *clock_low, __u16 *ret_clock_seq) { static int adjustment = 0; static struct timeval last = {0, 0}; static __u16 clock_seq; struct timeval tv; unsigned longlong clock_reg; try_again: gettimeofday(&tv, 0); if ((last.tv_sec == 0) && (last.tv_usec == 0)) { get_random_bytes(&clock_seq, sizeof(clock_seq)); clock_seq &= 0x1FFF; last = tv; last.tv_sec--; } if ((tv.tv_sec < last.tv_sec) || ((tv.tv_sec == last.tv_sec) && (tv.tv_usec < last.tv_usec))) { clock_seq = (clock_seq+1) & 0x1FFF; adjustment = 0; last = tv; } else if ((tv.tv_sec == last.tv_sec) && (tv.tv_usec == last.tv_usec)) { if (adjustment >= MAX_ADJUSTMENT) goto try_again; adjustment++; } else { adjustment = 0; last = tv; } clock_reg = tv.tv_usec*10 + adjustment; clock_reg += ((unsigned longlong) tv.tv_sec)*10000000; clock_reg += (((unsigned longlong) 0x01B21DD2) << 32) + 0x13814000; *clock_high = clock_reg >> 32; *clock_low = clock_reg; *ret_clock_seq = clock_seq; return 0; } /* create a new uuid, based on randomness */ void uuid_generate_random(uuid_t out) { uuid_t buf; struct uuid uu; get_random_bytes(buf, sizeof(buf)); uuid_unpack(buf, &uu); uu.clock_seq = (uu.clock_seq & 0x3FFF) | 0x8000; uu.time_hi_and_version = (uu.time_hi_and_version & 0x0FFF) | 0x4000; uuid_pack(&uu, out); } /* create a new uuid, based on time */ static void uuid_generate_time(uuid_t out) { static unsigned char node_id[6]; static int has_init = 0; struct uuid uu; __u32 clock_mid; if (!has_init) { if (get_node_id(node_id) <= 0) { get_random_bytes(node_id, 6); /* * Set multicast bit, to prevent conflicts * with IEEE 802 addresses obtained from * network cards */ node_id[0] |= 0x80; } has_init = 1; } get_clock(&clock_mid, &uu.time_low, &uu.clock_seq); uu.clock_seq |= 0x8000; uu.time_mid = (__u16) clock_mid; uu.time_hi_and_version = (clock_mid >> 16) | 0x1000; memcpy(uu.node, node_id, 6); uuid_pack(&uu, out); } void uuid_generate(uuid_t out) { if (get_random_fd() >= 0) { uuid_generate_random(out); }else{ uuid_generate_time(out); } } Heartbeat-3-0-7e3a82377fa8/resources/Makefile.am0000644000000000000000000000153511576626513021251 0ustar00usergroup00000000000000# Author: Sun Jiang Dong # Copyright (c) 2004 International Business Machines # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in SUBDIRS = heartbeat Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/AudibleAlarm.in0000644000000000000000000000143311576626513024023 0ustar00usergroup00000000000000#!/bin/sh # # # Description: wrapper of OCF RA AudibleAlarm, based on original heartbeat RA. # See OCF RA AudibleAlarm for more information. # # Author: Xun Sun # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2005 International Business Machines # Source function library. . @HB_RA_DIR@/hto-mapfuncs usage() { echo "Usage: $0 [node1 node2 ... ] $LEGAL_ACTIONS" } if [ $# = 0 ]; then usage fi # Get operation name, which is the last argument we are passed. for arg in "$@"; do op=$arg done nodelist=`echo $* | sed 's%'$op'$%%'` OCF_TYPE=AudibleAlarm OCF_RESOURCE_INSTANCE=AudibleAlarm export OCF_TYPE OCF_RESOURCE_INSTANCE OCF_RESKEY_nodelist=$nodelist; export OCF_RESKEY_nodelist ra_execocf $op Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/Delay.in0000644000000000000000000000160411576626513022537 0ustar00usergroup00000000000000#!/bin/sh # # # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # # This script is a test resource for introducing delay. # # usage: $0 {start|stop|status|monitor} # usage: $0 delay {start|stop|status|monitor} # usage: $0 startdelay stopdelay {start|stop|status|monitor} # # This is really a test resource script. # usage() { echo "usage: $0 [delay [stopdelay]] $LEGAL_ACTIONS" exit 1 } . @HB_RA_DIR@/hto-mapfuncs case $# in 1) op=$1;; 2) OCF_RESKEY_startdelay=$1; export OCF_RESKEY_startdelay; op=$2;; 3) OCF_RESKEY_startdelay=$1; OCF_RESKEY_stopdelay=$2; export OCF_RESKEY_startdelay OCF_RESKEY_stopdelay; op=$3;; *) usage;; esac if [ -z $OCF_RESKEY_startdelay ]; then OCF_RESKEY_startdelay=10 fi OCF_TYPE=Delay OCF_RESOURCE_INSTANCE=${OCF_TYPE}_${OCF_RESKEY_startdelay} export OCF_TYPE OCF_RESOURCE_INSTANCE ra_execocf $op Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/Filesystem.in0000755000000000000000000000354511576626513023636 0ustar00usergroup00000000000000#!/bin/sh # # # Description: wrapper of OCF RA Filesystem, based on original heartbeat RA. # See OCF RA Filesystem for more information. # # Author: Xun Sun # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2005 International Business Machines # # usage: ./Filesystem [ []] {start|stop|status} # # : name of block device for the filesystem. e.g. /dev/sda1, /dev/md0 # Or a -U or -L option for mount, or an NFS mount specification, # or a samba share # : the mount point for the filesystem # : optional name of the filesystem type. e.g. ext2 # : options to be given to the mount command via -o # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 Filesystem::/dev/sda1::/data1::ext2 # or # node1 10.0.0.170 Filesystem::-Ldata1::/data1::ext2 # or # node1 10.0.0.170 Filesystem::server:/data1::/data1::nfs::ro # # This assumes you want to manage a filesystem on a shared (scsi) bus. # Do not put this filesystem in /etc/fstab. This script manages all of # that for you. . @HB_RA_DIR@/hto-mapfuncs usage() { echo "usage: $0 [ []] $LEGAL_ACTIONS" exit 1 } # Check the arguments passed to this script if [ $# -lt 3 ]; then usage fi if [ "x$2" != "x" ]; then OCF_RESKEY_device="$1"; shift export OCF_RESKEY_device fi if [ "x$2" != "x" ]; then OCF_RESKEY_directory="$1"; shift export OCF_RESKEY_directory fi if [ "x$2" != "x" ]; then OCF_RESKEY_fstype=$1; shift export OCF_RESKEY_fstype fi if [ "x$2" != "x" ]; then OCF_RESKEY_options="$1"; shift export OCF_RESKEY_options fi OCF_TYPE=Filesystem OCF_RESOURCE_INSTANCE=${OCF_TYPE}_$OCF_RESKEY_device export OCF_TYPE OCF_RESOURCE_INSTANCE ra_execocf $1 Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/ICP.in0000755000000000000000000000165511576626513022125 0ustar00usergroup00000000000000#!/bin/sh # # # Description: wrapper of OCF RA ICP, based on original heartbeat RA. # See OCF RA ICP for more information. # # Author: Xun Sun # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2005 International Business Machines # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 LinuxSCSI::0:0 ICP::c0h1::/dev/sdb1 LVM::myvolname # # Notice that you will need to get the utility "icpclucon" from the ICP # support to use this. # # See usage() function below for more details... # . @HB_RA_DIR@/hto-mapfuncs usage() { echo "usage: $0 $LEGAL_ACTIONS" exit 1 } if [ $# != 3 ]; then usage exit 1 fi OCF_TYPE=ICP OCF_RESOURCE_INSTANCE=${OCF_TYPE}_$1 export OCF_TYPE OCF_RESOURCE_INSTANCE OCF_RESKEY_driveid=$1 OCF_RESKEY_device=$2 export OCF_RESKEY_device OCF_RESKEY_driveid ra_execocf $3 Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/IPaddr.in0000644000000000000000000000432411576626513022646 0ustar00usergroup00000000000000#!/bin/sh # # # Description: wrapper of OCF RA IPaddr, based on original heartbeat RA. # See OCF RA IPaddr for more information. # # Author: Xun Sun # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # Copyright: (C) 2005 International Business Machines # # This script manages IP alias IP addresses # # It can add an IP alias, or remove one. # # usage: $0 {start|stop|status|monitor} # # The "start" arg adds an IP alias. # # Surprisingly, the "stop" arg removes one. :-) # . @HB_RA_DIR@/hto-mapfuncs usage() { echo "usage: $0 $LEGAL_ACTIONS" exit 1 } if [ $# != 2 ]; then usage fi # We need to split the argument into pieces that IPaddr OCF RA can # recognize, sed is prefered over Bash specific builtin functions # for portability. BASEIP=`echo $1 | sed 's%/.*%%'` OCF_RESKEY_ip=$BASEIP; export OCF_RESKEY_ip str=`echo $1 | sed 's%^'$BASEIP'/*%%'` if [ ! -z "$str" ]; then NETMASK=`echo $str | sed 's%/.*%%'` OCF_RESKEY_cidr_netmask=$NETMASK; export OCF_RESKEY_cidr_netmask str=`echo $str | sed 's%^'$NETMASK'/*%%'` NIC=`echo $str | sed 's%/.*%%'` case $NIC in [0-9]*) BROADCAST=$NIC OCF_RESKEY_broadcast=$BROADCAST; export OCF_RESKEY_broadcast NIC= ;; "") ;; *) BROADCAST=`echo $str | sed -e 's%^'$NIC'/*%%' -e 's%/.*%%'` OCF_RESKEY_nic=$NIC; export OCF_RESKEY_nic OCF_RESKEY_broadcast=$BROADCAST; export OCF_RESKEY_broadcast ;; esac fi # # Determine if this IP address is really being served, or not. # Note that we don't distinguish if *we're* serving it locally... # ip_monitor() { PINGARGS="`pingargs $BASEIP`" for j in 1 2 3 do # for R1 style clusters, CTS runs this on the test monitor node # so we cannot check to see if the IP address is served locally # This means that the ARP spoofing is also tested # But we can't tell for sure which node is serving the IP if @PING@ $PINGARGS >/dev/null 2>&1 then exit 0 fi done exit 1 } case $2 in monitor) ip_monitor ;; *) ;; esac OCF_TYPE=IPaddr OCF_RESKEY_lvs_support=0 OCF_RESOURCE_INSTANCE=${OCF_TYPE}_$BASEIP export OCF_TYPE OCF_RESOURCE_INSTANCE OCF_RESKEY_lvs_support ra_execocf $2 Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/IPaddr2.in0000644000000000000000000000342711576626513022733 0ustar00usergroup00000000000000#!/bin/sh # # # Description: wrapper of OCF RA IPaddr2, based on original heartbeat RA. # See OCF RA IPaddr2 for more information. # # Author: Xun Sun # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2005 International Business Machines # # This script manages IP alias IP addresses # # It can add an IP alias, or remove one. # # usage: $0 ip-address[/netmaskbits[/interface[:label][/broadcast]]] \ # {start|stop|status|monitor} # # The "start" arg adds an IP alias. # # Surprisingly, the "stop" arg removes one. :-) # unset LANG; export LANG LC_ALL=C export LC_ALL . @HB_RA_DIR@/hto-mapfuncs # We need to split the argument into pieces that IPaddr OCF RA can # recognize, sed is prefered over Bash specific builtin functions # for portability. usage() { echo "usage: $0 ip-address[/netmaskbits[/interface[:label][/broadcast]]] $LEGAL_ACTIONS" } if [ $# != 2 ]; then usage exit 1 fi BASEIP=`echo $1 | sed 's%/.*%%'` OCF_RESKEY_ip=$BASEIP; export OCF_RESKEY_ip str=`echo $1 | sed 's%^'$BASEIP'/*%%'` if [ ! -z "$str" ]; then NETMASK=`echo $str | sed 's%/.*%%'` OCF_RESKEY_cidr_netmask=$NETMASK; export OCF_RESKEY_cidr_netmask str=`echo $str | sed 's%^'$NETMASK'/*%%'` NIC=`echo $str | sed 's%/.*%%'` case $NIC in [0-9]*) BROADCAST=$NIC OCF_RESKEY_broadcast=$BROADCAST; export OCF_RESKEY_broadcast NIC= ;; "") ;; *) BROADCAST=`echo $str | sed -e 's%^'$NIC'/*%%' -e 's%/.*%%'` OCF_RESKEY_nic=$NIC; export OCF_RESKEY_nic OCF_RESKEY_broadcast=$BROADCAST; export OCF_RESKEY_broadcast ;; esac fi OCF_TYPE=IPaddr2 OCF_RESKEY_lvs_support=1 OCF_RESOURCE_INSTANCE=${OCF_TYPE}_$BASEIP export OCF_TYPE OCF_RESOURCE_INSTANCE OCF_RESKEY_lvs_support ra_execocf $2 Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/IPsrcaddr.in0000644000000000000000000000254511576626513023361 0ustar00usergroup00000000000000#!/bin/sh # # # Description: wrapper of OCF RA IPsrcaddr, based on original heartbeat RA. # See OCF RA IPsrcaddr for more information. # # Author: Xun Sun # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2005 International Business Machines # # It can add a preferred source address, or remove one. # # usage: IPsrcaddr ip-address {start|stop|status|monitor} # # The "start" arg adds a preferred source address. # # Surprisingly, the "stop" arg removes it. :-) # # NOTES: # # 1) There must be exactly one default route! # 2) The script depends on Alexey Kuznetsov's ip utility from the # iproute aka iproute2 package. # 3) No checking is done to see if the passed in IP address can # reasonably be associated with the interface on which the default # route exists. So unless you want to deliberately spoof your source IP, # check it! Normally, I would expect that your haresources looks # something like: # # nodename ip1 ip2 ... ipN IPsrcaddr::ipX # # where ipX is one of the ip1 to ipN. # . @HB_RA_DIR@/hto-mapfuncs USAGE="usage: $0 $LEGAL_ACTIONS"; usage() { echo $USAGE >&2 exit 1 } if [ $# != 2 ]; then usage fi OCF_TYPE=IPsrcaddr OCF_RESOURCE_INSTANCE=${OCF_TYPE} export OCF_TYPE OCF_RESOURCE_INSTANCE OCF_RESKEY_ipaddress=$1; export OCF_RESKEY_ipaddress ra_execocf $2 Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/IPv6addr.in0000644000000000000000000000220311576626513023114 0ustar00usergroup00000000000000#!/bin/sh # # # Description: wrapper of OCF RA IPv6addr. See OCF RA IPv6addr for more information. # # Author: Xun Sun # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2005 International Business Machines # # Source function library. . @HB_RA_DIR@/hto-mapfuncs usage() { echo "usage: $0 $LEGAL_ACTIONS" exit 1 } if [ $# != 2 ]; then usage fi OCF_TYPE=IPv6addr OCF_RESOURCE_INSTANCE=${OCF_TYPE}_$1 export OCF_TYPE OCF_RESOURCE_INSTANCE # We need to split the argument into pieces that IPv6addr OCF RA can # recognize, sed is prefered over Bash specific builtin functions # for portability. BASEIP=`echo $1 | sed 's%/.*%%'` OCF_RESKEY_ipv6addr=$BASEIP; export OCF_RESKEY_ipv6addr str=`echo $1 | sed 's%^'$BASEIP'*%%'` if [ ! -z "$str" ]; then NETMASK=`echo ${str#/} | sed 's%/.*%%'` OCF_RESKEY_cidr_netmask=$NETMASK; export OCF_RESKEY_cidr_netmask str=`echo $str | sed 's%^/'$NETMASK'/*%%'` NIC=`echo $str | sed 's%/.*%%'` case $NIC in "") ;; *) OCF_RESKEY_nic=$NIC; export OCF_RESKEY_nic ;; esac fi ra_execocf $2 Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/LVM.in0000755000000000000000000000141411576626513022141 0ustar00usergroup00000000000000#!/bin/sh # # # Description: wrapper of OCF RA LVM, based on original heartbeat RA. # See OCF RA LVM for more information. # # Author: Xun Sun # Support: linux-ha@lists.linux-ha.org # License: GNU Lesser General Public License (LGPL) # Copyright: (C) 2005 International Business Machines, Inc. # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 ServeRAID::1::1 LVM::myvolname # # See usage() function below for more details... # . @HB_RA_DIR@/hto-mapfuncs usage() { echo "usage: $0 $LEGAL_ACTIONS" exit 1 } if [ $# != 2 ]; then usage fi OCF_TYPE=LVM OCF_RESOURCE_INSTANCE=${OCF_TYPE} export OCF_TYPE OCF_RESOURCE_INSTANCE OCF_RESKEY_volgrpname=$1; export OCF_RESKEY_volgrpname ra_execocf $2 Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/LinuxSCSI.in0000755000000000000000000000207111576626513023264 0ustar00usergroup00000000000000#!/bin/sh # # # Description: wrapper of OCF RA LinuxSCSI, based on original heartbeat RA. # See OCF RA LinuxSCSI for more information. # # Author: Xun Sun # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2005 International Business Machines # # CAVEATS: See the usage message for some important warnings # # usage: ./LinuxSCSI ::[:] {start|stop|status} # #: Host adapter number of the SCSI device to query #: SCSI channel #: Target ID of the SCSI device under consideration #: LUN of the SCSI device under consideration # (optional) # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 LinuxSCSI:0:0:11 # . @HB_RA_DIR@/hto-mapfuncs usage() { echo "usage: $0 ::[:] $LEGAL_ACTIONS" exit 1 } if [ $# != 2 ]; then usage fi OCF_TYPE=LinuxSCSI OCF_RESOURCE_INSTANCE=${OCF_TYPE}_$1 export OCF_TYPE OCF_RESOURCE_INSTANCE OCF_RESKEY_scsi=$1; export OCF_RESKEY_scsi ra_execocf $2 Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/MailTo.in0000644000000000000000000000213311576626513022664 0ustar00usergroup00000000000000#!/bin/sh # # Description: wrapper of OCF RA MailTo, based on original heartbeat RA. # See OCF RA MailTo for more information. # # Author: Xun Sun # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2005 International Business Machines # # This can be given in the haresources file as: # MailTo::alanr@unix.sh::BigImportantWebServer # MailTo::alanr@unix.sh,spoppi@gmx.de::BigImportantWebServer # # This will then be put into the message subject and body. # Source function library. . @HB_RA_DIR@/hto-mapfuncs usage() { echo "Usage: $0 [] $LEGAL_ACTIONS" exit 1 } case "$#" in 0|1) echo "At least 1 Email address has to be given!" usage ;; 2) cmd=$2 OCF_RESKEY_email=$1 export OCF_RESKEY_email ;; 3) cmd=$3 OCF_RESKEY_email=$1 OCF_RESKEY_subject=$2 export OCF_RESKEY_email OCF_RESKEY_subject ;; *) echo "Additional parameters found: $# but max. 3 are allowed!" usage;; esac OCF_TYPE=MailTo OCF_RESOURCE_INSTANCE=${OCF_TYPE} export OCF_TYPE OCF_RESOURCE_INSTANCE ra_execocf $cmd Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/Makefile.am0000644000000000000000000000223311576626513023204 0ustar00usergroup00000000000000# # heartbeat: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in harddir = $(sysconfdir)/ha.d/resource.d hard_SCRIPTS = IPv6addr \ apache \ AudibleAlarm \ db2 \ Delay \ Filesystem \ hto-mapfuncs \ ICP \ ids \ IPaddr \ IPaddr2 \ IPsrcaddr \ LinuxSCSI \ LVM \ MailTo \ OCF \ portblock \ ServeRAID \ SendArp \ Raid1 \ WAS \ WinPopup \ Xinetd Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/OCF.in0000644000000000000000000000556411576626513022121 0ustar00usergroup00000000000000#!/bin/sh # # Copyright: (C) 2003 International Business Machines Corporation # Author: Alan Robertson # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # # # OCF resource agent wrapper script for "original" heartbeat # resource system # # This allows heartbeat to use OCF resource agents. # # They are configured in /etc/ha.d/haresources like this: # # OCF::resource-type::resource-instance-name # # Note that resource-type must be a script in the ${OCF_AGENTS} directory # and that resource-instance-name must be a configuration file in # ${HA_DIR}/conf/OCF/OCF-resource-name/ # HA_HBCONF_DIR=@HA_HBCONF_DIR@ # Source function library. . $HA_HBCONF_DIR/shellfuncs # ${HA_DIR} is where things show up # ${HA_DIR}/conf/OCF is where we find our resource settings. # One directory per resource type # # ${OCF_AGENTS} is a directory above where OCF resource agent scripts # are found # # usage() { cat <<-! >&2 usage: $0 OCF-resource-name OCF-instance-name operation-name OCF-resource-name is the name of an OCF-compliant resource agent script found under an immediate subdirectory of ${OCF_AGENTS} OCF-instance-name is the name of an instance of OCF-resource-name. It corresponds to an instance conf file named ${HA_DIR}/conf/OCF/OCF-resource-name/OCF-instance-name operation-name is one of: start stop status monitor ! exit 1 } # # Official OCF environment variable names # OCF_RES_VERSION_MAJOR=1 OCF_RES_VERSION_MAJOR=0 OCF_RES_NAME=$2 export OCF_RES_VERSION_MAJOR OCF_RES_VERSION_MAJOR OCF_RES_NAME ocf_catvars() { cat $Resource_Inst_File | sed -e 's%#.*%%' -e 's%^[ ]*%%' | grep '.' } ocf_varnames() { ocf_catvars | grep '=' | sed 's%=.*%%' } ocf_setvars() { . $Resource_Inst_File for j in `ocf_varnames` do eval export $j done } ocf_op() { ( ocf_setvars $Resource_Script $1 ) } rname() { echo "OCF Resource $Resource_Name::$Resource_Instance" } ocf_status() { statoutput=`OCF_STATUS_QOS=0 ocf_op status` rc=$? case $statoutput in "") ;; *) ha_log "info: $statoutput" esac if [ $rc -eq 0 ] then case $statoutput in *OK*|*running*) echo $statoutput;; *) echo "`rname` is running" ;; esac return 0 else case $statoutput in *OK*|*running*) echo "`rname` is stopped" ;; *) echo "$statoutput";; esac return $rc fi } ocf_monitor() { OCF_STATUS_QOS=10 ocf_op status } if [ $# -ne 3 ] then usage fi Resource_Name=$1 Resource_Instance=$2 Operation=$3 Resource_Inst_File="${HA_DIR}/conf/OCF/${Resource_Name}/${Resource_Instance}" Resource_Script=`ls ${OCF_AGENTS}/*/${Resource_Name} 2>/dev/null | head -n 1` if [ ! -f "$Resource_Script" -o ! -x "$Resource_Script" -o \ ! -f "$Resource_Inst_File" ] then usage fi case $Operation in status) ocf_status;; monitor) ocf_monitor;; *) ocf_op $Operation;; esac Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/Raid1.in0000755000000000000000000000217611576626513022451 0ustar00usergroup00000000000000#!/bin/sh # # # Description: wrapper of OCF RA Raid1, based on original heartbeat RA. # See OCF RA Raid1 for more information. # # Author: Xun Sun # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2005 International Business Machines # # usage: $0 {start|stop|status} # # in /etc/ha.d/haresources, use a line such as: # nodea 10.0.0.170 Raid1::/etc/raidtab.md0::/dev/md0 Filesystem::/dev/md0::/data1::ext2 # # The "start" arg starts up the raid device # The "stop" arg stops it. NOTE: all filesystems must be unmounted # and no processes should be accessing the device. # The "status" arg just prints out whether the device is running or not # # Source function library. . @HB_RA_DIR@/hto-mapfuncs usage() { echo "usage: $0 $LEGAL_ACTIONS" } if [ $# != 3 ]; then usage exit 1 fi OCF_TYPE=Raid1 OCF_RESOURCE_INSTANCE=${OCF_TYPE}_$1 export OCF_TYPE OCF_RESOURCE_INSTANCE OCF_RESKEY_raidconf=$1 OCF_RESKEY_raiddev=$2 export OCF_RESKEY_raiddev OCF_RESKEY_raidconf ra_execocf $3 Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/SendArp.in0000644000000000000000000000302111576626513023030 0ustar00usergroup00000000000000#!/bin/sh # # # 2006, Huang Zhen # convert it to calling OCF counterpart. # # Copyright (C) 2004 Horms # # Based on IPaddr2: Copyright (C) 2003 Tuomo Soini # # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # # This script send out gratuitous Arp for an IP address # # It can be used _instead_ of the IPaddr2 or IPaddr resource # to send gratuitous arp for an IP address on a given interface, # without adding the address to that interface. I.e. if for # some reason you want to send gratuitous arp for addresses # managed by IPaddr2 or IPaddr on an additional interface. # # usage: $0 ip-address[/netmaskbits[/interface[:label][/broadcast]]] \ # {start|stop|status|monitor} # # The "start" arg adds an IP alias. # # Surprisingly, the "stop" arg removes one. :-) # # set -e unset LANG; export LANG LC_ALL=C export LC_ALL . @HB_RA_DIR@/hto-mapfuncs # We need to split the argument into pieces that IPaddr OCF RA can # recognize, sed is prefered over Bash specific builtin functions # for portability. usage() { echo "usage: $0 ip-address/interface $LEGAL_ACTIONS" } if [ $# != 2 ]; then usage exit 1 fi BASEIP=`echo $1 | sed "s%/.*%%"` INTERFACE=`echo $1 | sed "s%${BASEIP}/%%"` OCF_TYPE=SendArp OCF_RESKEY_ip=$BASEIP OCF_RESKEY_nic=$INTERFACE OCF_RESOURCE_INSTANCE=${OCF_TYPE}_$BASEIP export OCF_TYPE OCF_RESOURCE_INSTANCE OCF_RESKEY_ip OCF_RESKEY_nic ra_execocf $2 # EOF - end of file Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/ServeRAID.in0000755000000000000000000000175211576626513023234 0ustar00usergroup00000000000000#!/bin/sh # # # Description: wrapper of OCF RA ServeRAID, based on original heartbeat RA. # See OCF RA ServeRAID for more information. # # Author: Xun Sun # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2005 International Business Machines # # usage: ./ServeRAID {start|stop|status} # #: Adapter number of the ServeRAID adapter #: MergeGroup # of the logical drive under consideration. # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 ServeRAID::1::1 # Source function library. . @HB_RA_DIR@/hto-mapfuncs usage() { echo "usage: $0 $LEGAL_ACTIONS" exit 1 } if [ $# != 3 ]; then usage fi OCF_TYPE=ServeRAID OCF_RESOURCE_INSTANCE=${OCF_TYPE}_$1 export OCF_TYPE OCF_RESOURCE_INSTANCE OCF_RESKEY_serveraid=$1 OCF_RESKEY_mergegroup=$2 export OCF_RESKEY_mergegroup OCF_RESKEY_serveraid ra_execocf $3 Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/WAS.in0000755000000000000000000000240411576626513022135 0ustar00usergroup00000000000000#!/bin/sh # # # Description: wrapper of OCF RA WAS, based on original heartbeat RA. # See OCF RA WAS for more information. # # Author: Xun Sun # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2005 International Business Machines # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 WAS::/opt/WebSphere/ApplicationServer/config/server-cfg.xml # # See usage() function below for more details... # # Source function library. . @HB_RA_DIR@/hto-mapfuncs unset LC_ALL; export LC_ALL unset LANGUAGE; export LANGUAGE usage() { cat <<-! >&1 For the single server edition of WAS: usage: $0 [] $LEGAL_ACTIONS For the advanced edition of WAS: usage: $0 [] $LEGAL_ACTIONS ! exit 1 } case $# in 1) op=$1;; 2) op=$2 if [ -f $1 ]; then OCF_RESKEY_config=$1; export OCF_RESKEY_config else case $1 in *[!0-9]*) echo "ERROR: $1 is neither a WAS configuration file nor a valid port number!" usage;; *) OCF_RESKEY_port=$1; export OCF_RESKEY_port port=$1;; esac fi;; *) usage;; esac OCF_TYPE=WAS OCF_RESOURCE_INSTANCE=${OCF_TYPE} export OCF_TYPE OCF_RESOURCE_INSTANCE ra_execocf $op Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/WinPopup.in0000644000000000000000000000220411576626513023257 0ustar00usergroup00000000000000#!/bin/sh # # Resource script for sending WinPopups using smbclient # derived from Alan Robertson's MailTo script # # Author: Sandro Poppi # # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # # Description: sends WinPopups to a sysadmin's workstation # whenever a takeover occurs. # # Note: This command requires an argument, unlike normal init scripts. # # This can be given in the haresources file as: # # WinPopup::hosts # # where "hosts" is a file containing the IPs/Workstation names # one by line to be sent WinPopups # # Source function library. . @HB_RA_DIR@/hto-mapfuncs usage() { echo "Usage: $0 [workstationfile] $LEGAL_ACTIONS" exit 1 } # max. 2 parameters allowed if [ $# -gt 2 ] ; then echo "Additional parameters found: $# but only 2 are allowed!" usage fi # See how we were called. if [ $# -eq 2 ] ; then # optional parameter found cmd=$2 hostfile=$1 else cmd=$1 hostfile="hosts" fi OCF_RESKEY_hostfile=$hostfile export OCF_RESKEY_hostfile OCF_TYPE=WinPopup OCF_RESOURCE_INSTANCE=${OCF_TYPE} export OCF_TYPE OCF_RESOURCE_INSTANCE ra_execocf $cmd Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/Xinetd.in0000644000000000000000000000122011576626513022726 0ustar00usergroup00000000000000#!/bin/sh # # # Description: wrapper of OCF RA Xinetd, based on original heartbeat RA. # See OCF RA Xinetd for more information. # # Author: Xun Sun # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2005 International Business Machines # # Source function library. . @HB_RA_DIR@/hto-mapfuncs xup_usage () { echo "Usage: $0 $LEGAL_ACTIONS" exit 1 } if [ $# -ne 2 ]; then xup_usage fi OCF_TYPE=Xinetd OCF_RESOURCE_INSTANCE=${OCF_TYPE} export OCF_TYPE OCF_RESOURCE_INSTANCE OCF_RESKEY_service=$1; export OCF_RESKEY_service ra_execocf $2 Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/apache.in0000755000000000000000000000146211576626513022727 0ustar00usergroup00000000000000#!/bin/sh # # # Description: wrapper of OCF RA apache, based on original heartbeat RA. See # OCF RA apache for more information. # # Author: Xun Sun # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2005 International Business Machines # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 apache::/opt/IBMHTTPServer/conf/httpd.conf # node1 10.0.0.170 IBMhttpd # . @HB_RA_DIR@/hto-mapfuncs usage() { echo "usage: $0 [config-file-pathname] $LEGAL_ACTIONS" exit 1 } case $# in 1) op=$1 ;; 2) OCF_RESKEY_configfile=$1; export OCF_RESKEY_configfile op=$2 ;; *) usage ;; esac OCF_TYPE=apache OCF_RESOURCE_INSTANCE=${OCF_TYPE}_$OCF_RESKEY_configfile export OCF_TYPE OCF_RESOURCE_INSTANCE ra_execocf $op Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/db2.in0000755000000000000000000000135611576626513022157 0ustar00usergroup00000000000000#!/bin/sh # # # Description: wrapper of OCF RA db2, based on original heartbeat RA. # See OCF RA db2 for more information. # # Author: Xun Sun # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2005 International Business Machines, Inc. # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 db2::db2inst1 # # See usage() function below for more details... # . @HB_RA_DIR@/hto-mapfuncs usage() { echo "usage: $0 db2-database-owner-id $LEGAL_ACTIONS" exit 1 } if [ $# != 2 ]; then usage fi OCF_TYPE=db2 OCF_RESOURCE_INSTANCE=${OCF_TYPE}_$1 export OCF_TYPE OCF_RESOURCE_INSTANCE OCF_RESKEY_instance=$1; export OCF_RESKEY_instance ra_execocf $2 Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/hto-mapfuncs.in0000644000000000000000000000442111576626513024105 0ustar00usergroup00000000000000#!/bin/sh # # Author: Zhao Kai # # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # #set -x . @HA_HBCONF_DIR@/shellfuncs OCF_ROOT_DIR=@OCF_ROOT_DIR@ OCF_RA_DIR=@OCF_RA_DIR@ OCF_RA_VERSION_MAJOR=1 ; export OCF_RA_VERSION_MAJOR OCF_RA_VERSION_MINOR=0 ; export OCF_RA_VERSION_MINOR OCF_ROOT=$OCF_ROOT_DIR ; export OCF_ROOT LEGAL_ACTIONS="(start|stop|status|usage|meta-data)" log_and_print(){ ha_log "$*" echo "$*" } # # rsc_id=rsc1 rsc_type=IPaddr2 provide=heartbeat start ip=192.168.0.1 ..... # ra_execocf(){ if [ "x${1}" = "x" ]; then log_and_print "ERROR: No action specfied." usage exit 1 fi . ${OCF_RA_DIR}/heartbeat/.ocf-shellfuncs __ACTION=$1 __SCRIPT_NAME="${OCF_RA_DIR}/heartbeat/${OCF_TYPE}" if [ "x${OCF_TYPE}" = "x" ]; then log_and_print "ERROR: Internal error. No value for OCF_TYPE specified" exit 1 fi if [ ! -x $__SCRIPT_NAME ] then log_and_print "ERROR: $__SCRIPT_NAME is not an executable file " exit 1 fi # execute ocf ra and get return value case $__ACTION in start) $__SCRIPT_NAME start;; stop) $__SCRIPT_NAME stop ;; monitor) $__SCRIPT_NAME monitor;; status) $__SCRIPT_NAME monitor;; # Mapping this to monitor is a bug usage|help) usage;; *) log_and_print "ERROR: Unknown operation: $__ACTION" usage exit 1;; esac ocf_return_status=$? case $ocf_return_status in $OCF_SUCCESS) case $__ACTION in monitor|status) log_and_print "INFO: $rsc_type Running OK";; *) log_and_print "INFO: $rsc_type Success";; esac;; $OCF_ERR_GENERIC) log_and_print "ERROR: $rsc_type Generic error";; $OCF_ERR_ARGS) log_and_print "ERROR: $rsc_type Illegal argument";; $OCF_ERR_UNIMPLEMENTED) log_and_print "ERROR: $rsc_type Function unimplemented";; $OCF_ERR_PERM) log_and_print "ERROR: $rsc_type User had insufficient privilege";; $OCF_ERR_INSTALLED) log_and_print "ERROR: $rsc_type Program is not installed";; $OCF_ERR_CONFIGURED) log_and_print "ERROR: $rsc_type Program is not configured";; $OCF_NOT_RUNNING) log_and_print "INFO: $rsc_type Resource is stopped";; *) log_and_print "ERROR: $rsc_type Unknown error: $ocf_return_status" exit 1 ;; esac return $ocf_return_status } Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/ids.in0000644000000000000000000000652611576626513022270 0ustar00usergroup00000000000000#!/bin/sh # # # ids # # Description: # # Wrapper script for the ids OCF resource agent that # manages an IBM Informix Dynamic Server (IDS) instance # as an High-Availability resource. #### # # Author: Lars D. Forseth, or # Created: May 25th 2007 # Last Modified: July 30th 2007 # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL), Version 2 or later # Copyright: (c) 2002 - 2007 International Business Machines, Inc. # # This code is inspired by the db2 and Filesystem wrapper # resource scripts both written by Xun Sun, #### # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. #### # # Example usage as it would appear in /etc/ha.d/haresources: # node1 192.168.0.1 ids::/informix::ids1::onconfig.ids1 # # # --> Note that passing dbname and sqltestquery in heartbeat version 1 style is not supported! # # See usage() function below for more details... #### # # Include variables and functions needed to wrap # from heartbeat V1 resource agent style (haresources, no crm) # to heartbeat V2 resource agent style (crm, OCF) # . @HB_RA_DIR@/hto-mapfuncs # # Function that displays the usage of this script. # usage() { echo "usage: $0 [ ] $LEGAL_ACTIONS" exit 1 } # # Check if number of parameters is valid. # Valid hereby are: # - 1 parameter (only the action to perform) # - or 4 parameters (variables needed plus the action to perform) # if [ $# -ne 1 -a $# -ne 4 ]; then usage fi # # Set OCF parameter variables, if supplied # # Set informixdir only if there follows at least one more unempty parameter if [ -n "$2" ]; then OCF_RESKEY_informixdir="$1"; shift export OCF_RESKEY_informixdir fi # Set informixserver only if there follows at least one more unempty parameter if [ -n "$2" ]; then OCF_RESKEY_informixserver="$1"; shift export OCF_RESKEY_informixserver fi # Set onconfig only if there follows at least one more unempty parameter if [ -n "$2" ]; then OCF_RESKEY_onconfig="$1"; shift export OCF_RESKEY_onconfig fi # # Set general OCF variables # OCF_TYPE=ids OCF_RESOURCE_INSTANCE=${OCF_TYPE}_$OCF_RESKEY_informixserver export OCF_TYPE OCF_RESOURCE_INSTANCE # # Finally call OCF resource agent we are wrapping here... # ra_execocf $1 ############################################################################### Heartbeat-3-0-7e3a82377fa8/resources/heartbeat/portblock.in0000755000000000000000000000133411576626513023503 0ustar00usergroup00000000000000#!/bin/sh # # # Description: wrapper of OCF RA portblock, based on original heartbeat RA. # See OCF RA portblock for more information. # # Author: Xun Sun # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2005 International Business Machines # . @HB_RA_DIR@/hto-mapfuncs usage() { echo "usage: $0 {udp|tcp} portno,portno {block|unblock} $LEGAL_ACTIONS" exit 1 } if [ $# != 3 ]; then usage fi OCF_RESKEY_protocol=$1 OCF_RESKEY_portno=$2 OCF_RESKEY_action=$3 export OCF_RESKEY_action OCF_RESKEY_portno OCF_RESKEY_action OCF_TYPE=portblock OCF_RESOURCE_INSTANCE=${OCF_TYPE}_$1_$2_$3 export OCF_TYPE OCF_RESOURCE_INSTANCE ra_execocf $4 Heartbeat-3-0-7e3a82377fa8/telecom/Makefile.am0000644000000000000000000000155611576626513020672 0ustar00usergroup00000000000000# # heartbeat telecom directory: Linux-HA code # # Copyright (C) 2001 Alan Robertson # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in SUBDIRS = apphbd DIST_SUBDIRS = apphbd Heartbeat-3-0-7e3a82377fa8/telecom/apphbd/Makefile.am0000644000000000000000000000341311576626513022122 0ustar00usergroup00000000000000# # heartbeat: Linux-HA telecom code: application heartbeat daemon # # Copyright (C) 2002 Alan Robertson # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha hadir = $(sysconfdir)/ha.d halibdir = $(libdir)/@HB_PKG@ commmoddir = $(halibdir)/modules/comm havarlibdir = $(localstatedir)/lib/@HB_PKG@ PIDFILE = $(localstatedir)/run/apphbd.pid COMMONLIBS = -lplumb \ $(GLIBLIB) \ -lpils LIBRT = @LIBRT@ AM_CFLAGS = @CFLAGS@ \ -DPIDFILE='"$(PIDFILE)"' ## binary progs halib_PROGRAMS = apphbd apphbtest ## SOURCES apphbd_SOURCES = apphbd.c # # We don't really need libpils/libltdl (yet...) # apphbd_LDADD = $(COMMONLIBS) \ -lpils \ @LIBLTDL@ $(LIBRT) apphbd_LDFLAGS = @LIBADD_DL@ @LIBLTDL@ -export-dynamic @DLOPEN_FORCE_FLAGS@ ## SOURCES apphbtest_SOURCES = apphbtest.c apphbtest_LDADD = $(COMMONLIBS) \ $(top_builddir)/lib/apphb/libapphb.la apphbtest_LDFLAGS = Heartbeat-3-0-7e3a82377fa8/telecom/apphbd/apphbd.c0000644000000000000000000007424011576626513021476 0ustar00usergroup00000000000000/* * apphbd: application heartbeat daemon * * This daemon implements an application heartbeat server. * * Clients register with it and are expected to check in from time to time * If they don't, we complain ;-) * * More details can be found in the header file. * * Copyright(c) 2002 Alan Robertson * ********************************************************************* * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * General strategy: We use the IPC abstraction library for all our * client-server communications. We use the glib 'mainloop' paradigm * for all our event processing. * * The IPC connection socket is one event source. * Each socket connecting us to our clients are more event sources. * Each heartbeat timeout is also an event source. * * The only limit we have on the number of clients we can support is the * number of file descriptors we can have open. It's been tested to * several hundred at a time. * * We use the Gmain_timeout timeouts instead of native glib mainloop * timeouts because they aren't affected by changes in the time of day * on the system. They have identical semantics - except for working * correctly ;-) * * * TODO list: * * - Consider merging all the timeouts into some kind of single * timeout source. This would probably more efficient for * large numbers of clients. But, it may not matter ;-) * * - Implement a reload option for config file? * * */ #include #include #include #include #include #include #include #include #include #include #include #include #define time footime #define index fooindex #include #undef time #undef index #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef PIDFILE # define PIDFILE HA_VARRUNDIR "/apphbd.pid" #endif /* Start: Mirrored from ipcsocket.c */ #ifdef SO_PEERCRED # define USE_SO_PEERCRED #elif HAVE_GETPEEREID # define USE_GETPEEREID #elif defined(SCM_CREDS) # define USE_SCM_CREDS #else # define USE_DUMMY_CREDS /* This will make it compile, but attempts to authenticate * will fail. This is a stopgap measure ;-) */ #endif /* End: Mirrored from ipcsocket.c */ static const char *cmdname = "apphbd"; #define DBGMIN 1 #define DBGDETAIL 2 static int usenormalpoll = TRUE; static int watchdogfd = -1; #define CONFIG_FILE "./apphbd.cf" #define EOS '\0' #define DEFAULT_DEBUG_LEVEL "3" #define DEFAULT_WDT_DEV NULL #define DEFAULT_WDT_INTERVAL_MS "1000" #define APPHB_DEFAULT_REALTIME "yes" #define DEFAULT_DEBUGFILE NULL #define DEFAULT_LOGFILE NULL typedef struct apphb_client apphb_client_t; /* * Per-client data structure. */ struct apphb_client { char * appname; /* application name */ char * appinst; /* application name */ char * curdir; /* application starting directory */ pid_t pid; /* application pid */ uid_t uid; /* application UID */ gid_t gid; /* application GID */ guint timerid; /* timer source id */ unsigned long timerms; /* heartbeat timeout in ms */ longclock_t lasthb; /* Last HB time */ unsigned long warnms; /* heartbeat warntime in ms */ gboolean cause_reboot; /* True if client probs should * cause a crash*/ gboolean missinghb; /* True if missing a hb */ GCHSource* source; IPC_Channel* ch; struct IPC_MESSAGE rcmsg; /* return code msg */ struct apphb_rc rc; /* last return code */ gboolean deleteme; /* Delete after next call */ }; /* Probably ought to eventually make this configurable, but it's a start */ static uid_t critical_uid_list[] = {0, HA_CCMUID}; #define MAXNOTIFYPLUGIN 100 static AppHBNotifyOps *NotificationPlugins[MAXNOTIFYPLUGIN]; static int n_Notification_Plugins; static void apphb_notify(apphb_client_t* client, apphb_event_t event); static void make_daemon(void); static int init_start(void); static int init_stop(void); static int init_status(void); static gboolean load_notification_plugin(const char *optarg); static gboolean open_watchdog(const char * dev); static void tickle_watchdog(void); static void close_watchdog(void); static void usage(const char* cmd, int exit_status); static void apphb_client_remove(gpointer client); static void apphb_putrc(apphb_client_t* client, int rc); static gboolean apphb_timer_popped(gpointer data); static gboolean tickle_watchdog_timer(gpointer data); static apphb_client_t* apphb_client_new(struct IPC_CHANNEL* ch); static int apphb_client_register(apphb_client_t* client, void* Msg, size_t len); static gboolean apphb_read_msg(apphb_client_t* client); static int apphb_client_hb(apphb_client_t* client, void * msg, size_t msgsize); void apphb_process_msg(apphb_client_t* client, void* msg, size_t length); static int authenticate_client(void * clienthandle, uid_t * uidlist , gid_t* gidlist, int nuid, int ngid); /* "event source" functions for client communication */ static gboolean apphb_dispatch(IPC_Channel* src, gpointer user); /* "event source" functions for new client connections */ static gboolean apphb_new_dispatch(IPC_Channel* src, gpointer user); /* Functions for apphbd configure */ static void init_config(const char* cfgfile); static gboolean parse_config(const char* cfgfile); static int get_dir_index(const char* directive); static int set_debug_level(const char* option); static int set_watchdog_device(const char* option); static int set_watchdog_interval(const char* option); static int set_realtime(const char* option); static int set_notify_plugin(const char* option); static int set_debugfile(const char* option); static int set_logfile(const char* option); static struct { int debug_level; char wdt_dev[MAXLINE]; int wdt_interval_ms; int realtime; char debugfile[MAXLINE]; char logfile[MAXLINE]; } apphbd_config; static struct directive { const char* name; int (*add_func)(const char*); } Directives[] = { {"debug_level", set_debug_level} , {"watchdog_device", set_watchdog_device} , {"watchdog_interval_ms", set_watchdog_interval} , {"realtime", set_realtime} , {"notify_plugin", set_notify_plugin} , {"debugfile", set_debugfile} , {"logfile", set_logfile} }; /* Send return code from current operation back to client... */ static void apphb_putrc(apphb_client_t* client, int rc) { client->rc.rc = rc; if (client->ch->ops->send(client->ch, &client->rcmsg) != IPC_OK) { client->deleteme = TRUE; } } /* Oops! Client heartbeat timer expired! -- Bad client! */ static gboolean apphb_timer_popped(gpointer data) { apphb_client_t* client = data; if (!client->deleteme) { apphb_notify(client, APPHB_NOHB); } client->missinghb = TRUE; client->timerid = 0; return FALSE; } /* gmainloop "event source" dispatch function */ static gboolean apphb_dispatch(IPC_Channel* src, gpointer Client) { apphb_client_t* client = Client; if (apphbd_config.debug_level >= DBGDETAIL) { cl_log(LOG_DEBUG, "apphb_dispatch: client: %ld" , (long)client->pid); } while (!client->deleteme && client->ch->ops->is_message_pending(client->ch)) { if (client->ch->ch_status == IPC_DISCONNECT) { apphb_notify(client, APPHB_HUP); client->deleteme = TRUE; }else{ if (!apphb_read_msg(client)) { break; } } } return !client->deleteme; } #define DEFAULT_TO (10*60*1000) /* Create new client (we don't know appname or pid yet) */ static apphb_client_t* apphb_client_new(struct IPC_CHANNEL* ch) { apphb_client_t* ret; ret = g_new(apphb_client_t, 1); memset(ret, 0, sizeof(*ret)); ret->appname = NULL; ret->appinst = NULL; ret->ch = ch; ret->timerid = 0; ret->pid = 0; ret->deleteme = FALSE; ret->missinghb = FALSE; ret->cause_reboot = FALSE; /* Create the standard result code (errno) message to send client * NOTE: this disallows multiple outstanding calls from a client * (IMHO this is not a problem) */ ret->rcmsg.msg_buf = NULL; ret->rcmsg.msg_body = &ret->rc; ret->rcmsg.msg_len = sizeof(ret->rc); ret->rcmsg.msg_done = NULL; ret->rcmsg.msg_private = NULL; ret->rc.rc = 0; if (apphbd_config.debug_level >= DBGMIN) { cl_log(LOG_DEBUG, "apphb_client_new: channel: 0x%x" " pid=%ld" , GPOINTER_TO_UINT(ch) , (long)ch->farside_pid); } ret->source = G_main_add_IPC_Channel(G_PRIORITY_DEFAULT , ch, FALSE, apphb_dispatch, (gpointer)ret , apphb_client_remove); if (!ret->source) { memset(ret, 0, sizeof(*ret)); ret=NULL; return ret; } /* Set timer for this client... */ ret->timerid = Gmain_timeout_add(DEFAULT_TO, apphb_timer_popped, ret); ret->timerms = DEFAULT_TO; ret->warnms = 0; ret->lasthb = time_longclock(); /* Set up "real" input message source for this client */ return ret; } /* Process client registration message */ static int apphb_client_register(apphb_client_t* client, void* Msg, size_t length) { struct apphb_signupmsg* msg = Msg; size_t namelen = 0; uid_t uidlist[1]; gid_t gidlist[1]; IPC_Auth* clientauth; int j; if (client->appname) { return EEXIST; } if (length < sizeof(*msg) || (namelen = strnlen(msg->appname, sizeof(msg->appname))) < 1 || namelen >= sizeof(msg->appname) || strnlen(msg->appinstance, sizeof(msg->appinstance)) >= sizeof(msg->appinstance)) { return EINVAL; } #ifndef USE_SO_PEERCRED if(client->ch->farside_pid == -1) { client->ch->farside_pid = msg->pid; } #endif if (msg->pid < 2 || (CL_KILL(msg->pid, 0) < 0 && errno != EPERM) || (client->ch->farside_pid != msg->pid)) { return EINVAL; } client->pid = msg->pid; /* Make sure client is who they claim to be... */ uidlist[0] = msg->uid; gidlist[0] = msg->gid; clientauth = ipc_set_auth(uidlist, gidlist, 1, 1); if (client->ch->ops->verify_auth(client->ch, clientauth) != IPC_OK) { ipc_destroy_auth(clientauth); return EINVAL; } ipc_destroy_auth(clientauth); client->appname = g_strdup(msg->appname); client->appinst = g_strdup(msg->appinstance); client->curdir = g_strdup(msg->curdir); client->uid = msg->uid; client->gid = msg->gid; if (apphbd_config.debug_level >= DBGMIN) { cl_log(LOG_DEBUG , "apphb_client_register: client: [%s]/[%s] pid %ld" " (uid,gid) = (%ld,%ld)\n" , client->appname , client->appinst , (long)client->pid , (long)client->uid , (long)client->gid); } /* Tell the plugins something happened */ for (j=0; j < n_Notification_Plugins; ++j) { NotificationPlugins[j]->cregister(client->pid , client->appname, client->appinst, client->curdir , client->uid, client->gid, client); } return 0; } /* Shut down the requested client */ static void apphb_client_remove(gpointer Client) { apphb_client_t* client = Client; cl_log(LOG_INFO, "apphb_client_remove: client: %ld" , (long)client->pid); if (apphbd_config.debug_level >= DBGMIN) { cl_log(LOG_DEBUG, "apphb_client_remove: client pid: %ld" , (long)client->pid); } if (client->timerid) { g_source_remove(client->timerid); client->timerid=0; } G_main_del_IPC_Channel(client->source); g_free(client->appname); g_free(client->appinst); g_free(client->curdir); memset(client, 0, sizeof(*client)); } /* Client requested disconnect */ static int apphb_client_disconnect(apphb_client_t* client , void * msg, size_t msgsize) { /* We can't delete it right away... */ client->deleteme=TRUE; apphb_notify(client, APPHB_HBUNREG); return 0; } /* Client requested new timeout interval */ static int apphb_client_set_timeout(apphb_client_t* client, void * Msg, size_t msgsize) { struct apphb_msmsg* msg = Msg; if (msgsize < sizeof(*msg)) { return EINVAL; } client->timerms = msg->ms; return apphb_client_hb(client, Msg, msgsize); } /* Client requested new warntime interval */ static int apphb_client_set_warntime(apphb_client_t* client, void * Msg, size_t msgsize) { struct apphb_msmsg* msg = Msg; if (msgsize < sizeof(*msg)) { return EINVAL; } client->warnms = msg->ms; client->lasthb = time_longclock(); return 0; } /* Client requested to set/reset 'reboot' attribute */ static int apphb_client_set_reboot(apphb_client_t* client, void * Msg, size_t msgsize) { struct apphb_msmsg* msg = Msg; gboolean tf = (gboolean)msg->ms; if (msgsize < sizeof(*msg)) { return EINVAL; } /* * Only authorized clients can request system reboots if they fail, * since this is a perfect way to cause a denial of service. */ if (tf && !authenticate_client(client, critical_uid_list , NULL, DIMOF(critical_uid_list), 0)) { return EPERM; } client->cause_reboot = tf; client->lasthb = time_longclock(); return 0; } /* Client heartbeat received */ static int apphb_client_hb(apphb_client_t* client, void * Msg, size_t msgsize) { if (client->missinghb) { apphb_notify(client, APPHB_HBAGAIN); client->missinghb = FALSE; } if (client->timerid) { g_source_remove(client->timerid); client->timerid = 0; } if (client->timerms > 0) { client->timerid = Gmain_timeout_add(client->timerms , apphb_timer_popped, client); } if (client->warnms > 0) { longclock_t now = time_longclock(); unsigned long elapsedms; elapsedms=longclockto_ms(sub_longclock(now, client->lasthb)); client->lasthb = now; if (elapsedms > client->warnms) { cl_log(LOG_INFO, "apphb client '%s' / '%s' (pid %ld) " "late heartbeat: %lu ms" , client->appname, client->appinst , (long)client->pid, elapsedms); } } return 0; } /* Read and process a client request message */ static gboolean apphb_read_msg(apphb_client_t* client) { struct IPC_MESSAGE* msg = NULL; switch (client->ch->ops->recv(client->ch, &msg)) { case IPC_OK: apphb_process_msg(client, msg->msg_body, msg->msg_len); if (msg->msg_done) { msg->msg_done(msg); } return TRUE; break; case IPC_BROKEN: client->deleteme = TRUE; return FALSE; break; case IPC_FAIL: return FALSE; break; } return FALSE; } /* * Mappings between commands and strings */ struct hbcmd { const char * msg; gboolean senderrno; int (*fun)(apphb_client_t* client, void* msg, size_t len); }; /* * Put HEARTBEAT message first - it is by far the most common message... */ static struct hbcmd hbcmds[] = { {HEARTBEAT, FALSE, apphb_client_hb}, {REGISTER, TRUE, apphb_client_register}, {SETINTERVAL, TRUE, apphb_client_set_timeout}, {SETWARNTIME, TRUE, apphb_client_set_warntime}, {SETREBOOT, TRUE, apphb_client_set_reboot}, {UNREGISTER, TRUE, apphb_client_disconnect}, }; /* Process a message from an app heartbeat client process */ void apphb_process_msg(apphb_client_t* client, void* Msg, size_t length) { struct apphb_msg * msg = Msg; const int sz1 = sizeof(msg->msgtype)-1; int rc = EINVAL; gboolean sendrc = TRUE; int j; if (length < sizeof(*msg)) { return; } msg->msgtype[sz1] = EOS; /* Which command are we processing? */ if (apphbd_config.debug_level >= DBGDETAIL) { cl_log(LOG_DEBUG, "apphb_process_msg: client: 0x%x" " type=%s" , GPOINTER_TO_UINT(client) , msg->msgtype); } for (j=0; j < DIMOF(hbcmds); ++j) { if (strcmp(msg->msgtype, hbcmds[j].msg) == 0) { sendrc = hbcmds[j].senderrno; if (client->appname == NULL && hbcmds[j].fun != apphb_client_register) { rc = ESRCH; break; } rc = hbcmds[j].fun(client, Msg, length); } } if (sendrc) { if (apphbd_config.debug_level >= DBGMIN) { cl_log(LOG_DEBUG, "apphb_process_msg: client: 0x%x" " type=%s, rc=%d" , GPOINTER_TO_UINT(client) , msg->msgtype, rc); } apphb_putrc(client, rc); } } /* gmainloop client connection "dispatch" function */ /* This is where we accept connections from a new client */ static gboolean apphb_new_dispatch(IPC_Channel* src, gpointer user) { if (apphbd_config.debug_level >= DBGMIN) { cl_log(LOG_DEBUG, "apphb_new_dispatch: IPC_channel: 0x%x" " pid=%ld" , GPOINTER_TO_UINT(src) , (long)src->farside_pid); } if (src != NULL) { /* This sets up comm channel w/client * Ignoring the result value is OK, because * the client registers itself w/event system. */ (void)apphb_client_new(src); }else{ cl_perror("accept_connection failed!"); sleep(1); } return TRUE; } /* * This function is called whenever a heartbeat event occurs. * It could be replaced by a function which called the appropriate * set of plugins to distribute the notification along to whoever * is interested in whatever way is desired. */ static void apphb_notify(apphb_client_t* client, apphb_event_t event) { int logtype = LOG_WARNING; const char * msg; int j; gboolean eventisbad = FALSE; const char * word = ""; switch(event) { case APPHB_HUP: msg = "hangup"; logtype = LOG_WARNING; eventisbad = TRUE; break; case APPHB_NOHB: msg = "failed to heartbeat"; logtype = LOG_WARNING; eventisbad = TRUE; break; case APPHB_HBAGAIN: msg = "resumed heartbeats"; logtype = LOG_INFO; break; case APPHB_HBUNREG: msg = "unregistered"; logtype = LOG_INFO; break; default: return; } if (eventisbad && client->cause_reboot) { logtype = LOG_EMERG; word="Critical "; } if (event != APPHB_HBUNREG) { cl_log(logtype, "%sapphb client '%s' / '%s' (pid %ld) %s" , word, client->appname, client->appinst , (long)client->pid, msg); } if (eventisbad && client->cause_reboot) { /* Uh, Oh... Time to go! */ tickle_watchdog(); cl_reboot(apphbd_config.wdt_interval_ms, client->appname); } /* Tell the plugins something happened */ for (j=0; j < n_Notification_Plugins; ++j) { NotificationPlugins[j]->status(client->appname , client->appinst, client->curdir, client->pid , client->uid, client->gid, event); } } extern pid_t getsid(pid_t); static void init_config(const char* cfgfile) { /* Set default configure */ set_debug_level(DEFAULT_DEBUG_LEVEL); set_watchdog_device(DEFAULT_WDT_DEV); set_watchdog_interval(DEFAULT_WDT_INTERVAL_MS); set_realtime(APPHB_DEFAULT_REALTIME); /* Read configure file */ if (cfgfile) { if (!parse_config(cfgfile)) { exit(LSB_EXIT_NOTCONFIGED); } }else{ exit(LSB_EXIT_NOTCONFIGED); } } /* Adapted from parse_config in config.c */ static gboolean parse_config(const char* cfgfile) { FILE* f; char buf[MAXLINE]; char* bp; char* cp; char directive[MAXLINE]; int dirlength; int optionlength; char option[MAXLINE]; int dir_index; gboolean ret = TRUE; if ((f = fopen(cfgfile, "r")) == NULL){ cl_log(LOG_ERR, "Cannot open config file:[%s]", cfgfile); return(FALSE); } while(fgets(buf, MAXLINE, f) != NULL){ bp = buf; /* Skip over white space*/ bp += strspn(bp, " \t\n\r\f"); /* comments */ if ((cp = strchr(bp, '#')) != NULL){ *cp = EOS; } if (*bp == EOS){ continue; } dirlength = strcspn(bp, " \t\n\f\r"); strncpy(directive, bp, dirlength); directive[dirlength] = EOS; if ((dir_index = get_dir_index(directive)) == -1){ cl_log(LOG_ERR, "Illegal directive [%s] in %s" , directive, cfgfile); ret = FALSE; continue; } bp += dirlength; /* skip delimiters */ bp += strspn(bp, " ,\t\n\f\r"); /* Set option */ optionlength = strcspn(bp, " ,\t\n\f\r"); strncpy(option, bp, optionlength); option[optionlength] = EOS; if (!(*Directives[dir_index].add_func)(option)) { ret = FALSE; } }/*while*/ fclose(f); return ret; } static int get_dir_index(const char* directive) { int j; for(j=0; j < DIMOF(Directives); j++){ if (strcmp(directive, Directives[j].name) == 0){ return j; } } return -1; } static int set_debug_level(const char* option) { char * ep; long lval; if (!option) { return FALSE; } errno = 0; lval = strtol(option, &ep, 10); if (errno == 0 && option[0] != EOS && *ep == EOS){ apphbd_config.debug_level = (int) lval; return TRUE; }else{ cl_log(LOG_ERR, "invalid debug_level [%s] specified" , option); return FALSE; } } static int set_watchdog_device(const char* option) { if (!option) { apphbd_config.wdt_dev[0] = EOS; return FALSE; } strncpy(apphbd_config.wdt_dev, option, MAXLINE); return TRUE; } static int set_watchdog_interval(const char* option) { char * ep; long lval; if (!option) { return FALSE; } errno = 0; lval = strtol(option, &ep, 10); if (errno == 0 && option[0] != EOS && *ep == EOS && lval >= 10) { apphbd_config.wdt_interval_ms = (int) lval; return TRUE; }else{ cl_log(LOG_ERR, "invalid watchdog_interval_ms [%s] specified" , option); return FALSE; } } static int set_realtime(const char* option) { if (!option) { return FALSE; } if (strcmp(option, "yes") == 0){ apphbd_config.realtime = 1; return TRUE; }else if (strcmp(option, "no") == 0){ apphbd_config.realtime = 0; return TRUE; } return FALSE; } static int set_notify_plugin(const char* option) { if (option && load_notification_plugin(option)) { cl_log(LOG_INFO, "Plugin [%s] loaded", option); return TRUE; } return FALSE; } static int set_debugfile(const char* option) { if (!option){ apphbd_config.debugfile[0] = EOS; return FALSE; } strncpy(apphbd_config.debugfile, option, MAXLINE); return TRUE; } static int set_logfile(const char* option) { if (!option){ apphbd_config.logfile[0] = EOS; return FALSE; } strncpy(apphbd_config.logfile, option, MAXLINE); return TRUE; } /* * Main program for monitoring application heartbeats... */ static GMainLoop *mainloop; static void usage(const char* cmd, int exit_status) { FILE* stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-srkh]" "[-c configure file]\n", cmd); fprintf(stream, "\t-d\tsets debug level\n"); fprintf(stream, "\t-s\tgets daemon status\n"); fprintf(stream, "\t-r\trestarts daemon\n"); fprintf(stream, "\t-k\tstops daemon\n"); fprintf(stream, "\t-h\thelp message\n"); fflush(stream); exit(exit_status); } #define OPTARGS "srdkhc:" int main(int argc, char ** argv) { int flag; int req_restart = FALSE; int req_status = FALSE; int req_stop = FALSE; int argerr = 0; const char* cfgfile = CONFIG_FILE; cl_cdtocoredir(); cl_enable_coredumps(TRUE); cl_log_set_entity(cmdname); cl_log_enable_stderr(TRUE); cl_log_set_facility(LOG_USER); while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch(flag) { case 's': /* Status */ req_status = TRUE; break; case 'k': /* Stop (kill) */ req_stop = TRUE; break; case 'r': /* Restart */ req_restart = TRUE; break; case 'h': /* Help message */ usage(cmdname, LSB_EXIT_OK); break; case 'c': /* Configure file */ cfgfile = optarg; break; case 'd': ++apphbd_config.debug_level; break; default: ++argerr; break; } } if (optind > argc) { ++argerr; } if (argerr) { usage(cmdname, LSB_EXIT_GENERIC); } if (req_status){ return init_status(); } if (req_stop){ return init_stop(); } if (req_restart) { init_stop(); } init_config(cfgfile); return init_start(); } static void shutdown(int nsig) { static int shuttingdown = 0; CL_SIGNAL(nsig, shutdown); if (!shuttingdown) { /* Let the watchdog get us if we can't shut down */ tickle_watchdog(); shuttingdown = 1; } if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); }else{ exit(LSB_EXIT_OK); } } static gboolean cpu_limit_timer(gpointer unused) { (void)unused; cl_cpu_limit_update(); return TRUE; } static int init_start(void) { char path[] = IPC_PATH_ATTR; char commpath[] = APPHBSOCKPATH; long pid; struct IPC_WAIT_CONNECTION* wconn; GHashTable* wconnattrs; if ((pid = cl_read_pidfile(PIDFILE)) > 0) { cl_log(LOG_CRIT, "already running: [pid %ld].", pid); exit(LSB_EXIT_OK); } if (apphbd_config.debug_level) { if (apphbd_config.logfile[0] != EOS) { cl_log_set_logfile(apphbd_config.logfile); } if (apphbd_config.debugfile[0] != EOS) { cl_log_set_debugfile(apphbd_config.debugfile); } } if (apphbd_config.realtime == 1){ cl_enable_realtime(); }else if (apphbd_config.realtime == 0){ cl_disable_realtime(); } if (!usenormalpoll) { g_main_set_poll_func(cl_glibpoll); ipc_set_pollfunc(cl_poll); } make_daemon(); /* Create a "waiting for connection" object */ wconnattrs = g_hash_table_new(g_str_hash, g_str_equal); g_hash_table_insert(wconnattrs, path, commpath); wconn = ipc_wait_conn_constructor(IPC_ANYTYPE, wconnattrs); if (wconn == NULL) { cl_log(LOG_CRIT, "Unable to create wcon of type %s", IPC_ANYTYPE); cl_log(LOG_CRIT, "UhOh! Failed to create wconn!"); exit(LSB_EXIT_GENERIC); } /* Create a source to handle new connection requests */ G_main_add_IPC_WaitConnection(G_PRIORITY_HIGH, wconn , NULL, FALSE, apphb_new_dispatch, wconn, NULL); if (apphbd_config.debug_level >= DBGMIN) { int ms_interval; cl_cpu_limit_setpercent(20); ms_interval = cl_cpu_limit_ms_interval(); Gmain_timeout_add(ms_interval, cpu_limit_timer, NULL); } /* Create the mainloop and run it... */ mainloop = g_main_new(FALSE); cl_log(LOG_INFO, "Starting %s", cmdname); cl_make_realtime(SCHED_RR, 5, 64, 64); if (apphbd_config.wdt_dev[0] != EOS) { open_watchdog(apphbd_config.wdt_dev); } if (watchdogfd >= 0) { Gmain_timeout_add(apphbd_config.wdt_interval_ms , tickle_watchdog_timer, NULL); } drop_privs(0, 0); /* Become nobody */ g_main_run(mainloop); return_to_orig_privs(); close_watchdog(); wconn->ops->destroy(wconn); if (unlink(PIDFILE) == 0) { cl_log(LOG_INFO, "[%s] stopped", cmdname); } return 0; } static void make_daemon(void) { int j; long pid; #ifndef NOFORK pid = fork(); if (pid < 0) { cl_log(LOG_CRIT, "cannot start daemon."); exit(LSB_EXIT_GENERIC); }else if (pid > 0) { exit(LSB_EXIT_OK); } #endif if (cl_lock_pidfile(PIDFILE) < 0) { cl_log(LOG_CRIT, "already running: [pid %d].", cl_read_pidfile(PIDFILE)); exit(LSB_EXIT_OK); } umask(022); getsid(0); if (!apphbd_config.debug_level) { cl_log_enable_stderr(FALSE); } for (j=0; j < 3; ++j) { close(j); (void)open("/dev/null", j == 0 ? O_RDONLY : O_RDONLY); } CL_IGNORE_SIG(SIGINT); CL_IGNORE_SIG(SIGHUP); CL_SIGNAL(SIGTERM, shutdown); } static int init_stop(void) { long pid; int rc = LSB_EXIT_OK; pid = cl_read_pidfile(PIDFILE); if (pid > 0) { if (CL_KILL((pid_t)pid, SIGTERM) < 0) { rc = (errno == EPERM ? LSB_EXIT_EPERM : LSB_EXIT_GENERIC); fprintf(stderr, "Cannot kill pid %ld\n", pid); }else{ while (CL_PID_EXISTS(pid)) { sleep(1); } } } return rc; } static int init_status(void) { long pid = cl_read_pidfile(PIDFILE); if (pid > 0) { fprintf(stderr, "%s is running [pid: %ld]\n" , cmdname, pid); return LSB_STATUS_OK; } fprintf(stderr, "%s is stopped.\n", cmdname); return LSB_STATUS_STOPPED; } /* * Notification plugin imported functions: * authenticate_client(void * handle, uidlist, gidlist) * This returns TRUE if the app at apphandle * properly authenticates according to the gidlist * and the uidlist. */ static gboolean authenticate_client(void * clienthandle, uid_t * uidlist, gid_t* gidlist , int nuid, int ngid) { struct apphb_client* client = clienthandle; struct IPC_AUTH* auth; struct IPC_CHANNEL* ch; gboolean rc = FALSE; if ((auth = ipc_set_auth(uidlist, gidlist, nuid, ngid)) == NULL) { return FALSE; } if (client != NULL && (ch = client->ch) != NULL) { rc = ch->ops->verify_auth(ch, auth) == IPC_OK; } ipc_destroy_auth(auth); return rc; } static gboolean open_watchdog(const char * dev) { if (watchdogfd >= 0) { cl_log(LOG_WARNING, "Watchdog device already open."); return FALSE; } if (!dev) { cl_log(LOG_WARNING, "Bad watchdog device name."); return FALSE; } watchdogfd = open(dev, O_WRONLY); if (watchdogfd >= 0) { if (fcntl(watchdogfd, F_SETFD, FD_CLOEXEC)) { cl_log(LOG_WARNING, "Error setting the " "close-on-exec flag for watchdog"); } cl_log(LOG_NOTICE, "Using watchdog device: %s" , dev); tickle_watchdog(); return TRUE; }else{ cl_log(LOG_ERR, "Cannot open watchdog device: %s" , dev); } return FALSE; } static void close_watchdog(void) { if (watchdogfd >= 0) { if (write(watchdogfd, "V", 1) != 1) { cl_log(LOG_CRIT , "Watchdog write magic character failure:" " closing watchdog!\n"); } close(watchdogfd); watchdogfd=-1; } } static void tickle_watchdog(void) { if (watchdogfd >= 0) { if (write(watchdogfd, "", 1) != 1) { cl_log(LOG_CRIT , "Watchdog write failure: closing watchdog!\n"); close_watchdog(); watchdogfd=-1; } } } static gboolean tickle_watchdog_timer(gpointer data) { tickle_watchdog(); return TRUE; } static PILPluginUniv* pisys = NULL; static GHashTable* Notifications = NULL; static AppHBNotifyImports piimports = { authenticate_client }; static PILGenericIfMgmtRqst RegistrationRqsts [] = { {"AppHBNotification", &Notifications, &piimports, NULL, NULL}, {NULL, NULL, NULL, NULL, NULL} }; static gboolean load_notification_plugin(const char * pluginname) { PIL_rc rc; void* exports; if (pisys == NULL) { pisys = NewPILPluginUniv(HA_LIBHBDIR "/heartbeat/plugins"); if (pisys == NULL) { return FALSE; } if ((rc = PILLoadPlugin(pisys, "InterfaceMgr", "generic" , &RegistrationRqsts)) != PIL_OK) { cl_log(LOG_ERR , "cannot load generic interface manager" " [%s/%s]: %s" , "InterfaceMgr", "generic" , PIL_strerror(rc)); return FALSE; } } rc = PILLoadPlugin(pisys, "AppHBNotification" , pluginname, NULL); if (rc != PIL_OK) { cl_log(LOG_ERR, "cannot load plugin %s", pluginname); return FALSE; } if ((exports = g_hash_table_lookup(Notifications, pluginname)) == NULL) { cl_log(LOG_ERR, "cannot find plugin %s", pluginname); return FALSE; } NotificationPlugins[n_Notification_Plugins] = exports; n_Notification_Plugins ++; return TRUE; } Heartbeat-3-0-7e3a82377fa8/telecom/apphbd/apphbtest.c0000644000000000000000000002257511576626513022236 0ustar00usergroup00000000000000/* * apphbtest: application heartbeat test program * * This program tests apphbd. It registers with the application heartbeat * server and issues heartbeats from time to time... * * Copyright(c) 2002 Alan Robertson * ********************************************************************* * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include static int debug; void doafailtest(void); void multi_hb_test(int child_proc_num, int hb_intvl_ms, int hb_num , int delaysecs, int dofailuretests); void hb_normal(int hb_intvl_ms, int delaysecs, int hb_num); void apphb_setwarn_test(int warnhb_ms, int hb_ms); void dup_reg_test(void); #define APPNAME_LEN 256 #define OPTARGS "n:p:i:l:dFh" #define USAGE_STR "Usage: [-n heartbeat number] \ [-p process number] \ [-l delay seconds] \ [-i heartbeat interval(ms)] \ [-d](debug information) \ [-F](enable failure cases) \ [-h](print help message)" int main(int argc,char ** argv) { int flag; int hb_num = 10; int child_proc_num = 1; int hb_intvl_ms = 1000; int dofailuretests = FALSE; int delaysecs = -1; while (( flag = getopt(argc, argv, OPTARGS)) != EOF) { switch(flag) { case 'n': /* Number of heartbeat */ hb_num = atoi(optarg); break; case 'p': /* Number of heartbeat processes */ child_proc_num = atoi(optarg); break; case 'i': /* Heartbeat interval */ hb_intvl_ms = atoi(optarg); break; case 'l': /* Delay before starting multiple clients */ delaysecs = atoi(optarg); break; case 'd': /* Debug */ debug += 1; break; case 'F': /* Enable failure cases */ dofailuretests = TRUE; break; case 'h': default: fprintf(stderr , "%s "USAGE_STR"\n", argv[0]); return(1); } } cl_log_set_entity(argv[0]); cl_log_enable_stderr(TRUE); cl_log_set_facility(LOG_USER); if (delaysecs < 0) delaysecs = child_proc_num; multi_hb_test(child_proc_num, hb_intvl_ms, hb_num, delaysecs , dofailuretests); if (dofailuretests) { /* run these fail cases if you want */ /* apphb_setwarn_test(2000, 1000); apphb_setwarn_test(1000, 2000); dup_reg_test(); */ } return(0); } void doafailtest(void) { int j; int rc; char app_name[] = "failtest"; char app_instance[APPNAME_LEN]; snprintf(app_instance, sizeof(app_instance) , "%s_%ld", app_name, (long)getpid()); cl_log(LOG_INFO, "Client %s registering", app_instance); rc = apphb_register(app_name, app_instance); if (rc < 0) { cl_perror("%s registration failure", app_instance); exit(1); } if (debug) { cl_log(LOG_INFO, "Client %s registered", app_instance); } cl_log(LOG_INFO, "Client %s setting 2 second heartbeat period" , app_instance); rc = apphb_setinterval(2000); if (rc < 0) { cl_perror("%s setinterval failure", app_instance); exit(2); } for (j=0; j < 10; ++j) { sleep(1); if (debug) fprintf(stderr, "+"); if (j == 8) { apphb_setwarn(500); } rc = apphb_hb(); if (rc < 0) { cl_perror("%s apphb_hb failure", app_instance); exit(3); } } if (debug) { fprintf(stderr, "\n"); } sleep(3); if (debug) fprintf(stderr, "!"); rc = apphb_hb(); if (rc < 0) { cl_perror("%s late apphb_hb failure", app_instance); exit(4); } cl_log(LOG_INFO, "Client %s unregistering", app_instance); rc = apphb_unregister(); if (rc < 0) { cl_perror("%s apphb_unregister failure", app_instance); exit(5); } rc = apphb_register(app_instance, "HANGUP"); if (rc < 0) { cl_perror("%s second registration failure", app_instance); exit(1); } /* Now we leave without further adieu -- HANGUP */ cl_log(LOG_INFO, "Client %s HANGUP!", app_instance); } void hb_normal(int hb_intvl_ms, int delaysecs, int hb_num) { int j; int rc; struct timespec time_spec; char app_name[] = "apphb_normal"; char app_instance[APPNAME_LEN]; struct timeval tmp; snprintf(app_instance, sizeof(app_instance) , "%s_%ld", app_name, (long)getpid()); if (delaysecs) { /* sleep randomly for a while */ gettimeofday(&tmp, NULL); srandom((unsigned int)tmp.tv_usec); delaysecs = random() % delaysecs; if (delaysecs) { cl_log(LOG_INFO, "%s sleep randomly for %d secs" , app_instance, delaysecs); time_spec.tv_sec = delaysecs; time_spec.tv_nsec = 0; nanosleep(&time_spec, NULL); } } cl_log(LOG_INFO, "Client %s registering", app_instance); rc = apphb_register(app_name, app_instance); if (rc < 0) { cl_perror("%s registration failure", app_instance); exit(1); } if (debug) { cl_log(LOG_INFO, "Client %s registered", app_instance); } cl_log(LOG_INFO, "Client %s setting %d ms heartbeat interval" , app_instance, hb_intvl_ms); rc = apphb_setinterval(hb_intvl_ms); if (rc < 0) { cl_perror("%s setinterval failure", app_instance); exit(2); } /* Sleep for half of the heartbeat interval */ time_spec.tv_sec = hb_intvl_ms / 2000; time_spec.tv_nsec = (hb_intvl_ms % 2000) * 500000; for (j=0; j < hb_num; ++j) { nanosleep(&time_spec, NULL); if(debug >= 1) fprintf(stderr, "%ld:+\n", (long)getpid()); rc = apphb_hb(); if (rc < 0) { cl_perror("%s apphb_hb failure", app_instance); exit(3); } } cl_log(LOG_INFO, "Client %s unregistering", app_instance); rc = apphb_unregister(); if (rc < 0) { cl_perror("%s apphb_unregister failure", app_instance); exit(4); } if (debug) { cl_log(LOG_INFO, "Client %s unregistered", app_instance); } } void multi_hb_test(int child_proc_num, int hb_intvl_ms, int hb_num, int delaysecs , int dofailuretests) { int j; cl_log(LOG_INFO, "----Start %d client(s) with hb interval %d ms----" , child_proc_num, hb_intvl_ms); for (j=0; j < child_proc_num; ++j) { switch(fork()){ case 0: hb_normal(hb_intvl_ms, delaysecs ,hb_num); exit(0); break; case -1: cl_perror("Can't fork!"); exit(1); break; default: /* In the parent. */ break; } } /* Wait for all our child processes to exit*/ while(wait(NULL) > 0); errno = 0; if (dofailuretests) { cl_log(LOG_INFO, "----Start %d client(s) doing fail test----" , child_proc_num); for (j = 0; j < child_proc_num; ++j) { switch(fork()){ case 0: doafailtest(); exit(0); break; case -1: cl_perror("Can't fork!"); exit(1); break; default: break; } } /* Wait for all our child processes to exit*/ while(wait(NULL) > 0); errno = 0; } } void apphb_setwarn_test(int warnhb_ms, int hb_ms) { /* apphb_setwarn() sets the warning period. * if interval between two heartbeats is longer than the * warning period, apphbd will warn: 'late heartbeat' */ int rc; struct timespec time_spec; char app_name[] = "apphb_setwarn_test"; char app_instance[APPNAME_LEN]; snprintf(app_instance, sizeof(app_instance) , "%s_%ld", app_name, (long)getpid()); cl_log(LOG_INFO, "----Start test apphb_setwarn----"); cl_log(LOG_INFO, "Client %s registering", app_instance); rc = apphb_register(app_name, app_instance); if (rc < 0) { cl_perror("%s register failure", app_instance); exit(1); } cl_log(LOG_INFO, "Client %s setwarn for %d ms", app_instance, warnhb_ms); rc = apphb_setwarn(warnhb_ms); if (rc < 0) { cl_perror("%s setwarn failure", app_instance); exit(3); } cl_log(LOG_INFO, "Client %s setinterval for %d ms", app_instance, hb_ms); rc = apphb_setinterval(hb_ms); if (rc < 0) { cl_perror("%s setinterval failure", app_instance); exit(2); } rc = apphb_hb(); if (rc < 0) { cl_perror("%s first apphb_hb failure", app_instance); exit(4); } time_spec.tv_sec = hb_ms/1000; time_spec.tv_nsec = (hb_ms % 1000) * 1000000; nanosleep(&time_spec, NULL); rc = apphb_hb(); if (rc < 0) { cl_perror("%s second apphb_hb failure", app_instance); exit(4); } cl_log(LOG_INFO, "Client %s unregistering", app_instance); rc = apphb_unregister(); if (rc < 0) { cl_perror("%s apphb_unregister failure", app_instance); exit(5); } errno = 0; } void dup_reg_test(void) { /* apphbd should not allow a process register two times */ int rc; char app_instance[APPNAME_LEN]; char app_name[] = "dup_reg_test"; snprintf(app_instance, sizeof(app_instance) , "%s_%ld", app_name, (long)getpid()); cl_log(LOG_INFO, "----Client %s trying to register twice----" , app_instance); cl_log(LOG_INFO, "Client %s registering", app_instance); rc = apphb_register(app_name, app_instance); if (rc < 0) { cl_perror("%s first register fail", app_instance); exit(1); } sleep(3); cl_log(LOG_INFO, "Client %s registering again", app_instance); rc = apphb_register(app_name, app_instance); if (rc < 0) { cl_perror("%s second register fail", app_instance); exit(1); } errno = 0; } Heartbeat-3-0-7e3a82377fa8/tools/1node2heartbeat0000755000000000000000000002447311576626513021247 0ustar00usergroup00000000000000#!/usr/bin/python # # Program to determine current list of enabled services for init state 3 # and create heartbeat CRM configuration for heartbeat to manage them # __copyright__=''' Author: Alan Robertson Copyright (C) 2006 International Business Machines ''' # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import os,re # # Here's the plan: # Find out the default run level # Find out what (additional?) services are enabled in that run level # Figure out which of them start after the network (or heartbeat?) # Ignore heartbeat :-) # Figure out which services supply the $services # Look to see if the SUSE /etc/insserv.conf file exists # If so, then scan it for who provides the $services # defined by the LSB # If we're on Red Hat, then make some Red Hat type assumptions # (whatever those might be) # If we're not, then make some generic assumptions... # Scan the init scripts for their dependencies... # Eliminate anything at or before 'network'. # Create resources corresponding to all active services # Include monitor actions for those services # that can be started after 'network' # Add the start-after dependencies # # Things to consider doing in the future: # Constrain them to only run on the local system? # Put them all in a convenience group (no colocation, no ordering) # Add start and stop timeouts ServiceKeywords = {} ServiceMap = {} ProvidesMap = {} RequiresMap = {} SkipMap = {'heartbeat': None, 'random': None} NoMonitor = {'microcode': None} PreReqs = ['network'] IgnoreList = [] sysname = os.uname()[1] InitDir = "/etc/init.d" def service_is_hb_compatible(service): scriptname = os.path.join(InitDir, service) command=scriptname + " status >/dev/null 2>&1"; rc = os.system(command) return rc == 0 def find_ordered_services(dir): allscripts = os.listdir(dir) allscripts.sort() services = [] for entry in allscripts: matchobj = re.match("S[0-9]+(.*)", entry) if not matchobj: continue service = matchobj.group(1) if SkipMap.has_key(service): continue if service_is_hb_compatible(service): services.append(service) else: IgnoreList.append(service) return services def register_services(initdir, services): for service in services: if not ServiceMap.has_key(service): ServiceMap[service] = os.path.join(initdir, service) for service in services: script_dependency_scan(service, os.path.join(initdir, service), ServiceMap) # # From the LSB version 3.1: "Comment Conventions for Init Scripts" # ### BEGIN INIT INFO ### END INIT INFO # # The delimiter lines may contain trailing whitespace, which shall be ignored. # All lines inside the block shall begin with a hash character '#' in the # first column, so the shell interprets them as comment lines which do not # affect operation of the script. The lines shall be of the form: # {keyword}: arg1 [arg2...] # with exactly one space character between the '#' and the keyword, with a # single exception. In lines following a line containing the Description # keyword, and until the next keyword or block ending delimiter is seen, # a line where the '#' is followed by more than one space or a tab # character shall be treated as a continuation of the previous line. # # Make this a class to avoid recompiling it for each script we scan. class pats: begin=re.compile("###\s+BEGIN\s+INIT\s+INFO") end=re.compile("###\s+END\s+INIT\s+INFO") desc=re.compile("# Description:\s*(.*)", re.IGNORECASE) desc_continue=re.compile("#( +|\t)\s*(.*)") keyword=re.compile("# ([^\s:]+):\s*(.*)\s*\Z") def script_keyword_scan(filename, servicename): keywords = {} ST_START=0 ST_INITINFO=1 ST_DESCRIPTION=1 description="" state=ST_START try: fd = open(filename) except IOError: return keywords while 1: line = fd.readline() if not line: break if state == ST_START: if pats.begin.match(line): state = ST_INITINFO continue if pats.end.match(line): break if state == ST_DESCRIPTION: match = pats.desc_continue.match(line) if match: description += ("\n" + match.group(2)) continue state = ST_INITINFO match = pats.desc.match(line) if match: state = ST_DESCRIPTION description = match.group(1) continue match = pats.keyword.match(line) if match: keywords[match.group(1)] = match.group(2) # Clean up and return fd.close() if description != "": keywords["Description"] = description keywords["_PATHNAME_"] = filename keywords["_RESOURCENAME_"] = "R_" + sysname + "_" + servicename return keywords def script_dependency_scan(service, script, servicemap): keywords=script_keyword_scan(script, service) ServiceKeywords[service] = keywords SysServiceGuesses = { '$local_fs': ['boot.localfs'], '$network': ['network'], '$named': ['named'], '$portmap': ['portmap'], '$remote_fs': ['nfs'], '$syslog': ['syslog'], '$netdaemons': ['portmap', 'inetd'], '$time': ['ntp'], } # # For specific versions of Linux, there are often better ways # to do this... # # (e.g., for SUSE Linux, one should look at /etc/insserv.conf file) # def map_sys_services(servicemap): sysservicemap = {} for sysserv in SysServiceGuesses.keys(): servlist = SysServiceGuesses[sysserv] result = [] for service in servlist: if servicemap.has_key(service): result.append(service) sysservicemap[sysserv] = result return sysservicemap # # # def create_service_dependencies(servicekeywords, systemservicemap): dependencies = {} for service in servicekeywords.keys(): if not dependencies.has_key(service): dependencies[service] = {} for key in ('Required-Start', 'Should-Start'): if not servicekeywords[service].has_key(key): continue for depserv in servicekeywords[service][key].split(): if systemservicemap.has_key(depserv): sysserv = systemservicemap[depserv] for serv in sysserv: dependencies[service][serv] = None else: if servicekeywords.has_key(depserv): dependencies[service][depserv] = None if len(dependencies[service]) == 0: del dependencies[service] return dependencies # # Modify the service name map to include all the mappings from # 'Provides' services to real service script names... # def map_script_services(sysservmap, servicekeywords): for service in servicekeywords.keys(): if not servicekeywords[service].has_key('Provides'): continue for provided in servicekeywords[service]['Provides'].split(): if not sysservmap.has_key(provided): sysservmap[provided] = [] sysservmap[provided].append(service) return sysservmap def create_cib_update(keywords, depmap): services = keywords.keys() services.sort() result = "" # Create the XML for the resources result += '\n' result += '\n' result += '\n' result += '\n' result += '\n' groupname="G_" + sysname + "_localinit" result += ' \n' for service in services: rid = keywords[service]["_RESOURCENAME_"] monid = "OPmon_" + sysname + '_' + service result += \ ' \n' + \ ' \n' + \ ' \n' if not NoMonitor.has_key(service): result += \ ' \n' result += \ ' \n' \ ' \n' result += ' \n' result += '\n' services = depmap.keys() services.sort() result += '\n' for service in services: rid = keywords[service]["_RESOURCENAME_"] deps = depmap[service].keys() deps.sort() for dep in deps: if not keywords.has_key(dep): continue depid = keywords[dep]["_RESOURCENAME_"] orderid='O_' + sysname + '_' + service + '_' + dep result += ' \n' loc_id="Loc_" + sysname + "_localinit" rule_id="LocRule_" + sysname + "_localinit" expr_id="LocExp_" + sysname + "_localinit" result += ' \n' result += ' \n' result += ' \n' result += ' \n' result += ' \n' result += '\n' result += '\n' result += '\n' result += '\n' return result def remove_a_prereq(service, servicemap, keywords, deps): if deps.has_key(service): parents = deps[service].keys() del deps[service] else: parents = [] if servicemap.has_key(service): del servicemap[service] if keywords.has_key(service): del keywords[service] for parent in parents: if not deps.has_key(parent): continue remove_a_prereq(parent, servicemap, keywords, deps) def remove_important_prereqs(prereqs, servicemap, keywords, deps): # Find everything these important prereqs need and get rid of them... for service in prereqs: remove_a_prereq(service, servicemap, keywords, deps) ServiceList = find_ordered_services(os.path.join(InitDir, "rc3.d")) register_services(InitDir, ServiceList) SysServiceMap = map_sys_services(ServiceMap) map_script_services(SysServiceMap, ServiceKeywords) ServiceDependencies = create_service_dependencies(ServiceKeywords,SysServiceMap) remove_important_prereqs(PreReqs, SysServiceMap, ServiceKeywords, ServiceDependencies) print create_cib_update(ServiceKeywords, ServiceDependencies) Heartbeat-3-0-7e3a82377fa8/tools/Makefile.am0000644000000000000000000000334311576626513020376 0ustar00usergroup00000000000000# # heartbeat: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl EXTRA_DIST = apigid = @HA_APIGID@ habindir = @bindir@ halibdir = $(libdir)/@HB_PKG@ hanoarchdir = @HA_NOARCHDATAHBDIR@ gliblib = @GLIBLIB@ habin_PROGRAMS = cl_status cl_respawn halib_PROGRAMS = cl_status_SOURCES = cl_status.c cl_status_LDADD = $(top_builddir)/lib/hbclient/libhbclient.la \ -lplumb \ $(gliblib) \ $(top_builddir)/replace/libreplace.la cl_respawn_SOURCES = cl_respawn.c cl_respawn_LDADD = -lplumb \ $(top_builddir)/lib/apphb/libapphb.la \ $(gliblib) \ $(top_builddir)/replace/libreplace.la install-data-hook: # install-exec-hook doesn't work (!) -chgrp $(apigid) $(DESTDIR)/$(habindir)/cl_status -chmod g+s,a-w $(DESTDIR)/$(habindir)/cl_status .PHONY: install-exec-hook Heartbeat-3-0-7e3a82377fa8/tools/README.dopd0000644000000000000000000000504111576626513020144 0ustar00usergroup00000000000000 DRBD Outdate Peer Daemon What is this about? This heartbeat plugin (dopd), using the heartbeat communication channels, is one implementaion of the "alternative communication pathes" mentioned below in an excerpt from the drbd.conf manpage. Using this helps to reduce the risk of going online with outdated data in multiple failure scenarios, since an "outdated" drbd refuses to become Primary. Unless you force it to. From the drbd.conf (drbd8) manpage: fencing Under fencing we understand preventive action to avoid situations where both nodes are primary and disconnected (AKA split brain). Valid fencing policies are: dont-care This is the default policy. No fencing actions are untertaken. Eventual situations are left for the Cluster Manager or Operator to sort out. resource-only If a node becomes a disconnected primary, it tries to outdate the peer's disk. This is done by calling the outdate-peer handler. The handler is supposed to reach the other node over alternative communication pathes, and call 'drbdadm outdate res' there. resource-and-stonith If a node becomes a disconnected primary it freezes all its IO operations and calls its outdate-peer handler. The outdate-peer hander is supposed to reach the peer over alternative communicaton pathes and call 'drbdadm outdate res' there. In case it can not reach the peer it should stonith the peer. IO is resumed as soon as the situation is resolved. In case your handler fails you can resume IO with the resume-io command. Additional preparation steps: * prepare drbdmeta to be setuid root (because it will be called as hacluster:haclient, but needs to be root to manipulate drbd meta data) cd /sbin chgrp haclient drbdmeta chmod o-x drbdmeta chmod u+s drbdmeta chgrp haclient drbdsetup chmod o-x drbdsetup chmod u+s drbdsetup * Configure Heartbeat to run the DRBD outdate peer daemon: Add these lines to /etc/ha.d/ha.cf --- respawn hacluster /usr/lib/heartbeat/dopd apiauth dopd gid=haclient uid=hacluster --- * Configure DRBD to use this facility resource rX { ... handlers { ... outdate-peer "/usr/lib/heartbeat/drbd-peer-outdater"; } disk { ... fencing resource-only; # or # fencing resource-and-stonith; # in case you configure heartbeat to use stonith } ... } BUGS Complaints about dopd go to drbd-user@lists.linbit.com Heartbeat-3-0-7e3a82377fa8/tools/cl_respawn.c0000644000000000000000000004014111576626513020640 0ustar00usergroup00000000000000/* * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 59 Temple * Place - Suite 330, Boston, MA 02111-1307, USA. * ****************************************************************************** * TODO: * 1) Man page * 2) Add the "cl_respawn recover" function, for combining with recovery * manager. But what's its strategy ? * The pid will passed by environment * 3) Add the function for "-l" option ? ****************************************************************************** * * File: cl_respawn.c * Description: * A small respawn tool which will start a program as a child process, and * unless it exits with the "magic" exit code, will restart the program again * if it exits(dies). It is intended that this respawn program should be usable * in resource agent scripts and other places. The respawn tool should properly * log all restarts, and all exits which it doesn't respawn, and run itself as a * client of the apphb application heartbeating program, so that it can be * restarted if the program it is monitoring dies. * * Author: Sun Jiang Dong * Copyright (c) 2004 International Business Machines */ #include #include #include #include #include #include #ifdef HAVE_GETOPT_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static const char * Simple_helpscreen = "Usage cl_respawn [] [] [] ...\n" "Options are as below:\n" "-m magic_exit_code\n" " When monitored_program exit as this magic_exit_code, then cl_respawn\n" " will not try to respawn it.\n" "-i interval\n" " Set the interval(ms) of application hearbeat or plumbing its client.\n" "-w warntime\n" " Set the warning time (ms) of application heartbeat.\n" "-p pidfile\n" " Set the name of a pid file to use.\n" "-r Recover itself from crash. Only called by other monitor programs like" " recovery manager.\n" "-l List the program monitored by cl_respawn.\n" " Notice: donnot support yet.\n" "-h Display this simple help.\n"; static void become_daemon(void); static int run_client_as_child(char * client_argv[]); static gboolean plumb_client_and_emit_apphb(gpointer data); static gboolean cl_respawn_quit(int signo, gpointer user_data); static void separate_argv(int * argc_p, char *** argv_p, char *** client_argv); static int cmd_str_to_argv(char * cmd_str, char *** argv); static void free_argv(char ** argv); /* Functions for handling the child quit/abort event */ static void monitoredProcessDied(ProcTrack* p, int status, int signo , int exitcode, int waslogged); static void monitoredProcessRegistered(ProcTrack* p); static const char * monitoredProcessName(ProcTrack* p); static ProcTrack_ops MonitoredProcessTrackOps = { monitoredProcessDied, monitoredProcessRegistered, monitoredProcessName }; static const int INSTANCE_NAME_LEN = 20, APPHB_INTVL_DETLA = 30; /* Avoid the incorrect warning message */ static const unsigned long DEFAULT_APPHB_INTERVAL = 2000, /* MS */ DEFAULT_APPHB_WARNTIME = 6000; /* MS */ static int MAGIC_EXIT_CODE = 100; static const char * app_name = "cl_respawn"; static gboolean REGTO_APPHBD = FALSE; static char * pidfile = NULL; /* * This pid will equal to the PID of the process who was ever the child of * that dead cl_respawn. */ static pid_t monitored_PID = 0; static const char * optstr = "rm:i:w:p:lh"; static GMainLoop * mainloop = NULL; static gboolean IS_RECOVERY = FALSE; static gboolean shutting_down = FALSE; int main(int argc, char * argv[]) { char app_instance[INSTANCE_NAME_LEN]; int option_char; int interval = DEFAULT_APPHB_INTERVAL; int apphb_warntime = DEFAULT_APPHB_WARNTIME; char ** client_argv = NULL; pid_t child_tmp = 0; cl_log_set_entity(app_name); cl_log_enable_stderr(TRUE); cl_log_set_facility(HA_LOG_FACILITY); if (argc == 1) { /* no arguments */ printf("%s\n", Simple_helpscreen); exit(LSB_EXIT_EINVAL); } /* * Try to separate the option parameter between myself and the client. * Maybe rewrite the argc and argv. */ separate_argv(&argc, &argv, &client_argv); /* code for debug */ #if 0 { int j; cl_log(LOG_INFO, "client_argv: 0x%08lx", (unsigned long) client_argv); cl_log(LOG_INFO, "Called arg"); for (j=0; argv[j] != NULL; ++j) { cl_log(LOG_INFO, "argv[%d]: %s", j, argv[j]); } for (j=0; client_argv && client_argv[j] != NULL; ++j) { if (ANYDEBUG) { cl_log(LOG_INFO, "client_argv[%d]: %s", j, client_argv[j]); } } } #endif do { option_char = getopt(argc, argv, optstr); if (option_char == -1) { break; } switch (option_char) { case 'r': IS_RECOVERY = TRUE; break; case 'm': if (optarg) { MAGIC_EXIT_CODE = atoi(optarg); } break; case 'i': if (optarg) { interval = atoi(optarg); } else { printf("error.\n"); } break; case 'p': if (optarg) { pidfile = optarg; } break; case 'w': if (optarg) { apphb_warntime = atoi(optarg); } break; case 'l': break; /* information */ return LSB_EXIT_OK; case 'h': printf("%s\n",Simple_helpscreen); return LSB_EXIT_OK; default: cl_log(LOG_ERR, "getopt returned" "character code %c.", option_char); printf("%s\n",Simple_helpscreen); return LSB_EXIT_EINVAL; } } while (1); /* * Now I suppose recovery program only pass the client name via * environment variables. */ if ( (IS_RECOVERY == FALSE) && (client_argv == NULL) ) { cl_log(LOG_ERR, "Please give the program name which will be " "run as a child process of cl_respawn."); printf("%s\n", Simple_helpscreen); exit(LSB_EXIT_EINVAL); } if ((IS_RECOVERY == TRUE ) && ( client_argv == NULL)) { /* * Here the client_argv must be NULL. At least now just * suppose so. */ /* * From the environment variables to acquire the necessary * information set by other daemons like recovery manager. * RSP_PID: the PID of the process which need to be monitored. * RSP_CMD: the command line to restart the program, which is * the same as the input in command line as above. */ if ( getenv("RSP_PID") == NULL ) { cl_log(LOG_ERR, "cannot get monitored PID from the " "environment variable which should be set by " "the recovery program."); exit(LSB_EXIT_EINVAL); } else { monitored_PID = atoi(getenv("RSP_PID")); } /* * client_argv == NULL" indicates no client program passed as * a parameter by others such as a recovery manager, so expect * it will be passed by environment variable RSP_CMD, see as * below. If cannot get it, quit. */ if (client_argv == NULL) { if (getenv("RSP_CMD") == NULL) { cl_log(LOG_ERR, "cannot get the argument of the " "monitored program from the environment " "variable, which should be set by the " "recovery program."); } if (0!=cmd_str_to_argv(getenv("RSP_CMD"), &client_argv)) { cl_log(LOG_ERR, "Failed to transfer the CLI " "string to the argv[] style."); exit(LSB_EXIT_EINVAL); } } } /* Not use the API 'daemon' since it's not a POSIX's */ become_daemon(); /* Code for debug int k = 0; do { cl_log(LOG_INFO,"%s", execv_argv[k]); } while (execv_argv[++k] != NULL); */ set_sigchld_proctrack(G_PRIORITY_HIGH,DEFAULT_MAXDISPATCHTIME); if (( IS_RECOVERY == FALSE )) { child_tmp = run_client_as_child(client_argv); if (child_tmp > 0 ) { cl_log(LOG_NOTICE, "started the monitored program %s, " "whose PID is %d", client_argv[0], child_tmp); } else { exit(LSB_EXIT_GENERIC); } } snprintf(app_instance, INSTANCE_NAME_LEN, "%s_%ldd" , app_name, (long)getpid()); if (apphb_register(app_name, app_instance) != 0) { cl_log(LOG_WARNING, "Failed to register with apphbd."); cl_log(LOG_WARNING, "Maybe apphd isn't running."); REGTO_APPHBD = FALSE; } else { REGTO_APPHBD = TRUE; cl_log(LOG_INFO, "Registered with apphbd."); apphb_setinterval(interval); apphb_setwarn(apphb_warntime); /* To avoid the warning when app_interval is very small. */ apphb_hb(); } Gmain_timeout_add(interval - APPHB_INTVL_DETLA , plumb_client_and_emit_apphb, client_argv); mainloop = g_main_new(FALSE); g_main_run(mainloop); if ( REGTO_APPHBD == TRUE ) { apphb_hb(); apphb_unregister(); } return LSB_EXIT_OK; } static int run_client_as_child(char * execv_argv[]) { long pid; int i; if (execv_argv[0] == NULL) { cl_log(LOG_ERR, "Null pointer to program name which need to" "be executed."); return LSB_EXIT_EINVAL; } pid = fork(); if (pid < 0) { cl_log(LOG_ERR, "cannot start monitor program %s.", execv_argv[0]); return -1; } else if (pid > 0) { /* in the parent process */ NewTrackedProc( pid, 1, PT_LOGVERBOSE , execv_argv, &MonitoredProcessTrackOps); monitored_PID = pid; return pid; } /* Now in child process */ execvp(execv_argv[0], execv_argv); /* if go here, there must be something wrong */ cl_log(LOG_ERR, "%s",strerror(errno)); cl_log(LOG_ERR, "execving monitored program %s failed.", execv_argv[0]); i = 0; do { free(execv_argv[i]); } while (execv_argv[++i] != NULL); /* Since parameter error, donnot need to be respawned */ exit(MAGIC_EXIT_CODE); } /* * Notes: Since the work dir is changed to "/", the client name should include * pathname or it's located in the system PATH */ static void become_daemon(void) { int j; if (pidfile) { int runningpid; if ((runningpid=cl_read_pidfile(pidfile)) > 0) { cl_log(LOG_WARNING, "pidfile [%s] says we're already running as pid [%d]" , pidfile, runningpid); exit(LSB_EXIT_OK); } if (cl_lock_pidfile(pidfile) != 0) { cl_log(LOG_ERR, "Cannot create pidfile [%s]" , pidfile); exit(LSB_EXIT_GENERIC); } } #if 0 pid_t pid; pid = fork(); if (pid < 0) { cl_log(LOG_ERR, "cannot start daemon."); exit(LSB_EXIT_GENERIC); } else if (pid > 0) { exit(LSB_EXIT_OK); } #endif if (chdir("/") < 0) { cl_log(LOG_ERR, "cannot chroot to /."); exit(LSB_EXIT_GENERIC); } umask(022); setsid(); for (j=0; j < 3; ++j) { close(j); (void)open("/dev/null", j == 0 ? O_RDONLY : O_RDWR); } CL_IGNORE_SIG(SIGINT); CL_IGNORE_SIG(SIGHUP); G_main_add_SignalHandler(G_PRIORITY_DEFAULT, SIGTERM, cl_respawn_quit, NULL, NULL); } static gboolean plumb_client_and_emit_apphb(gpointer data) { pid_t new_pid; char ** client_argv = (char **) data; if ( REGTO_APPHBD == TRUE ) { apphb_hb(); } if (shutting_down) { return TRUE; } /* cl_log(LOG_NOTICE,"donnot emit hb for test."); */ if ( IS_RECOVERY == TRUE && !(CL_PID_EXISTS(monitored_PID)) ) { cl_log(LOG_INFO, "process %d exited.", monitored_PID); new_pid = run_client_as_child(client_argv); if (new_pid > 0 ) { cl_log(LOG_NOTICE, "restart the monitored program %s," " whose PID is %d", client_argv[0], new_pid); } else { /* * donnot let recovery manager restart me again, avoid * infinite loop */ cl_log(LOG_ERR, "Failed to restart the monitored " "program %s, will exit.", client_argv[0]); cl_respawn_quit(SIGTERM, NULL); } } return TRUE; } static gboolean cl_respawn_quit(int signo, gpointer user_data) { shutting_down = TRUE; if (monitored_PID != 0) { cl_log(LOG_INFO, "Killing pid [%d] with SIGTERM" , monitored_PID); /* DisableProcLogging(); */ if (kill(monitored_PID, SIGTERM) < 0) { monitored_PID=0; }else{ return TRUE; } } if (mainloop != NULL && g_main_is_running(mainloop)) { DisableProcLogging(); g_main_quit(mainloop); } else { apphb_unregister(); DisableProcLogging(); exit(LSB_EXIT_OK); } return TRUE; } static void separate_argv(int * argc_p, char *** argv_p, char *** client_argv_p) { /* Search the first no-option parameter */ int i,j; struct stat buf; *client_argv_p = NULL; for (i=1; i < *argc_p; i++) { if ( ((*argv_p)[i][0] != '-') && (0 == stat((*argv_p)[i], &buf)) ) { if ( S_ISREG(buf.st_mode) && ((S_IXUSR| S_IXGRP | S_IXOTH) & buf.st_mode) ) { break; } } } /* * Cannot find a valid program name which will be run as a child * process of cl_respawn, may be a recovery. */ if (*argc_p == i) { return; } *client_argv_p = calloc(*argc_p - i + 1, sizeof(char*)); if (*client_argv_p == NULL) { cl_perror("separate_argv:calloc: "); exit(1); } for (j=i; j < *argc_p; j++) { (*client_argv_p)[j-i] = (*argv_p)[j]; } (*argv_p)[i] = NULL; *argc_p = i; return; } static int cmd_str_to_argv(char * cmd_str, char *** client_argv_p) { const int MAX_NUM_OF_PARAMETER = 80; char *pre, *next; int index = 0; int i, len_tmp; if (cmd_str == NULL) { return LSB_EXIT_EINVAL; } *client_argv_p = calloc(MAX_NUM_OF_PARAMETER, sizeof(char *)); if (*client_argv_p == NULL) { cl_perror("cmd_str_to_argv:calloc: "); return LSB_EXIT_GENERIC; } pre = cmd_str; do { next = strchr(pre,' '); if (next == NULL) { len_tmp = strnlen(pre, 80); (*client_argv_p)[index] = calloc(len_tmp+1, sizeof(char)); if (((*client_argv_p)[index]) == NULL ) { cl_perror("cmd_str_to_argv:calloc: "); return LSB_EXIT_GENERIC; } strncpy((*client_argv_p)[index], pre, len_tmp); break; } (*client_argv_p)[index] = calloc(next-pre+1, sizeof(char)); if (((*client_argv_p)[index]) == NULL ) { cl_perror("cmd_str_to_argv:calloc: "); return LSB_EXIT_GENERIC; } strncpy((*client_argv_p)[index], pre, next-pre); /* remove redundant spaces between parametes */ while ((char)(*next)==' ') { next++; } pre = next; if (++index >= MAX_NUM_OF_PARAMETER - 1) { break; } } while (1==1); if (index >= MAX_NUM_OF_PARAMETER - 1) { for (i = 0; i < MAX_NUM_OF_PARAMETER; i++) { free((*client_argv_p)[i]); } free(*client_argv_p); return LSB_EXIT_EINVAL; } (*client_argv_p)[index+1] = NULL; return 0; } static void monitoredProcessDied(ProcTrack* p, int status, int signo , int exitcode, int waslogged) { pid_t new_pid; char ** client_argv = (char **) p->privatedata; const char * pname = p->ops->proctype(p); if (shutting_down) { cl_respawn_quit(SIGTERM, NULL); p->privatedata = NULL; return; } if ( exitcode == MAGIC_EXIT_CODE) { cl_log(LOG_INFO, "Don't restart the monitored program" " %s [%d], since we got the magic exit code." , pname, p->pid); free_argv(client_argv); cl_respawn_quit(SIGTERM, NULL); /* Does NOT always exit */ return; } cl_log(LOG_INFO, "process %s[%d] exited, and its exit code is %d" , pname, p->pid, exitcode); if ( 0 < (new_pid = run_client_as_child(client_argv)) ) { cl_log(LOG_NOTICE, "restarted the monitored program, whose PID " " is %d", new_pid); } else { cl_log(LOG_ERR, "Failed to restart the monitored program %s ," "will exit.", pname ); free_argv(client_argv); cl_respawn_quit(SIGTERM, NULL); /* Does NOT always exit */ return; } p->privatedata = NULL; } static void monitoredProcessRegistered(ProcTrack* p) { cl_log(LOG_INFO, "Child process [%s] started [ pid: %d ]." , p->ops->proctype(p), p->pid); } static const char * monitoredProcessName(ProcTrack* p) { char ** argv = p->privatedata; return argv[0]; } static void free_argv(char ** argv) { int i = 0; if ( argv == NULL ) { return; } do { if (argv[i] != NULL) { free(argv[i++]); } else { free(argv); return; } } while (1==1); } Heartbeat-3-0-7e3a82377fa8/tools/cl_status.c0000644000000000000000000004732711576626513020521 0ustar00usergroup00000000000000/* File: cl_status.c * Description: * A small tool for acquire the state information of heartbeat cluster. * TODO: Map string output to return value? * * Author: Sun Jiang Dong * Copyright (c) 2004 International Business Machines * Referred to the following tools * api_test Copyright (C) 2000 Alan Robertson * hbinfo Copyright (C) 2004 Mike Neuhauser * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 59 Temple * Place - Suite 330, Boston, MA 02111-1307, USA. */ #include #include #include #include #include #include #ifdef HAVE_GETOPT_H #include #endif #include #include #include #include #include #include /* exit code */ static const int OK = 0, NORMAL_FAIL = 1, /* such as the local node is down */ PARAMETER_ERROR = 11, TIMEOUT = 12, UNKNOWN_ERROR = 13; /* error due to unkown causes */ /* * The exit values under some situations proposed by Alan. * nodestatus fail when the node is down * clientstatus fail when client not accessible (offline?) * hbstatus fail when heartbeat not running locally * hblinkstatus fail if the given heartbeat link is down * hbparameter fail if the given parameter is not set to any value */ /* * Important * General return value for the following functions, and it is actually * as this program cl_status' return value: * 0(OK): on success, including the node status is ok. * <>0: on fail 1(NORMAL_FAIL): for a "normal" failure (like node is down) * 2(PARAMETER_ERROR): * 3(OTHER_ERROR): error due to unkown causes */ /* * Description: * Detect if heartbeat is running. * * Parameters: * Obvious. ;-) * * Return Value: * OK: In local machine, heartbeat is running. * NORMAL_FAIL: In local machine, heartbeat is stopped. */ static int hbstatus(ll_cluster_t *hb, int argc, char ** argv, const char * optstr); /* * Return Value: * OK: In local machine, this operation succeed. * NORMAL_FAIL: In local machine, heartbeat is stopped. */ static int listnodes(ll_cluster_t *hb, int argc, char ** argv, const char * optstr); /* * Return Value: * OK: the node is active. * NORMAL_FAIL: the node is down */ static int nodestatus(ll_cluster_t *hb, int argc, char ** argv, const char * optstr); /* * Return Value: * the weight of the node */ static int nodeweight(ll_cluster_t *hb, int argc, char ** argv, const char * optstr); /* * Return Value: * the site of the node */ static int nodesite(ll_cluster_t *hb, int argc, char ** argv, const char * optstr); /* * Return Value: * 0: normal * 1: ping * 3: unknown type * Notes: not map string std_output to return value yet */ static int nodetype(ll_cluster_t *hb, int argc, char ** argv, const char * optstr); /* * Return Value: * 0(OK): sucess * 1(NORMAL_FAIL): the node is down. */ static int listhblinks(ll_cluster_t *hb, int argc, char ** argv, const char * optstr); /* * Return Value: * 0(OK): the link is up * 1(NORMAL_FAIL): the link is down */ static int hblinkstatus(ll_cluster_t *hb, int argc, char ** argv, const char * optstr); /* * Return Value: * 0(OK): online * 1(NORMAL_FAIL): offline * 2: join * 3: leave * * When sucess and without -m option, at the meantime on stdout print one * of the following string to reflect the status of the client: * online, offline, join, leave */ static int clientstatus(ll_cluster_t *hb, int argc, char ** argv, const char * optstr); /* * Return Value: * 0(OK): on success. * 1(NORMAL_FAIL): the node is down. * * When sucess and without -m option, on stdout print one of the following * string to reflect the status of the resource: * none, local, foreign, all */ static int rscstatus(ll_cluster_t *hb, int argc, char ** argv, const char * optstr); /* * Return Value: * 0: success * 1: fail if the given parameter is not set to any value */ static int hbparameter(ll_cluster_t *hb, int argc, char ** argv, const char * optstr); /* miscellaneous functions */ static int test(ll_cluster_t *hb, int argc, char ** argv, const char * optstr); static int general_simple_opt_deal(int argc, char ** argv, const char * optstr); typedef struct { const char * name; int (*func)(ll_cluster_t *hb, int, char **, const char *); const char * optstr; gboolean needsignon; } cmd_t; static const size_t CMDS_MAX_LENGTH = 16; static gboolean FOR_HUMAN_READ = FALSE; static const cmd_t cmds[] = { { "hbstatus", hbstatus, "m" , FALSE}, { "listnodes", listnodes, "mpn", TRUE}, { "nodestatus", nodestatus, "m", TRUE}, { "nodeweight", nodeweight, "m", TRUE}, { "nodesite", nodesite, "m", TRUE}, { "nodetype", nodetype, "m", TRUE }, { "listhblinks", listhblinks, "m", TRUE }, { "hblinkstatus", hblinkstatus, "m", TRUE }, { "clientstatus", clientstatus, "m", TRUE }, { "rscstatus", rscstatus, "m", TRUE}, { "hbparameter", hbparameter, "mp:, TRUE"}, { "test", test, NULL, TRUE}, { NULL, NULL, NULL }, }; static const char * simple_help_screen = "Usage: cl_status [] []\n" "\n" "Sub-commands:\n" "clientstatus []\n" " Show the status of heartbeat clients.\n" "hblinkstatus \n" " Show the status of a heartbeat link\n" "hbstatus\n" " Indicate if heartbeat is running on the local system.\n" "listhblinks \n" " List the network interfaces used as hearbeat links.\n" "listnodes [