pax_global_header00006660000000000000000000000064136315424340014517gustar00rootroot0000000000000052 comment=267081429072aed48d00158a9220d95ef8be0151 PAF-2.3.0/000077500000000000000000000000001363154243400121275ustar00rootroot00000000000000PAF-2.3.0/.gitignore000066400000000000000000000003411363154243400141150ustar00rootroot00000000000000/blib/ /.build/ _build/ cover_db/ inc/ Build !Build/ Build.bat .last_cover_stats /Makefile /Makefile.old /MANIFEST.bak /META.yml /META.json /MYMETA.* nytprof.out /pm_to_blib *.o *.bs *.tmp /_eumm/ _site .vagrant/ vagrant.yml PAF-2.3.0/Build.PL000077500000000000000000000102641363154243400134310ustar00rootroot00000000000000#!/usr/bin/env perl use strict; use warnings; use Module::Build; use Pod::Man; my $lib_ocf_root = '/usr/lib/ocf'; my $ocft_confs = '/usr/share/resource-agents/ocft/configs'; my $lib_ocf_dirs; my $man7_dir; my $ocf_dirs; my %ocf_dirs; my $build = Module::Build->new( module_name => 'PAF', license => 'bsd', requires => { 'perl' => '5.8.0' }, create_packlist => 1, #create_readme => 1, create_makefile_pl => 'traditional', dist_version => '2.3.0', release_status => 'stable', # stable or testing dist_abstract => 'PostgreSQL multistate OCF resource agent for Pacemaker', dist_author => [ 'Mael Rimbault ', 'Jehan-Guillaume ' ], script_files => [ 'script/pgsqlms' ], PL_files => { 'lib/OCF_Directories.pm.PL' => 'lib/OCF_Directories.pm' }, tests_files => { 't/pgsqlms' => 'tests/pgsqlms' }, man7_files => { 'ocf_heartbeat_pgsqlms.7' => 'man7/ocf_heartbeat_pgsqlms.7' }, get_options => { with_ocf_root => { type => '=s', store => \$lib_ocf_root }, with_ocft_confs => { type => '=s', store => \$ocft_confs } }, add_to_cleanup => [ 'lib/OCF_Directories.pm', 'ocf_heartbeat_pgsqlms.7' ], meta_merge => { resources => { 'homepage' => 'http://clusterlabs.github.io/PAF/', 'repository' => 'https://github.com/ClusterLabs/PAF', 'bugtracker' => 'https://github.com/ClusterLabs/PAF/issues' } }, ); # The pgsqlms man page must be generated in man7, not man1. # We create a new man7 install path, and disable the man1 generation. # build man7 page Pod::Man->new( 'release' => $build->dist_version, 'section' => 7, 'center' => 'OCF resource agents', 'name' => 'OCF_HEARTBEAT_PGSQLMS' )->parse_from_file( 'script/pgsqlms', 'ocf_heartbeat_pgsqlms.7' ); $man7_dir = $build->install_destination( 'bindoc' ); $man7_dir =~ s@man1@man7@; $build->install_path( 'man7' => $man7_dir ); $build->add_build_element('man7'); # we set the bindoc dirs list to an empty list so Module::Build can not find # script/pgsqlms and create a man1 page $build->bindoc_dirs( [] ); # Check given ocf_root or default values for my $dir ( $lib_ocf_root, '/usr/lib/ocf', '/usr/lib32/ocf', '/usr/lib64/ocf' ) { if ( -d $dir and -s "$dir/lib/heartbeat/ocf-directories" ) { print STDERR "Found OCF_ROOT: $dir\n" if $build->verbose; $lib_ocf_root = $dir; last; } } $lib_ocf_dirs = "$lib_ocf_root/lib/heartbeat/ocf-directories"; die "Couldn't find OCF shell functions in «OCF_ROOT/lib/heartbeat»!\n" ."Try to build using the --with_ocf_root argument.\n" if ! -f $lib_ocf_dirs; $ocf_dirs = qx{ . "$lib_ocf_dirs" 2> /dev/null echo "\$INITDIR" echo "\$HA_DIR" echo "\$HA_RCDIR" echo "\$HA_CONFDIR" echo "\$HA_CF" echo "\$HA_VARLIB" echo "\$HA_RSCTMP" echo "\$HA_RSCTMP_OLD" echo "\$HA_FIFO" echo "\$HA_BIN" echo "\$HA_SBIN_DIR" echo "\$HA_DATEFMT" echo "\$HA_DEBUGLOG" echo "\$HA_RESOURCEDIR" echo "\$HA_DOCDIR" echo "\$HA_VARRUN" echo "\$HA_VARLOCK" echo "\$prefix" echo "\$exec_prefix" }; @ocf_dirs{ 'INITDIR', 'HA_DIR', 'HA_RCDIR', 'HA_CONFDIR', 'HA_CF', 'HA_VARLIB', 'HA_RSCTMP', 'HA_RSCTMP_OLD', 'HA_FIFO', 'HA_BIN', 'HA_SBIN_DIR', 'HA_DATEFMT', 'HA_DEBUGLOG', 'HA_RESOURCEDIR', 'HA_DOCDIR', 'HA_VARRUN', 'HA_VARLOCK', 'prefix', 'exec_prefix' } = split /\n/ => $ocf_dirs; $build->install_path( 'lib' => "$lib_ocf_root/lib/heartbeat" ); $build->install_path( 'script' => "$lib_ocf_root/resource.d/heartbeat" ); for my $dir ( $ocft_confs, "$ocf_dirs{'prefix'}/share/resource-agents/ocft/configs" ) { if ( -d $dir ) { print STDERR "Found OCFT_CONFS: $dir\n" if $build->verbose; $ocft_confs = $dir; $build->add_build_element('tests'); $build->install_path( 'tests' => "$ocft_confs" ); last; } } $build->notes('ocf_dirs', \%ocf_dirs); $build->create_build_script; PAF-2.3.0/CHANGELOG.md000066400000000000000000000107361363154243400137470ustar00rootroot00000000000000# PAF v2.3.0 Release date: 2020-03-09 __WARNING__: This version is only compatible with at least Pacemaker 1.1.13 using a corosync 2.x stack. Changelog since 2.2.1: * new: support PostgreSQL 12 * fix: wrong log message during switchover failure * fix: race condition during election process, gh #131 reported by dud225 * change: with PostgreSQL 12, setting `recovery_template` is forbidden * misc: debian package use `/var/run/postgres` as `pghost` default value * misc: various small code cleanup # PAF v2.2.1 Release date: 2019-01-31 __WARNING__: This version is only compatible with at least Pacemaker 1.1.13 using a corosync 2.x stack. This is a bugfix release with no new features. * fix: Fix checking for notify=true, YanChii * fix: forbid pg_isready to timeout before the action itself, ioguix * misc: avoid flooding log files when system_user is not postgres, patch by TBP2k18 on github * misc: avoid flooding log files with warning about non used nodes, suggested by timdeluxe on github * misc: few typos fixes, YanChii # PAF v2.2.0 Release date: 2017-09-12 __WARNING__: This version is only compatible with at least Pacemaker 1.1.13 using a corosync 2.x stack. Changelog since 2.1: * new: support PostgreSQL 10 * new: add the maxlag parameter to exclude lagging slaves from promotion, Thomas Reiss * new: support for multiple pgsqlms resources in the same cluster * new: provide comprehensive error messages to crm_mon * fix: follow the resource agent man page naming policy and section * fix: add documentation to the pgsqlms man page * fix: do not rely on crm_failcount, suggested on the clusterlabs lists * misc: improve the RPM packaging * misc: check Pacemaker compatibility and resource setup * misc: enforce the election process by including timelines comparison * misc: various code cleanup, factorization and module improvement # PAF v2.1.0 Release date: 2016-12-23 __WARNING__: This version is only compatible with at least Pacemaker 1.1.13 using a corosync 2.x stack. Changelog since 2.0: * fix: compatibility with PostrgeSQL 9.6, YanChii * change: better handling of Pacemaker timeout, ioguix with a fix from YanChii * change: avoid fatal error during validate action when possible, Benoit Lobréau & ioguix * new: parameter "datadir" to cope with PostgreSQL GUC data_directory, Benoit Lobréau & ioguix * misc: improve debian packaging, Christoph Berg # PAF v2.0.0 Release date: 2016-09-16 __WARNING__: This version is only compatible with at least Pacemaker 1.1.13 using a corosync 2.x stack. * 2.0.0 major release * fix: do not use crm_node --partition to discover resources * fix: unknown argument --query when calling crm_master * fix: perl warning when master score has never been set on the master * fix: remove wrong info message during post-promote notify * fix: race condition when setting attributes during actions * fix: bug where pgport and pghost where ignored in _query * fix: use same role name than the system_user to connect * fix: wrap crm_master calls in sub to make them synchronous * fix: fixed a bug related to setgid in _runas * fix: check on application_name in validate_all * change: do not start standby with a master score of 1 * change: choose the clone to promote when no master score exist * new: detect and deal master/slave recovery transition * new: detect and enforce reliability of a switchover * new: set next best secondaries base on their lag * misc: code cleanup and refactoring * misc: various log messages cleanup and enhancement # PAF v1.0.2 Release date: 2016-05-25 * 1.0.2 minor release * fix: unknown argument --query when calling crm_master * fix: perl warning when master score has never been set on the master * change: remove misleading message in log file # PAF v1.0.1 Release date: 2016-04-27 * 1.0.1 minor release * fix: forbid the master to decrease its own score (gh #19) * fix: bad LSN decimal converstion (gh #20) * fix: support PostgreSQL 9.5 controldata output (gh #12) * fix: set group id of given system_user before executing commands (gh #11) * fix: use long argument of external commands when possible * fix: bad header leading to wrong manpage section * fix: OCF tests when PostgreSQL does not listen in /tmp * change: do not update score outside of a monitor action (gh #18) * new: add parameter 'start_opts', usefull for debian and derivated (gh #11) * new: add specific timeout for master and slave roles in meta-data (gh #14) * new: add debian packaging related files # PAF v1.0.0 Release date: 2016-03-02 * First public release PAF-2.3.0/CONTRIBUTORS.md000066400000000000000000000011621363154243400144060ustar00rootroot00000000000000# People behind PAF * Mael Rimbault: original author * Jehan-Guillaume de Rorthais: original author * Benoit Lobréau: intensive tests, feedbacks, helps, documentation, released PAF v1.1 * Thomas Reiss: the `maxlag` parameter * Jan Poctavek: Fix for PostgreSQL 9.6, documentation, intensive tests and feedbacks * Adrian Vondendriesch: patches related to the debian packaging * Christoph Berg: patches related to the debian packaging * Damien Clochard: documentation fix * Pierre Giraud: documentation fix # Companies sponsoring PAF * Dalibo: sponsors some R&D time on PAF for Maël Rimbault and Jehan-Guillaume de Rorthais PAF-2.3.0/INSTALL.md000066400000000000000000000033041363154243400135570ustar00rootroot00000000000000# How to install this resource agent This agent is written in perl. Its installation process follow the perl common method, but its installation path follow usual path used for OCF libraries and scripts. ## Prerequisite The perl popular method to install packages use Module::Build. Depending on the system, you might need to install a package: * under Debian and derivatives, you need ``libmodule-build-perl`` * under RHEL and derivatives, you need ``perl-Module-Build`` Moreover, this module suppose you already installed Pacemaker's resource agents. Under Debian, RHEL and their derivatives, you need the ``resource-agents`` package. ## Quick install The quick installation process is: ``` ./Build.PL ./Build sudo ./Build install ``` This process is supposed to detect the root of your OCF files (aka. OCF_ROOT) and install the following files in there: * $OCF_ROOT/lib/heartbeat/OCF_ReturnCodes.pm * $OCF_ROOT/lib/heartbeat/OCF_Functions.pm * $OCF_ROOT/lib/heartbeat/OCF_Directories.pm * $OCF_ROOT/resource.d/heartbeat/pgsqlms Moreover, if the build process find an ocft config folder (usually ``/usr/share/resource-agents/ocft/configs/``), it will install the "pgsqlms" config file in there. The ``ocft`` tool allows to run unit-tests on OCF resource agents. See ``t/README`` for more information about it. ## Build.PL arguments The first installation step (call of Build.PL) accept two arguments: * ``--with_ocf_root=PATH``: give the location of OCF_ROOT to the Build process * ``--with_ocft_confs=PATH``: give the location of the ocft config files They are usually not required as Build.PL should detect their location by itself. ## Testing See t/README to learn more about ocft tests. PAF-2.3.0/LICENSE000066400000000000000000000020011363154243400131250ustar00rootroot00000000000000Copyright (c) 2016-2020, Jehan-Guillaume de Rorthais, Mael Rimbault. Permission to use, copy, modify, and distribute this software and its documentation for any purpose, without fee, and without a written agreement is hereby granted, provided that the above copyright notice and this paragraph and the following two paragraphs appear in all copies. IN NO EVENT SHALL THE AUTHOR OR DISTRIBUTORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE AUTHOR OR DISTRIBUTORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. THE AUTHOR AND DISTRIBUTORS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE AUTHOR AND DISTRIBUTORS HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. PAF-2.3.0/MANIFEST000066400000000000000000000005341363154243400132620ustar00rootroot00000000000000Build.PL CONTRIBUTORS.md LICENSE MANIFEST This list of files README.md INSTALL.md CHANGELOG.md lib/OCF_Directories.pm.PL lib/OCF_Functions.pm lib/OCF_ReturnCodes.pm script/pgsqlms resource-agents-paf.spec debian/changelog debian/compat debian/control debian/copyright debian/docs debian/rules debian/watch debian/source/format t/pgsqlms t/README PAF-2.3.0/MANIFEST.SKIP000066400000000000000000000024641363154243400140330ustar00rootroot00000000000000 #!start included /usr/share/perl/5.20/ExtUtils/MANIFEST.SKIP # Avoid version control files. \bRCS\b \bCVS\b \bSCCS\b ,v$ \B\.svn\b \B\.git\b \B\.gitignore\b \b_darcs\b \B\.cvsignore$ # Avoid VMS specific MakeMaker generated files \bDescrip.MMS$ \bDESCRIP.MMS$ \bdescrip.mms$ # Avoid Makemaker generated and utility files. \bMANIFEST\.bak \bMakefile$ \bblib/ \bMakeMaker-\d \bpm_to_blib\.ts$ \bpm_to_blib$ \bblibdirs\.ts$ # 6.18 through 6.25 generated this # Avoid Module::Build generated and utility files. \bBuild$ \b_build/ \bBuild.bat$ \bBuild.COM$ \bBUILD.COM$ \bbuild.com$ # Avoid temp and backup files. ~$ \.old$ \#$ \b\.# \.bak$ \.tmp$ \.# \.rej$ # Avoid OS-specific files/dirs # Mac OSX metadata \B\.DS_Store # Mac OSX SMB mount metadata files \B\._ # Avoid Devel::Cover and Devel::CoverX::Covered files. \bcover_db\b \bcovered\b # Avoid MYMETA files ^MYMETA\. #!end included /usr/share/perl/5.20/ExtUtils/MANIFEST.SKIP # Avoid configuration metadata file ^MYMETA\. # Avoid Module::Build generated and utility files. \bBuild$ \bBuild.bat$ \b_build \bBuild.COM$ \bBUILD.COM$ \bbuild.com$ ^MANIFEST\.SKIP # Avoid archives of this distribution \bPAF-[\d\.\_]+ # Avoid developers doc ^devel\/.* # Avoid extra folder ^extra\/.* # avoid manual page generated during the build process ocf_heartbeat_pgsqlms.7 PAF-2.3.0/README.md000066400000000000000000000106601363154243400134110ustar00rootroot00000000000000# PostgreSQL Automatic Failover High-Availibility for Postgres, based on industry references Pacemaker and Corosync. ## Description Pacemaker is nowadays the industry reference for High Availability. In the same fashion than for Systemd, all Linux distributions moved (or are moving) to this unique Pacemaker+Corosync stack, removing all other existing high availability stacks (CMAN, RGManager, OpenAIS, ...). It is able to detect failure on various services and automatically decide to failover the failing resource to another node when possible. To be able to manage a specific service resource, Pacemaker interact with it through a so-called "Resource Agent". Resource agents must comply to the OCF specification which define what they must implement (start, stop, promote, etc), how they should behave and inform Pacemaker of their results. PostgreSQL Automatic Failover is a new OCF resource Agent dedicated to PostgreSQL. Its original wish is to keep a clear limit between the Pacemaker administration and the PostgreSQL one, to keep things simple, documented and yet powerful. Once your PostgreSQL cluster built using internal streaming replication, PAF is able to expose to Pacemaker what is the current status of the PostgreSQL instance on each node: master, slave, stopped, catching up, etc. Should a failure occurs on the master, Pacemaker will try to recover it by default. Should the failure be non-recoverable, PAF allows the slaves to be able to elect the best of them (the closest one to the old master) and promote it as the new master. All of this thanks to the robust, feature-full and most importantly experienced project: Pacemaker. For information about how to install this agent, see `INSTALL.md`. ## Setup and requirements PAF supports PostgreSQL 9.3 and higher. It has been extensively tested under CentOS 6 and 7 in various scenario. PAF has been written to give to the administrator the maximum control over their PostgreSQL configuration and architecture. Thus, you are 100% responsible for the master/slave creations and their setup. The agent will NOT edit your setup. It only requires you to follow these pre-requisites: * slave __must__ be in hot_standby (accept read-only connections) ; * the following parameters __must__ be configured in the appropriate place : * `standby_mode = on` (for PostgreSQL 11 and before) * `recovery_target_timeline = 'latest'` * `primary_conninfo` wih `application_name` set to the node name as seen in Pacemaker. * these last parameters has been merged inside the instance configuration file with PostgreSQL 12. For PostgreSQL 11 and before, you __must__ provide a `recovery.conf` template file. When setting up the resource in Pacemaker, here are the available parameters you can set: * `bindir`: location of the PostgreSQL binaries (default: `/usr/bin`) * `pgdata`: location of the PGDATA of your instance (default: `/var/lib/pgsql/data`) * `datadir`: path to the directory set in `data_directory` from your postgresql.conf file. This parameter has same default than PostgreSQL itself: the `pgdata` parameter value. Unless you have a special PostgreSQL setup and you understand this parameter, __ignore it__ * `pghost`: the socket directory or IP address to use to connect to the local instance (default: `/tmp` or `/var/run/postgresql` for DEBIAN) * `pgport`: the port to connect to the local instance (default: `5432`) * `recovery_template`: __only__ for PostgreSQL 11 and before. The local template that will be copied as the `PGDATA/recovery.conf` file. This file must not exist on any node for PostgreSQL 12 and after. (default: `$PGDATA/recovery.conf.pcmk`) * `start_opts`: Additional arguments given to the postgres process on startup. See "postgres --help" for available options. Useful when the postgresql.conf file is not in the data directory (PGDATA), eg.: `-c config_file=/etc/postgresql/9.3/main/postgresql.conf` * `system_user`: the system owner of your instance's process (default: `postgres`) * `maxlag`: maximum lag allowed on a standby before we set a negative master score on it. The calculation is based on the difference between the current xlog location on the master and the write location on the standby. (default: 0, which disables this feature) For a demonstration about how to setup a cluster, see [http://clusterlabs.github.io/PAF/documentation.html](http://clusterlabs.github.io/PAF/documentation.html). PAF-2.3.0/debian/000077500000000000000000000000001363154243400133515ustar00rootroot00000000000000PAF-2.3.0/debian/changelog000066400000000000000000000132341363154243400152260ustar00rootroot00000000000000resource-agents-paf (2.3.0-1) unstable; urgency=low * 2.3.0 major release -- Jehan-Guillaume (ioguix) de Rorthais Mon, 09 Mar 2020 23:30:00 +0100 resource-agents-paf (2.3~rc2-1) unstable; urgency=low * 2.3_rc2 release candidate -- Jehan-Guillaume (ioguix) de Rorthais Tue, 11 Feb 2020 22:00:00 +0100 resource-agents-paf (2.3~rc1-1) unstable; urgency=low * 2.3_rc1 release candidate -- Jehan-Guillaume (ioguix) de Rorthais Tue, 28 Nov 2019 15:00:00 +0100 resource-agents-paf (2.2.1-1) unstable; urgency=low * 2.2.1 minor release -- Jehan-Guillaume (ioguix) de Rorthais Thu, 31 Jan 2019 14:00:00 +0100 resource-agents-paf (2.2.1~rc1-1) unstable; urgency=low * 2.2.1_rc1 release candidate -- Jehan-Guillaume (ioguix) de Rorthais Tue, 22 Jan 2019 22:00:00 +0100 resource-agents-paf (2.2.0-2) unstable; urgency=low * add CHANGELOG.md to the package -- Jehan-Guillaume (ioguix) de Rorthais Tue, 12 Sep 2017 22:40:00 +0200 resource-agents-paf (2.2.0-1) unstable; urgency=low * 2.2.0 major release * see CHANGELOG.md -- Jehan-Guillaume (ioguix) de Rorthais Tue, 12 Sep 2017 10:00:00 +0200 resource-agents-paf (2.2~rc1-1) unstable; urgency=low * 2.2_rc1 release candidate * no change since beta1 -- Jehan-Guillaume (ioguix) de Rorthais Tue, 29 Aug 2017 23:15:00 +0200 resource-agents-paf (2.2~beta1-3) unstable; urgency=medium * d/control: - Bump Standards-Version to 4.0.0 (no change required) - Add $misc:Depends binary dependencies * d/watch: Add watch file to look for new releases on github. -- Adrian Vondendriesch Mon, 26 Jun 2017 16:55:28 +0200 resource-agents-paf (2.2~beta1-2) unstable; urgency=low * add dependencies on Pacemaker and Corosync versions -- Jehan-Guillaume (ioguix) de Rorthais Mon, 26 Jun 2017 16:50:00 +0100 resource-agents-paf (2.2~beta1-1) unstable; urgency=low * 2.2_beta1 beta release * see CHANGELOG.md -- Jehan-Guillaume (ioguix) de Rorthais Mon, 26 Jun 2017 15:00:00 +0100 resource-agents-paf (2.1.0-1) unstable; urgency=low * 2.1.0 major release * no changes since 2.1_rc2. -- Jehan-Guillaume (ioguix) de Rorthais Fri, 23 Dec 2016 13:20:00 +0100 resource-agents-paf (2.1~rc2-1) unstable; urgency=low * 2.1_rc2 beta release * see CHANGELOG.md -- Jehan-Guillaume (ioguix) de Rorthais Sat, 17 Dec 2016 14:10:00 +0100 resource-agents-paf (2.1~rc1-1) unstable; urgency=low * 2.1_rc1 beta release * see CHANGELOG.md -- Jehan-Guillaume (ioguix) de Rorthais Sun, 11 Dec 2016 23:30:00 +0100 resource-agents-paf (2.1~beta1-1) unstable; urgency=low * 2.1_beta1 beta release * see CHANGELOG.md -- Jehan-Guillaume (ioguix) de Rorthais Sun, 04 Dec 2016 15:00:00 +0100 resource-agents-paf (2.0.0-1) unstable; urgency=low * 2.0.0 major release * see CHANGELOG.md -- Jehan-Guillaume (ioguix) de Rorthais Fri, 16 Sep 2016 16:05:16 +0200 resource-agents-paf (2.0~rc1-1) unstable; urgency=low * 2.0_rc1 first release candidate -- Jehan-Guillaume (ioguix) de Rorthais Wed, 3 Aug 2016 18:17:35 +0200 resource-agents-paf (2.0~beta2-1) unstable; urgency=low * 2.0_beta2 beta release * fix: bug in switchover with 9.5 and 9.6 -- Jehan-Guillaume (ioguix) de Rorthais Fri, 1 Jul 2016 16:35:35 +0200 resource-agents-paf (2.0~beta1-1) unstable; urgency=low * 2.0_beta1 beta release * fix: do not use crm_node --partition to discover resources * fix: unknown argument --query when calling crm_master * fix: perl warning when master score has never been set on the master * fix: remove wrong info message during post-promote notify * fix: race condition when setting attributes during actions * fix: bug where pgport and pghost where ignored in _query * fix: use same role name than the system_user to connect * fix: wrap crm_master calls in sub to make them synchronous * fix: fixed a bug related to setgid in _runas * fix: check on application_name in validate_all * change: do not start standby with a master score of 1 * change: choose the clone to promote when no master score exist * new: detect and deal master/slave recovery transition * new: detect and enforce reliability of a switchover * new: set next best secondaries base on their lag * misc: code cleanup and refactoring * misc: various log messages cleanup and enhancement -- Jehan-Guillaume (ioguix) de Rorthais Wed, 15 Jun 2016 23:19:58 +0200 resource-agents-paf (1.0.1-1) unstable; urgency=low * 1.0.1 minor release * fix: forbid the master to decrease its own score (gh #19) * fix: bad LSN decimal converstion (gh #20) * fix: support PostgreSQL 9.5 controldata output (gh #12) * fix: set group id of given system_user before executing commands (gh #11) * fix: use long argument of external commands when possible * fix: bad header leading to wrong manpage section * fix: OCF tests when PostgreSQL does not listen in /tmp * change: do not update score outside of a monitor action (gh #18) * new: add parameter 'start_opts', usefull for debian and derivated (gh #11) * new: add specific timeout for master and slave roles in meta-data (gh #14) * new: add debian packaging related files -- Jehan-Guillaume (ioguix) de Rorthais Wed, 27 Apr 2016 13:22:50 +0200 resource-agents-paf (1.0.0-1) unstable; urgency=low * Initial release -- Jehan-Guillaume (ioguix) de Rorthais Fri, 08 Apr 2016 13:52:29 +0200 PAF-2.3.0/debian/compat000066400000000000000000000000021363154243400145470ustar00rootroot000000000000009 PAF-2.3.0/debian/control000066400000000000000000000015171363154243400147600ustar00rootroot00000000000000Source: resource-agents-paf Section: admin Priority: optional Maintainer: Jehan-Guillaume (ioguix) de Rorthais Build-Depends: debhelper (>= 9), libmodule-build-perl, resource-agents, Standards-Version: 4.0.0 Homepage: http://clusterlabs.github.io/PAF/ Package: resource-agents-paf Architecture: all Depends: ${misc:Depends}, resource-agents, perl, pacemaker (>= 1.1.13) | pacemaker-remote (>= 1.1.13), corosync (>= 2.0.0) Description: PostgreSQL resource agent for Pacemaker PostgreSQL Automatic Failover (aka. PAF) is a new OCF resource Agent dedicated to PostgreSQL. Its original wish is to keep a clear limit between the Pacemaker administration and the PostgreSQL one, to keep things simple, documented and yet powerful. . It only supports multi-state (Master-Slave) PostgreSQL clusters. PAF-2.3.0/debian/copyright000066400000000000000000000023651363154243400153120ustar00rootroot00000000000000Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: paf Source: https://github.com/ClusterLabs/PAF/ Files: * Copyright: 2016-2020 Jehan-Guillaume de Rorthais 2016-2020 Mael Rimbault License: PostgreSQL Permission to use, copy, modify, and distribute this software and its documentation for any purpose, without fee, and without a written agreement is hereby granted, provided that the above copyright notice and this paragraph and the following two paragraphs appear in all copies. . IN NO EVENT SHALL THE AUTHOR OR DISTRIBUTORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE AUTHOR OR DISTRIBUTORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. . THE AUTHOR AND DISTRIBUTORS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE AUTHOR AND DISTRIBUTORS HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. PAF-2.3.0/debian/docs000066400000000000000000000000121363154243400142150ustar00rootroot00000000000000README.md PAF-2.3.0/debian/patches/000077500000000000000000000000001363154243400150005ustar00rootroot00000000000000PAF-2.3.0/debian/patches/10-default-host.patch000066400000000000000000000014011363154243400206320ustar00rootroot00000000000000diff --git a/script/pgsqlms b/script/pgsqlms index 2480fc3..d894393 100755 --- a/script/pgsqlms +++ b/script/pgsqlms @@ -48,7 +48,7 @@ my %OCF_NOTIFY_ENV = ocf_notify_env() if $__OCF_ACTION eq 'notify'; my $system_user_default = "postgres"; my $bindir_default = "/usr/bin"; my $pgdata_default = "/var/lib/pgsql/data"; -my $pghost_default = "/tmp"; +my $pghost_default = "/var/run/postgresql"; my $pgport_default = 5432; my $start_opts_default = ""; my $maxlag_default = "0"; @@ -1025,7 +1025,7 @@ Location of the PGDATA of your instance The socket directory or IP address to use to connect to the local instance -(optional, string, default "/tmp") +(optional, string, default "/var/run/postgresql") =item B PAF-2.3.0/debian/patches/series000066400000000000000000000000261363154243400162130ustar00rootroot0000000000000010-default-host.patch PAF-2.3.0/debian/rules000077500000000000000000000003041363154243400144260ustar00rootroot00000000000000#!/usr/bin/make -f # See debhelper(7) (uncomment to enable) # output every command that modifies files on the build system. DH_VERBOSE = 1 # main packaging script based on dh7 syntax %: dh $@ PAF-2.3.0/debian/source/000077500000000000000000000000001363154243400146515ustar00rootroot00000000000000PAF-2.3.0/debian/source/format000066400000000000000000000000141363154243400160570ustar00rootroot000000000000003.0 (quilt) PAF-2.3.0/debian/watch000066400000000000000000000003101363154243400143740ustar00rootroot00000000000000version=3 opts=filenamemangle=s/.+\/v?(\d\S+)\.tar\.gz/PAF-$1\.tar\.gz/,uversionmangle=s/_beta/~beta/;s/_alpha/~alpha/;s/_rc/~rc/ \ https://github.com/ClusterLabs/PAF/releases .*/v?(\d\S+)\.tar\.gz PAF-2.3.0/devel/000077500000000000000000000000001363154243400132265ustar00rootroot00000000000000PAF-2.3.0/devel/Vagrantfile000066400000000000000000000024201363154243400154110ustar00rootroot00000000000000#ENV['VAGRANT_NO_PARALLEL'] = 'yes' # uncomment to forbid parallel execution ENV['LANG'] = 'C' ENV['LC_ALL'] = 'C' Vagrant.configure(2) do |config| # don't mind about insecure ssh key config.ssh.insert_key = false # hardware and host settings config.vm.provider 'libvirt' do |lv| lv.cpus = 1 lv.memory = 512 lv.default_prefix = 'paf_pkg' end config.vm.synced_folder '..', '/vagrant', type: 'rsync' config.vm.define 'deb' do |debvm| debvm.vm.box = 'debian/stretch64' debvm.vm.provision 'DEB builder', type: 'shell', path:'deb-builder.bash' debvm.vm.post_up_message = <<-HEREDOC If no error appeared, you can get the package using the following command: vagrant ssh --no-tty deb -c 'sudo ls /root/|grep deb$|xargs sudo tar c -C /root/'| tar xv HEREDOC end config.vm.define 'rpm' do |rpmvm| rpmvm.vm.box = 'centos/7' rpmvm.vm.provision 'RPM builder', type: 'shell', path:'rpm-builder.bash' rpmvm.vm.post_up_message = <<-HEREDOC If no error appeared, you can get the package using the following command: vagrant ssh --no-tty rpm -c 'sudo tar c -C /root/rpmbuild/RPMS/noarch .'| tar vx HEREDOC end end PAF-2.3.0/devel/deb-builder.bash000077500000000000000000000006671363154243400162570ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail cd /root DEBIAN_FRONTEND=noninteractive apt-get -qq install dh-make devscripts libmodule-build-perl resource-agents uscan --check-dirname-level=0 --destdir=/root --force-download /vagrant mkdir resource-agents-paf tar zxf resource-agents-paf_*.orig.tar.gz -C "resource-agents-paf" --strip-components=1 cd resource-agents-paf debuild --check-dirname-level=0 -i -us -uc -b PAF-2.3.0/devel/doc/000077500000000000000000000000001363154243400137735ustar00rootroot00000000000000PAF-2.3.0/devel/doc/action-monitor.dia000066400000000000000000000231151363154243400174160ustar00rootroot00000000000000}YsGC2a.5ƞp{;$m'*Y,ԥ¦qŬnۿ|:(`qz:^o}o_?'N߾}~97|ZGuONċyWxךWzfqp:ٛ[>?+W5NV'>m>wz1+>o}X~gedKZ'5Uw{KH W탶|-O?Ipq oIw_W}jYϗ-YNb|$JxlV#-|<{^ݸ]XoYvwT_/ߜ,}f4d񁻡[+Yc=5-ޘҭ68Z{㡰xv0?,۵{z_Mכ|}|o?]H>E߼Y,gwlu  ho@f͛vq%w'K[|e0RѤWw/NVOWe|(@Ib?\&4`l?coVi| `$[y۷Zq~a/+,7N ,N?~(uy<ze(LpT@QV @$Kzk0lѠюI@?Yq~Dzq6_74xCXiČP)SLX!(}b3v5>^mj8w ࠲&?I{pG+A&~s޲1<86 ุB`,{XGy<9`fl  i3kgAv_=-mn[״FruBGӈy-q;m:l^?x;<ou:?E3}#[,oCܠV'(Ac)̫ћGV5"#un14?Y;۽z𠣕"j߸'&,fnF/*S% 9oCeR[[ tC%ax<.S <Ŕũ% pew *<<ĩ?~'޺_['+rΧG,BdƔAG=P.v%W..OG(RÙ O̐Ŕd3cSw^6]Mk4AI0bIhڔ4-(D t=!4MhھдNC@f4-Z.'Klg Brp4V&Cѐp4h_G[,O""LϔL 1ajF+(!YP,-2”auUC_y=2(yQqkG=x˗?߿+R.G,@&R7F\*جCC4Ro&Z7.u[x8~|-t7Jwcƻ:bm6 ]P:2NlqWI_39|:򕡧u۲NkcGE==g)H%Q9`[\"A2ۖ ҧLM  ҉%H#ۉ) d"ZItn4J!<uؾm: i8~) z ~;7e-Tm k6ݐ{KHvC/q8E Ex1#zP$@P\^- jB/^vx+{kK|^Q|̪ϕ)K"CR~/ - M/旸#~!h$wѺ/#ȑEd ۽_/ۖUlϙcT/ ~iw4+s@e&噍.4"ۖj2HoKջvF/ >y8\7eEAk.L뾦]>Qvپ3] 8Js :]3y^VABQČ[A`%m/,Y<eb'(nЉN^~-\cȉ̐C"'ENl4:˦a8r4KEOH&cOJbϥ/@@OEG =݆pr\#O.B& [{ڗU-8HTM\|=O%ƃ[=.Z TCŔn79'oQf~_xxy*5] ͤ]&ǹ4;>s"K"4 oVݶ33L\EO,>m$?Y>e^C uV)x,߯{k~R nHc6ˣzF?9ox),d9HǾi]ٙ 1'vf1iΨ̟FXeJqe~L %Bp1O+SkJDC֔Sf&nPH$#v;E㣭/ B !$8=c1 ,0sN$G1JDN۪1dʸƔO1soJ.,qU4:QB;G?o+B Jb>TqyIf)R4<\*toދbQb4Ikam*|7}{tIB_jի^叇/^ +2/IIQ4ԞD'$:Q HJ)T^WngD#mx^EH頻m؍iB}L!P7nBZٻy\o0jYb 9ajԦbj5M5u|1bf)}dR-kCR>!`hg4Z,jwnT؏>AQ /L9muF[lCo"#Ty_1XFndxO%vMB]w|1彙%6i-Ѿԭh~)z VFѨ2""*7j^) >0=%8~9lݛf1Q\Qa7:hQ^sOwԔRq `armKr-Dᛏȵ Gr!m3|@;)E9펶aj LiᾉӺa >+FOW~ʜ 61Olۺ'vnU)ܑ;Wdp||p+7z,:5Lvp=5c1[c!)Yc If Zѵb*IY V|\_ے@yuo ~95ќ?cGҞ)3t0Ә<i[vb2 vw.MP-um/M1xǀ+/h B.;mMr95;"{j*{ҕ{ -9$F2!(zw:[fkͷY f2.' ;4P33 Q:tU;m ri6O(Ggo;wlwb j8)fIqtOqoJD|ک}uX!'n<Ҍ7N3@vk^XFW!8Rm\jXڙ\V$8\RQc,hA&pݶ8b3MYUUGPx̢nޣUuaJQU-*w F _)S)b>IXVaV4ƀ>rr;.,m1._U}-]I%/q_E쮪( cƧ.,]Bv}F:{]TIa샥eCN=9VAW,ys{nX?}/83ZY9%hx LJ+IS]01٥&ۺmaOYlMB-fGҍ JMj3U3$]B EOr'PRmYkY04;"ReuviîإA̐-SG=NmZʦa֔iaS*PF`Υ^ұ&2-Щ1-^&v+^4!\M0DxAxQ[HoP8t(XD0R("npss&JpK4ed^ ȗ,sMnKz!sE&m/6}U(ެNjdu}O/_?aֶ\s͌%t)Bc˖_0P;_PkPRVX(@Ǒ2A-G )v FSa=:Ql [(v ㋗/^Iz&I ÞaoODM A*&u0p<VۺԭrRYIÂ$'UtSlkO4e1(.۶яXd.$> =rDt:( f2E[OĭE8:l[&Ag#NMW,u&5q­[ׯ;T7Uh}4e,ݠ='p[{M TOݩ6:+"W\3e[pmy{#U)ER8$B3fXR4S#3Oy omkI"6~F`kzZW塏 R'SG#M$SɳCr=E빿Ќ`ʾ"+et%QL"RF\漍JiMiRަ፩#^4~p3p>IޔdM~,!|L35*[† ABfCB^WwvTJچTΔ-V!ʯ/MYgBI/GOϴMɲK},L `(.TUvMƈX\x_an)7~r%"x Եq#8d|%̕HQLd}2!匎a~FݦIikӤ>HHO ~]Oo"0il€DKTY4?=TN? A~ض,rxO>]%GZ5piFJLm ,suښ-s~7!L x_7SJ[uMQQ)R/o/_G!ӑO2"b$Ai 8vb\*e9;cx7#BL1=Һ-ZQ*rd7$(G(fƺ'{eIBqoJDQ&QF`B%\uR2E1ȥ NkQhEe](FE(DgD,٬>̅oL7B cG t/ct†<&Hz/kQR@D.,@+G]QvTLY*R*,R*|g"I׿x%Td:*mjR0?an:,U-݋0sä%=^EqbI͆|(m l!z5.rVDqoӟ'!ҽ\_?MW|f_Ƞ_1=|NN%oUwmՄqDQDMܼ1P%S]dS|Hl-}F]5`C*ZDC:?kbb360iW>:WM}SDL”ӠE =8wQ9 (QGxDY#BfpmQrQ GA`xCW&>ZUmMc*DSTl6$5x(7IrI '~s-9)m:vƒY( SM'(A0=@9ۿw(dκk]sm2iBAUj4R$,Az(1/y7 >z1Aiޝyq>}(M]hʺm\I! >j"9 ݞN]韫Y|ݶ8yצ<+"+S>m =%ϜO/%CNAckӘh OoiCCCC[AiІ~F1fҔM$3#E4I=%_z8ToݖX@5.(Mv2uT&juv&@UڷgKm6fw:qܻЫrG*,z0uT`m͌PEʓ_0JI[?5V^JIq#իfIC>r`JS0, c*-TZ``oP/?Z%n=Wa&PAF-2.3.0/devel/doc/action-monitor.pdf000066400000000000000000000531071363154243400174360ustar00rootroot00000000000000%PDF-1.5 % 3 0 obj << /Length 4 0 R /Filter /FlateDecode >> stream x][q~_<A`X ausfeiUur g!u淇\}(ɍ!8ޏ%g _1`3`RjNcX9nqǿX|H! wk_FKO\C|֧)!<'oU,xgѼ `0gGoݐ]3} ;@>>G7hn8Z  r+1諀E5ʁ 0ŝO'V?d8}Ã)bru!6᧟͑{WO.|~ ׿{χ4AC1c~8/c!|FMI|~vw2jWoT?!`12mu4nϊ1 ܝ ]pd¤ Bsr]Ͳ!ena`L4 /! Oa"kcëww)at) G٩؟xM1G\.U}O xM3Sbjbpdk X 8/db=(",-MBSf !f{t"|YBY|n`) 6Q73E O(FhGoSN~1dbOW_> b޿mB&^XN3y!G3px!"`H-5K-PG[hIiGAYL> g0TK|rKMkwku Ю$OW x A } ZRti$ ̖;Ή:zOޥKg{ 3 DcҞRa1s6H|Nmaa=H-sr{DoKnn\5cqz,^2ĨvrS#%4AXEN5&!sOIݯb 2ϻfVX DfJB,*ti#] ."žnu9pKAg`{V8N5G .-ӥTLQ*)/un3 8&˿FU3GZÕ\8KiDϫʴ xqWhX.[' onuOm^C2ѳ\Cc{(P&(#1\FVƄk : ~"Ǩz d83'4)$ +Z׫+Z99|t/bjYdPcP0s߉A7)]H|hhUu0]r*& *ű 9fKcT,oӜ j! B۵ d56EkݤBaĒFprS=@v/ôj10Եy7=/Lꃘ*)wD`_tPRXNaA;W#^zz(Z-nq }y|\ Hс"!$A'5"gZ\_EfQԑ)Li|\ܹj"F_|pHlę`}=-FWX%^C`J)#0wxD*EƷ:y޷Eڵ0gr#L Ds,eU1P(:2 (s=!n .̭9_|abzA%/.7_faM,QtS4 t#^A^ɯyM!ë_ˏ'rEۂE4:3\㰪OGW˩̜N9)Z'baH/lbEX޻<h.k %_EP=V]>'X|`N2-)dF\aerV1Ul1Lj|uUA!aV6t;ұPH|6-sr%0qOOȌXh)MS4ihwy1tVD={MEmˆEl+47Ҝdȶ_HSAY3ΐNKڤ׮so;69NzD\N2g%V|zŋ]R2@9Pd0pa/Qcu_Xt>-tOAx . $ (XQۊPf9NlX(܀^ƥ;<(fMUñ\X 7nȪlZUJvO/l|fSĄb~PrYedVB_VeY#YflR׃V 6lFQ^|0&y7fsCJ,ګvฺR$0ĉUd, T'A7JP~L/2 BO0`LIpKKxNWA ˜La F7Ֆ'"n]/m%DI^"R֢կyktY 맴`Ӆq0]h27V=ԞXXFqgiaPi]gJ+"]g.5$[tiX; nmѭ2p. $wgq*R4{vU&(qz&)WVTʐsт%6s. HjYSx^2CS }X-#ỲX‚q--A5vjnۑ$a&Ap6I<#̬vaܞPa 4|%׷X-MrJ&\p:]gVomJG/bY7Ӽ;d0Ob98 3G#zGnoaO$4]$AԿ Jӥ , l+$;vKa2V(zK-&}FJB\x{-{gk-ۑU^19B3XBxƖV)gb6>*Z䤻ti$=?IzGJ?$$ˁi@h tEtm"3Fqp ZfD OF.eT :|kt/2f2GDw~Kzh]Ă&wQ2g3.o*_6: mMiZe/HEw\ 뢂f|zM3:9FHj"qw5]ȥVh{]$q5dq]`Qy -#ԵZe K/ʔ}2yy1rXBҀ~ٙ[ D, 2IUE͑H&g}W%[tkW/jXx& ؂)9=؄E b K<4*+ ljP a$u +&(JZ]> U卫X-%UM=4=QCnJ,s;tmҠvW pnMT[ߛ؀"L^BNED)mg#>|e߬\U\quPyK٭,ҜU'P9fJ(uH'Mq4 Z: ݋/ZU3-#E` #(:Q^K S(-";,v2-o[Ѫŷ#*qx,XFRZYaiCJEdb+s%o3r}yHF2N[ZIXdeM#"i&&X>t,3\g+ 02#1:O$H%Q:&07]l$wccQ>xpBꮆprv`חDZ[GXi x>{bSWaaR~Y<(w)QZPC~]Q0fDێ$~.9u7XuiGn95hX}QZcyOr65QΥ9i#v͢ !Ӱϴڞ69 f ivjiۑZMĪ55qxi(3l^iϜVsGzCvX+~-NwgZ#?\kR4B,ʧH=ta#r/qZ\T܊JA* zqjM`KaQVG۠>!K)FK>!ȁ LL)7&zd7N+U\ OUsܲwt(U"}hE '/9Ab^dl#ߎsdϥTr2 {I 3fhw4VJ@tdc fpG::؛r448dpѭLB%؇'Yt&M|.5,A\gox{ц>h^"=X,<3gV7ڌ$/cS fedsts׳Wͣ! [&%a{eIesY&C'|V_Stk% ,[ x}fTUox>˸\;#GQ;=ӗ8MšeUҞ8UABk\)lnI*GV}bP7ec m+i2hF_ P4>H!ʓ`Lrm<]..KNgOF*۟WH xِ2$ӓ*OWrA;ug(b!늖Vv$ X mWu۠{9>%rZ΁kY' E9p'/g5Ŋ}VZ&gN2rVJj2OF:8>&at/g>W"s=*Ubs혢 #ߗP('R!Gk(Gq9\eF^{_#RW胲ث'/,lb;@Qr7vGZj=i] .fXr =d%A%ri=-aV`JV˯[QS`29~! ۯ/?U0v/yëg3^BYd]s[AYYx ̉szP2&x]VG_ 4Oo j^|Z\w?A.WH\voYr5o;{$ ް0|jܷi35hnElg:_,`3At%LfyX"jdX Og=cZewi;\у~]'u;H:%beߌ *e1-KV+ڒX8,0_W0qi=ï#ZO)7M@6, ]r6 eϬ"4gcF2=C2+J&g"3;S< 6MgqO%H4O\5ΰ/owr~>@?j׿X,>tPLh狲С60wQ> >> /Font << /f-0-0 5 0 R /f-1-0 6 0 R >> >> endobj 7 0 obj << /Type /Page /Parent 1 0 R /MediaBox [ 0 0 841.889764 1190.551181 ] /Contents 3 0 R /Group << /Type /Group /S /Transparency /I true /CS /DeviceRGB >> /Resources 2 0 R >> endobj 8 0 obj << /Length 9 0 R /Filter /FlateDecode /Length1 10768 >> stream xz{xU^N&C iA0 w T (DA+qQGa|@g笪 xfwnWVWU{z֮0`:,pY j1_0;V` 0oyg~c,؜Yߌa,ذ5/it=sbW4}{z]_YLcOX~v۳=ݕ>xFќh{IG/y>M ,3]6x̱nJߒj70-7"ܦm#SՕLշּ,:|wM˺# :RhI@ H9`(5` ž7S̰?UT SQpU+<鵠x*dY?&A8{KʍD?t꒱cǾ;{6 ﬩gkzu+<죕d,0Y؀lTZYh5.^ VMݚ5ewE5.ti7D caQ\,dA/[Ξjv@J\a~mujdnL?>3yhi2S628HX0qma͵\}\U =*wђXn~fnm|洳 VbiVɍi1;l6ø%5ERȪL" ??q[3M˹9:6sނlmK,eEťë"}z˿kv6e9swny(lb&馚tcd!>Veb5&6/EȊϘMHYof)>-m B;?Lf$%a.M EBѻmOCni{S} 5%CuC3[˻~u?,]4[翾p֛ PV7zZT; I$H٭h~h+Pj~5w]ZLqK&@ ˤ}N ޵{l(d@a-]R`#GuVEGMFf J*c6,f X-e6!I3Ԋ[M[V٨hb 5ץ,5H}F*3ؕ$I:iFOB Apߍ|oVM.4?Q2q4>a ҧP1 5VcѥqCYHqҨۼg٥A `i4EAEU$Y2RH-lQV$e`Vl0FDWM.%A58!up;2?Qߗw` 3F0jM4u4N)Њ+A Wg3]IP !^0~/x];{}KD[d6?A%oDD!$!$ !6l`"py) Q/JS$$D,OgSp8MVjY&{0?JNI $u=R_ߢh3 V HMCIQ%ŶwG &i$_,j> {}1a[plU4othd2i Y\iԝԡ[FjHA<ɿ4vT`,Ԯ ʡD:K K%JlK4 t1haBîa`_-kzDPw]o!D@`Ѧ0!3DD[Wbc6SQIx|('qW ȶ2ɾ t0Aq8b,Q 9D/h /wySz2$UhE66QBu{EpS)'V?NADݞK-BOsXBbwwx^e b^_."%0LGZIMp37jV7+׌(wa @.pcǟzbFj/_(YGb紦wd9lYt F UY4fDR0!3͢-F`oKk!݆N-Nǵc\:!n Dth1+HpXu0w^1hƌ>6Y36rE:+<O+*fׁyxOw1ϪaHF{;8KU1&[cé[2}fٛa^K@+O%bmfZSs~`0${ޚY "}P4{֞ǔv}>5 VBLcDq;a4_LOus{/ﮝ{Jct?ڇdIEA!*\]tx (Ȕ )X>nHIa k+!ކ<>{tAa|ϡ*}7T"37v(D12, ܠM55\s&IB΅QBsB .&&IGX0&.d Bl 555Zs!ZXa܏A]n W.xw*(N}j[ni.jAGё=>l$2˾KZ$ \t$iD$(YD0*14nG0AIaLҍ>|e2q3qz͋4\x@0\n!i6*BPh(T FF# rKX!8"XIvM!?gfw=㲋j㶐Oؓ!ŝ6)Y\\ -K iuA^?f9v7%~ceܱ- r8 p+,Ŀ{{mu '߷^fG;>\j:> _sԪQTO}5s25Y_CQ7%N֠ dfY F=qKlpiǤYC sL+ Oj g% h8bFkU*jva2Q2IQU333 lI1YRaB&c2PNIJNU׏֡`ȋbEFbY45ݘa2CKaPݴ5V* lg&#*o5)d7a뛿\[T{څz^"믁<ױLf*! w0E܊=kkॏ4_ \=NUgSܔ$ؘel>sV:nN]&RXJX æ VBVR\a۰&@PbRV'j*PTz:N3+jq;CܡPwwe33YS!wzY/ +JrWU{Qv GxT:*SSGGM:w(jÔT}'+庪lr} uCQ 7pܙ3Λ9g?\ ?|/|^sɻ/^'.| Q}7_wý|LvGJj10z=im A^JvP\,[s;j~'^էk4y|f)D$ MR5#(Ѵ@7 r_ (L)~3ƁKa-&~'k5Ia"_*7~wq a"mնky9+xOCyzqk4$ї!dR.IQNGɽ5 2;іb*xWGC!N>hS\Ja考v,Ӽt %FPݢI{!|W+/ã5fn/!Q2Gc$YP) MtQ5}SNW * -@1~ (rج'4vATNUdPU&ZQfv~/L}XYaĻ,}OlC Dh۔"dDǔW9ɞ䕛W?bN<(k/AI@) =k71C 9cºW89//o. _/pE`m~B}.|ާ-ދ<Ӥħ։̕vOQ\'گޓ^}hz1?/#_]mQ[H|sa[8αKkn!/n~.mvv|`_z q#&7Ju↟ !*,IS>ۆplZ"nKZWk]xwǻ&jqu`Wqi 9. ۰p#Ju!iQ-. XpG{Cgb8Nq.4׏sXv9ZugR̻-xGp 4͋5^9[8NՓMR'pD4 'V٥^oƷ8c%ǛntI7."^[׎uI׮.ƇXQ,/JF2+4iD7I)XNnQdO*)a0J,ꖊ|8ԍ!>)RC{ þW*a~4! rB4 l@<,0*` lXp:jlXqaXE̯6̫!:*1Tl7pK" IQ@f41nɌ`\R:4PŴT´OMJu`-ύ)m)Aw /.3{Dr\D;#hH5h+k Zh&dɆFjW fI@h$TEYR%وa%ۇESE QHoȜ~f(/Ν_q> endstream endobj 9 0 obj 7538 endobj 10 0 obj << /Length 11 0 R /Filter /FlateDecode >> stream x]Sn0+xLd> ŇE~D$_'H,V/a6ULK8ʦiI.5Qi}g8kU0Ku?^tSwqRJ?R4'u񺮟rySM߫(c.֯YT]1CN}[EHarY ORuMW8+\Ôa }dj#xbv.co GG#8 7xr|Ǝzy ;rvբg}#f/]zr[消-4Zc1c_Z8<Xe§F 55jK.5ZhsK߱C}K=ge6ze~4QC?-▞aXAܕהZ U<}eEV endstream endobj 11 0 obj 455 endobj 12 0 obj << /Type /FontDescriptor /FontName /AAMALK+DejaVuSansMono-Bold /FontFamily (DejaVu Sans Mono) /Flags 32 /FontBBox [ -446 -394 730 1041 ] /ItalicAngle 0 /Ascent 928 /Descent -235 /CapHeight 1041 /StemV 80 /StemH 80 /FontFile2 8 0 R >> endobj 5 0 obj << /Type /Font /Subtype /TrueType /BaseFont /AAMALK+DejaVuSansMono-Bold /FirstChar 32 /LastChar 121 /FontDescriptor 12 0 R /Encoding /WinAnsiEncoding /Widths [ 602 0 0 0 602 0 0 0 602 602 0 0 0 0 0 0 602 602 602 0 0 0 0 0 0 0 602 0 0 602 0 602 0 602 0 602 0 602 602 602 0 602 0 602 602 602 602 602 0 0 602 602 602 602 0 0 0 0 0 0 0 0 0 602 0 602 602 602 602 602 602 602 602 602 0 602 602 602 602 602 602 602 602 602 602 602 602 602 602 602 ] /ToUnicode 10 0 R >> endobj 13 0 obj << /Length 14 0 R /Filter /FlateDecode /Length1 4584 >> stream xW}TT׵{ff 3| 2"Q`2ЈE1)HCI'8)c5~ h->5E^y<uBHBLJ=vu[3ܻ>{DC ؗ~𧻓WEKV=n-Q:&*ZrªZCwH51:I6&JRGK6uzj ?3mMfV cZ[R$SXRwh8g0SBJ~!!5tZB7/ Sf0Vnn1sKKUM6fefHke9d,;w|o{~xO|~[oM[d-i*nͣO3O87yNI$=~0.MCA3Lnev܆|_ :38)~ 35jkiѼ~N+HlE#3ee^Ɏ]UJ2W%G$fG$D AqC-z.?x[?4AuMzE'NOplfq~#wi Л:f>ȶc4`Y"dCJwZgmb8snXgȏFw=3#=Oqzm޼ۥ`LCB{]vĵ0B.hܘeFsQb(J˓VʯkQh֧X7IIJ–mim23)3koygNC! OؤH>t݄14O-8vYgsGݢ5V9FGEj(+Y!i178nV.5ծ [F ?ҁ,͡"#U'Y:%]ԑ5WdGyrP&B\#r ]Z>!MA]ď1BY[*b4n4&Ud,=G:ٳp~t/dȯ|O)#UӴV/o BG:BO{-"u2̇ݪʤN]Œaj 49g\grOGrhsT)=\}եr9krbZGiyT ]Vi 4l@XΦ\XNw>8!H+EU4s!ȞC/BT5 pT2J ە69晻~f#s߈vEmor2_SЩii#㇙g,(*jQy!JmX"5 #mIYm% i7(K'gwmc~ 6M$߮[|ջF]"1l=?WݡwDQVr4`e5a1t!XIȵBZp*5ޗ(A3I^DV"8XF'Dga4Y<#2ៜ u=~)pPN\pn*?04dt&?x_ ` +ŏdJ6~(&|?ވcߥ3Z#N_gdMm-_#oCO |Ru* cCrV |T#|y)>,pY->DB\,b2:,O \ /$=7> ~8ߌL᥉8τs\%Z9QQ>2[`,,K 18SP=R&gûgG`iw455eĩSbTH,NN3IC7Qy&+ĠN-w Sl-w`2xQ8ލpq^#pbǦ;ػ1݉cZ>&Z]`Xt#ڽhB+`%SCxtL"!I`4)> MiQ|5LGc1u4ZZ F 0@ U^(S(ULAȂ[<6o-?ac endstream endobj 14 0 obj 3133 endobj 15 0 obj << /Length 16 0 R /Filter /FlateDecode >> stream x]Pn -/ ėRti\8 )qEJ;;;òK`M16V\$„iAY"<᧦V|W.?Xg=zV_;F[i2VX;]$ endstream endobj 16 0 obj 261 endobj 17 0 obj << /Type /FontDescriptor /FontName /RLPEJA+DejaVuSans /FontFamily (DejaVu Sans) /Flags 32 /FontBBox [ -1020 -462 1793 1232 ] /ItalicAngle 0 /Ascent 928 /Descent -235 /CapHeight 1232 /StemV 80 /StemH 80 /FontFile2 13 0 R >> endobj 6 0 obj << /Type /Font /Subtype /TrueType /BaseFont /RLPEJA+DejaVuSans /FirstChar 32 /LastChar 116 /FontDescriptor 17 0 R /Encoding /WinAnsiEncoding /Widths [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 634 0 0 0 0 0 0 0 0 0 0 0 0 0 612 0 0 634 615 0 0 0 0 0 0 0 0 0 611 634 0 411 0 392 ] /ToUnicode 15 0 R >> endobj 1 0 obj << /Type /Pages /Kids [ 7 0 R ] /Count 1 >> endobj 18 0 obj << /Creator (cairo 1.14.2 (http://cairographics.org)) /Producer (cairo 1.14.2 (http://cairographics.org)) >> endobj 19 0 obj << /Type /Catalog /Pages 1 0 R >> endobj xref 0 20 0000000000 65535 f 0000021354 00000 n 0000007710 00000 n 0000000015 00000 n 0000007687 00000 n 0000016563 00000 n 0000020954 00000 n 0000007838 00000 n 0000008067 00000 n 0000015700 00000 n 0000015723 00000 n 0000016257 00000 n 0000016280 00000 n 0000017066 00000 n 0000020295 00000 n 0000020319 00000 n 0000020659 00000 n 0000020682 00000 n 0000021419 00000 n 0000021547 00000 n trailer << /Size 20 /Root 19 0 R /Info 18 0 R >> startxref 21600 %%EOF PAF-2.3.0/devel/doc/releasing.md000066400000000000000000000057021363154243400162720ustar00rootroot00000000000000# Releasing ## Source code Edit varable `$VERSION` in the following files: * `script/pgsqlms` * `lib/OCF_Directories.pm.PL` * `lib/OCF_Functions.pm` * `lib/OCF_ReturnCodes.pm` In `Build.PL`, search and edit the following line: ``` dist_version => '1.0.0', release_status => 'stable', ``` For beta or rc release, set `release_status => 'testing'`. In `resource-agents-paf.spec`: * update the tag in the `_tag` variable (first line) * update the version in `Version:`. for beta or rc release, use X.Y~betaZ or X.Y~rcZ * edit the changelog * date format: `LC_TIME=C date +"%a %b %d %Y"` * take care of the `Release` field if there is multiple version of the package for the same version of PAF In `debian/`, edit the `changelog` file. Edit the `CHANGELOG.md` file. ## Commit the changes Check tat every issues related to this release has been closed! ``` git commit -m 'vX.Y.0 release' ``` For beta or rc release use `vX.Y_betaN` or `vX.Y_rcN`, eg. `v2.2_beta1`. ## Tagging and building tar file ``` TAG=v1.0.0 git tag $TAG git push --tags git archive --prefix=PAF-$TAG/ -o /tmp/PAF-$TAG.tgz $TAG ``` For beta or rc release use `vX.Y_betaN` or `vX.Y_rcN`, eg. `v2.2_beta1`. ## Release on github - go to https://github.com/ClusterLabs/PAF/tags - edit the release notes for the new tag - set "PAF $VERSION" as title, eg. "PAF 1.0.0" - here is the format of the release node itself: YYYY-MM-DD - Version X.Y.Z Changelog: * item 1 * item 2 * ... See http://clusterlabs.github.io/PAF/documentation.html - upload the tar file - save ## Building the RPM file ### Installation ``` yum group install "Development Tools" yum install rpmdevtools useradd makerpm ``` ### Building the package ``` su - makerpm rpmdev-setuptree git clone https://github.com/ClusterLabs/PAF.git spectool -R -g PAF/resource-agents-paf.spec rpmbuild -ba PAF/resource-agents-paf.spec ``` Don't forget to upload the package on github release page. ## Building the deb file ### Installation ``` apt-get install dh-make devscripts libmodule-build-perl resource-agents ``` ### Building the package Package to install on your debian host to build the builder environment ``` VER=1.0.0 wget "https://github.com/ClusterLabs/PAF/archive/v${VER/\~/_}.tar.gz" -O resource-agents-paf_${VER}.orig.tar.gz mkdir resource-agents-paf-$VER tar zxf resource-agents-paf_${VER}.orig.tar.gz -C "resource-agents-paf-$VER" --strip-components=1 cd resource-agents-paf-${VER} debuild -i -us -uc -b ``` For beta or rc release, use `VER=X.Y~betaN` or `VER=X.Y~rcN`. Don't forget to upload the package on github release page. ## Documentation Update the "quick start" documentation pages with the links to the new packages ## Community * if this is a first beta or a release: - submit a news on postgrsql.org website - submit a mail on pgsql-announce mailing list * submit a mail to the users@clusterlabs.org mailing list * twitt, blog, ... PAF-2.3.0/devel/doc/sub_controldata.dia000066400000000000000000000056111363154243400176400ustar00rootroot00000000000000]s6_Qnou:MLK_4HRo)[Dڔa;:I v?m t&<28|/ow~[3M/pe^t$p-QbW &~Y,<8^㿦i'âԺ8tpG_t]V=ԽeUWMc1U/t"YX$Y}̺ w&UJl0bg Q\mw/\/\/\-4K0ۇL(-ざc?1\ fYҒwЩ;M3wDE-$u]E7~.((k}gUNo'Wߛ7 EmatN#lLEMuz}\ݎ)ڱ9D;=X`4,*q^>?8[ K\g~ '߆$ ۽+ֆpݘZI J r"E)xw00ˆ31iA;7h>ahU'-2Q-HQ\LYNgY5bJiqhI: d#؃lzTB]kC`dmd!e״vTuMNUzIdM}*TQ ~?|8OdǨٍaOQdA=6쾺=){þq3fʱL(O`VWPڶ|`uEH3 dx ^KC*VYӍkh&~:A[%v 5I |mDH:4m$8"Ϡ MAP@ő8QOr2.e>3^춧R).Ozk31(mX( Ҵd6ߑv(3v2ጶ'p_`˒d N(/Uv hT0O28:U;z E,* $9zdcf.){;XC,&t`jp0j@l;aG9~}4XoUý ͚o >e7"Q%;a0_nm19aQF$Լ"}蚞 ygjHXNc{}z3xH*pΙѧ3ܪ2y3fT(\fIV 4!d1Y8dax*ێˇ7%g)1Cnʊ (r \V$`<$=FvagFBDg=|mHG $^?!:Qk_%E]킣{߯ҮQV2'#7K$W[F6Yu]({zUŒۯo%&lSrR܄fu83[Ji@|3Q {JY*eY0k ۥ1J(cSUR(-3ˡ.oJtd)8nDOqtznpI%gAۍݟX*C68ڑJG*k ҄Svs`ZpRj4PIW dH|WҊܚ9w\k?ܲ?W:L9ʝs58x2A=*?Gx*Sϰ#fK\v:֪iKo|~ϟQ?̏ ] % !Hyց26:gZTJ3r/>|q[pǤp{nIv}#{N|'3Gd+Bzz˺3oμm>y^9}RpS٦SҲq8gfa<: $3ofߞCx+]XzA`ќpխ TQE& _rck~I2=q~* 7N &eq04L\*֯J9Zڥ_6t6RܿB9']!Q"b"Z!Oire{k+6+:ixZG6:Z n}T ݭaB^\`Tح9u*,:yQVu::Yk?tGygPΩIyק42Ogp^|û"J[hBBlxvE]oF|&i~ER7!mWY&$vaN~Oꦉm~v7AU? 0UPAF-2.3.0/devel/doc/sub_pg_ctl_status.dia000066400000000000000000000043371363154243400202050ustar00rootroot00000000000000]n8}0\)R5~ f fu˒!Y桿}.%'ޤDі@ #^K2?|G&I8cG&Z Z]5O ?_Qj.7Y:yCgqK/? >F,̷vw,K63__Jmvq'[??ɮQ=ԽWf|nR$ծ7q@asV̮T _v7u=h%HUmS0J#$~…%l'Y9Bqjx TRy<-&j{*Y&p Ef.>|huf|H.Bޖt 1 1*R= 1EHp(|RI+q3tVt夣4FRP>;NQVvc ;tC)JчJVCi(u5B12v5SJqSɝ^*kdćL%kR.RU$^4uՠFY?„,1%L WDH..~Gq뭨6"w17kFjFI 1 Q=ɊVEDB vtZjIVZO#+D1FW)9C90 S٥aEtid'(FQb#^vI"ˑr<#Z3hʊv>Wb*Awp*(^D=jOj/lƟHܧKk<Ϝ?{ 96`( (?ۿc(mmRΠy3 Ȋ{%*$ih/\1?MXg0Z11W)$. `n.xdo[yў(Hj% ,$_qyy+KKYd֝oC%UH QLZ Nɺg$ $SʣH&kNe3Xc!đ#:ߖtߞ]}:hWOy*EW|:? mXdy76:3LO?Lv ~6,<$ t.XMVvYoar1xU~nyMy51]DqMIq9iЏA]-6JW?E|nižm)aShN0%My,K0P&ꎳ] .džcWqQdOUl =WrWy|\pZ4r/Qm6*硟<ڄyh{xzNet&Q(+%_3P%La;w\R( %2cPߌR('t 9ц9d/a(aM+-7YNc0H`xd{3{Qwy}p@h|g*pa($;!l;u9!?1!|Z 'FJtpyR4  3"kaօ4hfTPW9^8S99ʡIjtX 6VĘ]2T!A쁙 ,Cg14hetZQ\K2kf{^E0vp ;P;EWܩ k4A˝C%?v2njquM#JO(_)|)> 3֗D9s6#1f^K<&PF(ý W "lboBXũt7PZ5f2Ђ '2UPŒ*GtҔno3X+P_tUViOh[I3ewW'!W\Z$a˓t9epםIk`gC`'r&L&̳3<;kbgEsGdg*8ZМxr٫I%tRM1L;M&F$[6Hp3+%\Fq6{/wbNlpG75* +b qYӾGAX#M)܂wF[5,LbGފe( O+V(JZgOYmn)zfPQJB5uW?̺ @lDRH+K<| bA + t ֙Jfhd&Yg\_,nCP(}cIB|RQ*"*EvM3Cd. 4]2lfdDp7F&>FCAzJRRwc`F})Ss$4ՎeMDgd)m`úғʱI U SʠURN41FX$!iTa7/ڕxRI{R)_TAHpjpv k{Ygd~rlV 9jå=6|drlrNrc 1V x?cΟ17$&ܳXu\J_:r-N"f 8kK@@3q E9?aMm2L|dhJ/(/=_QqUδZ(0MjP)jڰ3$T7}؆#%F bRn;\*K:zo+x%eP[D#+?BQTjEҰ Ki lsٝV<.pAȊ 13 P#+#EV8eWn)A\QnQ'|#nrhlQօU@k퍡0>**'TX+qa Nٴ}+ RX˦uTeӃ ҳiϦ_fMƦy@eW?x6= BIwE;e կ}/ ׸Hf,L!avI]U8:>1j_>C> oOi݌)eLr/4[^iPvR{#dzz.sQ+ ǏJ*&<KP"mdpV*462c}d8븴a~S⹴m|Y.]]']P]U?%(PAF-2.3.0/devel/doc/sub_query.dia000066400000000000000000000054311363154243400164730ustar00rootroot00000000000000]Ks8WCF4f河5[;h38mrlIM*IQAC7od5ʷqM! I.eΦ}郞8Vy/ҭ}:^ul"˃$ $ Oӻ~˰woâUMpMU]iUjWn%Y>&ggztބhGUӥMVd7" ؿٕbtI/kh#HU`$UG@SJ3ц(O7...ޞoø8gYiZWQw"Lp=$V).iElP}}=ux+Pu,.Ͽ9ꮪGh8 X4a9J$Zx@}b%nQYA+qJr,wPI2WE3 bJiɅh,_Fc=ш̮kk M\#(tE52M]Sսrv3i`&^E~rll ōU\NU߳<dy_>)Gq3~NkJkhD?}c~@HEW:Qza:SdOlW12̗h~ӨƘA_cLOҗR9gJXH*r 0M[GEQ,%$gbs؞:ZJqf|W<Ϯ=>Ot4Y;G#8qzn,L.qB[Àp^˚d 9A3в=4s@\Ibg4x{~aқطWΣMV5ܛ߰߬?FRsLs)BJm@ \,svtwr҃m6Þ%a G\sI_zf$_BY귙6#M_ȎyO 0-$\M#-7Y$!0Epeԝ} [zĴNy`J"FV!/PR0ǾzH2Fk*NyCݸz/EX)pb Nå4U(5xLig1^y7k8^ "xk٤fz_rxঁ+w2non#Inz%#LZlRiM6y\tEsdJO(_&'U#!m$89,Ha)tFOD[O(G#ӡPD#9 =|UR Ǻ@K(%t4Р* Sb&Ø'2bOi ]&~̑Vǒ豏q̣-EOGd @ /'|lI2;X5"QlC؉ (嬋6m}w[^Qdx&B ?NGMq@9*MhhHW\C~D~P3B"0\xrHⓜp"TO <(qQ(I D'_(4N? N3E]JC+n Hl3"UJ80}sʑ9%󆑁' /jI.Z>@`ʙR':D8 ]&Lm82HFH$Hr *o|y\&x)("(2TQ4cM$i掽~MbFN6 ^#`'/NHp! BvFq7Jǥ${NgJF* QIs4HQrJLF Rr7ńkN(g]QӜنQ'TPRIsmMT"kU[KsB 1+/AV}coV6LKrlZ)V%V{ӓHT3*}g&6PgS{Zie AJ'`ȲJf#d-*+$*=ZK1T7'&P0rk BiK*xWʹ_|01󪊖i\ C\6aO #탊DTyOywlW3 !m޶ FmͶA@8+l?NP1B %Rmqm\a1PB 5?-p:3R\ tFeAv۵a׈k5 Ryg䐆[Wy]n8eWky^.1o3fĜQ LpB8S_0θp0[^HnOE>NE4wNĥTy0 FR} $fY뙧~wbw6ZWXw6S[J92rj\(dI5k^";R ;.FScNEL*r#:Jl(6jǡڡR p]0eGkDGj Ie1sĬS:|fG^x&<ҩ&?*ןoA!PAF-2.3.0/devel/rpm-builder.bash000077500000000000000000000010301363154243400163040ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail yum --nogpgcheck --quiet -y -e 0 install git rpmdevtools perl-Module-Build resource-agents rpmlint rpmdev-setuptree rpmlint /vagrant/resource-agents-paf.spec cd /vagrant TAG=$(awk '/^%global _tag/{print $NF}' /vagrant/resource-agents-paf.spec) git archive --prefix="PAF-${TAG}/" --format=tar.gz v${TAG} > /root/rpmbuild/SOURCES/v${TAG}.tar.gz rpmbuild --quiet -ba /vagrant/resource-agents-paf.spec rpmlint /root/rpmbuild/RPMS/noarch/resource-agents-paf-*.noarch.rpm PAF-2.3.0/extra/000077500000000000000000000000001363154243400132525ustar00rootroot00000000000000PAF-2.3.0/extra/vagrant/000077500000000000000000000000001363154243400147145ustar00rootroot00000000000000PAF-2.3.0/extra/vagrant/2nodes-qdevice-vip/000077500000000000000000000000001363154243400203205ustar00rootroot00000000000000PAF-2.3.0/extra/vagrant/2nodes-qdevice-vip/Makefile000066400000000000000000000017421363154243400217640ustar00rootroot00000000000000export VAGRANT_BOX_UPDATE_CHECK_DISABLE=1 export VAGRANT_CHECKPOINT_DISABLE=1 .PHONY: all create_vm pgsql-primary pgsql-replicas qdevice pacemaker cts prov clean validate all: create_vm pgsql-replicas pacemaker create_vm: vagrant up pgsql-replicas: pcmk-stop pgsql-primary vagrant up --provision-with=pgsql-replicas qdevice: vagrant up --provision-with=qdevice pacemaker: qdevice vagrant up --provision-with=pacemaker pgsql-primary: pcmk-stop vagrant up --provision-with=pgsql-primary prov: vagrant up --provision clean: vagrant destroy -f check: validate validate: @vagrant validate @if which shellcheck >/dev/null ;\ then shellcheck provision/*bash ;\ else echo "WARNING: shellcheck is not in PATH, not checking bash syntax" ;\ fi cts: vagrant up --provision-with=cts pcmk-stop: vagrant ssh -c 'if [ -f "/etc/corosync/corosync.conf" ]; then sudo pcs cluster stop --all --wait; fi' PAF-2.3.0/extra/vagrant/2nodes-qdevice-vip/Vagrantfile000066400000000000000000000115411363154243400225070ustar00rootroot00000000000000require 'ipaddr' require 'yaml' #ENV['VAGRANT_NO_PARALLEL'] = 'yes' # uncomment to forbid parallel execution ENV["LANG"] = "C" ENV["LC_ALL"] = "C" boxname = 'centos/7' # vagrant box to use pgver = '11' # pg version to use hapass = 'hapass' # password for sys user hacluster ssh_login = 'root' # ssh login to connect to the host when fencing a VM. # put "./provision/id_rsa.pub" in your "~/.ssh/authorized_keys" master_ip = '10.20.30.105' # vIP assigned to master pg_nodes = 'srv1', 'srv2' # first will be primary qd_node = 'qd' # name of the node receiving logs log_node = 'log-sink' # name of the node receiving logs vm_prefix = 'paf_2nqd' # VM prefix in libvrit rhel_user = '' # RHEL user account rhel_pass = '' # RHEL user account password if File.file?('vagrant.yml') and ( custom = YAML.load_file('vagrant.yml') ) boxname = custom['boxname'] if custom.has_key?('boxname') pgver = custom['pgver'] if custom.has_key?('pgver') hapass = custom['hapass'] if custom.has_key?('hapass') ssh_login = custom['ssh_login'] if custom.has_key?('ssh_login') master_ip = custom['master_ip'] if custom.has_key?('master_ip') pg_nodes = custom['pg_nodes'] if custom.has_key?('pg_nodes') log_node = custom['log_node'] if custom.has_key?('log_node') vm_prefix = custom['vm_prefix'] if custom.has_key?('vm_prefix') rhel_user = custom['rhel_user'] if custom.has_key?('rhel_user') rhel_pass = custom['rhel_pass'] if custom.has_key?('rhel_pass') end Vagrant.configure(2) do |config| pgdata = "/var/lib/pgsql/#{pgver}/data" next_ip = IPAddr.new(master_ip).succ host_ip = (IPAddr.new(master_ip) & "255.255.255.0").succ.to_s nodes_ips = {} ( pg_nodes + [ qd_node, log_node ] ).each do |node| nodes_ips[node] = next_ip.to_s next_ip = next_ip.succ end # don't mind about insecure ssh key config.ssh.insert_key = false # https://vagrantcloud.com/search. config.vm.box = boxname # hardware and host settings config.vm.provider 'libvirt' do |lv| lv.cpus = 1 lv.memory = 512 lv.watchdog model: 'i6300esb' lv.default_prefix = vm_prefix lv.qemu_use_session = false end # disable default share config.vm.synced_folder ".", "/vagrant", disabled: true config.vm.synced_folder "../../..", "/vagrant", type: "rsync", rsync__exclude: [ ".git/" ] # system setup for all nodes (pg_nodes + [qd_node, log_node]).each do |node| config.vm.define node do |conf| conf.vm.network 'private_network', ip: nodes_ips[node] conf.vm.provision 'system-setup', type: 'shell', path: 'provision/system.bash', args: [ node, rhel_user, rhel_pass ] + nodes_ips.keys.map {|n| "#{n}=#{nodes_ips[n]}"}, preserve_order: true end end # setup rsyslog to collect logs from other node on log-sink config.vm.define log_node do |conf| conf.vm.provision 'rsyslog-setup', type: 'shell', path: 'provision/log_sink.bash' end # common postgresql+pacemaker installation and setup pg_nodes.each do |node| config.vm.define node do |conf| conf.vm.provision 'cluster-common', type: 'shell', path: 'provision/cluster-common.bash', args: [ pgver, hapass, master_ip ], preserve_order: true end end # build primary pgsql instance config.vm.define pg_nodes.first, primary:true do |conf| conf.vm.provision 'pgsql-primary', type: 'shell', path: 'provision/pgsql-primary.bash', args: [ pgver, pgdata, master_ip, pg_nodes.first ], run: 'never' end # replicas setup. Use "vagrant up --provision-with=pgsql-replicas" pg_nodes[1..-1].each do |node| config.vm.define node do |conf| conf.vm.provision 'pgsql-replicas', type: 'shell', path: 'provision/pgsql-replicas.bash', args: [ pgver, pgdata, master_ip, node ], run: 'never' end end # cluster setup. Use "vagrant up --provision-with=qdevice" config.vm.define qd_node do |conf| conf.vm.provision 'qdevice', type: 'shell', path:'provision/qdevice.bash', args: [ hapass ], run: 'never' end # cluster setup. Use "vagrant up --provision-with=pacemaker" pg_nodes.each do |node| config.vm.define node do |conf| conf.vm.provision 'pacemaker', type: 'shell', path:'provision/pacemaker.bash', args: [ pgver, hapass, master_ip, ssh_login, vm_prefix, host_ip, pgdata, qd_node ] + pg_nodes, run: 'never' end end # cluster test suite setup. Use "vagrant up --provision-with=cts" config.vm.provision 'cts', type: 'shell', path: 'provision/cts.bash', run: 'never' end PAF-2.3.0/extra/vagrant/2nodes-qdevice-vip/provision/000077500000000000000000000000001363154243400223505ustar00rootroot00000000000000PAF-2.3.0/extra/vagrant/2nodes-qdevice-vip/provision/cluster-common.bash000077500000000000000000000047441363154243400261720ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail PGVER="$1" HAPASS="$2" MASTER_IP="$3" # shellcheck disable=SC1091 source "/etc/os-release" OS_ID="$ID" OS_VER="$VERSION_ID" YUM_INSTALL="yum install --nogpgcheck --quiet -y -e 0" # install required packages if [ "$OS_ID" = "rhel" ]; then # use yum instead of dnf for compatibility between EL 7 and 8 yum-config-manager --enable "*highavailability-rpms" fi if ! rpm --quiet -q "pgdg-redhat-repo"; then if [ "${OS_VER:0:2}" = "8." ]; then $YUM_INSTALL "https://download.postgresql.org/pub/repos/yum/reporpms/EL-8-x86_64/pgdg-redhat-repo-latest.noarch.rpm" else $YUM_INSTALL "https://download.postgresql.org/pub/repos/yum/reporpms/EL-7-x86_64/pgdg-redhat-repo-latest.noarch.rpm" fi fi # disable postgresql upstream module conflicting with pgdg packages in RHEL8 if [ "${OS_VER:0:2}" = "8." ]; then yum -qy module disable postgresql fi PACKAGES=( pacemaker corosync-qdevice pcs resource-agents fence-agents-virsh sbd perl-Module-Build "postgresql${PGVER}" "postgresql${PGVER}-server" "postgresql${PGVER}-contrib" ) $YUM_INSTALL "${PACKAGES[@]}" # firewall setup firewall-cmd --quiet --permanent --add-service=high-availability firewall-cmd --quiet --permanent --add-service=postgresql firewall-cmd --quiet --reload # cluster stuffs systemctl --quiet --now enable pcsd echo "${HAPASS}"|passwd --stdin hacluster > /dev/null 2>&1 cp /etc/sysconfig/pacemaker /etc/sysconfig/pacemaker.dist cat<<'EOF' > /etc/sysconfig/pacemaker PCMK_debug=yes PCMK_logpriority=debug EOF # cleanup master ip everywhere HAS_MASTER_IP=$(ip -o addr show to "${MASTER_IP}"|wc -l) if [ "$HAS_MASTER_IP" -gt 0 ]; then DEV=$(ip route show to "${MASTER_IP}/24"|grep -Eom1 'dev \w+') ip addr del "${MASTER_IP}/24" dev "${DEV/dev }" fi # send logs to log-sinks cat <<'EOF' >/etc/rsyslog.d/fwd_log_sink.conf *.* action(type="omfwd" queue.type="LinkedList" queue.filename="log_sink_fwd" action.resumeRetryCount="-1" queue.saveonshutdown="on" target="log-sink" Port="514" Protocol="tcp") EOF systemctl --quiet restart rsyslog # cleanup pre-existing IP address ip -o addr show to "${MASTER_IP}" | if grep -q "${MASTER_IP}" then DEV=$(ip route show to "${MASTER_IP}/24"|grep -Eo 'dev \w+') ip addr del "${MASTER_IP}/24" dev "${DEV/dev }" fi # install PAF cd /vagrant [ -f Build ] && perl Build distclean sudo -u vagrant perl Build.PL --quiet >/dev/null 2>&1 sudo -u vagrant perl Build --quiet perl Build --quiet install PAF-2.3.0/extra/vagrant/2nodes-qdevice-vip/provision/cts.bash000077500000000000000000000060161363154243400240060ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail # install packages PACKAGES=( pacemaker-cts patch ) yum install --nogpgcheck --quiet -y -e 0 "${PACKAGES[@]}" # do not drop any log messages from rsyslog cat <<'EOF'>/etc/rsyslog.d/rateLimit.conf $imjournalRatelimitInterval 0 $imjournalRatelimitBurst 0 EOF systemctl --quiet restart rsyslog # make journald logs persistent mkdir -p /var/log/journal # do not drop any log messages from journald mkdir -p /etc/systemd/journald.conf.d cat <<'EOF'>/etc/systemd/journald.conf.d/rateLimit.conf RateLimitInterval=0 RateLimitBurst=0 EOF systemctl --quiet restart systemd-journald # shellcheck disable=SC1091 source "/etc/os-release" OS_VER="$VERSION_ID" if [ "${OS_VER:0:2}" != "7." ]; then exit; fi # fix bug in the log watcher for EL7 cat <<'EOF' | patch /usr/lib64/python2.7/site-packages/cts/watcher.py *** /tmp/watcher.py.orig 2019-02-07 16:25:32.836265277 +0100 --- /tmp/watcher.py 2019-02-07 16:27:03.296926885 +0100 *************** *** 124,130 **** self.offset = "EOF" if host == None: ! host = "localhost" def __str__(self): if self.host: --- 124,130 ---- self.offset = "EOF" if host == None: ! self.host = "localhost" def __str__(self): if self.host: *************** *** 155,179 **** class FileObj(SearchObj): def __init__(self, filename, host=None, name=None): global has_log_watcher ! SearchObj.__init__(self, filename, host, name) ! ! if host is not None: ! if not host in has_log_watcher: ! global log_watcher ! global log_watcher_bin ! self.debug("Installing %s on %s" % (log_watcher_file, host)) ! os.system("cat << END >> %s\n%s\nEND" %(log_watcher_file, log_watcher)) ! os.system("chmod 755 %s" %(log_watcher_file)) ! self.rsh.cp(log_watcher_file, "root@%s:%s" % (host, log_watcher_bin)) ! has_log_watcher[host] = 1 ! os.system("rm -f %s" %(log_watcher_file)) ! self.harvest() def async_complete(self, pid, returncode, outLines, errLines): for line in outLines: --- 155,176 ---- class FileObj(SearchObj): def __init__(self, filename, host=None, name=None): global has_log_watcher ! global log_watcher ! global log_watcher_bin ! SearchObj.__init__(self, filename, host, name) ! self.debug("Installing %s on %s" % (log_watcher_file, self.host)) ! os.system("cat << END >> %s\n%s\nEND" %(log_watcher_file, log_watcher)) ! os.system("chmod 755 %s" %(log_watcher_file)) ! self.rsh.cp(log_watcher_file, "root@%s:%s" % (self.host, log_watcher_bin)) ! has_log_watcher[self.host] = 1 ! os.system("rm -f %s" %(log_watcher_file)) ! self.harvest() def async_complete(self, pid, returncode, outLines, errLines): for line in outLines: EOF PAF-2.3.0/extra/vagrant/2nodes-qdevice-vip/provision/id_rsa000066400000000000000000000032131363154243400235330ustar00rootroot00000000000000-----BEGIN RSA PRIVATE KEY----- MIIEogIBAAKCAQEA6NF8iallvQVp22WDkTkyrtvp9eWW6A8YVr+kz4TjGYe7gHzI w+niNltGEFHzD8+v1I2YJ6oXevct1YeS0o9HZyN1Q9qgCgzUFtdOKLv6IedplqoP kcmF0aYet2PkEDo3MlTBckFXPITAMzF8dJSIFo9D8HfdOV0IAdx4O7PtixWKn5y2 hMNG0zQPyUecp4pzC6kivAIhyfHilFR61RGL+GPXQ2MWZWFYbAGjyiYJnAmCP3NO Td0jMZEnDkbUvxhMmBYSdETk1rRgm+R4LOzFUGaHqHDLKLX+FIPKcF96hrucXzcW yLbIbEgE98OHlnVYCzRdK8jlqm8tehUc9c9WhQIBIwKCAQEA4iqWPJXtzZA68mKd ELs4jJsdyky+ewdZeNds5tjcnHU5zUYE25K+ffJED9qUWICcLZDc81TGWjHyAqD1 Bw7XpgUwFgeUJwUlzQurAv+/ySnxiwuaGJfhFM1CaQHzfXphgVml+fZUvnJUTvzf TK2Lg6EdbUE9TarUlBf/xPfuEhMSlIE5keb/Zz3/LUlRg8yDqz5w+QWVJ4utnKnK iqwZN0mwpwU7YSyJhlT4YV1F3n4YjLswM5wJs2oqm0jssQu/BT0tyEXNDYBLEF4A sClaWuSJ2kjq7KhrrYXzagqhnSei9ODYFShJu8UWVec3Ihb5ZXlzO6vdNQ1J9Xsf 4m+2ywKBgQD6qFxx/Rv9CNN96l/4rb14HKirC2o/orApiHmHDsURs5rUKDx0f9iP cXN7S1uePXuJRK/5hsubaOCx3Owd2u9gD6Oq0CsMkE4CUSiJcYrMANtx54cGH7Rk EjFZxK8xAv1ldELEyxrFqkbE4BKd8QOt414qjvTGyAK+OLD3M2QdCQKBgQDtx8pN CAxR7yhHbIWT1AH66+XWN8bXq7l3RO/ukeaci98JfkbkxURZhtxV/HHuvUhnPLdX 3TwygPBYZFNo4pzVEhzWoTtnEtrFueKxyc3+LjZpuo+mBlQ6ORtfgkr9gBVphXZG YEzkCD3lVdl8L4cw9BVpKrJCs1c5taGjDgdInQKBgHm/fVvv96bJxc9x1tffXAcj 3OVdUN0UgXNCSaf/3A/phbeBQe9xS+3mpc4r6qvx+iy69mNBeNZ0xOitIjpjBo2+ dBEjSBwLk5q5tJqHmy/jKMJL4n9ROlx93XS+njxgibTvU6Fp9w+NOFD/HvxB3Tcz 6+jJF85D5BNAG3DBMKBjAoGBAOAxZvgsKN+JuENXsST7F89Tck2iTcQIT8g5rwWC P9Vt74yboe2kDT531w8+egz7nAmRBKNM751U/95P9t88EDacDI/Z2OwnuFQHCPDF llYOUI+SpLJ6/vURRbHSnnn8a/XG+nzedGH5JGqEJNQsz+xT2axM0/W/CRknmGaJ kda/AoGANWrLCz708y7VYgAtW2Uf1DPOIYMdvo6fxIB5i9ZfISgcJ/bbCUkFrhoH +vq/5CIWxCPp0f85R4qxxQ5ihxJ0YDQT9Jpx4TMss4PSavPaBH3RXow5Ohe+bYoQ NE5OgEXk2wVfZczCZpigBKbKZHNYcelXtTt/nP3rsCuGcM4h53s= -----END RSA PRIVATE KEY----- PAF-2.3.0/extra/vagrant/2nodes-qdevice-vip/provision/id_rsa.pub000066400000000000000000000006311363154243400243210ustar00rootroot00000000000000ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA6NF8iallvQVp22WDkTkyrtvp9eWW6A8YVr+kz4TjGYe7gHzIw+niNltGEFHzD8+v1I2YJ6oXevct1YeS0o9HZyN1Q9qgCgzUFtdOKLv6IedplqoPkcmF0aYet2PkEDo3MlTBckFXPITAMzF8dJSIFo9D8HfdOV0IAdx4O7PtixWKn5y2hMNG0zQPyUecp4pzC6kivAIhyfHilFR61RGL+GPXQ2MWZWFYbAGjyiYJnAmCP3NOTd0jMZEnDkbUvxhMmBYSdETk1rRgm+R4LOzFUGaHqHDLKLX+FIPKcF96hrucXzcWyLbIbEgE98OHlnVYCzRdK8jlqm8tehUc9c9WhQ== vagrant insecure public key PAF-2.3.0/extra/vagrant/2nodes-qdevice-vip/provision/log_sink.bash000077500000000000000000000005661363154243400250260ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail # setup log sink cat <<'EOF' > /etc/rsyslog.d/log_sink.conf $ModLoad imtcp $InputTCPServerRun 514 $template RemoteLogsMerged,"/var/log/%HOSTNAME%/messages.log" *.* ?RemoteLogsMerged $template RemoteLogs,"/var/log/%HOSTNAME%/%PROGRAMNAME%.log" *.* ?RemoteLogs #& ~ EOF systemctl --quiet restart rsyslog PAF-2.3.0/extra/vagrant/2nodes-qdevice-vip/provision/pacemaker.bash000077500000000000000000000066121363154243400251470ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail PGVER="$1" HAPASS="$2" MASTER_IP="$3" SSH_LOGIN="$4" VM_PREFIX="$5" HOST_IP="$6" PGDATA="$7" QD_NODE="$8" shift 8 NODES=( "$@" ) CUSTOMDIR="${PGDATA}/conf.d" PCMK_VER=$(yum info --quiet pacemaker|grep ^Version) PCMK_VER="${PCMK_VER#*: }" # extract x.y.z PCMK_VER="${PCMK_VER:0:1}" # extract x if [ "$PCMK_VER" -ge 2 ]; then # if pacemaker version is 2.x, we suppose pcs support it (pcs >= 0.10) # from pcs 0.10, pcs host auth must be exec'ed on each node pcs host auth -u hacluster -p "${HAPASS}" "${NODES[@]}" "$QD_NODE" else # this could be run on one node, but it doesn't hurt if it runs everywhere, # so we keep this piece of code with the one dedicated to pacemaker 2.x pcs cluster auth -u hacluster -p "${HAPASS}" "${NODES[@]}" "$QD_NODE" fi if [ "$(hostname -s)" != "${NODES[0]}" ]; then exit 0 fi # WARNING: # Starting from here, everything is executed on first node only! if [ "$PCMK_VER" -ge 2 ]; then pcs cluster setup cluster_pgsql --force "${NODES[@]}" else pcs cluster setup --name cluster_pgsql --wait --force "${NODES[@]}" fi pcs stonith sbd enable pcs quorum device add model net host=qd algorithm=ffsplit pcs cluster start --all --wait pcs cluster cib cluster1.xml pcs -f cluster1.xml resource defaults migration-threshold=5 pcs -f cluster1.xml resource defaults resource-stickiness=10 pcs -f cluster1.xml property set stonith-watchdog-timeout=10s # for VM in "${NODES[@]}"; do # FENCE_ID="fence_vm_${VM}" # VM_PORT="${VM_PREFIX}_${VM}" # pcs -f cluster1.xml stonith create "${FENCE_ID}" fence_virsh \ # pcmk_host_check=static-list "pcmk_host_list=${VM}" \ # "port=${VM_PORT}" "ipaddr=${HOST_IP}" "login=${SSH_LOGIN}" \ # "identity_file=/root/.ssh/id_rsa" # pcs -f cluster1.xml constraint location "fence_vm_${VM}" \ # avoids "${VM}=INFINITY" # done PGSQLD_RSC_OPTS=( "ocf:heartbeat:pgsqlms" "bindir=/usr/pgsql-${PGVER}/bin" "pgdata=${PGDATA}" "recovery_template=${CUSTOMDIR}/recovery.conf.pcmk" "op" "start" "timeout=60s" "op" "stop" "timeout=60s" "op" "promote" "timeout=30s" "op" "demote" "timeout=120s" "op" "monitor" "interval=15s" "timeout=10s" "role=Master" "op" "monitor" "interval=16s" "timeout=10s" "role=Slave" "op" "notify" "timeout=60s" ) # NB: pcs 0.10.2 doesn't support to set the id of the clone XML node # the id is built from the rsc id to clone using "-clone" # As a matter of cohesion and code simplicity, we use the same # convention to create the master resource with pcs 0.9.x for # Pacemaker 1.1 if [ "$PCMK_VER" -ge 2 ]; then PGSQLD_RSC_OPTS+=( "promotable" "notify=true" ) fi pcs -f cluster1.xml resource create pgsqld "${PGSQLD_RSC_OPTS[@]}" if [ "$PCMK_VER" -eq 1 ]; then pcs -f cluster1.xml resource master pgsqld-clone pgsqld notify=true fi pcs -f cluster1.xml resource create pgsql-master-ip \ "ocf:heartbeat:IPaddr2" "ip=${MASTER_IP}" cidr_netmask=24 \ op monitor interval=10s pcs -f cluster1.xml constraint colocation add pgsql-master-ip with master pgsqld-clone INFINITY pcs -f cluster1.xml constraint order promote pgsqld-clone "then" start pgsql-master-ip symmetrical=false pcs -f cluster1.xml constraint order demote pgsqld-clone "then" stop pgsql-master-ip symmetrical=false pcs cluster cib-push scope=configuration cluster1.xml --wait crm_mon -Dn1 PAF-2.3.0/extra/vagrant/2nodes-qdevice-vip/provision/pgsql-primary.bash000077500000000000000000000040331363154243400260210ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail PGVER="$1" PGDATA="$2" MASTER_IP="$3" NODENAME="$4" CUSTOMDIR="${PGDATA}/conf.d" # cleanup systemctl --quiet --now disable "postgresql-${PGVER}" rm -rf "${PGDATA}" # init instance "/usr/pgsql-${PGVER}/bin/postgresql-${PGVER}-setup" initdb # pg_hba setup cat< "${PGDATA}/pg_hba.conf" local all all trust host all all 0.0.0.0/0 trust # forbid self-replication host replication postgres ${MASTER_IP}/32 reject host replication postgres ${NODENAME} reject # allow any standby connection host replication postgres 0.0.0.0/0 trust EOC # postgresql.conf setup mkdir -p "$CUSTOMDIR" echo "include_dir = 'conf.d'" >> "${PGDATA}/postgresql.conf" cat <<'EOC' > "${CUSTOMDIR}/custom.conf" listen_addresses = '*' wal_level = replica max_wal_senders = 10 hot_standby = on hot_standby_feedback = on wal_keep_segments = 256 log_destination = 'syslog,stderr' log_checkpoints = on log_min_duration_statement = 0 log_autovacuum_min_duration = 0 log_replication_commands = on EOC if [ "${PGVER%%.*}" -lt 12 ]; then # recovery.conf setup cat<<-EOC > "${CUSTOMDIR}/recovery.conf.pcmk" standby_mode = on primary_conninfo = 'host=${MASTER_IP} application_name=${NODENAME}' recovery_target_timeline = 'latest' EOC else cat <<-EOC > "${CUSTOMDIR}/repli.conf" primary_conninfo = 'host=${MASTER_IP} application_name=${NODENAME}' EOC # standby_mode disappear in v12 # no need to add recovery_target_timeline as its default is 'latest' since v12 fi # backing up files cp "${PGDATA}/pg_hba.conf" "${PGDATA}/.." cp "${PGDATA}/postgresql.conf" "${PGDATA}/.." cp "${CUSTOMDIR}"/* "${PGDATA}/.." chown -R postgres:postgres "$PGDATA" # create master ip ip -o addr show to "${MASTER_IP}" | if ! grep -q "${MASTER_IP}" then DEV=$(ip route show to "${MASTER_IP}/24"|grep -Eo 'dev \w+') ip addr add "${MASTER_IP}/24" dev "${DEV/dev }" fi # restart master pgsql systemctl --quiet start "postgresql-${PGVER}" PAF-2.3.0/extra/vagrant/2nodes-qdevice-vip/provision/pgsql-replicas.bash000077500000000000000000000031151363154243400261400ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail PGVER="$1" PGDATA="$2" MASTER_IP="$3" NODENAME="$4" CUSTOMDIR="${PGDATA}/conf.d" # cleanup systemctl --quiet --now disable "postgresql-${PGVER}" rm -rf "${PGDATA}" # build standby "/usr/pgsql-${PGVER}/bin/pg_basebackup" -h "${MASTER_IP}" -U postgres -D "${PGDATA}" -X stream # set pg_hba cat< "${PGDATA}/pg_hba.conf" local all all trust host all all 0.0.0.0/0 trust # forbid self-replication host replication postgres ${MASTER_IP}/32 reject host replication postgres ${NODENAME} reject # allow any standby connection host replication postgres 0.0.0.0/0 trust EOC cp "${PGDATA}/pg_hba.conf" "${PGDATA}/.." if [ "${PGVER%%.*}" -lt 12 ]; then # recovery.conf setup cat<<-EOC > "${CUSTOMDIR}/recovery.conf.pcmk" standby_mode = on primary_conninfo = 'host=${MASTER_IP} application_name=${NODENAME}' recovery_target_timeline = 'latest' EOC cp "${CUSTOMDIR}/recovery.conf.pcmk" "${PGDATA}/recovery.conf" else cat <<-EOC > "${CUSTOMDIR}/repli.conf" primary_conninfo = 'host=${MASTER_IP} application_name=${NODENAME}' EOC # standby_mode disappear in v12 # no need to add recovery_target_timeline as its default is 'latest' since v12 touch "${PGDATA}/standby.signal" fi # backing up files cp "${PGDATA}/pg_hba.conf" "${PGDATA}/.." cp "${PGDATA}/postgresql.conf" "${PGDATA}/.." cp "${CUSTOMDIR}"/* "${PGDATA}/.." chown -R "postgres:postgres" "${PGDATA}/.." # start systemctl --quiet start "postgresql-${PGVER}" PAF-2.3.0/extra/vagrant/2nodes-qdevice-vip/provision/qdevice.bash000077500000000000000000000007311363154243400246330ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail YUM_INSTALL="yum install --nogpgcheck --quiet -y -e 0" $YUM_INSTALL pcs corosync-qnetd echo "$1"|passwd --stdin hacluster > /dev/null 2>&1 systemctl --quiet --now enable pcsd.service if ! pcs qdevice status net cluster_pgsql|grep -q cluster_pgsql; then pcs qdevice setup model net --enable --start fi firewall-cmd --quiet --permanent --add-service=high-availability firewall-cmd --quiet --reload PAF-2.3.0/extra/vagrant/2nodes-qdevice-vip/provision/system.bash000077500000000000000000000030171363154243400245370ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail NODENAME="$1" RHEL_USER="$2" RHEL_PASS="$3" shift NODES=( "$@" ) hostnamectl set-hostname "${NODENAME}" for N in "${NODES[@]}"; do NG=$(sed -n "/${N%=*}\$/p" /etc/hosts|wc -l) if [ "$NG" -eq 0 ]; then echo "${N##*=} ${N%=*}" >> /etc/hosts fi done # shellcheck disable=SC1091 source "/etc/os-release" OS_ID="$ID" PACKAGES=( vim bash-completion yum-utils ) if [ "$OS_ID" = "rhel" ]; then subscription-manager register --force --username "${RHEL_USER:?}" --password "${RHEL_PASS:?}" --auto-attach PACKAGES+=("tmux") else PACKAGES+=("screen") fi yum install --nogpgcheck --quiet -y -e 0 "${PACKAGES[@]}" cat <<'EOF' > "/home/vagrant/.ssh/config" Host * CheckHostIP no StrictHostKeyChecking no EOF cp "/vagrant/extra/vagrant/2nodes-qdevice-vip/provision/id_rsa" "/home/vagrant/.ssh" cp "/vagrant/extra/vagrant/2nodes-qdevice-vip/provision/id_rsa.pub" "/home/vagrant/.ssh" chown -R "vagrant:" "/home/vagrant/.ssh" chmod 0700 "/home/vagrant/.ssh" chmod 0600 "/home/vagrant/.ssh/id_rsa" chmod 0644 "/home/vagrant/.ssh/id_rsa.pub" chmod 0600 "/home/vagrant/.ssh/config" chmod 0600 "/home/vagrant/.ssh/authorized_keys" cp -R "/home/vagrant/.ssh" "/root" # force proper permissions on .ssh files chown -R "root:" "/root/.ssh" chmod 0700 "/root/.ssh" chmod 0600 "/root/.ssh/id_rsa" chmod 0644 "/root/.ssh/id_rsa.pub" chmod 0600 "/root/.ssh/config" chmod 0600 "/root/.ssh/authorized_keys" # enable firewall systemctl --quiet --now enable firewalld PAF-2.3.0/extra/vagrant/2nodes-qdevice-vip/vagrant.yml-dist000066400000000000000000000013251363154243400234470ustar00rootroot00000000000000# boxname: "centos/7" # vagrant box to use # pgver: "10" # pg version to use # hapass: "hapass" # password for sys user hacluster # ssh_login: "user" # ssh login to connect to the host when fencing a VM. # # put "./provision/id_rsa.pub" in your "~/.ssh/authorized_keys" # master_ip: "10.20.30.5" # vIP assigned to master # pg_nodes: # servers to create. # - "srv1" # First one will be master # - "srv2" # - "srv3" # qd_node: "qd" # QDevice node # log_node: "log-sink" # log collector node # vm_prefix: "paf_vm" # rhel_user: "" # RHEL user account # rhel_pass: "" # RHEL user account password PAF-2.3.0/extra/vagrant/3nodes-haproxy/000077500000000000000000000000001363154243400175775ustar00rootroot00000000000000PAF-2.3.0/extra/vagrant/3nodes-haproxy/Makefile000066400000000000000000000017441363154243400212450ustar00rootroot00000000000000export VAGRANT_BOX_UPDATE_CHECK_DISABLE=1 export VAGRANT_CHECKPOINT_DISABLE=1 .PHONY: all create_vm postgresql pgsql_replicas pacemaker prov clean check validate cts pcmk-stop all: create_vm postgresql pgsql_replicas pacemaker cluster create_vm: vagrant up postgresql: pcmk-stop vagrant up --provision-with=postgresql pgsql_replicas: pcmk-stop vagrant up --provision-with=pgsql-replicas pacemaker: vagrant up --provision-with=pacemaker cluster: vagrant up --provision-with=cluster-setup prov: vagrant up --provision clean: vagrant destroy -f check: validate validate: @vagrant validate @if which shellcheck >/dev/null ;\ then shellcheck provision/*bash ;\ else echo "WARNING: shellcheck is not in PATH, not checking bash syntax" ;\ fi cts: vagrant up --provision-with=cts pcmk-stop: vagrant ssh -c 'if [ -f "/etc/corosync/corosync.conf" ]; then sudo pcs cluster stop --all --wait; fi' PAF-2.3.0/extra/vagrant/3nodes-haproxy/Vagrantfile000066400000000000000000000131411363154243400217640ustar00rootroot00000000000000require 'ipaddr' require 'yaml' #ENV['VAGRANT_NO_PARALLEL'] = 'yes' # uncomment to forbid parallel execution ENV["LANG"] = "C" ENV["LC_ALL"] = "C" boxname = 'centos/7' # vagrant box to use pgver = '11' # pg version to use hapass = 'hapass' # password for sys user hacluster ssh_login = 'root' # ssh login to connect to the host when fencing a VM. # put "./provision/id_rsa.pub" in your "~/.ssh/authorized_keys" base_ip = '10.20.30.5' # Base IP address to compute other ones pg_nodes = 'srv1', 'srv2', 'srv3' # first will be primary log_node = 'log-sink' # name of the node receiving logs vm_prefix = 'paf_vm' # VM prefix in libvrit rhel_user = '' # RHEL user account rhel_pass = '' # RHEL user account password if File.file?('vagrant.yml') and ( custom = YAML.load_file('vagrant.yml') ) boxname = custom['boxname'] if custom.has_key?('boxname') pgver = custom['pgver'] if custom.has_key?('pgver') hapass = custom['hapass'] if custom.has_key?('hapass') ssh_login = custom['ssh_login'] if custom.has_key?('ssh_login') base_ip = custom['base_ip'] if custom.has_key?('base_ip') pg_nodes = custom['pg_nodes'] if custom.has_key?('pg_nodes') log_node = custom['log_node'] if custom.has_key?('log_node') vm_prefix = custom['vm_prefix'] if custom.has_key?('vm_prefix') rhel_user = custom['rhel_user'] if custom.has_key?('rhel_user') rhel_pass = custom['rhel_pass'] if custom.has_key?('rhel_pass') end Vagrant.configure(2) do |config| ############################################################################ # computes variables pgdata = "/var/lib/pgsql/#{pgver}/data" next_ip = IPAddr.new(base_ip).succ host_ip = (IPAddr.new(base_ip) & "255.255.255.0").succ.to_s nodes_ips = {} ( pg_nodes + [ log_node ] ).each do |node| nodes_ips[node] = next_ip.to_s next_ip = next_ip.succ end ############################################################################ # general vagrant setup # don't mind about insecure ssh key config.ssh.insert_key = false # https://vagrantcloud.com/search. config.vm.box = boxname # hardware and host settings config.vm.provider 'libvirt' do |lv| lv.cpus = 1 lv.memory = 512 lv.watchdog model: 'i6300esb' lv.default_prefix = vm_prefix lv.qemu_use_session = false end # disable default share config.vm.synced_folder ".", "/vagrant", disabled: true config.vm.synced_folder "../../..", "/vagrant", type: "rsync", rsync__exclude: [ ".git/" ] ############################################################################ # system setup for all nodes config.vm.define pg_nodes.first, primary: true (pg_nodes + [log_node]).each do |node| config.vm.define node do |conf| conf.vm.network 'private_network', ip: nodes_ips[node] conf.vm.provision 'system-setup', type: 'shell', path: 'provision/system.bash', args: [ node, rhel_user, rhel_pass ] + nodes_ips.keys.map {|n| "#{n}=#{nodes_ips[n]}"}, preserve_order: true end end ############################################################################ # setup rsyslog to collect logs (pg_nodes + [log_node]).each do |node| config.vm.define node do |conf| conf.vm.provision 'rsyslog-setup', type: 'shell', path: 'provision/rsyslog.bash', args: [ log_node ], preserve_order: true end end ############################################################################ # setup haproxy pg_nodes.each do |node| config.vm.define node do |conf| conf.vm.provision 'haproxy-setup', type: 'shell', path: 'provision/haproxy.bash', preserve_order: true end end ############################################################################ # postgresql installation and setup pg_nodes.each do |node| config.vm.define node do |conf| conf.vm.provision 'postgresql', type: 'shell', path: 'provision/postgresql.bash', args: [ pgver, pg_nodes.first, pgdata ], preserve_order: true end end # replicas setup. Use "vagrant up --provision-with=pgsql-replicas" pg_nodes[1..-1].each do |node| config.vm.define node do |conf| conf.vm.provision 'pgsql-replicas', type: 'shell', path: 'provision/pgsql-replicas.bash', args: [ pgver, node, pgdata ], run: 'never' end end ############################################################################ # cluster setup. Use "vagrant up --provision-with=pacemaker" pg_nodes.each do |node| config.vm.define node do |conf| conf.vm.provision 'pacemaker', type: 'shell', path: 'provision/pacemaker.bash', args: [ hapass ], run: 'never' end end # create the cluster. Use "vagrant up --provision-with=cluster-setup" pg_nodes.each do |node| config.vm.define node do |conf| conf.vm.provision 'cluster-setup', type: 'shell', path: 'provision/cluster.bash', args: [ pgver, ssh_login, vm_prefix, host_ip, pgdata, hapass ] + pg_nodes, run: 'never' end end # cluster test suite setup. Use "vagrant up --provision-with=cts" config.vm.provision 'cts', type: 'shell', path: 'provision/cts.bash', run: 'never' end PAF-2.3.0/extra/vagrant/3nodes-haproxy/provision/000077500000000000000000000000001363154243400216275ustar00rootroot00000000000000PAF-2.3.0/extra/vagrant/3nodes-haproxy/provision/cluster.bash000077500000000000000000000056511363154243400241610ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail PGVER="$1" SSH_LOGIN="$2" VM_PREFIX="$3" HOST_IP="$4" PGDATA="$5" HAPASS="$6" shift 6 NODES=( "$@" ) CUSTOMDIR="${PGDATA}/conf.d" # psd authent PCMK_VER=$(yum info --quiet pacemaker|grep ^Version) PCMK_VER="${PCMK_VER#*: }" # extract x.y.z PCMK_VER="${PCMK_VER:0:1}" # extract x if [ "$PCMK_VER" -ge 2 ]; then # if pacemaker version is 2.x, we suppose pcs support it (pcs >= 0.10) # from pcs 0.10, pcs host auth must be exec'ed on each node pcs host auth -u hacluster -p "${HAPASS}" "${NODES[@]}" else # this could be run on one node, but it doesn't hurt if it runs everywhere, # so we keep this piece of code with the one dedicated to pacemaker 2.x pcs cluster auth -u hacluster -p "${HAPASS}" "${NODES[@]}" fi # Stop PostgreSQL everywhere systemctl --quiet stop "postgresql-${PGVER}" if [ "$(hostname -s)" != "${NODES[0]}" ]; then exit 0 fi # WARNING: # Starting from here, everything is executed on first node only! if [ "$PCMK_VER" -ge 2 ]; then pcs cluster setup cluster_pgsql --force "${NODES[@]}" else pcs cluster setup --name cluster_pgsql --wait --force "${NODES[@]}" fi # pcs stonith sbd enable pcs cluster start --all --wait pcs cluster cib cluster1.xml pcs -f cluster1.xml resource defaults migration-threshold=5 pcs -f cluster1.xml resource defaults resource-stickiness=10 #pcs -f cluster1.xml property set stonith-watchdog-timeout=10s for VM in "${NODES[@]}"; do FENCE_ID="fence_vm_${VM}" VM_PORT="${VM_PREFIX}_${VM}" pcs -f cluster1.xml stonith create "${FENCE_ID}" fence_virsh \ pcmk_host_check=static-list "pcmk_host_list=${VM}" \ "port=${VM_PORT}" "ipaddr=${HOST_IP}" "login=${SSH_LOGIN}" \ "identity_file=/root/.ssh/id_rsa" pcs -f cluster1.xml constraint location "fence_vm_${VM}" \ avoids "${VM}=INFINITY" done PGSQLD_RSC_OPTS=( "ocf:heartbeat:pgsqlms" "pgport=5434" "bindir=/usr/pgsql-${PGVER}/bin" "pgdata=${PGDATA}" "recovery_template=${CUSTOMDIR}/recovery.conf.pcmk" "op" "start" "timeout=60s" "op" "stop" "timeout=60s" "op" "promote" "timeout=30s" "op" "demote" "timeout=120s" "op" "monitor" "interval=15s" "timeout=10s" "role=Master" "op" "monitor" "interval=16s" "timeout=10s" "role=Slave" "op" "notify" "timeout=60s" ) # NB: pcs 0.10.2 doesn't support to set the id of the clone XML node # the id is built from the rsc id to clone using "-clone" # As a matter of cohesion and code simplicity, we use the same # convention to create the master resource with pcs 0.9.x for # Pacemaker 1.1 if [ "$PCMK_VER" -ge 2 ]; then PGSQLD_RSC_OPTS+=( "promotable" "notify=true" ) fi pcs -f cluster1.xml resource create pgsqld "${PGSQLD_RSC_OPTS[@]}" if [ "$PCMK_VER" -eq 1 ]; then pcs -f cluster1.xml resource master pgsqld-clone pgsqld notify=true fi pcs cluster cib-push scope=configuration cluster1.xml --wait crm_mon -Dn1 PAF-2.3.0/extra/vagrant/3nodes-haproxy/provision/cts.bash000077500000000000000000000060161363154243400232650ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail # install packages PACKAGES=( pacemaker-cts patch ) yum install --nogpgcheck --quiet -y -e 0 "${PACKAGES[@]}" # do not drop any log messages from rsyslog cat <<'EOF'>/etc/rsyslog.d/rateLimit.conf $imjournalRatelimitInterval 0 $imjournalRatelimitBurst 0 EOF systemctl --quiet restart rsyslog # make journald logs persistent mkdir -p /var/log/journal # do not drop any log messages from journald mkdir -p /etc/systemd/journald.conf.d cat <<'EOF'>/etc/systemd/journald.conf.d/rateLimit.conf RateLimitInterval=0 RateLimitBurst=0 EOF systemctl --quiet restart systemd-journald # shellcheck disable=SC1091 source "/etc/os-release" OS_VER="$VERSION_ID" if [ "${OS_VER:0:2}" != "7." ]; then exit; fi # fix bug in the log watcher for EL7 cat <<'EOF' | patch /usr/lib64/python2.7/site-packages/cts/watcher.py *** /tmp/watcher.py.orig 2019-02-07 16:25:32.836265277 +0100 --- /tmp/watcher.py 2019-02-07 16:27:03.296926885 +0100 *************** *** 124,130 **** self.offset = "EOF" if host == None: ! host = "localhost" def __str__(self): if self.host: --- 124,130 ---- self.offset = "EOF" if host == None: ! self.host = "localhost" def __str__(self): if self.host: *************** *** 155,179 **** class FileObj(SearchObj): def __init__(self, filename, host=None, name=None): global has_log_watcher ! SearchObj.__init__(self, filename, host, name) ! ! if host is not None: ! if not host in has_log_watcher: ! global log_watcher ! global log_watcher_bin ! self.debug("Installing %s on %s" % (log_watcher_file, host)) ! os.system("cat << END >> %s\n%s\nEND" %(log_watcher_file, log_watcher)) ! os.system("chmod 755 %s" %(log_watcher_file)) ! self.rsh.cp(log_watcher_file, "root@%s:%s" % (host, log_watcher_bin)) ! has_log_watcher[host] = 1 ! os.system("rm -f %s" %(log_watcher_file)) ! self.harvest() def async_complete(self, pid, returncode, outLines, errLines): for line in outLines: --- 155,176 ---- class FileObj(SearchObj): def __init__(self, filename, host=None, name=None): global has_log_watcher ! global log_watcher ! global log_watcher_bin ! SearchObj.__init__(self, filename, host, name) ! self.debug("Installing %s on %s" % (log_watcher_file, self.host)) ! os.system("cat << END >> %s\n%s\nEND" %(log_watcher_file, log_watcher)) ! os.system("chmod 755 %s" %(log_watcher_file)) ! self.rsh.cp(log_watcher_file, "root@%s:%s" % (self.host, log_watcher_bin)) ! has_log_watcher[self.host] = 1 ! os.system("rm -f %s" %(log_watcher_file)) ! self.harvest() def async_complete(self, pid, returncode, outLines, errLines): for line in outLines: EOF PAF-2.3.0/extra/vagrant/3nodes-haproxy/provision/haproxy.bash000066400000000000000000000041171363154243400241630ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail YUM_INSTALL="yum install --nogpgcheck --quiet -y -e 0" $YUM_INSTALL haproxy systemctl --quiet --now disable haproxy cp /etc/haproxy/haproxy.cfg /etc/haproxy/haproxy.cfg-dist cat <<'EOF' > /etc/haproxy/haproxy.cfg global log 127.0.0.1:514 local2 chroot /var/lib/haproxy pidfile /var/run/haproxy.pid maxconn 4000 user haproxy group haproxy daemon stats socket /var/lib/haproxy/stats defaults mode tcp log global option tcplog retries 3 timeout connect 10s timeout client 10m timeout server 10m timeout check 1s maxconn 300 listen stats mode http bind *:7000 stats enable stats uri / timeout connect 15s timeout client 15s timeout server 15s listen prd bind *:5432 option tcp-check tcp-check connect port 5431 tcp-check expect string production default-server inter 2s fastinter 1s rise 2 fall 1 on-marked-down shutdown-sessions server srv1 srv1:5434 check server srv2 srv2:5434 check server srv3 srv3:5434 check listen stb bind *:5433 balance leastconn option tcp-check tcp-check connect port 5431 tcp-check expect string standby default-server inter 2s fastinter 1s rise 2 fall 1 on-marked-down shutdown-sessions server srv1 srv1:5434 check server srv2 srv2:5434 check server srv3 srv3:5434 check EOF setsebool -P haproxy_connect_any=1 systemctl --quiet --now enable haproxy if ! firewall-cmd --get-services|grep -q haproxy-stats; then firewall-cmd --quiet --permanent --new-service="haproxy-stats" firewall-cmd --quiet --permanent --service="haproxy-stats" --set-description="HAProxy statistics" firewall-cmd --quiet --permanent --service="haproxy-stats" --add-port="7000/tcp" fi firewall-cmd --quiet --permanent --add-service="haproxy-stats" firewall-cmd --quiet --reload PAF-2.3.0/extra/vagrant/3nodes-haproxy/provision/id_rsa000066400000000000000000000032131363154243400230120ustar00rootroot00000000000000-----BEGIN RSA PRIVATE KEY----- MIIEogIBAAKCAQEA6NF8iallvQVp22WDkTkyrtvp9eWW6A8YVr+kz4TjGYe7gHzI w+niNltGEFHzD8+v1I2YJ6oXevct1YeS0o9HZyN1Q9qgCgzUFtdOKLv6IedplqoP kcmF0aYet2PkEDo3MlTBckFXPITAMzF8dJSIFo9D8HfdOV0IAdx4O7PtixWKn5y2 hMNG0zQPyUecp4pzC6kivAIhyfHilFR61RGL+GPXQ2MWZWFYbAGjyiYJnAmCP3NO Td0jMZEnDkbUvxhMmBYSdETk1rRgm+R4LOzFUGaHqHDLKLX+FIPKcF96hrucXzcW yLbIbEgE98OHlnVYCzRdK8jlqm8tehUc9c9WhQIBIwKCAQEA4iqWPJXtzZA68mKd ELs4jJsdyky+ewdZeNds5tjcnHU5zUYE25K+ffJED9qUWICcLZDc81TGWjHyAqD1 Bw7XpgUwFgeUJwUlzQurAv+/ySnxiwuaGJfhFM1CaQHzfXphgVml+fZUvnJUTvzf TK2Lg6EdbUE9TarUlBf/xPfuEhMSlIE5keb/Zz3/LUlRg8yDqz5w+QWVJ4utnKnK iqwZN0mwpwU7YSyJhlT4YV1F3n4YjLswM5wJs2oqm0jssQu/BT0tyEXNDYBLEF4A sClaWuSJ2kjq7KhrrYXzagqhnSei9ODYFShJu8UWVec3Ihb5ZXlzO6vdNQ1J9Xsf 4m+2ywKBgQD6qFxx/Rv9CNN96l/4rb14HKirC2o/orApiHmHDsURs5rUKDx0f9iP cXN7S1uePXuJRK/5hsubaOCx3Owd2u9gD6Oq0CsMkE4CUSiJcYrMANtx54cGH7Rk EjFZxK8xAv1ldELEyxrFqkbE4BKd8QOt414qjvTGyAK+OLD3M2QdCQKBgQDtx8pN CAxR7yhHbIWT1AH66+XWN8bXq7l3RO/ukeaci98JfkbkxURZhtxV/HHuvUhnPLdX 3TwygPBYZFNo4pzVEhzWoTtnEtrFueKxyc3+LjZpuo+mBlQ6ORtfgkr9gBVphXZG YEzkCD3lVdl8L4cw9BVpKrJCs1c5taGjDgdInQKBgHm/fVvv96bJxc9x1tffXAcj 3OVdUN0UgXNCSaf/3A/phbeBQe9xS+3mpc4r6qvx+iy69mNBeNZ0xOitIjpjBo2+ dBEjSBwLk5q5tJqHmy/jKMJL4n9ROlx93XS+njxgibTvU6Fp9w+NOFD/HvxB3Tcz 6+jJF85D5BNAG3DBMKBjAoGBAOAxZvgsKN+JuENXsST7F89Tck2iTcQIT8g5rwWC P9Vt74yboe2kDT531w8+egz7nAmRBKNM751U/95P9t88EDacDI/Z2OwnuFQHCPDF llYOUI+SpLJ6/vURRbHSnnn8a/XG+nzedGH5JGqEJNQsz+xT2axM0/W/CRknmGaJ kda/AoGANWrLCz708y7VYgAtW2Uf1DPOIYMdvo6fxIB5i9ZfISgcJ/bbCUkFrhoH +vq/5CIWxCPp0f85R4qxxQ5ihxJ0YDQT9Jpx4TMss4PSavPaBH3RXow5Ohe+bYoQ NE5OgEXk2wVfZczCZpigBKbKZHNYcelXtTt/nP3rsCuGcM4h53s= -----END RSA PRIVATE KEY----- PAF-2.3.0/extra/vagrant/3nodes-haproxy/provision/id_rsa.pub000066400000000000000000000006311363154243400236000ustar00rootroot00000000000000ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA6NF8iallvQVp22WDkTkyrtvp9eWW6A8YVr+kz4TjGYe7gHzIw+niNltGEFHzD8+v1I2YJ6oXevct1YeS0o9HZyN1Q9qgCgzUFtdOKLv6IedplqoPkcmF0aYet2PkEDo3MlTBckFXPITAMzF8dJSIFo9D8HfdOV0IAdx4O7PtixWKn5y2hMNG0zQPyUecp4pzC6kivAIhyfHilFR61RGL+GPXQ2MWZWFYbAGjyiYJnAmCP3NOTd0jMZEnDkbUvxhMmBYSdETk1rRgm+R4LOzFUGaHqHDLKLX+FIPKcF96hrucXzcWyLbIbEgE98OHlnVYCzRdK8jlqm8tehUc9c9WhQ== vagrant insecure public key PAF-2.3.0/extra/vagrant/3nodes-haproxy/provision/pacemaker.bash000077500000000000000000000020521363154243400244200ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail HAPASS="$1" # shellcheck disable=SC1091 source "/etc/os-release" OS_ID="$ID" YUM_INSTALL="yum install --nogpgcheck --quiet -y -e 0" # install required packages if [ "$OS_ID" = "rhel" ]; then # use yum instead of dnf for compatibility between EL 7 and 8 yum-config-manager --enable "*highavailability-rpms" fi PACKAGES=( pacemaker pcs resource-agents fence-agents-virsh sbd perl-Module-Build ) $YUM_INSTALL "${PACKAGES[@]}" # install PAF cd /vagrant [ -f Build ] && perl Build distclean sudo -u vagrant perl Build.PL --quiet >/dev/null 2>&1 sudo -u vagrant perl Build --quiet perl Build --quiet install # firewall setup firewall-cmd --quiet --permanent --add-service=high-availability firewall-cmd --quiet --reload # pcsd setup systemctl --quiet --now enable pcsd echo "${HAPASS}"|passwd --stdin hacluster > /dev/null 2>&1 # Pacemaker setup cp /etc/sysconfig/pacemaker /etc/sysconfig/pacemaker.dist cat<<'EOF' > /etc/sysconfig/pacemaker PCMK_debug=yes PCMK_logpriority=debug EOF PAF-2.3.0/extra/vagrant/3nodes-haproxy/provision/pgsql-replicas.bash000077500000000000000000000032051363154243400254170ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail PGVER="$1" NODENAME="$2" PGDATA="$3" CUSTOMDIR="${PGDATA}/conf.d" # cleanup systemctl --quiet --now disable "postgresql-${PGVER}" rm -rf "${PGDATA}" # build standby "/usr/pgsql-${PGVER}/bin/pg_basebackup" -h 127.0.0.1 -U postgres -D "${PGDATA}" -X stream # set pg_hba cat< "${PGDATA}/pg_hba.conf" local all all trust host all all 0.0.0.0/0 trust local replication all reject host replication all $NODENAME reject host replication all 127.0.0.1/32 reject host replication all ::1/128 reject # allow any standby connection host replication all 0.0.0.0/0 trust EOC cat < "${CUSTOMDIR}/cluster_name.conf" cluster_name = 'pgsql-$NODENAME' EOC if [ "${PGVER%%.*}" -lt 12 ]; then # recovery.conf setup cat<<-EOC > "${CUSTOMDIR}/recovery.conf.pcmk" standby_mode = on primary_conninfo = 'host=127.0.0.1 application_name=${NODENAME}' recovery_target_timeline = 'latest' EOC cp "${CUSTOMDIR}/recovery.conf.pcmk" "${PGDATA}/recovery.conf" else cat <<-EOC > "${CUSTOMDIR}/repli.conf" primary_conninfo = 'host=127.0.0.1 application_name=${NODENAME}' EOC # standby_mode disappear in v12 # no need to add recovery_target_timeline as its default is 'latest' since v12 touch "${PGDATA}/standby.signal" fi # backing up files cp "${PGDATA}/pg_hba.conf" "${PGDATA}/.." cp "${PGDATA}/postgresql.conf" "${PGDATA}/.." cp "${CUSTOMDIR}"/* "${PGDATA}/.." chown -R "postgres:postgres" "${PGDATA}/.." # start systemctl --quiet start "postgresql-${PGVER}" PAF-2.3.0/extra/vagrant/3nodes-haproxy/provision/postgresql.bash000077500000000000000000000100501363154243400246700ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail PGVER="$1" NODENAME="$2" PGDATA="$3" # shellcheck disable=SC1091 source "/etc/os-release" OS_VER="$VERSION_ID" YUM_INSTALL="yum install --nogpgcheck --quiet -y -e 0" if ! rpm --quiet -q "pgdg-redhat-repo"; then if [ "${OS_VER:0:2}" = "8." ]; then $YUM_INSTALL "https://download.postgresql.org/pub/repos/yum/reporpms/EL-8-x86_64/pgdg-redhat-repo-latest.noarch.rpm" else $YUM_INSTALL "https://download.postgresql.org/pub/repos/yum/reporpms/EL-7-x86_64/pgdg-redhat-repo-latest.noarch.rpm" fi fi # disable postgresql upstream module conflicting with pgdg packages in RHEL8 if [ "${OS_VER:0:2}" = "8." ]; then yum -qy module disable postgresql fi PACKAGES=( "postgresql${PGVER}" "postgresql${PGVER}-server" "postgresql${PGVER}-contrib" ) $YUM_INSTALL "${PACKAGES[@]}" # PostgreSQL state cat<<'EOF' > /etc/systemd/system/pgsql-state@.service [Unit] Description=Local PostgreSQL state [Service] User=postgres Group=postgres ExecStart=/usr/pgsql-12/bin/psql -d postgres -U postgres -p 5434 -Atc "select CASE pg_is_in_recovery() WHEN true THEN 'standby' ELSE 'production' END" StandardOutput=socket EOF cat<<'EOF' > /etc/systemd/system/pgsql-state.socket [Unit] Description=Local PostgreSQL state [Socket] ListenStream=5431 Accept=yes [Install] WantedBy=sockets.target EOF systemctl --quiet --now enable pgsql-state.socket # firewall setup firewall-cmd --quiet --permanent --service=postgresql --add-port="5433/tcp" firewall-cmd --quiet --permanent --service=postgresql --add-port="5434/tcp" firewall-cmd --quiet --permanent --remove-service=postgresql firewall-cmd --quiet --permanent --add-service=postgresql if ! firewall-cmd --get-services|grep -q pgsql-state; then firewall-cmd --quiet --permanent --new-service="pgsql-state" firewall-cmd --quiet --permanent --service="pgsql-state" --set-description="Local PostgreSQL state" firewall-cmd --quiet --permanent --service="pgsql-state" --add-port="5431/tcp" fi firewall-cmd --quiet --permanent --add-service="pgsql-state" firewall-cmd --quiet --reload if [ "$(hostname -s)" != "$NODENAME" ]; then exit 0 fi # Build the primary CUSTOMDIR="${PGDATA}/conf.d" # cleanup systemctl --quiet --now disable "postgresql-${PGVER}" rm -rf "${PGDATA}" # init instance "/usr/pgsql-${PGVER}/bin/postgresql-${PGVER}-setup" initdb # pg_hba setup cat< "${PGDATA}/pg_hba.conf" local all all trust host all all 0.0.0.0/0 trust local replication all reject host replication all $NODENAME reject host replication all 127.0.0.1/32 reject host replication all ::1/128 reject # allow any standby connection host replication postgres 0.0.0.0/0 trust EOC # postgresql.conf setup mkdir -p "$CUSTOMDIR" echo "include_dir = 'conf.d'" >> "${PGDATA}/postgresql.conf" cat < "${CUSTOMDIR}/cluster_name.conf" cluster_name = 'pgsql-$NODENAME' EOC cat <<'EOC' > "${CUSTOMDIR}/custom.conf" listen_addresses = '*' port = 5434 wal_level = replica max_wal_senders = 10 hot_standby = on hot_standby_feedback = on wal_keep_segments = 256 log_destination = 'syslog,stderr' log_checkpoints = on log_min_duration_statement = 0 log_autovacuum_min_duration = 0 log_replication_commands = on log_line_prefix = '%m [%p] host=%h ' EOC if [ "${PGVER%%.*}" -lt 12 ]; then # recovery.conf setup cat<<-EOC > "${CUSTOMDIR}/recovery.conf.pcmk" standby_mode = on primary_conninfo = 'host=127.0.0.1 application_name=${NODENAME}' recovery_target_timeline = 'latest' EOC else cat <<-EOC > "${CUSTOMDIR}/repli.conf" primary_conninfo = 'host=127.0.0.1 application_name=${NODENAME}' EOC # standby_mode disappear in v12 # no need to add recovery_target_timeline as its default is 'latest' since v12 fi # backing up files cp "${PGDATA}/pg_hba.conf" "${PGDATA}/.." cp "${PGDATA}/postgresql.conf" "${PGDATA}/.." cp "${CUSTOMDIR}"/* "${PGDATA}/.." chown -R postgres:postgres "$PGDATA" # restart master pgsql systemctl --quiet start "postgresql-${PGVER}" PAF-2.3.0/extra/vagrant/3nodes-haproxy/provision/rsyslog.bash000077500000000000000000000025231363154243400241750ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail LOG_SINK="$1" if [ "$(hostname -s)" == "$LOG_SINK" ]; then # setup log sink cat <<-'EOF' > /etc/rsyslog.d/log_sink.conf $ModLoad imtcp $InputTCPServerRun 514 $template RemoteLogsMerged,"/var/log/%HOSTNAME%/messages.log" *.* ?RemoteLogsMerged $template RemoteLogs,"/var/log/%HOSTNAME%/%PROGRAMNAME%.log" *.* ?RemoteLogs #& ~ EOF if ! firewall-cmd --get-services|grep -q rsyslog-tcp; then firewall-cmd --quiet --permanent --new-service="rsyslog-tcp" firewall-cmd --quiet --permanent --service="rsyslog-tcp" --set-description="RSyslog TCP port" firewall-cmd --quiet --permanent --service="rsyslog-tcp" --add-port="514/tcp" fi firewall-cmd --quiet --permanent --add-service="rsyslog-tcp" firewall-cmd --quiet --reload semanage port -m -t syslogd_port_t -p tcp 514 else # send logs to log-sinks cat <<-'EOF' >/etc/rsyslog.d/20-fwd_log_sink.conf *.* action(type="omfwd" queue.type="LinkedList" queue.filename="log_sink_fwd" action.resumeRetryCount="-1" queue.saveonshutdown="on" target="log-sink" Port="514" Protocol="tcp") EOF # listen for haproxy logs locally cat <<-'EOF' >/etc/rsyslog.d/10-haproxy.conf $ModLoad imudp $UDPServerAddress 127.0.0.1 $UDPServerRun 514 EOF fi systemctl --quiet restart rsyslog PAF-2.3.0/extra/vagrant/3nodes-haproxy/provision/system.bash000077500000000000000000000031131363154243400240130ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail NODENAME="$1" RHEL_USER="$2" RHEL_PASS="$3" shift 3 NODES=( "$@" ) hostnamectl set-hostname "${NODENAME}" for N in "${NODES[@]}"; do NG=$(sed -n "/${N%=*}\$/p" /etc/hosts|wc -l) if [ "$NG" -eq 0 ]; then echo "${N##*=} ${N%=*}" >> /etc/hosts fi done # shellcheck disable=SC1091 source "/etc/os-release" OS_ID="$ID" YUM_INSTALL="yum install --nogpgcheck --quiet -y -e 0" PACKAGES=( vim bash-completion yum-utils policycoreutils policycoreutils-python ) if [ "$OS_ID" = "rhel" ]; then subscription-manager register --force --username "${RHEL_USER:?}" --password "${RHEL_PASS:?}" --auto-attach PACKAGES+=("tmux") else PACKAGES+=("screen") fi $YUM_INSTALL "${PACKAGES[@]}" cat <<'EOF' > "/home/vagrant/.ssh/config" Host * CheckHostIP no StrictHostKeyChecking no EOF cp "/vagrant/extra/vagrant/3nodes-haproxy/provision/id_rsa" "/home/vagrant/.ssh" cp "/vagrant/extra/vagrant/3nodes-haproxy/provision/id_rsa.pub" "/home/vagrant/.ssh" chown -R "vagrant:" "/home/vagrant/.ssh" chmod 0700 "/home/vagrant/.ssh" chmod 0600 "/home/vagrant/.ssh/id_rsa" chmod 0644 "/home/vagrant/.ssh/id_rsa.pub" chmod 0600 "/home/vagrant/.ssh/config" chmod 0600 "/home/vagrant/.ssh/authorized_keys" cp -R "/home/vagrant/.ssh" "/root" # force proper permissions on .ssh files chown -R "root:" "/root/.ssh" chmod 0700 "/root/.ssh" chmod 0600 "/root/.ssh/id_rsa" chmod 0644 "/root/.ssh/id_rsa.pub" chmod 0600 "/root/.ssh/config" chmod 0600 "/root/.ssh/authorized_keys" # enable firewall systemctl --quiet --now enable firewalld PAF-2.3.0/extra/vagrant/3nodes-haproxy/vagrant.yml-dist000066400000000000000000000012431363154243400227250ustar00rootroot00000000000000# boxname: "centos/7" # vagrant box to use # pgver: "10" # pg version to use # hapass: "hapass" # password for sys user hacluster # ssh_login: "user" # ssh login to connect to the host when fencing a VM. # # put "./provision/id_rsa.pub" in your "~/.ssh/authorized_keys" # base_ip: "10.20.30.5" # Base IP address to compute other ones # pg_nodes: # servers to create. # - "srv1" # First one will be master # - "srv2" # - "srv3" # log_node: "log-sink" # vm_prefix: "paf_vm" # rhel_user: "" # RHEL user account # rhel_pass: "" # RHEL user account password PAF-2.3.0/extra/vagrant/3nodes-vip/000077500000000000000000000000001363154243400167035ustar00rootroot00000000000000PAF-2.3.0/extra/vagrant/3nodes-vip/Makefile000066400000000000000000000016431363154243400203470ustar00rootroot00000000000000export VAGRANT_BOX_UPDATE_CHECK_DISABLE=1 export VAGRANT_CHECKPOINT_DISABLE=1 .PHONY: all create_vm pgsql-primary pgsql-replicas pacemaker cts prov clean validate all: create_vm pgsql-replicas pacemaker create_vm: vagrant up pgsql-replicas: pcmk-stop pgsql-primary vagrant up --provision-with=pgsql-replicas pacemaker: vagrant up --provision-with=pacemaker pgsql-primary: pcmk-stop vagrant up --provision-with=pgsql-primary prov: vagrant up --provision clean: vagrant destroy -f check: validate validate: @vagrant validate @if which shellcheck >/dev/null ;\ then shellcheck provision/*bash ;\ else echo "WARNING: shellcheck is not in PATH, not checking bash syntax" ;\ fi cts: vagrant up --provision-with=cts pcmk-stop: vagrant ssh -c 'if [ -f "/etc/corosync/corosync.conf" ]; then sudo pcs cluster stop --all --wait; fi' PAF-2.3.0/extra/vagrant/3nodes-vip/Vagrantfile000066400000000000000000000110321363154243400210650ustar00rootroot00000000000000require 'ipaddr' require 'yaml' #ENV['VAGRANT_NO_PARALLEL'] = 'yes' # uncomment to forbid parallel execution ENV["LANG"] = "C" ENV["LC_ALL"] = "C" boxname = 'centos/7' # vagrant box to use pgver = '11' # pg version to use hapass = 'hapass' # password for sys user hacluster ssh_login = 'root' # ssh login to connect to the host when fencing a VM. # put "./provision/id_rsa.pub" in your "~/.ssh/authorized_keys" master_ip = '10.20.30.5' # vIP assigned to master pg_nodes = 'srv1', 'srv2', 'srv3' # first will be primary log_node = 'log-sink' # name of the node receiving logs vm_prefix = 'paf_vm' # VM prefix in libvrit rhel_user = '' # RHEL user account rhel_pass = '' # RHEL user account password if File.file?('vagrant.yml') and ( custom = YAML.load_file('vagrant.yml') ) boxname = custom['boxname'] if custom.has_key?('boxname') pgver = custom['pgver'] if custom.has_key?('pgver') hapass = custom['hapass'] if custom.has_key?('hapass') ssh_login = custom['ssh_login'] if custom.has_key?('ssh_login') master_ip = custom['master_ip'] if custom.has_key?('master_ip') pg_nodes = custom['pg_nodes'] if custom.has_key?('pg_nodes') log_node = custom['log_node'] if custom.has_key?('log_node') vm_prefix = custom['vm_prefix'] if custom.has_key?('vm_prefix') rhel_user = custom['rhel_user'] if custom.has_key?('rhel_user') rhel_pass = custom['rhel_pass'] if custom.has_key?('rhel_pass') end Vagrant.configure(2) do |config| pgdata = "/var/lib/pgsql/#{pgver}/data" next_ip = IPAddr.new(master_ip).succ host_ip = (IPAddr.new(master_ip) & "255.255.255.0").succ.to_s nodes_ips = {} ( pg_nodes + [ log_node ] ).each do |node| nodes_ips[node] = next_ip.to_s next_ip = next_ip.succ end # don't mind about insecure ssh key config.ssh.insert_key = false # https://vagrantcloud.com/search. config.vm.box = boxname # hardware and host settings config.vm.provider 'libvirt' do |lv| lv.cpus = 1 lv.memory = 512 lv.watchdog model: 'i6300esb' lv.default_prefix = vm_prefix lv.qemu_use_session = false end # disable default share config.vm.synced_folder ".", "/vagrant", disabled: true config.vm.synced_folder "../../..", "/vagrant", type: "rsync", rsync__exclude: [ ".git/" ] # system setup for all nodes (pg_nodes + [log_node]).each do |node| config.vm.define node do |conf| conf.vm.network 'private_network', ip: nodes_ips[node] conf.vm.provision 'system-setup', type: 'shell', path: 'provision/system.bash', args: [ node, rhel_user, rhel_pass ] + nodes_ips.keys.map {|n| "#{n}=#{nodes_ips[n]}"}, preserve_order: true end end # setup rsyslog to collect logs from other node on log-sink config.vm.define log_node do |conf| conf.vm.provision 'rsyslog-setup', type: 'shell', path: 'provision/log_sink.bash' end # common postgresql+pacemaker installation and setup pg_nodes.each do |node| config.vm.define node do |conf| conf.vm.provision 'cluster-common', type: 'shell', path: 'provision/cluster-common.bash', args: [ pgver, hapass, master_ip ], preserve_order: true end end # build primary pgsql instance config.vm.define pg_nodes.first, primary:true do |conf| conf.vm.provision 'pgsql-primary', type: 'shell', path: 'provision/pgsql-primary.bash', args: [ pgver, pgdata, master_ip, pg_nodes.first ], run: 'never' end # replicas setup. Use "vagrant up --provision-with=pgsql-replicas" pg_nodes[1..-1].each do |node| config.vm.define node do |conf| conf.vm.provision 'pgsql-replicas', type: 'shell', path: 'provision/pgsql-replicas.bash', args: [ pgver, pgdata, master_ip, node ], run: 'never' end end # cluster setup. Use "vagrant up --provision-with=pacemaker" pg_nodes.each do |node| config.vm.define node do |conf| conf.vm.provision 'pacemaker', type: 'shell', path:'provision/pacemaker.bash', args: [ pgver, hapass, master_ip, ssh_login, vm_prefix, host_ip, pgdata ] + pg_nodes, run: 'never' end end # cluster test suite setup. Use "vagrant up --provision-with=cts" config.vm.provision 'cts', type: 'shell', path: 'provision/cts.bash', run: 'never' end PAF-2.3.0/extra/vagrant/3nodes-vip/provision/000077500000000000000000000000001363154243400207335ustar00rootroot00000000000000PAF-2.3.0/extra/vagrant/3nodes-vip/provision/cluster-common.bash000077500000000000000000000047741363154243400245600ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail PGVER="$1" HAPASS="$2" MASTER_IP="$3" # shellcheck disable=SC1091 source "/etc/os-release" OS_ID="$ID" OS_VER="$VERSION_ID" YUM_INSTALL="yum install --nogpgcheck --quiet -y -e 0" # install required packages if [ "$OS_ID" = "rhel" ]; then # use yum instead of dnf for compatibility between EL 7 and 8 yum-config-manager --enable "*highavailability-rpms" fi if ! rpm --quiet -q "pgdg-redhat-repo"; then if [ "${OS_VER:0:2}" = "8." ]; then $YUM_INSTALL "https://download.postgresql.org/pub/repos/yum/reporpms/EL-8-x86_64/pgdg-redhat-repo-latest.noarch.rpm" else $YUM_INSTALL "https://download.postgresql.org/pub/repos/yum/reporpms/EL-7-x86_64/pgdg-redhat-repo-latest.noarch.rpm" fi fi # disable postgresql upstream module conflicting with pgdg packages in RHEL8 if [ "${OS_VER:0:2}" = "8." ]; then yum -qy module disable postgresql fi PACKAGES=( pacemaker pcs resource-agents fence-agents-virsh sbd perl-Module-Build "postgresql${PGVER}" "postgresql${PGVER}-server" "postgresql${PGVER}-contrib" ) $YUM_INSTALL "${PACKAGES[@]}" # firewall setup systemctl --quiet --now enable firewalld firewall-cmd --quiet --permanent --add-service=high-availability firewall-cmd --quiet --permanent --add-service=postgresql firewall-cmd --quiet --reload # cluster stuffs systemctl --quiet --now enable pcsd echo "${HAPASS}"|passwd --stdin hacluster > /dev/null 2>&1 cp /etc/sysconfig/pacemaker /etc/sysconfig/pacemaker.dist cat<<'EOF' > /etc/sysconfig/pacemaker PCMK_debug=yes PCMK_logpriority=debug EOF # cleanup master ip everywhere HAS_MASTER_IP=$(ip -o addr show to "${MASTER_IP}"|wc -l) if [ "$HAS_MASTER_IP" -gt 0 ]; then DEV=$(ip route show to "${MASTER_IP}/24"|grep -Eom1 'dev \w+') ip addr del "${MASTER_IP}/24" dev "${DEV/dev }" fi # send logs to log-sinks cat <<'EOF' >/etc/rsyslog.d/fwd_log_sink.conf *.* action(type="omfwd" queue.type="LinkedList" queue.filename="log_sink_fwd" action.resumeRetryCount="-1" queue.saveonshutdown="on" target="log-sink" Port="514" Protocol="tcp") EOF systemctl --quiet restart rsyslog # cleanup pre-existing IP address ip -o addr show to "${MASTER_IP}" | if grep -q "${MASTER_IP}" then DEV=$(ip route show to "${MASTER_IP}/24"|grep -Eo 'dev \w+') ip addr del "${MASTER_IP}/24" dev "${DEV/dev }" fi # install PAF cd /vagrant [ -f Build ] && perl Build distclean sudo -u vagrant perl Build.PL --quiet >/dev/null 2>&1 sudo -u vagrant perl Build --quiet perl Build --quiet install PAF-2.3.0/extra/vagrant/3nodes-vip/provision/cts.bash000077500000000000000000000060161363154243400223710ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail # install packages PACKAGES=( pacemaker-cts patch ) yum install --nogpgcheck --quiet -y -e 0 "${PACKAGES[@]}" # do not drop any log messages from rsyslog cat <<'EOF'>/etc/rsyslog.d/rateLimit.conf $imjournalRatelimitInterval 0 $imjournalRatelimitBurst 0 EOF systemctl --quiet restart rsyslog # make journald logs persistent mkdir -p /var/log/journal # do not drop any log messages from journald mkdir -p /etc/systemd/journald.conf.d cat <<'EOF'>/etc/systemd/journald.conf.d/rateLimit.conf RateLimitInterval=0 RateLimitBurst=0 EOF systemctl --quiet restart systemd-journald # shellcheck disable=SC1091 source "/etc/os-release" OS_VER="$VERSION_ID" if [ "${OS_VER:0:2}" != "7." ]; then exit; fi # fix bug in the log watcher for EL7 cat <<'EOF' | patch /usr/lib64/python2.7/site-packages/cts/watcher.py *** /tmp/watcher.py.orig 2019-02-07 16:25:32.836265277 +0100 --- /tmp/watcher.py 2019-02-07 16:27:03.296926885 +0100 *************** *** 124,130 **** self.offset = "EOF" if host == None: ! host = "localhost" def __str__(self): if self.host: --- 124,130 ---- self.offset = "EOF" if host == None: ! self.host = "localhost" def __str__(self): if self.host: *************** *** 155,179 **** class FileObj(SearchObj): def __init__(self, filename, host=None, name=None): global has_log_watcher ! SearchObj.__init__(self, filename, host, name) ! ! if host is not None: ! if not host in has_log_watcher: ! global log_watcher ! global log_watcher_bin ! self.debug("Installing %s on %s" % (log_watcher_file, host)) ! os.system("cat << END >> %s\n%s\nEND" %(log_watcher_file, log_watcher)) ! os.system("chmod 755 %s" %(log_watcher_file)) ! self.rsh.cp(log_watcher_file, "root@%s:%s" % (host, log_watcher_bin)) ! has_log_watcher[host] = 1 ! os.system("rm -f %s" %(log_watcher_file)) ! self.harvest() def async_complete(self, pid, returncode, outLines, errLines): for line in outLines: --- 155,176 ---- class FileObj(SearchObj): def __init__(self, filename, host=None, name=None): global has_log_watcher ! global log_watcher ! global log_watcher_bin ! SearchObj.__init__(self, filename, host, name) ! self.debug("Installing %s on %s" % (log_watcher_file, self.host)) ! os.system("cat << END >> %s\n%s\nEND" %(log_watcher_file, log_watcher)) ! os.system("chmod 755 %s" %(log_watcher_file)) ! self.rsh.cp(log_watcher_file, "root@%s:%s" % (self.host, log_watcher_bin)) ! has_log_watcher[self.host] = 1 ! os.system("rm -f %s" %(log_watcher_file)) ! self.harvest() def async_complete(self, pid, returncode, outLines, errLines): for line in outLines: EOF PAF-2.3.0/extra/vagrant/3nodes-vip/provision/id_rsa000066400000000000000000000032131363154243400221160ustar00rootroot00000000000000-----BEGIN RSA PRIVATE KEY----- MIIEogIBAAKCAQEA6NF8iallvQVp22WDkTkyrtvp9eWW6A8YVr+kz4TjGYe7gHzI w+niNltGEFHzD8+v1I2YJ6oXevct1YeS0o9HZyN1Q9qgCgzUFtdOKLv6IedplqoP kcmF0aYet2PkEDo3MlTBckFXPITAMzF8dJSIFo9D8HfdOV0IAdx4O7PtixWKn5y2 hMNG0zQPyUecp4pzC6kivAIhyfHilFR61RGL+GPXQ2MWZWFYbAGjyiYJnAmCP3NO Td0jMZEnDkbUvxhMmBYSdETk1rRgm+R4LOzFUGaHqHDLKLX+FIPKcF96hrucXzcW yLbIbEgE98OHlnVYCzRdK8jlqm8tehUc9c9WhQIBIwKCAQEA4iqWPJXtzZA68mKd ELs4jJsdyky+ewdZeNds5tjcnHU5zUYE25K+ffJED9qUWICcLZDc81TGWjHyAqD1 Bw7XpgUwFgeUJwUlzQurAv+/ySnxiwuaGJfhFM1CaQHzfXphgVml+fZUvnJUTvzf TK2Lg6EdbUE9TarUlBf/xPfuEhMSlIE5keb/Zz3/LUlRg8yDqz5w+QWVJ4utnKnK iqwZN0mwpwU7YSyJhlT4YV1F3n4YjLswM5wJs2oqm0jssQu/BT0tyEXNDYBLEF4A sClaWuSJ2kjq7KhrrYXzagqhnSei9ODYFShJu8UWVec3Ihb5ZXlzO6vdNQ1J9Xsf 4m+2ywKBgQD6qFxx/Rv9CNN96l/4rb14HKirC2o/orApiHmHDsURs5rUKDx0f9iP cXN7S1uePXuJRK/5hsubaOCx3Owd2u9gD6Oq0CsMkE4CUSiJcYrMANtx54cGH7Rk EjFZxK8xAv1ldELEyxrFqkbE4BKd8QOt414qjvTGyAK+OLD3M2QdCQKBgQDtx8pN CAxR7yhHbIWT1AH66+XWN8bXq7l3RO/ukeaci98JfkbkxURZhtxV/HHuvUhnPLdX 3TwygPBYZFNo4pzVEhzWoTtnEtrFueKxyc3+LjZpuo+mBlQ6ORtfgkr9gBVphXZG YEzkCD3lVdl8L4cw9BVpKrJCs1c5taGjDgdInQKBgHm/fVvv96bJxc9x1tffXAcj 3OVdUN0UgXNCSaf/3A/phbeBQe9xS+3mpc4r6qvx+iy69mNBeNZ0xOitIjpjBo2+ dBEjSBwLk5q5tJqHmy/jKMJL4n9ROlx93XS+njxgibTvU6Fp9w+NOFD/HvxB3Tcz 6+jJF85D5BNAG3DBMKBjAoGBAOAxZvgsKN+JuENXsST7F89Tck2iTcQIT8g5rwWC P9Vt74yboe2kDT531w8+egz7nAmRBKNM751U/95P9t88EDacDI/Z2OwnuFQHCPDF llYOUI+SpLJ6/vURRbHSnnn8a/XG+nzedGH5JGqEJNQsz+xT2axM0/W/CRknmGaJ kda/AoGANWrLCz708y7VYgAtW2Uf1DPOIYMdvo6fxIB5i9ZfISgcJ/bbCUkFrhoH +vq/5CIWxCPp0f85R4qxxQ5ihxJ0YDQT9Jpx4TMss4PSavPaBH3RXow5Ohe+bYoQ NE5OgEXk2wVfZczCZpigBKbKZHNYcelXtTt/nP3rsCuGcM4h53s= -----END RSA PRIVATE KEY----- PAF-2.3.0/extra/vagrant/3nodes-vip/provision/id_rsa.pub000066400000000000000000000006311363154243400227040ustar00rootroot00000000000000ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA6NF8iallvQVp22WDkTkyrtvp9eWW6A8YVr+kz4TjGYe7gHzIw+niNltGEFHzD8+v1I2YJ6oXevct1YeS0o9HZyN1Q9qgCgzUFtdOKLv6IedplqoPkcmF0aYet2PkEDo3MlTBckFXPITAMzF8dJSIFo9D8HfdOV0IAdx4O7PtixWKn5y2hMNG0zQPyUecp4pzC6kivAIhyfHilFR61RGL+GPXQ2MWZWFYbAGjyiYJnAmCP3NOTd0jMZEnDkbUvxhMmBYSdETk1rRgm+R4LOzFUGaHqHDLKLX+FIPKcF96hrucXzcWyLbIbEgE98OHlnVYCzRdK8jlqm8tehUc9c9WhQ== vagrant insecure public key PAF-2.3.0/extra/vagrant/3nodes-vip/provision/log_sink.bash000077500000000000000000000005661363154243400234110ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail # setup log sink cat <<'EOF' > /etc/rsyslog.d/log_sink.conf $ModLoad imtcp $InputTCPServerRun 514 $template RemoteLogsMerged,"/var/log/%HOSTNAME%/messages.log" *.* ?RemoteLogsMerged $template RemoteLogs,"/var/log/%HOSTNAME%/%PROGRAMNAME%.log" *.* ?RemoteLogs #& ~ EOF systemctl --quiet restart rsyslog PAF-2.3.0/extra/vagrant/3nodes-vip/provision/pacemaker.bash000077500000000000000000000064301363154243400235300ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail PGVER="$1" HAPASS="$2" MASTER_IP="$3" SSH_LOGIN="$4" VM_PREFIX="$5" HOST_IP="$6" PGDATA="$7" shift 7 NODES=( "$@" ) CUSTOMDIR="${PGDATA}/conf.d" PCMK_VER=$(yum info --quiet pacemaker|grep ^Version) PCMK_VER="${PCMK_VER#*: }" # extract x.y.z PCMK_VER="${PCMK_VER:0:1}" # extract x if [ "$PCMK_VER" -ge 2 ]; then # if pacemaker version is 2.x, we suppose pcs support it (pcs >= 0.10) # from pcs 0.10, pcs host auth must be exec'ed on each node pcs host auth -u hacluster -p "${HAPASS}" "${NODES[@]}" else # this could be run on one node, but it doesn't hurt if it runs everywhere, # so we keep this piece of code with the one dedicated to pacemaker 2.x pcs cluster auth -u hacluster -p "${HAPASS}" "${NODES[@]}" fi if [ "$(hostname -s)" != "${NODES[0]}" ]; then exit 0 fi # WARNING: # Starting from here, everything is executed on first node only! if [ "$PCMK_VER" -ge 2 ]; then pcs cluster setup cluster_pgsql --force "${NODES[@]}" else pcs cluster setup --name cluster_pgsql --wait --force "${NODES[@]}" fi pcs stonith sbd enable pcs cluster start --all --wait pcs cluster cib cluster1.xml pcs -f cluster1.xml resource defaults migration-threshold=5 pcs -f cluster1.xml resource defaults resource-stickiness=10 pcs -f cluster1.xml property set stonith-watchdog-timeout=10s for VM in "${NODES[@]}"; do FENCE_ID="fence_vm_${VM}" VM_PORT="${VM_PREFIX}_${VM}" pcs -f cluster1.xml stonith create "${FENCE_ID}" fence_virsh \ pcmk_host_check=static-list "pcmk_host_list=${VM}" \ "port=${VM_PORT}" "ipaddr=${HOST_IP}" "login=${SSH_LOGIN}" \ "identity_file=/root/.ssh/id_rsa" pcs -f cluster1.xml constraint location "fence_vm_${VM}" \ avoids "${VM}=INFINITY" done PGSQLD_RSC_OPTS=( "ocf:heartbeat:pgsqlms" "bindir=/usr/pgsql-${PGVER}/bin" "pgdata=${PGDATA}" "recovery_template=${CUSTOMDIR}/recovery.conf.pcmk" "op" "start" "timeout=60s" "op" "stop" "timeout=60s" "op" "promote" "timeout=30s" "op" "demote" "timeout=120s" "op" "monitor" "interval=15s" "timeout=10s" "role=Master" "op" "monitor" "interval=16s" "timeout=10s" "role=Slave" "op" "notify" "timeout=60s" ) # NB: pcs 0.10.2 doesn't support to set the id of the clone XML node # the id is built from the rsc id to clone using "-clone" # As a matter of cohesion and code simplicity, we use the same # convention to create the master resource with pcs 0.9.x for # Pacemaker 1.1 if [ "$PCMK_VER" -ge 2 ]; then PGSQLD_RSC_OPTS+=( "promotable" "notify=true" ) fi pcs -f cluster1.xml resource create pgsqld "${PGSQLD_RSC_OPTS[@]}" if [ "$PCMK_VER" -eq 1 ]; then pcs -f cluster1.xml resource master pgsqld-clone pgsqld notify=true fi pcs -f cluster1.xml resource create pgsql-master-ip \ "ocf:heartbeat:IPaddr2" "ip=${MASTER_IP}" cidr_netmask=24 \ op monitor interval=10s pcs -f cluster1.xml constraint colocation add pgsql-master-ip with master pgsqld-clone INFINITY pcs -f cluster1.xml constraint order promote pgsqld-clone "then" start pgsql-master-ip symmetrical=false pcs -f cluster1.xml constraint order demote pgsqld-clone "then" stop pgsql-master-ip symmetrical=false pcs cluster cib-push scope=configuration cluster1.xml --wait crm_mon -Dn1 PAF-2.3.0/extra/vagrant/3nodes-vip/provision/pgsql-primary.bash000077500000000000000000000040331363154243400244040ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail PGVER="$1" PGDATA="$2" MASTER_IP="$3" NODENAME="$4" CUSTOMDIR="${PGDATA}/conf.d" # cleanup systemctl --quiet --now disable "postgresql-${PGVER}" rm -rf "${PGDATA}" # init instance "/usr/pgsql-${PGVER}/bin/postgresql-${PGVER}-setup" initdb # pg_hba setup cat< "${PGDATA}/pg_hba.conf" local all all trust host all all 0.0.0.0/0 trust # forbid self-replication host replication postgres ${MASTER_IP}/32 reject host replication postgres ${NODENAME} reject # allow any standby connection host replication postgres 0.0.0.0/0 trust EOC # postgresql.conf setup mkdir -p "$CUSTOMDIR" echo "include_dir = 'conf.d'" >> "${PGDATA}/postgresql.conf" cat <<'EOC' > "${CUSTOMDIR}/custom.conf" listen_addresses = '*' wal_level = replica max_wal_senders = 10 hot_standby = on hot_standby_feedback = on wal_keep_segments = 256 log_destination = 'syslog,stderr' log_checkpoints = on log_min_duration_statement = 0 log_autovacuum_min_duration = 0 log_replication_commands = on EOC if [ "${PGVER%%.*}" -lt 12 ]; then # recovery.conf setup cat<<-EOC > "${CUSTOMDIR}/recovery.conf.pcmk" standby_mode = on primary_conninfo = 'host=${MASTER_IP} application_name=${NODENAME}' recovery_target_timeline = 'latest' EOC else cat <<-EOC > "${CUSTOMDIR}/repli.conf" primary_conninfo = 'host=${MASTER_IP} application_name=${NODENAME}' EOC # standby_mode disappear in v12 # no need to add recovery_target_timeline as its default is 'latest' since v12 fi # backing up files cp "${PGDATA}/pg_hba.conf" "${PGDATA}/.." cp "${PGDATA}/postgresql.conf" "${PGDATA}/.." cp "${CUSTOMDIR}"/* "${PGDATA}/.." chown -R postgres:postgres "$PGDATA" # create master ip ip -o addr show to "${MASTER_IP}" | if ! grep -q "${MASTER_IP}" then DEV=$(ip route show to "${MASTER_IP}/24"|grep -Eo 'dev \w+') ip addr add "${MASTER_IP}/24" dev "${DEV/dev }" fi # restart master pgsql systemctl --quiet start "postgresql-${PGVER}" PAF-2.3.0/extra/vagrant/3nodes-vip/provision/pgsql-replicas.bash000077500000000000000000000031151363154243400245230ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail PGVER="$1" PGDATA="$2" MASTER_IP="$3" NODENAME="$4" CUSTOMDIR="${PGDATA}/conf.d" # cleanup systemctl --quiet --now disable "postgresql-${PGVER}" rm -rf "${PGDATA}" # build standby "/usr/pgsql-${PGVER}/bin/pg_basebackup" -h "${MASTER_IP}" -U postgres -D "${PGDATA}" -X stream # set pg_hba cat< "${PGDATA}/pg_hba.conf" local all all trust host all all 0.0.0.0/0 trust # forbid self-replication host replication postgres ${MASTER_IP}/32 reject host replication postgres ${NODENAME} reject # allow any standby connection host replication postgres 0.0.0.0/0 trust EOC cp "${PGDATA}/pg_hba.conf" "${PGDATA}/.." if [ "${PGVER%%.*}" -lt 12 ]; then # recovery.conf setup cat<<-EOC > "${CUSTOMDIR}/recovery.conf.pcmk" standby_mode = on primary_conninfo = 'host=${MASTER_IP} application_name=${NODENAME}' recovery_target_timeline = 'latest' EOC cp "${CUSTOMDIR}/recovery.conf.pcmk" "${PGDATA}/recovery.conf" else cat <<-EOC > "${CUSTOMDIR}/repli.conf" primary_conninfo = 'host=${MASTER_IP} application_name=${NODENAME}' EOC # standby_mode disappear in v12 # no need to add recovery_target_timeline as its default is 'latest' since v12 touch "${PGDATA}/standby.signal" fi # backing up files cp "${PGDATA}/pg_hba.conf" "${PGDATA}/.." cp "${PGDATA}/postgresql.conf" "${PGDATA}/.." cp "${CUSTOMDIR}"/* "${PGDATA}/.." chown -R "postgres:postgres" "${PGDATA}/.." # start systemctl --quiet start "postgresql-${PGVER}" PAF-2.3.0/extra/vagrant/3nodes-vip/provision/system.bash000077500000000000000000000027031363154243400231230ustar00rootroot00000000000000#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail NODENAME="$1" RHEL_USER="$2" RHEL_PASS="$3" shift NODES=( "$@" ) hostnamectl set-hostname "${NODENAME}" for N in "${NODES[@]}"; do NG=$(sed -n "/${N%=*}\$/p" /etc/hosts|wc -l) if [ "$NG" -eq 0 ]; then echo "${N##*=} ${N%=*}" >> /etc/hosts fi done # shellcheck disable=SC1091 source "/etc/os-release" OS_ID="$ID" PACKAGES=( vim bash-completion yum-utils ) if [ "$OS_ID" = "rhel" ]; then subscription-manager register --force --username "${RHEL_USER:?}" --password "${RHEL_PASS:?}" --auto-attach PACKAGES+=("tmux") else PACKAGES+=("screen") fi yum install --nogpgcheck --quiet -y -e 0 "${PACKAGES[@]}" cat <<'EOF' > "/home/vagrant/.ssh/config" Host * CheckHostIP no StrictHostKeyChecking no EOF cp "/vagrant/extra/vagrant/3nodes-vip/provision/id_rsa" "/home/vagrant/.ssh" cp "/vagrant/extra/vagrant/3nodes-vip/provision/id_rsa.pub" "/home/vagrant/.ssh" chown -R "vagrant:" "/home/vagrant/.ssh" chmod 0700 "/home/vagrant/.ssh" chmod 0600 "/home/vagrant/.ssh/id_rsa" chmod 0644 "/home/vagrant/.ssh/id_rsa.pub" chmod 0600 "/home/vagrant/.ssh/config" chmod 0600 "/home/vagrant/.ssh/authorized_keys" cp -R "/home/vagrant/.ssh" "/root" # force proper permissions on .ssh files chown -R "root:" "/root/.ssh" chmod 0700 "/root/.ssh" chmod 0600 "/root/.ssh/id_rsa" chmod 0644 "/root/.ssh/id_rsa.pub" chmod 0600 "/root/.ssh/config" chmod 0600 "/root/.ssh/authorized_keys" PAF-2.3.0/extra/vagrant/3nodes-vip/vagrant.yml-dist000066400000000000000000000012241363154243400220300ustar00rootroot00000000000000# boxname: "centos/7" # vagrant box to use # pgver: "10" # pg version to use # hapass: "hapass" # password for sys user hacluster # ssh_login: "user" # ssh login to connect to the host when fencing a VM. # # put "./provision/id_rsa.pub" in your "~/.ssh/authorized_keys" # master_ip: "10.20.30.5" # vIP assigned to master # pg_nodes: # servers to create. # - "srv1" # First one will be master # - "srv2" # - "srv3" # log_node: "log-sink" # vm_prefix: "paf_vm" # rhel_user: "" # RHEL user account # rhel_pass: "" # RHEL user account password PAF-2.3.0/extra/vagrant/README.md000066400000000000000000000110671363154243400162000ustar00rootroot00000000000000# How to bootstrap a cluster using vagrant This `Vagrantfile` is bootstrapping a fresh cluster with: * servers `srv1`, `srv2` and `srv3` hosting a pgsql cluster with streaming replication * pgsql primary is on `srv1` and the two standby are on `srv2` and `srv3` * server `log-sink` where all logs from `srv1`, `srv2` and `srv3` are collected under `/var/log/` * pacemaker stack is setup on `srv1`, `srv2` and `srv3` * fencing using `fence_virsh` * watchdog enabled Note that NTP is enabled by default (using chrony) in the vagrant box used (`centos/7`). No need to set it up ourselves. This README takes `3nodes-vip` as example. Replace with the cluster name you want: `3nodes-vip`, `3nodes-haproxy` or `2nodes-qdevice-vip`. ## Prerequisites You need `vagrant` and `vagrant-libvirt`. Everything is tested with versions 2.0.2 and 0.0.40. Please, report your versions if it works with inferior ones. ~~~ apt install make vagrant vagrant-libvirt libvirt-clients # for Debian-like yum install make vagrant vagrant-libvirt libvirt-client # for RH-like dnf install make vagrant vagrant-libvirt libvirt-client # for recent RH-like systemctl enable --now libvirtd ~~~ Alternatively, you might be able to install vagrant-libvirt only for your current user using (depending on the system, this might not work): ~~~ vagrant plugin install vagrant-libvirt ~~~ Pacemaker must be able to ssh to the libvirt host with no password using a user able to `virsh destroy $other_vm`. Here are the steps: * copy `/extra/vagrant/3nodes-vip/provision/id_rsa.pub` inside `user@host:~/.ssh/authorized_keys` * edit `ssh_login` in the `vagrant.yml` configuration file * user might need to be in group `libvirt` * user might need to add `uri_default='qemu:///system'` in its file `~/.config/libvirt/libvirt.conf` * make sure sshd is started on the host Here is a setup example: ~~~ #### Replace "myuser" with your usual user #### root$ systemctl start sshd root$ export MYUSER=myuser root$ usermod -a -G libvirt "$MYUSER" root$ su - $MYUSER myuser$ mkdir -p "${HOME}/.config/libvirt" myuser$ echo "uri_default='qemu:///system'" > "${HOME}/.config/libvirt/libvirt.conf" myuser$ git clone https://github.com/ClusterLabs/PAF.git myuser$ cd PAF/extra/vagrant/3nodes-vip myuser$ cat "provision/id_rsa.pub" >> "${HOME}/.ssh/authorized_keys" myuser$ echo "ssh_login: \"$USER\"" >> vagrant.yml ~~~ ## Creating the cluster To create the cluster, run: ~~~ cd PAF/extra/vagrant/3nodes-vip make all ~~~ After some minutes and tons of log messages, you can connect to your servers using eg.: ~~~ vagrant ssh srv1 vagrant ssh log-sink ~~~ ## Destroying the cluster To destroy your cluster, either run: ~~~ make clean ~~~ or ~~~ vagrant destroy -f ~~~ ## Customization You can edit file `vagrant.yml`: ~~~ cp vagrant.yml-dist vagrant.yml $EDITOR vagrant.yml make clean make all ~~~ ## OS This Vagrant environment currently supports CentOS 7 and RHEL 8. Use `boxname` in your `vagrant.yml` file (see chapter "Customization") to set the OS you want, eg.: `centos/7` or `generic/rhel8` In regard with RHEL 8, you must provide an active Redhat account with related subscriptions using `rhel_user` and `rhel_pass`. Set them in your `vagrant.yml` file (see chapter "Customization"). Do not forget this Vagrant environment is building four VM. All will consume one subscription if you pick a Redhat box. You will have to remove them by hands (eg. from the Redhat website) as soon as you finish with your tests. ## Cluster Test Suite Once your cluster is up and running, you can install the Cluster Test Suite from the Pacemaker project using: ~~~ make cts ~~~ Then, you'll be able to start the exerciser from the log-sink server using eg.: ~~~ vagrant ssh -c "sudo pcs cluster stop --all" vagrant ssh log-sink sudo -i cd /usr/share/pacemaker/tests/cts ./CTSlab.py --nodes "srv1 srv2 srv3" --outputfile ~/cts.log --once ~~~ You can select the test you want to run with: ~~~ ./CTSlab.py --nodes "srv1 srv2 srv3" --list-tests ./CTSlab.py --nodes "srv1 srv2 srv3" --outputfile ~/cts.log --choose <$NAME> 1 ~~~ Where `<$NAME>` is the name of the test you want to run. You can exercise the cluster randomly and repetitively with: ~~~ ./CTSlab.py --nodes "srv1 srv2 srv3" --outputfile ~/cts.log <$NTESTS> ~~~ ## Tips Find all existing VM created by vagrant on your system: ~~~ vagrant global-status ~~~ Shutdown all VM: ~~~ vagrant ssh -c "sudo pcs resource disable pgsqld-clone --wait" vagrant halt ~~~ Restart cluster: ~~~ vagrant up vagrant ssh -c "sudo pcs cluster start --all" vagrant ssh -c "sudo pcs resource enable pgsqld-clone --wait" ~~~ PAF-2.3.0/lib/000077500000000000000000000000001363154243400126755ustar00rootroot00000000000000PAF-2.3.0/lib/OCF_Directories.pm.PL000077500000000000000000000072461363154243400165240ustar00rootroot00000000000000#!/usr/bin/perl use strict; use warnings; use Config; use Module::Build; my $VERSION = 'v2.3.0'; my $build = Module::Build->current; my %ocf_dirs = %{ $build->notes('ocf_dirs') }; die "Could not find ocf_dirs generated by Build.PL!" unless exists $ocf_dirs{'HA_DIR'}; open OUT, '>', 'lib/OCF_Directories.pm'; print OUT qq{$Config{'startperl'} # This program is open source, licensed under the PostgreSQL License. # For license terms, see the LICENSE file. # # Copyright (C) 2016-2020: Jehan-Guillaume de Rorthais and Mael Rimbault =head1 NAME OCF_Directories - Binaries and binary options for use in Resource Agents =head1 SYNOPSIS use FindBin; use lib "\$FindBin::RealBin/../../lib/heartbeat/"; use OCF_Directories; =head1 DESCRIPTION This module has been ported from the ocf-directories shell script of the resource-agents project. See L. =head1 VARIABLES Here are the variables exported by this module: =over =item \$INITDIR =item \$HA_DIR =item \$HA_RCDIR =item \$HA_CONFDIR =item \$HA_CF =item \$HA_VARLIB =item \$HA_RSCTMP =item \$HA_RSCTMP_OLD =item \$HA_FIFO =item \$HA_BIN =item \$HA_SBIN_DIR =item \$HA_DATEFMT =item \$HA_DEBUGLOG =item \$HA_RESOURCEDIR =item \$HA_DOCDIR =item \$__SCRIPT_NAME =item \$HA_VARRUN =item \$HA_VARLOCK =item \$ocf_prefix =item \$ocf_exec_prefix =back =cut package OCF_Directories; use strict; use warnings; use 5.008; use File::Basename; BEGIN { use Exporter; our \$VERSION = '$VERSION'; our \@ISA = ('Exporter'); our \@EXPORT = qw( \$INITDIR \$HA_DIR \$HA_RCDIR \$HA_CONFDIR \$HA_CF \$HA_VARLIB \$HA_RSCTMP \$HA_RSCTMP_OLD \$HA_FIFO \$HA_BIN \$HA_SBIN_DIR \$HA_DATEFMT \$HA_DEBUGLOG \$HA_RESOURCEDIR \$HA_DOCDIR \$__SCRIPT_NAME \$HA_VARRUN \$HA_VARLOCK \$ocf_prefix \$ocf_exec_prefix ); our \@EXPORT_OK = ( \@EXPORT ); } our \$INITDIR = ( \$ENV{'INITDIR'} || '$ocf_dirs{'INITDIR'}' ); our \$HA_DIR = ( \$ENV{'HA_DIR'} || '$ocf_dirs{'HA_DIR'}' ); our \$HA_RCDIR = ( \$ENV{'HA_RCDIR'} || '$ocf_dirs{'HA_RCDIR'}' ); our \$HA_CONFDIR = ( \$ENV{'HA_CONFDIR'} || '$ocf_dirs{'HA_CONFDIR'}' ); our \$HA_CF = ( \$ENV{'HA_CF'} || '$ocf_dirs{'HA_CF'}' ); our \$HA_VARLIB = ( \$ENV{'HA_VARLIB'} || '$ocf_dirs{'HA_VARLIB'}' ); our \$HA_RSCTMP = ( \$ENV{'HA_RSCTMP'} || '$ocf_dirs{'HA_RSCTMP'}' ); our \$HA_RSCTMP_OLD = ( \$ENV{'HA_RSCTMP_OLD'} || '$ocf_dirs{'HA_RSCTMP_OLD'}' ); our \$HA_FIFO = ( \$ENV{'HA_FIFO'} || '$ocf_dirs{'HA_FIFO'}' ); our \$HA_BIN = ( \$ENV{'HA_BIN'} || '$ocf_dirs{'HA_BIN'}' ); our \$HA_SBIN_DIR = ( \$ENV{'HA_SBIN_DIR'} || '$ocf_dirs{'HA_SBIN_DIR'}' ); our \$HA_DATEFMT = ( \$ENV{'HA_DATEFMT'} || '$ocf_dirs{'HA_DATEFMT'}' ); our \$HA_DEBUGLOG = ( \$ENV{'HA_DEBUGLOG'} || '$ocf_dirs{'HA_DEBUGLOG'}' ); our \$HA_RESOURCEDIR = ( \$ENV{'HA_RESOURCEDIR'}|| '$ocf_dirs{'HA_RESOURCEDIR'}' ); our \$HA_DOCDIR = ( \$ENV{'HA_DOCDIR'} || '$ocf_dirs{'HA_DOCDIR'}' ); our \$__SCRIPT_NAME = ( \$ENV{'__SCRIPT_NAME'} || fileparse(\$0) ); our \$HA_VARRUN = ( \$ENV{'HA_VARRUN'} || '$ocf_dirs{'HA_VARRUN'}' ); our \$HA_VARLOCK = ( \$ENV{'HA_VARLOCK'} || '$ocf_dirs{'HA_VARLOCK'}' ); our \$ocf_prefix = '$ocf_dirs{'prefix'}'; our \$ocf_exec_prefix = '$ocf_dirs{'exec_prefix'}'; 1; =head1 COPYRIGHT AND LICENSE Copyright (C) 2016: Jehan-Guillaume de Rorthais and Mael Rimbault. Licensed under the PostgreSQL License. }; close OUT; PAF-2.3.0/lib/OCF_Functions.pm000077500000000000000000000405601363154243400157020ustar00rootroot00000000000000#!/usr/bin/perl # This program is open source, licensed under the PostgreSQL License. # For license terms, see the LICENSE file. # # Copyright (C) 2016-2020: Jehan-Guillaume de Rorthais and Mael Rimbault =head1 NAME OCF_Functions - helper subroutines for OCF agent =head1 SYNOPSIS use FindBin; use lib "$FindBin::RealBin/../../lib/heartbeat/"; use OCF_Functions; =head1 DESCRIPTION This module has been ported from the ocf-shellfuncs shell script of the resource-agents project. See L. =head1 VARIABLE The only variable exported by this module is C<__OCF_ACTION>. =head1 SUBROUTINES Here are the subroutines ported from ocf-shellfuncs and exported by this module: =over =item ha_debug =item ha_log =item hadate =item ocf_is_clone =item ocf_is_ms =item ocf_is_probe =item ocf_is_root =item ocf_is_true =item ocf_is_ver =item ocf_local_nodename =item ocf_log =item ocf_exit_reason =item ocf_maybe_random =item ocf_ver2num =item ocf_ver_complete_level =item ocf_ver_level =item ocf_version_cmp =item set_logtag =back Here are the subroutines only existing in the perl module but not in the ocf-shellfuncs script: =over =item ocf_notify_env =back =cut package OCF_Functions; use strict; use warnings; use 5.008; use POSIX qw( strftime setlocale LC_ALL ); use English; use FindBin; use lib "$FindBin::RealBin/../../lib/heartbeat/"; use OCF_ReturnCodes; use OCF_Directories; BEGIN { use Exporter; our $VERSION = 'v2.3.0'; our @ISA = ('Exporter'); our @EXPORT = qw( $__OCF_ACTION ocf_is_root ocf_maybe_random ocf_is_true hadate set_logtag ha_log ha_debug ocf_log ocf_exit_reason ocf_is_probe ocf_is_clone ocf_is_ms ocf_is_ver ocf_ver2num ocf_ver_level ocf_ver_complete_level ocf_version_cmp ocf_local_nodename ocf_notify_env ); our @EXPORT_OK = ( @EXPORT ); } our $__OCF_ACTION; sub ocf_is_root { return $EUID == 0; } sub ocf_maybe_random { return int( rand( 32767 ) ); } sub ocf_is_true { my $v = shift; return ( defined $v and $v =~ /^(?:yes|true|1|YES|TRUE|ja|on|ON)$/ ); } sub hadate { return strftime( $HA_DATEFMT, localtime ); } sub set_logtag { return if defined $ENV{'HA_LOGTAG'} and $ENV{'HA_LOGTAG'} ne ''; if ( defined $ENV{'OCF_RESOURCE_INSTANCE'} and $ENV{'OCF_RESOURCE_INSTANCE'} ne '' ) { $ENV{'HA_LOGTAG'} = "$__SCRIPT_NAME($ENV{'OCF_RESOURCE_INSTANCE'})[$PID]"; } else { $ENV{'HA_LOGTAG'}="${__SCRIPT_NAME}[$PID]"; } } sub __ha_log { my $ignore_stderr = 0; my $loglevel = ''; if ( $_[0] eq '--ignore-stderr' ) { $ignore_stderr = 1; shift; } $ENV{'HA_LOGFACILITY'} = '' if not defined $ENV{'HA_LOGFACILITY'} or $ENV{'HA_LOGFACILITY'} eq 'none'; # if we're connected to a tty, then output to stderr if ( -t STDERR ) { # FIXME # T.N.: this was ported with the bug on $loglevel being empty # and never set before the test here... if ( defined $ENV{'HA_debug'} and $ENV{'HA_debug'} == 0 and $loglevel eq 'debug' ) { return 0; } elsif ( $ignore_stderr ) { # something already printed this error to stderr, so ignore return 0; } if ( defined $ENV{'HA_LOGTAG'} and $ENV{'HA_LOGTAG'} ne '' ) { printf STDERR "%s: %s\n", $ENV{'HA_LOGTAG'}, join ' ', @ARG; } else { printf STDERR "%s\n", join ' ', @ARG; } return 0; } set_logtag(); if ( defined $ENV{'HA_LOGD'} and $ENV{'HA_LOGD'} eq 'yes' ) { system 'ha_logger', '-t', $ENV{'HA_LOGTAG'}, @ARG; return 0 if ( $? >> 8 ) == 0; } unless ( $ENV{'HA_LOGFACILITY'} eq '' ) { # logging through syslog # loglevel is unknown, use 'notice' for now $loglevel = 'notice'; for ( "@ARG" ) { if ( /ERROR/ ) { $loglevel = 'err'; } elsif ( /WARN/ ) { $loglevel = 'warning'; } elsif (/INFO|info/ ) { $loglevel = 'info'; } } system 'logger', '-t', $ENV{'HA_LOGTAG'}, '-p', "$ENV{'HA_LOGFACILITY'}.$loglevel", @ARG; } if ( defined $ENV{'HA_LOGFILE'} and $ENV{'HA_LOGFILE'} ne '' ) { # appending to $HA_LOGFILE open my $logfile, '>>', $ENV{'HA_LOGFILE'}; printf $logfile "%s: %s %s\n", $ENV{'HA_LOGTAG'}, hadate(), join (' ', @ARG); close $logfile; } # appending to stderr printf STDERR "%s %s\n", hadate(), join ' ', @ARG if (not defined $ENV{'HA_LOGFACILITY'} or $ENV{'HA_LOGFACILITY'} eq '') and (not defined $ENV{'HA_LOGFILE'} or $ENV{'HA_LOGFILE'} eq '' ) and not $ignore_stderr; if ( defined $ENV{'HA_DEBUGLOG'} and $ENV{'HA_DEBUGLOG'} ne '' and $ENV{'HA_LOGFILE'} ne $ENV{'HA_DEBUGLOG'} ) { # appending to $HA_DEBUGLOG open my $logfile, '>>', $ENV{'HA_DEBUGLOG'}; printf $logfile "%s: %s %s\n", $ENV{'HA_LOGTAG'}, hadate(), join (' ', @ARG); close $logfile; } } sub ha_log { return __ha_log( @ARG ); } sub ha_debug { return 0 if defined $ENV{'HA_debug'} and $ENV{'HA_debug'} == 0; if ( -t STDERR ) { if ( defined $ENV{'HA_LOGTAG'} and $ENV{'HA_LOGTAG'} ne '' ) { printf STDERR "%s: %s\n", $ENV{'HA_LOGTAG'}, join ' ', @ARG; } else { printf STDERR "%s\n", join ' ', @ARG; } return 0; } set_logtag(); if ( defined $ENV{'HA_LOGD'} and $ENV{'HA_LOGD'} eq 'yes' ) { system 'ha_logger', '-t', $ENV{'HA_LOGTAG'}, '-D', 'ha-debug', @ARG; return 0 if ( $? >> 8 ) == 0; } $ENV{'HA_LOGFACILITY'} = '' if not defined $ENV{'HA_LOGFACILITY'} or $ENV{'HA_LOGFACILITY'} eq 'none'; unless ( $ENV{'HA_LOGFACILITY'} eq '' ) { # logging through syslog system 'logger', '-t', $ENV{'HA_LOGTAG'}, '-p', "$ENV{'HA_LOGFACILITY'}.debug", @ARG; } if ( defined $ENV{'HA_DEBUGLOG'} and -f $ENV{'HA_DEBUGLOG'} ) { my $logfile; # appending to $HA_DEBUGLOG open $logfile, '>>', $ENV{'HA_DEBUGLOG'}; printf $logfile "%s: %s %s\n", $ENV{'HA_LOGTAG'}, hadate(), join (' ', @ARG); close $logfile; } # appending to stderr printf STDERR "%s: %s %s\n", $ENV{'HA_LOGTAG'}, hadate(), join ' ', @ARG if (not defined $ENV{'HA_LOGFACILITY'} or $ENV{'HA_LOGFACILITY'} eq '') and (not defined $ENV{'HA_DEBUGLOG'} or $ENV{'HA_DEBUGLOG'} eq '' ); } # # ocf_log: log messages from the resource agent # This function is slightly different from its equivalent in ocf-shellfuncs.in # as it behaves like printf. # Arguments: # * __OCF_PRIO: log level # * __OCF_MSG: printf-like format string # * all other arguments are values for the printf-like format string # sub ocf_log { my $__OCF_PRIO; my $__OCF_MSG; # TODO: Revisit and implement internally. if ( scalar @ARG < 2 ) { ocf_log ( 'err', "Not enough arguments [%d] to ocf_log", scalar @ARG ); } $__OCF_PRIO = shift; $__OCF_MSG = shift; $__OCF_MSG = sprintf $__OCF_MSG, @ARG; for ( $__OCF_PRIO ) { if ( /crit/ ) { $__OCF_PRIO = 'CRIT' } elsif ( /err/ ) { $__OCF_PRIO = 'ERROR' } elsif ( /warn/ ) { $__OCF_PRIO = 'WARNING' } elsif ( /info/ ) { $__OCF_PRIO = 'INFO' } elsif ( /debug/ ) { $__OCF_PRIO = 'DEBUG' } else { $__OCF_PRIO =~ tr/[a-z]/[A-Z]/ } } if ( $__OCF_PRIO eq 'DEBUG' ) { ha_debug( "$__OCF_PRIO: $__OCF_MSG"); } else { ha_log( "$__OCF_PRIO: $__OCF_MSG"); } } # # ocf_exit_reason: print exit error string to stderr and log # Usage: Allows the OCF script to provide a string # describing why the exit code was returned. # Arguments: reason - required, The string that represents # why the error occured. # sub ocf_exit_reason { my $cookie = $ENV{'OCF_EXIT_REASON_PREFIX'} || 'ocf-exit-reason:'; my $fmt; my $msg; # No argument is likely not intentional. # Just one argument implies a printf format string of just "%s". # "Least surprise" in case some interpolated string from variable # expansion or other contains a percent sign. # More than one argument: first argument is going to be the format string. ocf_log ( 'err', 'Not enough arguments [%d] to ocf_exit_reason', scalar @ARG ) if scalar @ARG < 1; $fmt = shift; $msg = sprintf $fmt, @ARG; print STDERR "$cookie$msg\n"; __ha_log( '--ignore-stderr', "ERROR: $msg" ); } # returns true if the CRM is currently running a probe. A probe is # defined as a monitor operation with a monitoring interval of zero. sub ocf_is_probe { return ( $__OCF_ACTION eq 'monitor' and $ENV{'OCF_RESKEY_CRM_meta_interval'} == 0 ); } # returns true if the resource is configured as a clone. This is # defined as a resource where the clone-max meta attribute is present, # and set to greater than zero. sub ocf_is_clone { return ( defined $ENV{'OCF_RESKEY_CRM_meta_clone_max'} and $ENV{'OCF_RESKEY_CRM_meta_clone_max'} > 0 ); } # returns true if the resource is configured as a multistate # (master/slave) resource. This is defined as a resource where the # master-max meta attribute is present, and set to greater than zero. sub ocf_is_ms { return ( defined $ENV{'OCF_RESKEY_CRM_meta_master_max'} and $ENV{'OCF_RESKEY_CRM_meta_master_max'} > 0 ); } # version check functions # allow . and - to delimit version numbers # max version number is 999 # letters and such are effectively ignored # sub ocf_is_ver { return $ARG[0] =~ /^[0-9][0-9.-]*[0-9]$/; } sub ocf_ver2num { my $v = 0; $v = $v * 1000 + $1 while $ARG[0] =~ /(\d+)/g; return $v; } sub ocf_ver_level { my $v = () = $ARG[0] =~ /(\d+)/g; return $v; } sub ocf_ver_complete_level { my $ver = shift; my $level = shift; my $i = 0; for ( my $i = 0; $i < $level; $i++ ) { $ver .= "$ver.0"; } return $ver; } # usage: ocf_version_cmp VER1 VER2 # version strings can contain digits, dots, and dashes # must start and end with a digit # returns: # 0: VER1 smaller (older) than VER2 # 1: versions equal # 2: VER1 greater (newer) than VER2 # 3: bad format sub ocf_version_cmp { my $v1 = shift; my $v2 = shift; my $v1_level; my $v2_level; my $level_diff; return 3 unless ocf_is_ver( $v1 ); return 3 unless ocf_is_ver( $v2 ); $v1_level = ocf_ver_level( $v1 ); $v2_level = ocf_ver_level( $v2 ); if ( $v1_level < $v2_level ) { $level_diff = $v2_level - $v1_level; $v1 = ocf_ver_complete_level( $v1, $level_diff ); } elsif ( $v1_level > $v2_level ) { $level_diff = $v1_level - $v2_level; $v2 = ocf_ver_complete_level( $v2, $level_diff ); } $v1 = ocf_ver2num( $v1 ); $v2 = ocf_ver2num( $v2 ); if ( $v1 == $v2 ) { return 1; } elsif ( $v1 < $v2 ) { return 0; } return 2; # -1 would look funny in shell ;-) ( T.N. not in perl ;) ) } sub ocf_local_nodename { # use crm_node -n for pacemaker > 1.1.8 my $nodename; qx{ which pacemakerd > /dev/null 2>&1 }; if ( $? == 0 ) { my $version; my $ret = qx{ pacemakerd -\$ }; $ret =~ /Pacemaker ([\d.]+)/; $version = $1; if ( ocf_version_cmp( $version, '1.1.8' ) == 2 ) { qx{ which crm_node > /dev/null 2>&1 }; $nodename = qx{ crm_node -n } if $? == 0; } } else { # otherwise use uname -n $nodename = qx { uname -n }; } chomp $nodename; return $nodename; } # Parse and returns the notify environment variables in a convenient structure # Returns undef if the action is not a notify # Returns undef if the resource is neither a clone or a multistate one sub ocf_notify_env { my $i; my %notify_env; return undef unless $__OCF_ACTION eq 'notify'; return undef unless ocf_is_clone() or ocf_is_ms(); %notify_env = ( 'type' => $ENV{'OCF_RESKEY_CRM_meta_notify_type'} || '', 'operation' => $ENV{'OCF_RESKEY_CRM_meta_notify_operation'} || '', 'active' => [ ], 'inactive' => [ ], 'start' => [ ], 'stop' => [ ], ); for my $action ( qw{ active start stop } ) { next unless defined $ENV{"OCF_RESKEY_CRM_meta_notify_${action}_resource"} and defined $ENV{"OCF_RESKEY_CRM_meta_notify_${action}_uname"}; $i = 0; $notify_env{ $action }[$i++]{'rsc'} = $_ foreach split /\s+/ => $ENV{"OCF_RESKEY_CRM_meta_notify_${action}_resource"}; $i = 0; $notify_env{ $action }[$i++]{'uname'} = $_ foreach split /\s+/ => $ENV{"OCF_RESKEY_CRM_meta_notify_${action}_uname"}; } # notify_nactive_uname doesn't exists. See: # http://lists.clusterlabs.org/pipermail/developers/2017-January/000406.html if ( defined $ENV{"OCF_RESKEY_CRM_meta_notify_inactive_resource"} ) { $i = 0; $notify_env{'inactive'}[$i++]{'rsc'} = $_ foreach split /\s+/ => $ENV{"OCF_RESKEY_CRM_meta_notify_inactive_resource"}; } # exit if the resource is not a mutistate one return %notify_env unless ocf_is_ms(); for my $action ( qw{ master slave promote demote } ) { $notify_env{ $action } = [ ]; next unless defined $ENV{"OCF_RESKEY_CRM_meta_notify_${action}_resource"} and defined $ENV{"OCF_RESKEY_CRM_meta_notify_${action}_uname"}; $i = 0; $notify_env{ $action }[$i++]{'rsc'} = $_ foreach split /\s+/ => $ENV{"OCF_RESKEY_CRM_meta_notify_${action}_resource"}; $i = 0; $notify_env{ $action }[$i++]{'uname'} = $_ foreach split /\s+/ => $ENV{"OCF_RESKEY_CRM_meta_notify_${action}_uname"}; } # Fix active and inactive fields for Pacemaker version < 1.1.16 # ie. crm_feature_set < 3.0.11 # See http://lists.clusterlabs.org/pipermail/developers/2016-August/000265.html # and git commit a6713c5d40327eff8549e7f596501ab1785b8765 if ( ocf_version_cmp( $ENV{"OCF_RESKEY_crm_feature_set"}, '3.0.11' ) == 0 ) { $notify_env{ 'active' } = [ @{ $notify_env{ 'master' } }, @{ $notify_env{ 'slave' } } ]; } return %notify_env; } $__OCF_ACTION = $ARGV[0]; # Return to sanity for the agents... undef $ENV{'LC_ALL'}; $ENV{'LC_ALL'} = 'C'; setlocale( LC_ALL, 'C' ); undef $ENV{'LANG'}; undef $ENV{'LANGUAGE'}; $ENV{'OCF_ROOT'} = '/usr/lib/ocf' unless defined $ENV{'OCF_ROOT'} and $ENV{'OCF_ROOT'} ne ''; # old undef $ENV{'OCF_FUNCTIONS_DIR'} if defined $ENV{'OCF_FUNCTIONS_DIR'} and $ENV{'OCF_FUNCTIONS_DIR'} eq "$ENV{'OCF_ROOT'}/resource.d/heartbeat"; # Define OCF_RESKEY_CRM_meta_interval in case it isn't already set, # to make sure that ocf_is_probe() always works $ENV{'OCF_RESKEY_CRM_meta_interval'} = 0 unless defined $ENV{'OCF_RESKEY_CRM_meta_interval'}; # Strip the OCF_RESKEY_ prefix from this particular parameter unless ( defined $ENV{'$OCF_RESKEY_OCF_CHECK_LEVEL'} and $ENV{'$OCF_RESKEY_OCF_CHECK_LEVEL'} ne '' ) { $ENV{'OCF_CHECK_LEVEL'} = $ENV{'$OCF_RESKEY_OCF_CHECK_LEVEL'}; } else { ENV{'OCF_CHECK_LEVEL'} = 0; } unless ( -d $ENV{'OCF_ROOT'} ) { ha_log( "ERROR: OCF_ROOT points to non-directory $ENV{'OCF_ROOT'}." ); $! = $OCF_ERR_GENERIC; die; } $ENV{'OCF_RESOURCE_TYPE'} = $__SCRIPT_NAME unless defined $ENV{'OCF_RESOURCE_TYPE'} and $ENV{'OCF_RESOURCE_TYPE'} ne ''; unless ( defined $ENV{'OCF_RA_VERSION_MAJOR'} and $ENV{'OCF_RA_VERSION_MAJOR'} ne '' ) { # We are being invoked as an init script. # Fill in some things with reasonable values. $ENV{'OCF_RESOURCE_INSTANCE'} = 'default'; return 1; } $ENV{'OCF_RESOURCE_INSTANCE'} = "undef" if $__OCF_ACTION eq 'meta-data'; unless ( defined $ENV{'OCF_RESOURCE_INSTANCE'} and $ENV{'OCF_RESOURCE_INSTANCE'} ne '' ) { ha_log( "ERROR: Need to tell us our resource instance name." ); $! = $OCF_ERR_ARGS; die; } 1; =head1 COPYRIGHT AND LICENSE Copyright (C) 2016: Jehan-Guillaume de Rorthais and Mael Rimbault. Licensed under the PostgreSQL License. PAF-2.3.0/lib/OCF_ReturnCodes.pm000077500000000000000000000035151363154243400161660ustar00rootroot00000000000000#!/usr/bin/perl # This program is open source, licensed under the PostgreSQL License. # For license terms, see the LICENSE file. # # Copyright (C) 2016-2020: Jehan-Guillaume de Rorthais and Mael Rimbault =head1 NAME OCF_ReturnCodes - Common varibales for the OCF Resource Agents supplied by heartbeat. =head1 SYNOPSIS use FindBin; use lib "$FindBin::RealBin/../../lib/heartbeat/"; use OCF_ReturnCodes; =head1 DESCRIPTION This module has been ported from the ocf-retrurncodes shell script of the resource-agents project. See L. =head1 VARIABLES Here are the variables exported by this module: =over =item $OCF_SUCCESS =item $OCF_ERR_GENERIC =item $OCF_ERR_ARGS =item $OCF_ERR_UNIMPLEMENTED =item $OCF_ERR_PERM =item $OCF_ERR_INSTALLED =item $OCF_ERR_CONFIGURED =item $OCF_NOT_RUNNING =item $OCF_RUNNING_MASTER =item $OCF_FAILED_MASTER =back =cut package OCF_ReturnCodes; use strict; use warnings; use 5.008; BEGIN { use Exporter; our $VERSION = 'v2.3.0'; our @ISA = ('Exporter'); our @EXPORT = qw( $OCF_SUCCESS $OCF_ERR_GENERIC $OCF_ERR_ARGS $OCF_ERR_UNIMPLEMENTED $OCF_ERR_PERM $OCF_ERR_INSTALLED $OCF_ERR_CONFIGURED $OCF_NOT_RUNNING $OCF_RUNNING_MASTER $OCF_FAILED_MASTER ); our @EXPORT_OK = ( @EXPORT ); } our $OCF_SUCCESS = 0; our $OCF_ERR_GENERIC = 1; our $OCF_ERR_ARGS = 2; our $OCF_ERR_UNIMPLEMENTED = 3; our $OCF_ERR_PERM = 4; our $OCF_ERR_INSTALLED = 5; our $OCF_ERR_CONFIGURED = 6; our $OCF_NOT_RUNNING = 7; our $OCF_RUNNING_MASTER = 8; our $OCF_FAILED_MASTER = 9; 1; =head1 COPYRIGHT AND LICENSE Copyright (C) 2016: Jehan-Guillaume de Rorthais and Mael Rimbault. Licensed under the PostgreSQL License. PAF-2.3.0/resource-agents-paf.spec000066400000000000000000000063601363154243400166620ustar00rootroot00000000000000%global _tag 2.3.0 %global _ocfroot %{_exec_prefix}/lib/ocf Name: resource-agents-paf Version: 2.3.0 Release: 1 Summary: PostgreSQL resource agent for Pacemaker License: PostgreSQL Group: Applications/Databases Url: http://clusterlabs.github.io/PAF/ Source0: https://github.com/ClusterLabs/PAF/archive/v%{_tag}.tar.gz BuildArch: noarch BuildRequires: resource-agents perl perl-Module-Build Requires: perl, resource-agents, pacemaker >= 1.1.13, corosync >= 2.0.0 %description PostgreSQL resource agent for Pacemaker # do not build -debuginfo package %define debug_package %{nil} %prep %setup -q -n PAF-%{_tag} %build perl Build.PL --install_path bindoc=%{_mandir}/man1 --install_path libdoc=%{_mandir}/man3 perl Build %install ./Build install --destdir "%{buildroot}" find "%{buildroot}" -type f -name .packlist -delete %files %defattr(-,root,root,0755) %doc README.md %doc CHANGELOG.md %license LICENSE %{_mandir}/man3/*.3* %{_mandir}/man7/*.7* %attr(755, -, -) %{_ocfroot}/resource.d/heartbeat/pgsqlms %attr(644, -, -) %{_ocfroot}/lib/heartbeat/OCF_ReturnCodes.pm %attr(644, -, -) %{_ocfroot}/lib/heartbeat/OCF_Directories.pm %attr(644, -, -) %{_ocfroot}/lib/heartbeat/OCF_Functions.pm %{_datadir}/resource-agents/ocft/configs/pgsqlms %changelog * Mon Mar 09 2020 Jehan-Guillaume de Rorthais - 2.3.0-1 - 2.3.0 major release * Tue Feb 11 2020 Jehan-Guillaume de Rorthais - 2.3~rc2-1 - 2.3_rc2 release candidate * Thu Nov 28 2019 Jehan-Guillaume de Rorthais - 2.3~rc1-1 - 2.3_rc1 release candidate * Thu Jan 31 2019 Jehan-Guillaume de Rorthais - 2.2.1-1 - 2.2.1 minor release * Tue Jan 22 2019 Jehan-Guillaume de Rorthais - 2.2.1rc1-1 - 2.2.1_rc1 release candidate * Tue Sep 12 2017 Jehan-Guillaume de Rorthais - 2.2.0-1 - 2.2.0 major release * Tue Aug 29 2017 Jehan-Guillaume de Rorthais - 2.2rc1-1 - 2.2_rc1 release candidate * Mon Jun 26 2017 Jehan-Guillaume de Rorthais - 2.2beta1-2 - add dependencies on Pacemaker and Corosync versions * Mon Jun 26 2017 Jehan-Guillaume de Rorthais - 2.2beta1-1 - 2.2_beta1 beta release * Fri Dec 23 2016 Jehan-Guillaume de Rorthais - 2.1.0-1 - 2.1.0 major release * Sat Dec 17 2016 Jehan-Guillaume de Rorthais - 2.1rc2-1 - 2.1_rc2 release candidate * Sun Dec 11 2016 Jehan-Guillaume de Rorthais - 2.1rc1-1 - 2.1_rc1 release candidate * Sun Dec 04 2016 Jehan-Guillaume de Rorthais - 2.1beta1-1 - 2.1_beta1 beta release * Fri Sep 16 2016 Jehan-Guillaume de Rorthais - 2.0.0-1 - 2.0.0 major release * Wed Aug 03 2016 Jehan-Guillaume de Rorthais - 2.0rc1-1 - 2.0_rc1 first release candidate * Fri Jul 1 2016 Jehan-Guillaume de Rorthais - 2.0beta2-1 - 2.0_beta2 beta release * Wed Jun 15 2016 Jehan-Guillaume de Rorthais - 2.0beta1-1 - 2.0_beta1 beta release * Wed Apr 27 2016 Jehan-Guillaume de Rorthais - 1.0.1-1 - 1.0.1 minor release * Wed Mar 02 2016 Jehan-Guillaume de Rorthais 1.0.0-1 - Official 1.0.0 release * Tue Mar 01 2016 Jehan-Guillaume de Rorthais 0.99.0-1 - Initial version PAF-2.3.0/script/000077500000000000000000000000001363154243400134335ustar00rootroot00000000000000PAF-2.3.0/script/pgsqlms000077500000000000000000002334641363154243400150630ustar00rootroot00000000000000#!/usr/bin/perl # This program is open source, licensed under the PostgreSQL License. # For license terms, see the LICENSE file. # # Copyright (C) 2016-2020: Jehan-Guillaume de Rorthais and Mael Rimbault =head1 NAME ocf_heartbeat_pgsqlms - A PostgreSQL multi-state resource agent for Pacemaker =head1 SYNOPSIS B [start | stop | monitor | promote | demote | notify | reload | methods | meta-data | validate-all] =head1 DESCRIPTION Resource script for PostgreSQL in replication. It manages PostgreSQL servers using streaming replication as an HA resource. =cut use strict; use warnings; use 5.008; use POSIX qw(locale_h); use Scalar::Util qw(looks_like_number); use File::Spec; use File::Temp; use Data::Dumper; use FindBin; use lib "$FindBin::RealBin/../lib/"; use lib "$FindBin::RealBin/../../lib/heartbeat/"; use OCF_ReturnCodes; use OCF_Directories; use OCF_Functions; our $VERSION = 'v2.3.0'; our $PROGRAM = 'pgsqlms'; # OCF environment my $OCF_RESOURCE_INSTANCE = $ENV{'OCF_RESOURCE_INSTANCE'}; my $OCF_RUNNING_SLAVE = $OCF_SUCCESS; my %OCF_NOTIFY_ENV = ocf_notify_env() if $__OCF_ACTION eq 'notify'; # Default parameters values my $system_user_default = "postgres"; my $bindir_default = "/usr/bin"; my $pgdata_default = "/var/lib/pgsql/data"; my $pghost_default = "/tmp"; my $pgport_default = 5432; my $start_opts_default = ""; my $maxlag_default = "0"; # Set default values if not found in environment my $system_user = $ENV{'OCF_RESKEY_system_user'} || $system_user_default; my $bindir = $ENV{'OCF_RESKEY_bindir'} || $bindir_default; my $pgdata = $ENV{'OCF_RESKEY_pgdata'} || $pgdata_default; my $datadir = $ENV{'OCF_RESKEY_datadir'} || $pgdata; my $pghost = $ENV{'OCF_RESKEY_pghost'} || $pghost_default; my $pgport = $ENV{'OCF_RESKEY_pgport'} || $pgport_default; my $start_opts = $ENV{'OCF_RESKEY_start_opts'} || $start_opts_default; my $maxlag = $ENV{'OCF_RESKEY_maxlag'} || $maxlag_default; my $recovery_tpl = $ENV{'OCF_RESKEY_recovery_template'} || "$pgdata/recovery.conf.pcmk"; # PostgreSQL commands path my $POSTGRES = "$bindir/postgres"; my $PGCTL = "$bindir/pg_ctl"; my $PGPSQL = "$bindir/psql"; my $PGCTRLDATA = "$bindir/pg_controldata"; my $PGISREADY = "$bindir/pg_isready"; my $PGWALDUMP = "$bindir/pg_waldump"; # pacemaker commands path my $CRM_MASTER = "$HA_SBIN_DIR/crm_master --lifetime forever"; my $CRM_NODE = "$HA_SBIN_DIR/crm_node"; my $CRM_RESOURCE = "$HA_SBIN_DIR/crm_resource"; my $ATTRD_PRIV = "$HA_SBIN_DIR/attrd_updater --private --lifetime reboot"; # Global vars my $nodename; my $exit_code = 0; # numeric pgsql versions my $PGVERNUM; my $PGVER_93 = 90300; my $PGVER_10 = 100000; my $PGVER_12 = 120000; # Run a query using psql. # # This function returns an array with psql return code as first element and # the result as second one. # sub _query { my $query = shift; my $res = shift; my $connstr = "dbname=postgres"; my $RS = chr(30); # ASCII RS (record separator) my $FS = chr(3); # ASCII ETX (end of text) my $postgres_uid = getpwnam( $system_user ); my $oldeuid = $>; my $tmpfile; my @res; my $ans; my $pid; my $rc; unless ( defined $res and defined $query and $query ne '' ) { ocf_log( 'debug', '_query: wrong parameters!' ); return -1; } unless ( $tmpfile = File::Temp->new( TEMPLATE => 'pgsqlms-XXXXXXXX', DIR => $HA_RSCTMP ) ) { ocf_exit_reason( 'Could not create or write in a temp file' ); exit $OCF_ERR_INSTALLED; } print $tmpfile $query; chmod 0644, $tmpfile; ocf_log( 'debug', '_query: %s', $query ); # Change the effective user to the given system_user so after forking # the given uid to the process should allow psql to connect w/o password $> = $postgres_uid; # Forking + piping $pid = open(my $KID, "-|"); if ( $pid == 0 ) { # child exec $PGPSQL, '--set', 'ON_ERROR_STOP=1', '-qXAtf', $tmpfile, '-R', $RS, '-F', $FS, '--port', $pgport, '--host', $pghost, $connstr; } # parent $> = $oldeuid; { local $/; $ans = <$KID>; } close $KID; $rc = $? >> 8; ocf_log( 'debug', '_query: psql return code: %d', $rc ); if ( defined $ans ) { chop $ans; push @{ $res }, [ split(chr(3) => $_, -1) ] foreach split (chr(30) => $ans, -1); ocf_log( 'debug', '_query: @res: %s', Data::Dumper->new( [ $res ] )->Terse(1)->Dump ); } # Possible return codes: # -1: wrong parameters # 0: OK # 1: failed to get resources (memory, missing file, ...) # 2: unable to connect # 3: query failed return $rc; } # Get the last received location on a standby # if the first argument is true, returns the value as decimal # if the first argument is false, returns the value as LSN # Returns undef if query failed sub _get_last_received_lsn { my ( $dec ) = @_; my $pg_last_wal_receive_lsn = 'pg_last_wal_receive_lsn()'; my $pg_wal_lsn_diff = 'pg_wal_lsn_diff'; my $query; my $rc; my @rs; if ( $PGVERNUM < $PGVER_10 ) { $pg_last_wal_receive_lsn = 'pg_last_xlog_receive_location()'; $pg_wal_lsn_diff = 'pg_xlog_location_diff'; } if ( $dec ) { $query = "SELECT $pg_wal_lsn_diff( $pg_last_wal_receive_lsn, '0/0' )"; } else { $query = "SELECT $pg_last_wal_receive_lsn"; } $rc = _query( $query, \@rs ); return $rs[0][0] if $rc == 0 and $rs[0][0]; ocf_log( 'err', 'Could not query last received LSN (%s)', $rc ) if $rc != 0; ocf_log( 'err', 'No values for last received LSN' ) if $rc == 0 and not $rs[0][0]; return undef; } # Get the master score for each connected standby # Returns directly the result set of the query or exit with an error. # Exits with OCF_ERR_GENERIC if the query failed sub _get_lag_scores { my $pg_current_wal_lsn = 'pg_current_wal_lsn()'; my $pg_wal_lsn_diff = 'pg_wal_lsn_diff'; my $write_lsn = 'write_lsn'; my $query; my $rc; my @rs; if ( $PGVERNUM < $PGVER_10 ) { $pg_current_wal_lsn = 'pg_current_xlog_location()'; $pg_wal_lsn_diff = 'pg_xlog_location_diff'; $write_lsn = 'write_location'; } # We check locations of connected standbies by querying the # "pg_stat_replication" view. # The row_number applies on the result set ordered on write_location ASC so # the highest row_number should be given to the closest node from the # master, then the lowest node name (alphanumeric sort) in case of equality. # The result set itself is order by priority DESC to process best known # candidate first. $query = qq{ SELECT application_name, priority, location, state, current_lag FROM ( SELECT application_name, (1000 - ( row_number() OVER ( PARTITION BY state IN ('startup', 'backup') ORDER BY location ASC, application_name ASC ) - 1 ) * 10 ) * CASE WHEN ( $maxlag > 0 AND current_lag > $maxlag) THEN -1 ELSE 1 END AS priority, location, state, current_lag FROM ( SELECT application_name, $write_lsn AS location, state, $pg_wal_lsn_diff($pg_current_wal_lsn, $write_lsn) AS current_lag FROM pg_stat_replication ) AS s2 ) AS s1 ORDER BY priority DESC }; $rc = _query( $query, \@rs ); if ( $rc != 0 ) { ocf_exit_reason( 'Query to get standby locations failed (%d)', $rc ); exit $OCF_ERR_GENERIC; } return \@rs; } # get the timeout for the current action given from environment var # Returns timeout as integer # undef if unknown sub _get_action_timeout { my $timeout = $ENV{'OCF_RESKEY_CRM_meta_timeout'} / 1000; ocf_log( 'debug', '_get_action_timeout: known timeout: %s', defined $timeout ? $timeout : 'undef' ); return $timeout if defined $timeout and $timeout =~ /^\d+$/; return undef; } # Get, parse and return the value of the given private attribute name # Returns an empty string if not found. sub _get_priv_attr { my ( $name, $node ) = @_; my $val = ''; my $node_arg = ''; my $ans; $node = '' unless defined $node; $name = "$name-$OCF_RESOURCE_INSTANCE"; $node_arg= "--node $node" if $node ne ''; $ans = qx{ $ATTRD_PRIV --name "$name" --query $node_arg }; $ans =~ m/^name=".*" host=".*" value="(.*)"$/; $val = $1 if defined $1; ocf_log( 'debug', '_get_priv_attr: value of "%s"%s is "%s"', $name, ( $node ? " on \"$node\"": ""), $val ); return $val; } # Set the given private attribute name to the given value # As setting an attribute is asynchronous, this will return as soon as the # attribute is really set by attrd and available. sub _set_priv_attr { my ( $name, $val ) = @_; my $name_instance = "$name-$OCF_RESOURCE_INSTANCE"; ocf_log( 'debug', '_set_priv_attr: set "%s=%s"...', $name_instance, $val ); qx{ $ATTRD_PRIV --name "$name_instance" --update "$val" }; # give attr name without the resource instance name as _get_priv_attr adds # it as well while ( _get_priv_attr( $name ) ne $val ) { ocf_log( 'debug', '_set_priv_attr: waiting attrd ack for "%s"...', $name_instance ); select( undef, undef, undef, 0.1 ); } return; } # Delete the given private attribute. # As setting an attribute is asynchronous, this will return as soon as the # attribute is really deleted by attrd. sub _delete_priv_attr { my ( $name ) = @_; my $name_instance = "$name-$OCF_RESOURCE_INSTANCE"; ocf_log( 'debug', '_delete_priv_attr: delete "%s"...', $name_instance ); qx{ $ATTRD_PRIV --name "$name_instance" --delete }; # give attr name without the resource instance name as _get_priv_attr adds # it as well while ( _get_priv_attr( $name ) ne '' ) { ocf_log( 'debug', '_delete_priv_attr: waiting attrd ack for "%s"...', $name_instance ); select( undef, undef, undef, 0.1 ); } return; } # Get, parse and return the resource master score on given node. # Returns an empty string if not found. # Returns undef on crm_master call on error sub _get_master_score { my ( $node ) = @_; my $node_arg = ''; my $score; $node_arg = sprintf '--node "%s"', $node if defined $node and $node ne ''; $score = qx{ $CRM_MASTER --quiet --get-value $node_arg 2> /dev/null }; return '' unless $? == 0 and defined $score; chomp $score; return $score; } # Set the master score of the local node or the optionally given node. # As setting an attribute is asynchronous, this will return as soon as the # attribute is really set by attrd and available everywhere. sub _set_master_score { my ( $score, $node ) = @_; my $node_arg = ''; my $tmp; $node_arg = sprintf '--node "%s"', $node if defined $node and $node ne ''; qx{ $CRM_MASTER $node_arg --quiet --update "$score" }; while ( ( $tmp = _get_master_score( $node ) ) ne $score ) { ocf_log( 'debug', '_set_master_score: waiting to set score to "%s" (currently "%s")...', $score, $tmp ); select(undef, undef, undef, 0.1); } return; } # _master_score_exists # This subroutine checks if a master score is set for one of the relative clones # in the cluster and the score is greater or equal of 0. # Returns 1 if at least one master score >= 0 is found. # Returns 0 otherwise sub _master_score_exists { my @partition_nodes = split /\s+/ => qx{ $CRM_NODE --partition }; foreach my $node ( @partition_nodes ) { my $score = _get_master_score( $node ); return 1 if defined $score and $score ne '' and $score > -1; } return 0; } # Check if the current transiation is a recover of a master clone on given node. sub _is_master_recover { my ( $n ) = @_; return ( scalar grep { $_->{'uname'} eq $n } @{ $OCF_NOTIFY_ENV{'master'} } and scalar grep { $_->{'uname'} eq $n } @{ $OCF_NOTIFY_ENV{'promote'} } ); } # Check if the current transition is a recover of a slave clone on given node. sub _is_slave_recover { my ( $n ) = @_; return ( scalar grep { $_->{'uname'} eq $n } @{ $OCF_NOTIFY_ENV{'slave'} } and scalar grep { $_->{'uname'} eq $n } @{ $OCF_NOTIFY_ENV{'start'} } ); } # check if th current transition is a switchover to the given node. sub _is_switchover { my ( $n ) = @_; my $old = $OCF_NOTIFY_ENV{'master'}[0]{'uname'}; return 0 if scalar @{ $OCF_NOTIFY_ENV{'master'} } != 1 or scalar @{ $OCF_NOTIFY_ENV{'demote'} } != 1 or scalar @{ $OCF_NOTIFY_ENV{'promote'} } != 1; return ( scalar grep { $_->{'uname'} eq $old } @{ $OCF_NOTIFY_ENV{'demote'} } and scalar grep { $_->{'uname'} eq $n } @{ $OCF_NOTIFY_ENV{'slave'} } and scalar grep { $_->{'uname'} eq $n } @{ $OCF_NOTIFY_ENV{'promote'} } and not scalar grep { $_->{'uname'} eq $old } @{ $OCF_NOTIFY_ENV{'stop'} } ); } # Run the given command as the "system_user" given as parameter. # It basically forks and seteuid/setuid away from root. # sub _runas { my $rc; my $pid; my @cmd = @_; my (undef, undef, $postgres_uid, $postgres_gid ) = getpwnam( $system_user ); $pid = fork; if ( $pid == 0 ) { # in child $) = "$postgres_gid $postgres_gid"; while ( my ( undef, undef, $gid, $members ) = getgrent ) { $) .= " $gid" if grep { $system_user eq $_ } split /\s+/, $members } $( = $postgres_gid; $< = $> = $postgres_uid; exec @cmd; } ocf_log( 'debug', '_runas: launching as "%s" command "%s"', $system_user, join(' ', @cmd) ); waitpid $pid, 0; $rc = $? >> 8; return $rc; } # Check if instance is listening on the given host/port. # sub _pg_isready { # Add 60s to the timeout or use a 24h timeout fallback to make sure # Pacemaker will give up before us and take decisions my $timeout = ( _get_action_timeout() || 60*60*24 ) + 60; my $rc = _runas( $PGISREADY, '-h', $pghost, '-p', $pgport, '-d', 'postgres', '-t', $timeout ); # Possible error codes: # 1: ping rejected (usually when instance is in startup, in crash # recovery, in warm standby, or when a shutdown is in progress) # 2: no response, usually means the instance is down # 3: no attempt, probably a syntax error, should not happen return $rc; } # Check the postmaster.pid file and the postmaster process. # WARNING: we do not distinguish the scenario where postmaster.pid does not # exist from the scenario where the process is still alive. It should be ok # though, as this is considered a hard error from monitor. # sub _pg_ctl_status { my $rc = _runas( $PGCTL, '--pgdata', $pgdata, 'status' ); # Possible error codes: # 3: postmaster.pid file does not exist OR it does but the process # with the PID found in the file is not alive return $rc; } # Start the local instance using pg_ctl # sub _pg_ctl_start { # Add 60s to the timeout or use a 24h timeout fallback to make sure # Pacemaker will give up before us and take decisions my $timeout = ( _get_action_timeout() || 60*60*24 ) + 60; my @cmd = ( $PGCTL, '--pgdata', $pgdata, '-w', '--timeout', $timeout, 'start' ); push @cmd => ( '-o', $start_opts ) if $start_opts ne ''; return _runas( @cmd ); } # Enable the Standby mode. # # Up to v11, creates the recovery.conf file based on the given template. # Since v12, creates standby.signal. sub _enable_recovery { my $fh; my $content = ''; my $standby_file = "$datadir/standby.signal"; my (undef, undef, $uid, $gid) = getpwnam($system_user); if ( $PGVERNUM < $PGVER_12 ) { $standby_file = "$datadir/recovery.conf"; ocf_log( 'debug', '_enable_recovery: get replication configuration from the template file "%s"', $recovery_tpl ); # Create the recovery.conf file to start the instance as a secondary. # NOTE: the recovery.conf is supposed to be set up so the secondary can # connect to the primary instance, eg. using a virtual IP address. # As there is no primary instance available at startup, secondaries will # complain about failing to connect. # As we can not reload a recovery.conf file on a standby without restarting # it, we will leave with this. # FIXME how would the reload help us in this case ? unless ( defined open( $fh, '<', $recovery_tpl ) ) { ocf_exit_reason( 'Could not open file "%s": %s', $recovery_tpl, $! ); exit $OCF_ERR_CONFIGURED; } # Copy all parameters from the template file while (my $line = <$fh>) { chomp $line; $content .= "$line\n"; } close $fh; } ocf_log( 'debug', '_enable_recovery: write the standby file "%s"', $standby_file ); unless ( open( $fh, '>', $standby_file ) ) { ocf_exit_reason( 'Could not open file "%s": %s', $standby_file, $! ); exit $OCF_ERR_CONFIGURED; } # Write the recovery.conf file using configuration from the template file print $fh $content; close $fh; unless ( chown $uid, $gid, $standby_file ) { ocf_exit_reason( 'Could not set owner of "%s"', $standby_file ); exit $OCF_ERR_CONFIGURED; }; } # Parse and return various informations about the local PostgreSQL instance as # reported by its controldata file. # # WARNING: the status is NOT updated in case of crash. # # This sub exit the script with an error on failure sub _get_controldata { my %controldata; my $ans; $ans = qx{ $PGCTRLDATA "$datadir" 2>/dev/null }; # Parse the output of pg_controldata. # This output is quite stable between pg versions, but we might need to sort # it at some point if things are moving in there... $ans =~ m{ # get the current state ^\QDatabase cluster state\E:\s+(.*?)\s*$ .* # Get the latest known REDO location ^\QLatest checkpoint's REDO location\E:\s+([/0-9A-F]+)\s*$ .* # Get the latest known TL ^\QLatest checkpoint's TimeLineID\E:\s+(\d+)\s*$ .* # Get the wal level # NOTE: pg_controldata output changed with PostgreSQL 9.5, so we need to # account for both syntaxes ^(?:\QCurrent \E)?\Qwal_level setting\E:\s+(.*?)\s*$ }smx; $controldata{'state'} = $1 if defined $1; $controldata{'redo'} = $2 if defined $2; $controldata{'tl'} = $3 if defined $3; $controldata{'wal_level'} = $4 if defined $4; ocf_log( 'debug', "_get_controldata: found: %s", Data::Dumper->new( [ \%controldata ] )->Terse(1)->Dump ); return %controldata if defined $controldata{'state'} and defined $controldata{'tl'} and defined $controldata{'redo'} and defined $controldata{'wal_level'}; ocf_exit_reason( 'Could not read all datas from controldata file for "%s"', $datadir ); ocf_log( 'debug', "_get_controldata: controldata file: %s", Data::Dumper->new( [ \%controldata ] )->Terse(1)->Dump, $ans ); exit $OCF_ERR_ARGS; } # Pead major version from datadir/PG_VERSION and return it as numeric version sub _get_pg_version { my $fh; my $PGVERSION; my $PGVERNUM; # check PG_VERSION if ( ! -s "$datadir/PG_VERSION" ) { ocf_exit_reason( 'PG_VERSION does not exist in "%s"', $datadir ); exit $OCF_ERR_ARGS; } unless ( open( $fh, '<', "$datadir/PG_VERSION" ) ) { ocf_exit_reason( "Could not open file \"$datadir/PG_VERSION\": $!" ); exit $OCF_ERR_ARGS; } read( $fh, $PGVERSION, 32 ); close $fh; chomp $PGVERSION; $PGVERSION =~ /^(\d+)(?:\.(\d+))?$/; $PGVERNUM = $1 * 10000; $PGVERNUM += $2 * 100 if $1 < 10; # no 2nd num in the major version from v10 return $PGVERNUM; } # Use pg_controldata to check the state of the PostgreSQL server. This # function returns codes depending on this state, so we can find whether the # instance is a primary or a secondary, or use it to detect any inconsistency # that could indicate the instance has crashed. # sub _controldata_to_ocf { my %cdata = _get_controldata(); while ( 1 ) { ocf_log( 'debug', '_controldata: instance "%s" state is "%s"', $OCF_RESOURCE_INSTANCE, $cdata{'state'} ); # Instance should be running as a primary. return $OCF_RUNNING_MASTER if $cdata{'state'} eq "in production"; # Instance should be running as a secondary. # This state includes warm standby (rejects connections attempts, # including pg_isready) return $OCF_SUCCESS if $cdata{'state'} eq "in archive recovery"; # The instance should be stopped. # We don't care if it was a primary or secondary before, because we # always start instances as secondaries, and then promote if necessary. return $OCF_NOT_RUNNING if $cdata{'state'} eq "shut down" or $cdata{'state'} eq "shut down in recovery"; # The state is "in crash recovery", "starting up" or "shutting down". # This state should be transitional, so we wait and loop to check if # it changes. # If it does not, pacemaker will eventually abort with a timeout. ocf_log( 'debug', '_controldata: waiting for transitionnal state "%s" to finish', $cdata{'state'} ); sleep 1; %cdata = _get_controldata(); } # If we reach this point, something went really wrong with this code or # pg_controldata. ocf_exit_reason( 'Unable get instance "%s" state using pg_controldata', $OCF_RESOURCE_INSTANCE ); return $OCF_ERR_INSTALLED ; } # Check the write_location of all secondaries, and adapt their master score so # that the instance closest to the master will be the selected candidate should # a promotion be triggered. # NOTE: This is only a hint to pacemaker! The selected candidate to promotion # actually re-check it is the best candidate and force a re-election by failing # if a better one exists. This avoid a race condition between the call of the # monitor action and the promotion where another slave might have catchup faster # with the master. # NOTE: we cannot directly use the write_location, neither a lsn_diff value as # promotion score as Pacemaker considers any value greater than 1,000,000 as # INFINITY. # # This sub is supposed to be executed from a master monitor action. # sub _check_locations { my $node_score; my $row_num; my $row; my @rs; # Call crm_node to exclude nodes that are not part of the cluster at this # point. my $partition_nodes = qx{ $CRM_NODE --partition }; @rs = @{ _get_lag_scores() }; $row_num = scalar @rs; # If there is no row left at this point, it means that there is no # secondary instance connected. ocf_log( 'warning', 'No secondary connected to the master' ) if $row_num == 0; # For each standby connected, set their master score based on the following # rule: the first known node/application, with the highest priority and # with an acceptable state. while ( $row = shift @rs ) { if ( $partition_nodes !~ /$row->[0]/ ) { ocf_log( 'info', 'Ignoring unknown application_name/node "%s"', $row->[0] ); next; } if ( $row->[0] eq $nodename ) { ocf_log( 'warning', 'Streaming replication with myself!' ); next; } $node_score = _get_master_score( $row->[0] ); if ( $row->[3] =~ /^\s*(?:startup|backup)\s*$/ ) { # We exclude any standby being in state backup (pg_basebackup) or # startup (new standby or failing standby) ocf_log( 'info', 'Forbidding promotion on "%s" in state "%s"', $row->[0], $row->[3] ); _set_master_score( '-1', $row->[0] ) unless $node_score eq '-1'; } else { ocf_log( 'debug', '_check_locations: checking "%s" promotion ability (current_score: %s, priority: %s, location: %s, lag: %s)', $row->[0], $node_score, $row->[1], $row->[2], $row->[4] ); if ( $node_score ne $row->[1] ) { if ( $row->[1] < -1 ) { ocf_log( 'info', 'Update score of "%s" from %s to %s because replication lag (%s) is higher than given maxlag (%s).', $row->[0], $node_score, $row->[1], $row->[4], $maxlag ); } else { ocf_log( 'info', 'Update score of "%s" from %s to %s because of a change in the replication lag (%s).', $row->[0], $node_score, $row->[1], $row->[4] ); } _set_master_score( $row->[1], $row->[0] ); } else { ocf_log( 'debug', '_check_locations: "%s" keeps its current score of %s', $row->[0], $row->[1] ); } } # Remove this node from the known nodes list. $partition_nodes =~ s/(?:^|\s)$row->[0](?:\s|$)/ /g; } $partition_nodes =~ s/(?:^\s+)|(?:\s+$)//g; # If there are still nodes in "partition_nodes", it means there is no # corresponding line in "pg_stat_replication". foreach my $node (split /\s+/ => $partition_nodes) { # Exclude the current node. next if $node eq $nodename; # do not warn if the master score is already set to -1000. # this avoid log flooding (gh #138) $node_score = _get_master_score( $node ); next if $node_score eq '-1000'; ocf_log( 'warning', '"%s" is not connected to the primary', $node ); _set_master_score( '-1000', $node ); } # Finally set the master score if not already done $node_score = _get_master_score(); _set_master_score( '1001' ) unless $node_score eq '1001'; return $OCF_SUCCESS; } # _check_switchover # check if the pgsql switchover to the localnode is safe. # This is supposed to be called **after** the master has been stopped or demoted. # This sub checks if the local standby received the shutdown checkpoint from the # old master to make sure it can take over the master role and the old master # will be able to catchup as a standby after. # # Returns 0 if switchover is safe # Returns 1 if swithcover is not safe # Returns 2 for internal error sub _check_switchover { my $has_sht_chk = 0; my $last_redo; my $last_lsn; my $ans; my $rc; my $tl; my %cdata; $PGWALDUMP = "$bindir/pg_xlogdump" if $PGVERNUM < $PGVER_10; ocf_log( 'info', 'Switchover in progress from "%s" to "%s".' .' Need to check the last record in WAL', $OCF_NOTIFY_ENV{'demote'}[0]{'uname'}, $nodename ); # check if we received the shutdown checkpoint of the master during its # demote process. # We need the last local checkpoint LSN and the last received LSN from # master to check in the WAL between these adresses if we have a # "checkpoint shutdown" using pg_xlogdump/pg_waldump. # # Force a checkpoint to make sure the controldata shows the very last TL # and the master's shutdown checkpoint _query( q{ CHECKPOINT }, {} ); %cdata = _get_controldata(); $tl = $cdata{'tl'}; $last_redo = $cdata{'redo'}; # Get the last received LSN from master $last_lsn = _get_last_received_lsn(); unless ( defined $last_lsn ) { ocf_exit_reason( 'Could not fetch last received LSN!' ); return 2; } $ans = qx{ $PGWALDUMP --path "$datadir" --timeline "$tl" \\ --start "$last_redo" --end "$last_lsn" 2>&1 }; $rc = $?; ocf_log( 'debug', '_check_switchover: %s rc: "%s", tl: "%s", last_chk: %s, last_lsn: %s, output: "%s"', $PGWALDUMP, $rc, $tl, $last_redo, $last_lsn, $ans ); if ( $rc == 0 and $ans =~ m{^rmgr: XLOG.*desc: (?i:checkpoint)(?::|_SHUTDOWN) redo [0-9A-F/]+; tli $tl;.*; shutdown$}m ) { ocf_log( 'info', 'Slave received the shutdown checkpoint' ); return 0; } ocf_exit_reason( 'Did not receive the shutdown checkpoint from the old master!' ); return 1; } # Check to confirm if the instance is really started as _pg_isready stated and # check if the instance is primary or secondary. # sub _confirm_role { my $is_in_recovery; my $rc; my @rs; $rc = _query( "SELECT pg_is_in_recovery()", \@rs ); $is_in_recovery = $rs[0][0]; if ( $rc == 0 ) { # The query was executed, check the result. if ( $is_in_recovery eq 't' ) { # The instance is a secondary. ocf_log( 'debug', "_confirm_role: instance $OCF_RESOURCE_INSTANCE is a secondary"); return $OCF_SUCCESS; } elsif ( $is_in_recovery eq 'f' ) { # The instance is a primary. ocf_log( 'debug', "_confirm_role: instance $OCF_RESOURCE_INSTANCE is a primary"); # Check lsn diff with current slaves if any _check_locations() if $__OCF_ACTION eq 'monitor'; return $OCF_RUNNING_MASTER; } # This should not happen, raise a hard configuration error. ocf_exit_reason( 'Unexpected result from query to check if "%s" is a primary or a secondary: "%s"', $OCF_RESOURCE_INSTANCE, $is_in_recovery ); return $OCF_ERR_CONFIGURED; } elsif ( $rc == 1 or $rc == 2 ) { # psql cound not connect to the instance. # As pg_isready reported the instance was listening, this error # could be a max_connection saturation. Just report a soft error. ocf_exit_reason( 'psql could not connect to instance "%s"', $OCF_RESOURCE_INSTANCE ); return $OCF_ERR_GENERIC; } # The query failed (rc: 3) or bad parameters (rc: -1). # This should not happen, raise a hard configuration error. ocf_exit_reason( 'The query to check if instance "%s" is a primary or a secondary failed (rc: %d)', $OCF_RESOURCE_INSTANCE, $rc ); return $OCF_ERR_CONFIGURED; } # Check to confirm if the instance is really stopped as _pg_isready stated # and if it was propertly shut down. # sub _confirm_stopped { my $pgctlstatus_rc; my $controldata_rc; # Check the postmaster process status. $pgctlstatus_rc = _pg_ctl_status(); if ( $pgctlstatus_rc == 0 ) { # The PID file exists and the process is available. # That should not be the case, return an error. ocf_exit_reason( 'Instance "%s" is not listening, but the process referenced in postmaster.pid exists', $OCF_RESOURCE_INSTANCE ); return $OCF_ERR_GENERIC; } # The PID file does not exist or the process is not available. ocf_log( 'debug', '_confirm_stopped: no postmaster process found for instance "%s"', $OCF_RESOURCE_INSTANCE ); if ( -f "$datadir/backup_label" ) { # We are probably on a freshly built secondary that was not started yet. ocf_log( 'debug', '_confirm_stopped: backup_label file exists: probably on a never started secondary', ); return $OCF_NOT_RUNNING; } # Continue the check with pg_controldata. $controldata_rc = _controldata_to_ocf(); if ( $controldata_rc == $OCF_RUNNING_MASTER ) { # The controldata has not been updated to "shutdown". # It should mean we had a crash on a primary instance. ocf_exit_reason( 'Instance "%s" controldata indicates a running primary instance, the instance has probably crashed', $OCF_RESOURCE_INSTANCE ); return $OCF_FAILED_MASTER; } elsif ( $controldata_rc == $OCF_SUCCESS ) { # The controldata has not been updated to "shutdown in recovery". # It should mean we had a crash on a secondary instance. # There is no "FAILED_SLAVE" return code, so we return a generic error. ocf_exit_reason( 'Instance "%s" controldata indicates a running secondary instance, the instance has probably crashed', $OCF_RESOURCE_INSTANCE ); return $OCF_ERR_GENERIC; } elsif ( $controldata_rc == $OCF_NOT_RUNNING ) { # The controldata state is consistent, the instance was probably # propertly shut down. ocf_log( 'debug', '_confirm_stopped: instance "%s" controldata indicates that the instance was propertly shut down', $OCF_RESOURCE_INSTANCE ); return $OCF_NOT_RUNNING; } # Something went wrong with the controldata check. ocf_exit_reason( 'Could not get instance "%s" status from controldata (returned: %d)', $OCF_RESOURCE_INSTANCE, $controldata_rc ); return $OCF_ERR_GENERIC; } ############################################################ #### OCF FUNCS =head1 SUPPORTED PARAMETERS =over =item B Location of the PGDATA of your instance (optional, string, default "/var/lib/pgsql/data") =item B The socket directory or IP address to use to connect to the local instance (optional, string, default "/tmp") =item B The port to connect to the local instance (optional, integer, default "5432") =item B Location of the PostgreSQL binaries. (optional, string, default "/usr/bin") =item B The system owner of your instance's process (optional, string, default "postgres") =item B B for PostgreSQL 11 and bellow. The local template that will be copied as the C file. This template file must exists on all node. With PostgreSQL 12 and higher, the cluster will refuse to start if this parameter is set or a template file is found. (optional, string, default "$PGDATA/recovery.conf.pcmk") =item B Maximum lag allowed on a standby before we set a negative master score on it. The calculation is based on the difference between the current xlog location on the master and the write location on the standby. (optional, integer, default "0" disables this feature) =item B Path to the directory set in C from your postgresql.conf file. This parameter has same default than PostgreSQL itself: the C parameter value. Unless you have a special PostgreSQL setup and you understand this parameter, B (optional, string, default to the value of C) =item B Additional arguments given to the postgres process on startup. See "postgres --help" for available options. Useful when the postgresql.conf file is not in the data directory (PGDATA), eg.: -c config_file=/etc/postgresql/9.3/main/postgresql.conf (optinal, string, default "") =back =cut sub ocf_meta_data { print qq{ 1.0 Resource script for PostgreSQL in replication. It manages PostgreSQL servers using streaming replication as an HA resource. Manages PostgreSQL servers in replication System user account used to run the PostgreSQL server PostgreSQL system User Path to the directory storing the PostgreSQL binaries. The agent uses psql, pg_isready, pg_controldata and pg_ctl. Path to the PostgreSQL binaries Path to the data directory, e.g. PGDATA Path to the data directory Path to the directory set in data_directory from your postgresql.conf file. This parameter has the same default than PostgreSQL itself: the pgdata parameter value. Unless you have a special PostgreSQL setup and you understand this parameter, ignore it. Path to the directory set in data_directory from your postgresql.conf file Host IP address or unix socket folder the instance is listening on. Instance IP or unix socket folder Port the instance is listening on. Instance port Maximum lag allowed on a standby before we set a negative master score on it. The calculation is based on the difference between the current LSN on the master and the LSN written on the standby. This parameter must be a valid positive number as described in PostgreSQL documentation. See: https://www.postgresql.org/docs/current/static/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC Maximum write lag before we mark a standby as inappropriate to promote Path to the recovery.conf template. This file is simply copied to \$PGDATA before starting the instance as slave. ONLY for PostgreSQL 11 and bellow. This parameter is IGNORED for PostgreSQL 12 and higher. The cluster will refuse to start if a template file is found. Path to the recovery.conf template for PostgreSQL 11 and older. Additionnal arguments given to the postgres process on startup. See "postgres --help" for available options. Usefull when the postgresql.conf file is not in the data directory (PGDATA), eg.: "-c config_file=/etc/postgresql/9.3/main/postgresql.conf". Additionnal arguments given to the postgres process on startup. }; return $OCF_SUCCESS; } =head1 SUPPORTED ACTIONS This resource agent supports the following actions (operations): =over =item B Starts the resource. Suggested minimum timeout: 60. =item B Stops the resource. Suggested minimum timeout: 60. =item B Suggested minimum timeout: 20. =item B Promotes the resource to the Master role. Suggested minimum timeout: 30. =item B Demotes the resource to the Slave role. Suggested minimum timeout: 120. =item B Performs a detailed status check. Suggested minimum timeout: 10. Suggested interval: 15. =item B Performs a detailed status check. Suggested minimum timeout: 10. Suggested interval: 16. =item B Suggested minimum timeout: 60 =item B Retrieves resource agent metadata (internal use only). Suggested minimum timeout: 5. =item B Suggested minimum timeout: 5. =item B Performs a validation of the resource configuration. Suggested minimum timeout: 5. =back =cut sub ocf_methods { print q{ start stop reload promote demote monitor notify methods meta-data validate-all }; return $OCF_SUCCESS; } ############################################################ #### RA FUNCS sub pgsql_validate_all { my $fh; my $ans = ''; my %cdata; unless ( ocf_version_cmp( $ENV{"OCF_RESKEY_crm_feature_set"}, '3.0.9' ) == 2 ) { ocf_exit_reason( 'PAF %s is compatible with Pacemaker 1.1.13 and greater', $VERSION ); return $OCF_ERR_INSTALLED; } # check notify=true $ans = qx{ $CRM_RESOURCE --resource "$OCF_RESOURCE_INSTANCE" \\ --meta --get-parameter notify 2>/dev/null }; chomp $ans; unless ( lc($ans) =~ /^true$|^on$|^yes$|^y$|^1$/ ) { ocf_exit_reason( 'You must set meta parameter notify=true for your master resource' ); return $OCF_ERR_INSTALLED; } # check master-max=1 unless ( defined $ENV{'OCF_RESKEY_CRM_meta_master_max'} and $ENV{'OCF_RESKEY_CRM_meta_master_max'} eq '1' ) { ocf_exit_reason( 'You must set meta parameter master-max=1 for your master resource' ); return $OCF_ERR_INSTALLED; } if ( $PGVERNUM >= $PGVER_12 ) { # check PostgreSQL setup: checks related to v12 and after my $guc; # recovery.conf template must not exists if ( -f $recovery_tpl ) { ocf_exit_reason( 'Recovery template file "%s" is forbidden for PostgreSQL 12 and above', $recovery_tpl ); exit $OCF_ERR_ARGS; } # WARNING: you MUST put -C as first argument to bypass the root check $guc = qx{ $POSTGRES -C recovery_target_timeline -D "$pgdata" $start_opts}; chomp $guc; unless ( $guc eq 'latest' ) { ocf_exit_reason( q{Parameter "recovery_target_timeline" MUST be set to 'latest'. } . q{It is currently set to '%s'}, $guc ); return $OCF_ERR_ARGS; } $guc = qx{ $POSTGRES -C primary_conninfo -D "$pgdata" $start_opts}; unless ($guc =~ /\bapplication_name=$nodename\b/) { ocf_exit_reason( q{Parameter "primary_conninfo" MUST contain 'application_name=%s'. }. q{It is currently set to '%s'}, $nodename, $guc ); return $OCF_ERR_ARGS; } } else { my @content; # check recovery template if ( ! -f $recovery_tpl ) { ocf_exit_reason( 'Recovery template file "%s" does not exist', $recovery_tpl ); return $OCF_ERR_ARGS; } # check content of the recovery template file unless ( open( $fh, '<', $recovery_tpl ) ) { ocf_exit_reason( 'Could not open file "%s": %s', $recovery_tpl, $! ); return $OCF_ERR_ARGS; } @content = <$fh>; close $fh; unless ( grep /^\s*standby_mode\s*=\s*'?on'?\s*$/, @content ) { ocf_exit_reason( 'Recovery template file must contain "standby_mode = on"' ); return $OCF_ERR_ARGS; } unless ( grep /^\s*recovery_target_timeline\s*=\s*'?latest'?\s*$/, @content ) { ocf_exit_reason( "Recovery template file must contain \"recovery_target_timeline = 'latest'\"" ); return $OCF_ERR_ARGS; } unless ( grep /^\s*primary_conninfo\s*=.*['\s]application_name=$nodename['\s]/, @content ) { ocf_exit_reason( 'Recovery template file must contain in primary_conninfo parameter "application_name=%s"', $nodename ); return $OCF_ERR_ARGS; } } unless ( looks_like_number($maxlag) ) { ocf_exit_reason( 'maxlag is not a number: "%s"', $maxlag ); return $OCF_ERR_INSTALLED; } # check system user unless ( defined getpwnam $system_user ) { ocf_exit_reason( 'System user "%s" does not exist', $system_user ); return $OCF_ERR_ARGS; } # require 9.3 minimum if ( $PGVERNUM < $PGVER_93 ) { ocf_exit_reason( "Require 9.3 and more" ); return $OCF_ERR_INSTALLED; } # check binaries unless ( -x $PGCTL and -x $PGPSQL and -x $PGCTRLDATA and -x $PGISREADY and ( -x $PGWALDUMP or -x "$bindir/pg_xlogdump") ) { ocf_exit_reason( "Missing one or more binary. Check following path: %s, %s, %s, %s, %s or %s", $PGCTL, $PGPSQL, $PGCTRLDATA, $PGISREADY, $PGWALDUMP, "$bindir/pg_xlogdump" ); return $OCF_ERR_ARGS; } # require wal_level >= hot_standby %cdata = _get_controldata(); unless ( $cdata{'wal_level'} =~ m{hot_standby|logical|replica} ) { ocf_exit_reason( 'wal_level must be one of "hot_standby", "logical" or "replica"' ); return $OCF_ERR_ARGS; } return $OCF_SUCCESS; } # Start the PostgreSQL instance as a *secondary* # sub pgsql_start { my $rc = pgsql_monitor(); my %cdata = _get_controldata(); my $prev_state = $cdata{'state'}; # Instance must be running as secondary or being stopped. # Anything else is an error. if ( $rc == $OCF_SUCCESS ) { ocf_log( 'info', 'Instance "%s" already started', $OCF_RESOURCE_INSTANCE ); return $OCF_SUCCESS; } elsif ( $rc != $OCF_NOT_RUNNING ) { ocf_exit_reason( 'Unexpected state for instance "%s" (returned %d)', $OCF_RESOURCE_INSTANCE, $rc ); return $OCF_ERR_GENERIC; } # # From here, the instance is NOT running for sure. # ocf_log( 'debug', 'pgsql_start: instance "%s" is not running, starting it as a secondary', $OCF_RESOURCE_INSTANCE ); # Must start as a standby, so enable recovery. _enable_recovery(); # Start the instance as a secondary. $rc = _pg_ctl_start(); if ( $rc == 0 ) { # Wait for the start to finish. sleep 1 while ( $rc = pgsql_monitor() ) == $OCF_NOT_RUNNING; if ( $rc == $OCF_SUCCESS ) { ocf_log( 'info', 'Instance "%s" started', $OCF_RESOURCE_INSTANCE ); # Check if a master score exists in the cluster. # During the very first start of the cluster, no master score will # exists on any of the existing slaves, unless an admin designated # one of them using crm_master. If no master exists the cluster will # not promote a master among the slaves. # To solve this situation, we check if there is at least one master # score existing on one node in the cluster. Do nothing if at least # one master score is found among the clones of the resource. If no # master score exists, set a score of 1 only if the resource was a # shut downed master before the start. if ( $prev_state eq "shut down" and not _master_score_exists() ) { ocf_log( 'info', 'No master score around. Set mine to 1' ); _set_master_score( '1' ); } return $OCF_SUCCESS; } ocf_exit_reason( 'Instance "%s" is not running as a slave (returned %d)', $OCF_RESOURCE_INSTANCE, $rc ); return $OCF_ERR_GENERIC; } ocf_exit_reason( 'Instance "%s" failed to start (rc: %d)', $OCF_RESOURCE_INSTANCE, $rc ); return $OCF_ERR_GENERIC; } # Stop the PostgreSQL instance # sub pgsql_stop { my $rc; my $state; my $pidfile = "$datadir/postmaster.pid"; # Add 60s to the timeout or use a 24h timeout fallback to make sure # Pacemaker will give up before us and take decisions my $timeout = ( _get_action_timeout() || 60*60*24 ) + 60; # Instance must be running as secondary or primary or being stopped. # Anything else is an error. $rc = pgsql_monitor(); if ( $rc == $OCF_NOT_RUNNING ) { ocf_log( 'info', 'Instance "%s" already stopped', $OCF_RESOURCE_INSTANCE ); return $OCF_SUCCESS; } elsif ( $rc != $OCF_SUCCESS and $rc != $OCF_RUNNING_MASTER ) { ocf_exit_reason( 'Unexpected state for instance "%s" (returned %d)', $OCF_RESOURCE_INSTANCE, $rc ); return $OCF_ERR_GENERIC; } # # From here, the instance is running for sure. # ocf_log( 'debug', 'pgsql_stop: instance "%s" is running, stopping it', $OCF_RESOURCE_INSTANCE ); # Try to quit with proper shutdown. $rc = _runas( $PGCTL, '--pgdata', $pgdata, '-w', '--timeout', $timeout, '-m', 'fast', 'stop' ); if ( $rc == 0 ) { # Wait for the stop to finish. sleep 1 while ( $rc = pgsql_monitor() ) != $OCF_NOT_RUNNING ; ocf_log( 'info', 'Instance "%s" stopped', $OCF_RESOURCE_INSTANCE ); return $OCF_SUCCESS; } ocf_exit_reason( 'Instance "%s" failed to stop', $OCF_RESOURCE_INSTANCE ); return $OCF_ERR_GENERIC; } # Monitor the PostgreSQL instance # sub pgsql_monitor { my $pgisready_rc; my $controldata_rc; ocf_log( 'debug', 'pgsql_monitor: monitor is a probe' ) if ocf_is_probe(); # First check, verify if the instance is listening. $pgisready_rc = _pg_isready(); if ( $pgisready_rc == 0 ) { # The instance is listening. # We confirm that the instance is up and return if it is a primary or a # secondary ocf_log( 'debug', 'pgsql_monitor: instance "%s" is listening', $OCF_RESOURCE_INSTANCE ); return _confirm_role(); } if ( $pgisready_rc == 1 ) { # The attempt was rejected. # This could happen in several cases: # - at startup # - during shutdown # - during crash recovery # - if instance is a warm standby # Except for the warm standby case, this should be a transitional state. # We try to confirm using pg_controldata. ocf_log( 'debug', 'pgsql_monitor: instance "%s" rejects connections - checking again...', $OCF_RESOURCE_INSTANCE ); $controldata_rc = _controldata_to_ocf(); if ( $controldata_rc == $OCF_RUNNING_MASTER or $controldata_rc == $OCF_SUCCESS ) { # This state indicates that pg_isready check should succeed. # We check again. ocf_log( 'debug', 'pgsql_monitor: instance "%s" controldata shows a running status', $OCF_RESOURCE_INSTANCE ); $pgisready_rc = _pg_isready(); if ( $pgisready_rc == 0 ) { # Consistent with pg_controdata output. # We can check if the instance is primary or secondary ocf_log( 'debug', 'pgsql_monitor: instance "%s" is listening', $OCF_RESOURCE_INSTANCE ); return _confirm_role(); } # Still not consistent, raise an error. # NOTE: if the instance is a warm standby, we end here. # TODO raise an hard error here ? ocf_exit_reason( 'Instance "%s" controldata is not consistent with pg_isready (returned: %d)', $OCF_RESOURCE_INSTANCE, $pgisready_rc ); ocf_log( 'info', 'If this instance is in warm standby, this resource agent only supports hot standby', $OCF_RESOURCE_INSTANCE, $pgisready_rc ); return $OCF_ERR_GENERIC; } if ( $controldata_rc == $OCF_NOT_RUNNING ) { # This state indicates that pg_isready check should fail with rc 2. # We check again. $pgisready_rc = _pg_isready(); if ( $pgisready_rc == 2 ) { # Consistent with pg_controdata output. # We check the process status using pg_ctl status and check # if it was propertly shut down using pg_controldata. ocf_log( 'debug', 'pgsql_monitor: instance "%s" is not listening', $OCF_RESOURCE_INSTANCE ); return _confirm_stopped(); } # Still not consistent, raise an error. # TODO raise an hard error here ? ocf_exit_reason( 'Instance "%s" controldata is not consistent with pg_isready (returned: %d)', $OCF_RESOURCE_INSTANCE, $pgisready_rc ); return $OCF_ERR_GENERIC; } # Something went wrong with the controldata check, hard fail. ocf_exit_reason( 'Could not get instance "%s" status from controldata (returned: %d)', $OCF_RESOURCE_INSTANCE, $controldata_rc ); return $OCF_ERR_INSTALLED; } elsif ( $pgisready_rc == 2 ) { # The instance is not listening. # We check the process status using pg_ctl status and check # if it was propertly shut down using pg_controldata. ocf_log( 'debug', 'pgsql_monitor: instance "%s" is not listening', $OCF_RESOURCE_INSTANCE ); return _confirm_stopped(); } elsif ( $pgisready_rc == 3 ) { # No attempt was done, probably a syntax error. # Hard configuration error, we don't want to retry or failover here. ocf_exit_reason( 'Unknown error while checking if instance "%s" is listening (returned %d)', $OCF_RESOURCE_INSTANCE, $pgisready_rc ); return $OCF_ERR_CONFIGURED; } ocf_exit_reason( 'Unexpected result when checking instance "%s" status', $OCF_RESOURCE_INSTANCE ); return $OCF_ERR_GENERIC; } # Demote the PostgreSQL instance from primary to secondary # To demote a PostgreSQL instance, we must: # * stop it gracefully # * create recovery.conf with standby_mode = on # * start it # sub pgsql_demote { my $rc; $rc = pgsql_monitor(); # Running as primary. Normal, expected behavior. if ( $rc == $OCF_RUNNING_MASTER ) { ocf_log( 'debug', 'pgsql_demote: "%s" currently running as a primary', $OCF_RESOURCE_INSTANCE ) ; } elsif ( $rc == $OCF_SUCCESS ) { # Already running as secondary. Nothing to do. ocf_log( 'debug', 'pgsql_demote: "%s" currently running as a secondary', $OCF_RESOURCE_INSTANCE ); return $OCF_SUCCESS; } elsif ( $rc == $OCF_NOT_RUNNING ) { # Instance is stopped. Nothing to do. ocf_log( 'debug', 'pgsql_demote: "%s" currently shut down', $OCF_RESOURCE_INSTANCE ); } elsif ( $rc == $OCF_ERR_CONFIGURED ) { # We actually prefer raising a hard or fatal error instead of leaving # the CRM abording its transition for a new one because of a soft error. # The hard error will force the CRM to move the resource immediately. return $OCF_ERR_CONFIGURED; } else { return $OCF_ERR_GENERIC; } # TODO we need to make sure at least one slave is connected!! # WARNING if the resource state is stopped instead of master, the ocf ra dev # rsc advises to return OCF_ERR_GENERIC, misleading the CRM in a loop where # it computes transitions of demote(failing)->stop->start->promote actions # until failcount == migration-threshold. # This is a really ugly trick to keep going with the demode action if the # rsc is already stopped gracefully. # See discussion "CRM trying to demote a stopped resource" on # developers@clusterlabs.org unless ( $rc == $OCF_NOT_RUNNING ) { # Add 60s to the timeout or use a 24h timeout fallback to make sure # Pacemaker will give up before us and take decisions my $timeout = ( _get_action_timeout() || 60*60*24 ) + 60; # WARNING the instance **MUST** be stopped gracefully. # Do **not** use pg_stop() or service or systemctl here as these # commands might force-stop the PostgreSQL instance using immediate # after some timeout and return success, which is misleading. $rc = _runas( $PGCTL, '--pgdata', $pgdata, '--mode', 'fast', '-w', '--timeout', $timeout , 'stop' ); # No need to wait for stop to complete, this is handled in pg_ctl # using -w option. unless ( $rc == 0 ) { ocf_exit_reason( 'Failed to stop "%s" using pg_ctl (returned %d)', $OCF_RESOURCE_INSTANCE, $rc ); return $OCF_ERR_GENERIC; } # Double check that the instance is stopped correctly. $rc = pgsql_monitor(); unless ( $rc == $OCF_NOT_RUNNING ) { ocf_exit_reason( 'Unexpected "%s" state: monitor status (%d) disagree with pg_ctl return code', $OCF_RESOURCE_INSTANCE, $rc ); return $OCF_ERR_GENERIC; } } # # At this point, the instance **MUST** be stopped gracefully. # # Note: We do not need to handle the recovery.conf file here as pgsql_start # deal with that itself. Equally, no need to wait for the start to complete # here, handled in pgsql_start. $rc = pgsql_start(); if ( $rc == $OCF_SUCCESS ) { ocf_log( 'info', 'pgsql_demote: "%s" started as a secondary', $OCF_RESOURCE_INSTANCE ); return $OCF_SUCCESS; } # NOTE: No need to double check the instance state as pgsql_start already use # pgsql_monitor to check the state before returning. ocf_exit_reason( 'Starting "%s" as a standby failed (returned %d)', $OCF_RESOURCE_INSTANCE, $rc ); return $OCF_ERR_GENERIC; } # Promote the secondary instance to primary # sub pgsql_promote { my $rc; my $cancel_switchover; $rc = pgsql_monitor(); if ( $rc == $OCF_SUCCESS ) { # Running as slave. Normal, expected behavior. ocf_log( 'debug', 'pgsql_promote: "%s" currently running as a standby', $OCF_RESOURCE_INSTANCE ); } elsif ( $rc == $OCF_RUNNING_MASTER ) { # Already a master. Unexpected, but not a problem. ocf_log( 'info', '"%s" already running as a primary', $OCF_RESOURCE_INSTANCE ); return $OCF_SUCCESS; } elsif ( $rc == $OCF_NOT_RUNNING ) { # INFO this is not supposed to happen. # Currently not running. Need to start before promoting. ocf_log( 'info', '"%s" currently not running, starting it', $OCF_RESOURCE_INSTANCE ); $rc = pgsql_start(); if ( $rc != $OCF_SUCCESS ) { ocf_exit_reason( 'Failed to start the instance "%s"', $OCF_RESOURCE_INSTANCE ); return $OCF_ERR_GENERIC; } } else { ocf_exit_reason( 'Unexpected error, cannot promote "%s"', $OCF_RESOURCE_INSTANCE ); return $OCF_ERR_GENERIC; } # # At this point, the instance **MUST** be started as a secondary. # # Cancel the switchover if it has been considered not safe during the # pre-promote action $cancel_switchover = _get_priv_attr('cancel_switchover'); if ( $cancel_switchover ) { # if not empty or not 0 ocf_exit_reason( 'Switchover has been canceled from pre-promote action' ); _delete_priv_attr( 'cancel_switchover' ); return $OCF_ERR_GENERIC if $cancel_switchover eq '1'; return $OCF_ERR_ARGS; # ban the resource from the node if we have an # internal error during _check_switchover } # Do not check for a better candidate if we try to recover the master # Recover of a master is detected during the pre-promote action. It sets the # private attribute 'recover_master' to '1' if this is a master recover. if ( _get_priv_attr( 'recover_master' ) eq '1' ) { ocf_log( 'info', 'Recovering old master, no election needed'); } else { # The promotion is occurring on the best known candidate (highest # master score), as chosen by pacemaker during the last working monitor # on previous master (see pgsql_monitor/_check_locations subs). # To avoid any race condition between the last monitor action on the # previous master and the **real** most up-to-date standby, we # set each standby location during the "pre-promote" action, and stored # them using the "lsn_location" resource attribute. # # The best standby to promote would have the highest known LSN. If the # current resource is not the best one, we need to modify the master # scores accordingly, and abort the current promotion. ocf_log( 'debug', 'pgsql_promote: checking if current node is the best candidate for promotion' ); # Exclude nodes that are known to be unavailable (not in the current # partition) using the "crm_node" command my @active_nodes = split /\s+/ => _get_priv_attr( 'nodes' ); my $node_to_promote = ''; my $ans; my $max_tl; my $max_lsn; my $node_tl; my $node_lsn; my $wal_num; my $wal_off; # Get the "lsn_location" attribute value for the current node, as set # during the "pre-promote" action. # It should be the greatest among the secondary instances. $ans = _get_priv_attr( 'lsn_location' ); if ( $ans eq '' ) { # This should not happen as the "lsn_location" attribute should have # been updated during the "pre-promote" action. ocf_exit_reason( 'Can not get current node LSN location' ); return $OCF_ERR_GENERIC; } chomp $ans; ( $max_tl, $max_lsn ) = split /#/, $ans; ocf_log( 'debug', 'pgsql_promote: current node TL#LSN location: %s#%s', $max_tl, $max_lsn ); # Now we compare with the other available nodes. foreach my $node ( @active_nodes ) { # We exclude the current node from the check. next if $node eq $nodename; # Get the "lsn_location" attribute value for the node, as set during # the "pre-promote" action. # This is implemented as a loop as private attributes are asynchronously # available from other nodes. # see: https://github.com/ClusterLabs/PAF/issues/131 # NOTE: if a node did not set its lsn_location for some reason, this will end # with a timeout and the whole promotion will start again. WAIT_FOR_LSN: { $ans = _get_priv_attr( 'lsn_location', $node ); if ( $ans eq '' ) { ocf_log( 'info', 'pgsql_promote: waiting for LSN from %s', $node ); select( undef, undef, undef, 0.1 ); redo WAIT_FOR_LSN; } } chomp $ans; ( $node_tl, $node_lsn ) = split /#/, $ans; ocf_log( 'debug', 'pgsql_promote: comparing with "%s": TL#LSN is %s#%s', $node, $node_tl, $node_lsn ); # If the node has a higher LSN, select it as a best candidate to # promotion and keep looping to check the TL/LSN of other nodes. if ( $node_tl > $max_tl or ( $node_tl == $max_tl and $node_lsn > $max_lsn ) ) { ocf_log( 'debug', 'pgsql_promote: "%s" is a better candidate to promote (%s#%s > %s#%s)', $node, $node_tl, $node_lsn, $max_tl, $max_lsn ); $node_to_promote = $node; $max_tl = $node_tl; $max_lsn = $node_lsn; } } # If any node has been selected, we adapt the master scores accordingly # and break the current promotion. if ( $node_to_promote ne '' ) { ocf_exit_reason( '%s is the best candidate to promote, aborting current promotion', $node_to_promote ); # Reset current node master score. _set_master_score( '1' ); # Set promotion candidate master score. _set_master_score( '1000', $node_to_promote ); # We fail the promotion to trigger another promotion transition # with the new scores. return $OCF_ERR_GENERIC; } # Else, we will keep on promoting the current node. } unless ( # Promote the instance on the current node. _runas( $PGCTL, '--pgdata', $pgdata, '-w', 'promote' ) == 0 ) { ocf_exit_reason( 'Error during promotion command' ); return $OCF_ERR_GENERIC; } # The instance promotion is asynchronous, so we need to wait for this # process to complete. while ( pgsql_monitor() != $OCF_RUNNING_MASTER ) { ocf_log( 'info', 'Waiting for the promote to complete' ); sleep 1; } ocf_log( 'info', 'Promote complete' ); return $OCF_SUCCESS; } # This action is called **before** the actual promotion when a failing master is # considered unreclaimable, recoverable or a new master must be promoted # (switchover or first start). # As every "notify" action, it is executed almost simultaneously on all # available nodes. sub pgsql_notify_pre_promote { my $rc; my $node_tl; my $node_lsn; my %cdata; my %active_nodes; my $attr_nodes; ocf_log( 'info', 'Promoting instance on node "%s"', $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} ); # No need to do an election between slaves if this is recovery of the master if ( _is_master_recover( $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} ) ) { ocf_log( 'warning', 'This is a master recovery!' ); _set_priv_attr( 'recover_master', '1' ) if $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} eq $nodename; return $OCF_SUCCESS; } # Environment cleanup! _delete_priv_attr( 'lsn_location' ); _delete_priv_attr( 'recover_master' ); _delete_priv_attr( 'nodes' ); _delete_priv_attr( 'cancel_switchover' ); # check for the last received entry of WAL from the master if we are # the designated slave to promote if ( _is_switchover( $nodename ) and scalar grep { $_->{'uname'} eq $nodename } @{ $OCF_NOTIFY_ENV{'promote'} } ) { $rc = _check_switchover(); unless ( $rc == 0 ) { # Shortcut the election process as the switchover will be # canceled _set_priv_attr( 'cancel_switchover', $rc ); return $OCF_SUCCESS; # return code is ignored during notify } # If the sub keeps going, that means the switchover is safe. # Keep going with the election process in case the switchover was # instruct to the wrong node. # FIXME: should we allow a switchover to a lagging slave? } # We need to trigger an election between existing slaves to promote the best # one based on its current LSN location. Each node set a private attribute # "lsn_location" with its TL and LSN location. # # During the following promote action, The designated standby for # promotion use these attributes to check if the instance to be promoted # is the best one, so we can avoid a race condition between the last # successful monitor on the previous master and the current promotion. # As we can not break the transition from a notification action, we check # during the promotion if each node TL and LSN are valid. # Force a checpoint to make sure the controldata shows the very last TL _query( q{ CHECKPOINT }, {} ); %cdata = _get_controldata(); $node_lsn = _get_last_received_lsn( 'in decimal' ); unless ( defined $node_lsn ) { ocf_log( 'warning', 'Unknown current node LSN' ); # Return code are ignored during notifications... return $OCF_SUCCESS; } $node_lsn = "$cdata{'tl'}#$node_lsn"; ocf_log( 'info', 'Current node TL#LSN: %s', $node_lsn ); # Set the "lsn_location" attribute value for this node so we can use it # during the following "promote" action. _set_priv_attr( 'lsn_location', $node_lsn ); ocf_log( 'warning', 'Could not set the current node LSN' ) if $? != 0 ; # If this node is the future master, keep track of the slaves that # received the same notification to compare our LSN with them during # promotion if ( $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} eq $nodename ) { # Build the list of active nodes: # master + slave + start - stop # FIXME: Deal with rsc started during the same transaction but **after** # the promotion ? $active_nodes{ $_->{'uname'} }++ foreach @{ $OCF_NOTIFY_ENV{'active'} }, @{ $OCF_NOTIFY_ENV{'start'} }; $active_nodes{ $_->{'uname'} }-- foreach @{ $OCF_NOTIFY_ENV{'stop'} }; $attr_nodes = join " " => grep { $active_nodes{$_} > 0 } keys %active_nodes; _set_priv_attr( 'nodes', $attr_nodes ); } return $OCF_SUCCESS; } # This action is called after a promote action. sub pgsql_notify_post_promote { # We have a new master (or the previous one recovered). # Environment cleanup! _delete_priv_attr( 'lsn_location' ); _delete_priv_attr( 'recover_master' ); _delete_priv_attr( 'nodes' ); _delete_priv_attr( 'cancel_switchover' ); return $OCF_SUCCESS; } # This is called before a demote occurs. sub pgsql_notify_pre_demote { my $rc; my %cdata; # do nothing if the local node will not be demoted return $OCF_SUCCESS unless scalar grep { $_->{'uname'} eq $nodename } @{ $OCF_NOTIFY_ENV{'demote'} }; $rc = pgsql_monitor(); # do nothing if this is not a master recovery return $OCF_SUCCESS unless _is_master_recover( $nodename ) and $rc == $OCF_FAILED_MASTER; # in case of master crash, we need to detect if the CRM tries to recover # the master clone. The usual transition is to do: # demote->stop->start->promote # # There are multiple flaws with this transition: # * the 1st and 2nd actions will fail because the instance is in # OCF_FAILED_MASTER step # * the usual start action is dangerous as the instance will start with # a recovery.conf instead of entering a normal recovery process # # To avoid this, we try to start the instance in recovery from here. # If it success, at least it will be demoted correctly with a normal # status. If it fails, it will be catched up in next steps. ocf_log( 'info', 'Trying to start failing master "%s"...', $OCF_RESOURCE_INSTANCE ); # Either the instance managed to start or it couldn't. # We rely on the pg_ctk '-w' switch to take care of this. If it couldn't # start, this error will be catched up later during the various checks _pg_ctl_start(); %cdata = _get_controldata(); ocf_log( 'info', 'State is "%s" after recovery attempt', $cdata{'state'} ); return $OCF_SUCCESS; } # This is called before a stop occurs. sub pgsql_notify_pre_stop { my $rc; my %cdata; # do nothing if the local node will not be stopped return $OCF_SUCCESS unless scalar grep { $_->{'uname'} eq $nodename } @{ $OCF_NOTIFY_ENV{'stop'} }; $rc = _controldata_to_ocf(); # do nothing if this is not a slave recovery return $OCF_SUCCESS unless _is_slave_recover( $nodename ) and $rc == $OCF_RUNNING_SLAVE; # in case of slave crash, we need to detect if the CRM tries to recover # the slaveclone. The usual transition is to do: stop->start # # This transition can no twork because the instance is in # OCF_ERR_GENERIC step. So the stop action will fail, leading most # probably to fencing action. # # To avoid this, we try to start the instance in recovery from here. # If it success, at least it will be stopped correctly with a normal # status. If it fails, it will be catched up in next steps. ocf_log( 'info', 'Trying to start failing slave "%s"...', $OCF_RESOURCE_INSTANCE ); # Either the instance managed to start or it couldn't. # We rely on the pg_ctk '-w' switch to take care of this. If it couldn't # start, this error will be catched up later during the various checks _pg_ctl_start(); %cdata = _get_controldata(); ocf_log( 'info', 'State is "%s" after recovery attempt', $cdata{'state'} ); return $OCF_SUCCESS; } # Notify type actions, called on all available nodes before (pre) and after # (post) other actions, like promote, start, ... # sub pgsql_notify { my $type_op; ocf_log( 'debug', "pgsql_notify: environment variables: %s", Data::Dumper->new( [ \%OCF_NOTIFY_ENV ] )->Sortkeys(1)->Terse(1)->Dump ); return unless %OCF_NOTIFY_ENV; $type_op = "$OCF_NOTIFY_ENV{'type'}-$OCF_NOTIFY_ENV{'operation'}"; for ( $type_op ) { if ( /^pre-promote$/ ) { return pgsql_notify_pre_promote() } elsif ( /^post-promote$/ ) { return pgsql_notify_post_promote() } elsif ( /^pre-demote$/ ) { return pgsql_notify_pre_demote() } elsif ( /^pre-stop$/ ) { return pgsql_notify_pre_stop() } } return $OCF_SUCCESS; } # Action used to allow for online modification of resource parameters value. # sub pgsql_reload { # No action necessary, the action declaration is enough to inform pacemaker # that the modification of any non-unique parameter can be applied without # having to restart the resource. ocf_log( 'info', 'Instance "%s" reloaded', $OCF_RESOURCE_INSTANCE ); return $OCF_SUCCESS; } ############################################################ #### MAIN exit ocf_meta_data() if $__OCF_ACTION eq 'meta-data'; exit ocf_methods() if $__OCF_ACTION eq 'methods'; # Avoid "could not change directory" when executing commands as "system-user". chdir File::Spec->tmpdir(); # mandatory sanity checks # check pgdata if ( ! -d $pgdata ) { ocf_exit_reason( 'PGDATA "%s" does not exist', $pgdata ); exit $OCF_ERR_ARGS; } # check datadir if ( ! -d $datadir ) { ocf_exit_reason( 'data_directory "%s" does not exist', $datadir ); exit $OCF_ERR_ARGS; } # Set PostgreSQL version $PGVERNUM = _get_pg_version(); # Set current node name. $nodename = ocf_local_nodename(); $exit_code = pgsql_validate_all(); exit $exit_code if $exit_code != $OCF_SUCCESS or $__OCF_ACTION eq 'validate-all'; # Run action for ( $__OCF_ACTION ) { if ( /^start$/ ) { $exit_code = pgsql_start() } elsif ( /^stop$/ ) { $exit_code = pgsql_stop() } elsif ( /^monitor$/ ) { $exit_code = pgsql_monitor() } elsif ( /^promote$/ ) { $exit_code = pgsql_promote() } elsif ( /^demote$/ ) { $exit_code = pgsql_demote() } elsif ( /^notify$/ ) { $exit_code = pgsql_notify() } elsif ( /^reload$/ ) { $exit_code = pgsql_reload() } else { $exit_code = $OCF_ERR_UNIMPLEMENTED } } exit $exit_code; =head1 EXAMPLE CRM SHELL The following is an example configuration for a pgsqlms resource using the crm(8) shell: primitive pgsqld pgsqlms \ params pgdata="/var/lib/postgresql/9.6/main" \ bindir="/usr/lib/postgresql/9.6/bin" \ pghost="/var/run/postgresql" \ recovery_template="/etc/postgresql/9.6/main/recovery.conf.pcmk" \ start_opts="-c config_file=/etc/postgresql/9.6/main/postgresql.conf" \ op start timeout=60s \ op stop timeout=60s \ op promote timeout=30s \ op demote timeout=120s \ op monitor interval=15s timeout=10s role="Master" \ op monitor interval=16s timeout=10s role="Slave" \ op notify timeout=60s ms pgsql-ha pgsqld meta notify=true =head1 EXAMPLE PCS The following is an example configuration for a pgsqlms resource using pcs(8): pcs resource create pgsqld ocf:heartbeat:pgsqlms \ bindir=/usr/pgsql-9.6/bin pgdata=/var/lib/pgsql/9.6/data \ op start timeout=60s \ op stop timeout=60s \ op promote timeout=30s \ op demote timeout=120s \ op monitor interval=15s timeout=10s role="Master" \ op monitor interval=16s timeout=10s role="Slave" \ op notify timeout=60s --master notify=true =head1 SEE ALSO http://clusterlabs.org/ =head1 AUTHOR Jehan-Guillaume de Rorthais and Mael Rimbault. =cut PAF-2.3.0/t/000077500000000000000000000000001363154243400123725ustar00rootroot00000000000000PAF-2.3.0/t/README000066400000000000000000000021311363154243400132470ustar00rootroot00000000000000# Testing the user agent 1/ you need to install the resource-agents package of your distribution. This package contains the "ocft" script needed to run these tests. 2/ if needed, export the following environment variables: PGDATA (default is /tmp/pgdata1) Where to create the PostrgeSQL instance used for the tests PGBIN (default is /usr/pgsql-9.3/bin) Location of the PostrgeSQL binaries RESOURCE_NAME (default is pgsqld) The resource name used by the RA PGHOST (default is /tmp) Ip address or socket directory to connect to the server Warning, the instance in PGDATA is created for tests, then __destructed__. 3/ Make sure the HA_RSCTMP folder exists. On most Linux distribution it is located in "/var/run/resource-agents/". 4/ to build the test scripts, run the following command as root: ocft make pgsqlms 5/ to actually run the tests, run the following command as root: ocft test pgsqlms 6/ to have some debug, run: ocft test -v pgsqlms For more informations, see : ocft -h PAF-2.3.0/t/pgsqlms000066400000000000000000000054231363154243400140070ustar00rootroot00000000000000#: {PGBIN=/usr/lib/postgresql/9.3/bin} CONFIG Agent pgsqlms AgentRoot /usr/lib/ocf/resource.d/heartbeat HangTimeout 120 VARIABLE PGDATA=${PGDATA=/tmp/pgdata1} PGBIN=${PGBIN=/usr/pgsql-9.3/bin} PGHOST=${PGHOST=/tmp} RESOURCE_NAME=${RESOURCE_NAME=pgsqld} NODENAME=$(uname -n) CASE-BLOCK cleanup sudo -iu postgres "$PGBIN"/pg_ctl -D "$PGDATA" -w -m immediate stop &> /dev/null rm -rf ${PGDATA:?} SETUP-AGENT Include cleanup echo "PGBIN: $PGBIN" >&2 sudo -iu postgres mkdir -p "$PGDATA" sudo -iu postgres "$PGBIN"/initdb --nosync -D "$PGDATA" &> /dev/null cat <<-EOC>> "$PGDATA"/postgresql.conf listen_addresses = '*' wal_level = hot_standby max_wal_senders = 5 hot_standby = on hot_standby_feedback = on wal_receiver_status_interval = 20s EOC cat <<-EOC > "$PGDATA"/recovery.conf.pcmk standby_mode = 'on' # Fake master conninfo! primary_conninfo = 'user=postgres host=127.0.0.1 port=15432 application_name=$NODENAME' recovery_target_timeline = 'latest' EOC cat <<-EOC>> "$PGDATA"/pg_hba.conf host replication postgres 0.0.0.0/0 trust EOC sudo -iu postgres "$PGBIN"/pg_ctl -D "$PGDATA" -w start &> /dev/null sudo -iu postgres "$PGBIN"/pg_ctl -D "$PGDATA" -w -m fast stop &> /dev/null CLEANUP-AGENT Include cleanup CASE-BLOCK required_args Env OCF_RESKEY_bindir=$PGBIN Env OCF_RESKEY_pgdata=$PGDATA Env OCF_RESKEY_pghost=$PGHOST Env OCF_RESOURCE_INSTANCE=$RESOURCE_NAME CASE-BLOCK prepare Include required_args AgentRun stop CASE "check validate-all" Include prepare AgentRun validate-all OCF_SUCCESS CASE "check stopped monitor" Include prepare AgentRun monitor OCF_NOT_RUNNING CASE "check start" Include prepare AgentRun start OCF_SUCCESS CASE "check double start" Include prepare AgentRun start AgentRun start OCF_SUCCESS CASE "check stop" Include prepare AgentRun start AgentRun stop OCF_SUCCESS CASE "check double stop" Include prepare AgentRun start AgentRun stop AgentRun stop OCF_SUCCESS CASE "check slave monitor" Include prepare AgentRun start AgentRun monitor OCF_SUCCESS CASE "check promote" Include prepare AgentRun start AgentRun promote OCF_SUCCESS CASE "check double promote" Include prepare AgentRun start AgentRun promote AgentRun promote OCF_SUCCESS CASE "check master monitor" Include prepare AgentRun start AgentRun promote AgentRun monitor OCF_RUNNING_MASTER CASE "check demote" Include prepare AgentRun start AgentRun promote AgentRun demote AgentRun monitor OCF_SUCCESS CASE "check double demote" Include prepare AgentRun start AgentRun promote AgentRun demote AgentRun demote AgentRun monitor OCF_SUCCESS