pax_global_header00006660000000000000000000000064120336322320014506gustar00rootroot0000000000000052 comment=dc69db5a6d203ebd230201167f012e43784f8d23 ClusterLabs-resource-agents-dc69db5/000077500000000000000000000000001203363223200175375ustar00rootroot00000000000000ClusterLabs-resource-agents-dc69db5/.gitattributes000066400000000000000000000001031203363223200224240ustar00rootroot00000000000000configure.ac export-subst heartbeat/ocf-shellfuncs.in export-subst ClusterLabs-resource-agents-dc69db5/.gitignore000066400000000000000000000014261203363223200215320ustar00rootroot00000000000000*.swp Makefile.in aclocal.m4 autoconf autoheader autom4te.cache automake autoscan.log compile configure configure.scan config.guess config.log config.sub config.status Makefile depcomp install-sh libtoolize ltmain.sh libtool make/stamp-h1 m4 make/clusterautoconfig.h* missing *.pc .deps .libs *.o *.la *.lo *.loT rgmanager/src/resources/fs.sh rgmanager/src/resources/oracledb.sh rgmanager/src/resources/utils/config-utils.sh resource-agents-* .version *.cache *.upgrade.xml py-compile ylwrap # BEAM Entries *.beam parser-messages MISC_ERRORS cscope.files cscope.out patches updates logs # OS and Editor Artifacts .DS_Store .bomb *.rej *.bz2 *.gz *.xz *.sed *.diff *.patch *.gres *~ # Misc HTML TAGS GPATH GRTAGS GSYMS GTAGS .gres.* *.orig .gdb_history *~ \#* .changes pacemaker.tar.gz ClusterLabs-resource-agents-dc69db5/AUTHORS000066400000000000000000000042501203363223200206100ustar00rootroot00000000000000Authors: -------- A. J. Lewis Abhijith Das Adam Manthei Akamatsu Hiroshi Alan Robertson Alasdair G. Kergon Andrew Beekhof Andrew Price Benjamin Marzinski Bob Peterson Chris Feist Christian Rishoj Christine Caulfield Daiki Matsuda Daniel Phillips David Lee David Teigland Dejan Muhamedagic Dominik Klein Fabio M. Di Nitto Florian Haas Hideo Yamauchi Horms Huang Zhen James Parsons Jean-Francois Larvoire Joel Becker Jonathan Brassow Keisuke MORI Ken Preslan Lars Marowsky-Bree Lon Hohberger Marc - A. Dahlhaus Marek 'marx' Grac Mark Hlawatschek Matthew Soffen Michael Conrad Tadpol Tilstra Michael Schwartzkopff Nakahira Kazutomo Patrick Caulfield Philipp Kolmann Raoul Bhatia Robert Peterson Ron Terry Ross Vandegrift Ryan McCabe Ryan O'Hara Sebastian Reitenbach Serge Dubrouski Simon Horman Stanko Kupcevic Stephan Berlet Steven Whitehouse Takenaka Kazuhiro Wendy Cheng Xinwei Hu jparsons ClusterLabs-resource-agents-dc69db5/COPYING000066400000000000000000000431031203363223200205730ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. ClusterLabs-resource-agents-dc69db5/COPYING.GPLv3000066400000000000000000001045131203363223200214700ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . ClusterLabs-resource-agents-dc69db5/ChangeLog000066400000000000000000000465041203363223200213220ustar00rootroot00000000000000* Fri May 25 2012 Linux-HA contributors - stable release 3.9.3 - dhcpd: new RA to manage ISC DHCP servers - Filesystem: add nfs4 to the list of well known types - IPaddr2: fix regression introduce in d93b5fd, nic=lo always "stopped" - iSCSILogicalUnit: correctly match for target IQN and backing device name (iet and tgt) - jboss: implememnt rotating of console log - mysql: improve handling of reset slave - oracle, oralsnr: get rid of eval - slapd: pass bind_dn correctly to ldapsearch * Wed May 16 2012 Linux-HA contributors - release candidate 3.9.3 rc1 - asterisk: new resource agent - named: new RA to manage bind servers - pound: new RA for Pound HTTP/HTTPS reverse-proxy and load-balancer - rsyslog: new RA to manage rsyslog servers - slapd: new RA to manage OpenLDAP servers - varnish: new resource agent - apache: add support for IPv6 in monitor - apache: create /var/run/apache2 if it doesn't exist - apache: fix sysconfig includes & enable status for default SUSE conf - conntrackd: test for socket existence in monitor instead of process grep - conntrackd: rename parameter "conntrackd" to "binary" - CTDB: Add smb_fileid_algorithm parameter (bnc#696978) - CTDB: Improve monitor op (check output of ctdb status, bnc#712192) - CTDB: Set ctdb_start_as_disabled=no by default (bnc#712410, required by samba 3.6) - exportfs: allow expanding the fsid parameter to produce correct exportfs options - exportfs: don't grow /var/lib/nfs/rmtab indefinitely - exportfs: fix monitor action for special characters and common suffixes - Filesystem: add support for glusterfs (lf#2620) - Filesystem: add tmpfs to the list of supported filesystems - Filesystem: allow to force cloning for local mounts - Filesystem: don't use direct dd option in monitor depth 20 for non-blockdevice fs - Filesystem: fix determining if the device is a block device - Filesystem: improve read/write checks for CHECK_LEVEL 10, 20 - Filesystem: repair the fast_stop parameter use (its value was always false) - Filesystem: support ceph - Filesystem: remove a status file only when OCF_CHECK_LEVEL is set to 20 - IPaddr: add back the local_start/stop_script code - IPaddr: remove colon at the end of the interface name - IPv6addr: always use the provided nic and cidr_netmask when specified - IPv6addr: handle a link-local address properly in send_ua - iscsi: do not rely on iscsid.startup being set correctly (bnc#751783) - iscsi: proceed if iscsid is not running if iscsid.startup is present in iscsid.conf - iSCSILogicalUnit: fix default for scsi_sn - iSCSITarget: treat an empty "implementation" parameter specially - jboss: add the java_opts parameter for java options - ldirectord: precedence error with perl v5.8.8 in IPv6 code - LVM: drop vgck(8) from monitor - LVM: force dmevent monitoring for clones - LVM: use ls instead of vgdisplay in status - lxc: fix LXC_status to work with lxc-0.7.5 or later - mysql: improve replication support - mysql: check mysql status more thoroughly before stopping - mysql: fix validation return codes - mysql: support 5.5 slave status message format - nfsserver: Support of multiple IP addresses (bnc#684143) - nfsserver: don't run sm-notify in foreground (bnc#759616) - ocf-shellfuncs: fix loglevel variable scope in ha_log - ocft: new tests for named, IPv6addr, oracle, Xinetd - ocft: several improvements - oracle: improve managing IPC objects - oracle: improve matching instance specific files and processes - pgsql: support for replication - postfix: multiple fixes - Raid1: support for multiple MD arrays, as specified in raidconf - SAPDatabase: add support for Sybase ASE and SAP HANA database - SAPDatabase: correcting the unique values of RAs parameters - SAPDatabase: replace method for checking responsiveness of saphostexec - SAPDatabase: version 2.00 make use of saphostagent - SAPInstance : correcting the unique values of RAs parameters - slapd: always set the exit code correctly in monitor - tomcat: remove pidfile before start, it may prevent some tomcat releases from starting - VirtualDomain: add a functionality that modifies utilization of resource automatically - VirtualDomain: if the configuration file is missing on stop exit with success - VirtualDomain: honor virsh "in shutdown" state - Xen: add support for HVM ACPI graceful shutdown - Xen: wait in migrate_from for the migration to finish instead of bailing out immediately - Tools: findif: Use most specific matching route (bnc#740738) - Tools: send_arp.libnet: fix for big endian platforms (bnc#721334) - doc: add the RA developer's guide * Wed Jun 29 2011 Dejan Muhamedagic and others - stable release 3.9.2 - ethermon: new resource agent - iscsi: fix regression in 3.9.1 for open-iscsi version 2.0-872 (lf#2562) - pgsql: fix regression in 3.9.1 in directories on probes - VirtualDomain: if there's no config exit on stop with success - doc: add sfex_init(8) man page * Wed Jun 15 2011 Dejan Muhamedagic and others - stable release 3.9.1 - ocf-tester: tolerate OCF_ERR_INSTALLED on probes and missing binaries - pgsql: improve configuration check and probe handling * Wed Jun 01 2011 Dejan Muhamedagic and others - release candidate 3.9.1 rc1 - first release since establishing joined repository with RHCS agents - build: new spec file and autoconf to support both agents' sets - build: use ./configure --with-ras-set=linux-ha to configure for heartbeat RA set - build: create compatibility symlinks in autofoo not in spec - build: GNUmakefile removed - lxc: new RA to manage lxc linux containers - symlink: new RA to manage symbolic links - db2: new implementation with master/slave mode - oracle: improve oracle process list test (bnc#673027) - exportfs: backup and restore rmtab to ensure smooth client failover on node failures - CTDB: Allow stop to succeed when using pkill on ctdbd (bnc#695829) - mysql: --skip-slave-start option is default now - mysql: set connect timeout to 10 seconds rather than 1 second - mysql: keep replication state (prevents data loss on master reset) - mysql: don't rely on state information from pacemaker, but check if the instance is in the read-only mode - mysql: if test parameters are all set, assume OCF_CHECK_LEVEL=10 - mysql: support for master/slave for more than two nodes - mysql: don't wait for replication to finish, when not replicating - mysql: store replication state in separate attributes for each master - VirtualDomain: correctly create migration URI when target is an FQDN - VirtualDomain: properly wait until domain_name is non-empty - ldirectord: add a support of "netmask" directive for IPv6 - ldirectord: fix fwmark behavior for IPv6 - ldirectord: ignore children in Net::DNS - iscsi: add support for open-iscsi version 2.0-872 (lf#2562) - postfix: issue error if 'postfix abort' failed - postfix: improve exit codes on installation problems - postfix: use monitor to test if postfix works after the start action - ocft: fix make command for compatibility with mawk/Debian (lf#2600) - ocft: test case for pgsql - ocft: test case for postfix - ocft: test case for iscsi - doc: improve man pages output - doc: add examples for master/slave resource agents * Wed Feb 16 2011 Dejan Muhamedagic and others - stable release 1.0.4 - ocft: testcases for db2, LVM, and Filesystem * Fri Feb 11 2011 Dejan Muhamedagic and others - release candidate 1.0.4 - add GPLv3 license file (bnc#655700) - ocf-shellfuncs: allow ocf_run to return the actual exit code - ocf-shellfuncs: handle properly syslog facility set to none (bnc#621818) - ocf-shellfuncs: correctly identify root by id only (bnc#602312) - RA: add OCF_ROOT/lib/heartbeat directory (development) - RA: set the HA_RSCTMP directory to /var/run/resource-agents (lf#2378) - build: install jboss - conntrackd: new RA - exportfs: new RA - nginx: new RA - fio: new RA for IO load simulation - Filesystem: allow cloning of some filesystems as read-only (lf#2440) - Filesystem: add fast_stop parameter (lf#2402) - Filesystem: Clarify metadata and improve non-clone warning - Filesystem: new run_fsck parameter - LVM: add partial_activation parameter (lf#2490) - IPaddr2: fix reference to Infiniband arping binary (bnc#668447) - IPaddr2: optionally flush kernel routing table on interface stop - IPaddr2: exit with the right code when not properly configured - IPaddr2: exit early and with the right code if the ip parameter is not set - IPaddr2: unique_clone_address should work without CIP (lf#2442) - IPaddr: return the correct code if interface delete failed - IPv6addr: allow link-local addresses in case the interface name is provided - IPv6addr: interface index in /proc/net/if_inet6 may be longer than 2 chars (lf#2462) - IPsrcaddr: exit with the right code when not properly configured - IPsrcaddr: add the cidr_netmask parameter - Tools: findif: differentiate between error conditions - nfsserver: fix the default string for the notification parameter - nfsserver: don't use -v in the notify cmd with rpc.statd - iSCSITarget: fix race for target IDs when using IET (lf#2432) - iSCSITarget: follow changed IET access policy - Raid1: Support attempting to re-add mirrors on deep monitor action (bnc#619121) - Raid1: Fix graceful stop code path - Raid1: Handle stop for failed arrays properly (bnc#618775) - sfex: output log messages also to stderr in sfex_init - sfex: add the sfex_stat command - sfex: wait in the start and stop actions until sfex_daemon starts/exits - Xen: implement stop of a migrating domain (bnc#656227) - Xen: check the allow_mem_management boolean properly (bnc#637525) - Xen: Always run destroy in stop sequence. - Xen: use xen-list command for status check if available (bnc#628735) - Xen: use xen-destroy for stop, if available. - Xen: Allow node configurable attribute to specify which IP to use for live migration (bnc#628735) - VirtualDomain: fix spurious stop failures - VirtualDomain: don't timeout in stop before escalating to "forced stop" - ManageVE: add migration capability - MailTo: don't check if user exists for email address (might be an alias or remote) - CTDB: Remove hard-coded timeout on start op - CTDB: Don't manage Samba and Winbind by default - CTDB: Deprecate (and make optional) smb_private_dir param (bnc#623788) - tomcat: Ensure name of tomcat resource is only used on start operation and expose JAVA_OPTS variable for use - tomcat: Fix to ensure default OCF_RESKEY_xx values are observed - tomcat: Add CATALINA_BASE parameter, defaults to CATALINA_HOME, permits multiple tomcat instances - tomcat: Use Tomcat stop TIMEOUT -force to improve stop - Dummy: migrate_from/to: correct OCF_RESKEY_CRM_meta_migrate_xxx variable names - Dummy: make method reload work - anything: add the workdir parameter - mysql: clone and master-slave functionality - mysql: add replication monitoring - mysql: check for write permissions after creating pid and socket directory - mysql: make client binary path configurable - pgsql: cd to pgdata before running commands (fixes permission error) - pgsql: add optional username, password, and sqlcode parameters for monitor - pgsql: add new "config" parameter - pgsql: properly implement pghost parameter - pgsql: socketdir parameter to manage non-default UNIX socket directories - oracle: reduce output from sqlplus to the last line for queries (bnc#567815) - db2: Replace call to db2_local_ps with db2nps - db2: guard against a hanging db2stop by spawning this into the background. Use db2_kill after grace period. - db2: add multi partition support - db2: improve behaviour on probes - db2: support for v9.x instances (bnc#608952) - SAPDatabase,SAPInstance: improve LD_LIBRARY_PATH processing (bnc#640026) - SAPInstance: prevent premature expansion of [:upper:] [:lower:] when producing sidadm uid - SAPInstance: Moved testing of SAP profile directory and START profile to a later stage (only when needed), for more robustness - SAPInstance: fix return codes in probes - SAPInstance: New parameter: SHUTDOWN_METHOD - SAPInstance: ensure enqueue failover in monitor_clone on process failure - SAPInstance: don't rely on op target rc when monitoring clones (lf#2371) - SAPDatabase: prevent premature expansion of [:upper:] and [:lower:] when producing sidadm/orasid/db2sid uids - SAPdatabase: Changed Oracle recovery method from "recover automatic database" to "end backup" - SAPDatabase: Adapt process search pattern for DB/2 9.5 - SAPDatabase: start listener only if database processes are found - SAPDatabase: avoid continuous output to syslog in monitor with SAP 7.20 and J2EE_ONLY=1 - ldirectord: http: connect to server instead of protocol (Debian#594958) - ldirectord: add implicit support for submission RFC4409 - ldirectord: example configuration for a submission virtual service - ldirectord: Shutdown write-side of client connection after writing has finished - ldirectord: port number mismatch of imaps and pops - ldirectord: Oracle compatibility - ldirectord: don't exit on timeout in HTTP/HTTPS check - ldirectord: allow underscore in service name - ldirectord: use $1 instead of \1 in pattern replace (bnc#605086) - Tools: ocf-tester: Extend to cover initial probe (monitor_0) test. - Tools: ocf-tester: set and export some common meta variables (lf#2524) - Tools: ocf-tester: meta-data also should never be affected by missing binaries. - Tools: ocf-tester: show output from the agent in case of error * Tue Apr 13 2010 Dejan Muhamedagic and others - stable release 1.0.3 - meta-data: improve timeouts in most resource agents (reduce the number of warnings by the shell) - RA: log messages to stderr if attached to a terminal - ocf-shellfuncs: tests to check for clone/ms resources - ocf-shellfuncs: don't output to stderr if using syslog (prevents double logging from the RA and lrmd) - make sure that OCF_RESKEY_CRM_meta_interval is always defined (lf#2284) - ocft: new RA test suite - VirtualDomain: bail out early if config file can't be read during probe (nbc#593988) - VirtualDomain: spin on define until we definitely have a domain name - VirtualDomain: fix incorrect use of __OCF_ACTION (the stop operation may timeout otherwise) - Filesystem: prefer /proc/mounts to /etc/mtab for non-bind mounts (lf#2388) - IPaddr2: don't bring the interface down on stop (otherwise IPv6 addresses may be removed) - oracle/oralsnr: improve exit codes if the environment isn't valid - oracle/oralsnr: improve logging - Route: don't assume that OCF_RESKEY_CRM_meta_clone_node_max is set to a number (lf#2375) - Route: add route table parameter (lf#2335) - sfex: don't use pid file (lf#2363,bnc#585416) - SFEX daemon: fix logging - ldirectord: fix the configfile default (bnc#589457) - drbd: fix metadata (bnc#588684) - IPsrcaddr: modify the interface route (lf#2367) - ldirectord: Allow multiple email addresses (lf#2168) - vmware: fix set_environment() invocation (lf#2342) - vmware: updated to version 0.2 - apache: return the right exit code from monitor (bnc#578628) - iSCSILogicalUnit: fix monitor for STGT * Mon Feb 01 2010 Dejan Muhamedagic and others - stable release 1.0.2 - EvmsSCC, Evmsd, LinuxSCSI, drbd, pingd: marked as deprecated (lf#2244) - CTDB: new resource agent for clustered samba - postfix: new resource agent - proftpd: new resource agent - AoEtarget: new resource agent to export ATA-over-Ethernet (AoE) targets - Squid: new resource agent - VirtualDomain: new resource agent (manage virtual domains using libvirt/virsh) - anything: new resource agent for arbitrary daemons - mysql-proxy: new resource agent - iSCSITarget/iSCSILogicalUnit: two new resource agents - portblock: fast reconnect/tickle ACK (new feature) - IPv6addr: new nic and cidr_netmask parameters - mysql-proxy: log_level and keepalive parameters - Filesystem: implement deep monitor operation - apache: monitor operation of depth 10 for web applications (lf#2234) - SAPDatabase + SAPInstance: New versions from SAP - CTDB: auto-generate cluster-specific part of smb.conf (lf#2308) - ClusterMon: don't fail in stop if the process is missing (bnc#569957) - Filesystem: allow configuring smbfs mounts as clones - IPaddr2: CLUSTERIP/iptables rule not always inserted on failed monitor (lf#2281) - IPaddr2: behave if the interface is down (lf#2147) - IPaddr2: check binaries when it makes sense - IPaddr2: fix invalid default value for OCF_RESKEY_clusterip_hash (bnc#553753) - IPaddr2: include netmask in search for the right interface - IPaddr2: remove all colons from the mac address before passing it to send_arp (lf#2165) - IPsrcaddr: replace 0/0 with proper ip prefix - IPv6addr: recognize network masks properly - IPv6addr: supply checksum for ICMPv6 packets - IPv6addr: ifdef out the ip offset hack for libnet v1.1.4 (lf#2034) - IPv6addr: supply checksum for ICMPv6 packets - LVM: Make monitor operation quiet in logs (bnc#546353) - MailTo: Provide a default for MAILCMD (bnc#534803, bnc#556366) - MailTo: allow multiple word subject line - Raid1: improve monitor function (bnc#546551) - Route: improve validate (lf#2232) - Squid: make the regexp match more precisely output of netstat - VIParip: Pathname needed to be configurable (lf#1331) - VirtualDomain: avoid needlessly invoking "virsh define" - VirtualDomain: destroy domain shortly before timeout expiry - VirtualDomain: fix forceful stop (lf#2283) - VirtualDomain: loop on status if libvirtd is unreachable - Xen: Remove instance_attribute "allow_migrate" (bnc#539968) - apache: make sure that proxies are not used for monitor - iSCSILogicalUnit: add support for SCSI ID, SCSI SN, Vendor ID, and Product ID - iSCSILogicalUnit: add support for per-LU parameters - iSCSILogicalUnit: set default for SCSI SN, truncate SCSI ID default to 24 bytes - iSCSILogicalUnit: use a 16-byte default SCSI ID - iSCSITarget, iSCSILogicalUnit: add support for tgt - iSCSITarget: reintroduce "tid" parameter - iSCSITarget, iSCSILogicalUnit: identify targets by IQN, not by tid - iSCSITarget, iSCSILogicalUnit: support LIO - iSCSITarget: add support for CHAP authentication - iSCSITarget: add support for restricting target access - iSCSITarget: be more persistent deleting targets on stop - include ldirectord (formerly known as heartbeat-ldirectord) - iscsi: replace wrong variable reference (bnc#499291) - jboss: Added JBoss support - ldirectord: fix setting defaults for configfile and ldirectord (lf#2328) - ldirectord: fix various bugs in OCF RA (lf#1949) - mysql: escalate stop to KILL if regular shutdown doesn't work - mysql: handle monitor and stop properly on invalid environment - nfsserver: use default values (lf#2321) - nfsserver: validate should not check if nfs_shared_infodir exists (lf#2219) - nfsserver: use check_binary properly in validate (lf#2211) - nfsserver: exit properly in nfsserver_validate (lf#2173) - oracle/oralsnr: export variables properly - oracle: drop spurious output from sqlplus - pgsql: remove the previous backup_label if it exists - portblock: add per-IP filtering capability - portblock: fix invalid exit codes on monitor - postfix: fix double stop - scsi2reservation: fix wrong logic in check for scsi_reserve - vmware: make meta-data work and several cleanups (lf#2212) - shellfuncs: make the mktemp wrappers work - ocf-shellfuncs: add mercurial repository version information - ocf-shellfuncs: add ocf_is_probe function - doc: add resource agents' man pages including examples * Thu Oct 23 2008 Lars Marowsky-Bree and MANY others - beta release 2.99.2 - LVM: stop correctly in case vol group does not exist * Tue Sep 23 2008 Lars Marowsky-Bree and MANY others - beta release 2.99.1 * Tue Aug 19 2008 Andrew Beekhof and MANY others - beta release 2.99.0 ClusterLabs-resource-agents-dc69db5/Makefile.am000066400000000000000000000114721203363223200216000ustar00rootroot00000000000000# # Copyright (C) 2008 Andrew Beekhof # Copyright (C) 2011 Fabio M. Di Nitto # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in aclocal.m4 configure DRF/config-h.in \ missing install-sh autoscan.log configure.scan \ DRF/stamp-h.in libtool.m4 ltdl.m4 libltdl.tar \ compile config.guess config.sub depcomp SPEC = $(PACKAGE_NAME).spec TARFILES = $(PACKAGE_NAME)-$(VERSION).tar.bz2 \ $(PACKAGE_NAME)-$(VERSION).tar.gz SUBDIRS = if BUILD_RGMANAGER SUBDIRS += rgmanager RGMANAGER = without else RGMANAGER = with endif if BUILD_LINUX_HA SUBDIRS += include heartbeat tools ldirectord doc LINUX_HA = without else LINUX_HA = with endif EXTRA_DIST = autogen.sh .version make/release.mk \ make/git-version-gen make/gitlog-to-changelog \ AUTHORS COPYING COPYING.GPLv3 ChangeLog \ $(SPEC).in install-exec-local: if BUILD_LINUX_HA $(INSTALL) -d -m 1755 $(DESTDIR)$(HA_RSCTMPDIR) $(LN_S) ../../lib/heartbeat/ocf-binaries $(DESTDIR)${OCF_RA_DIR_PREFIX}/heartbeat/.ocf-binaries $(LN_S) ../../lib/heartbeat/ocf-directories $(DESTDIR)${OCF_RA_DIR_PREFIX}/heartbeat/.ocf-directories $(LN_S) ../../lib/heartbeat/ocf-returncodes $(DESTDIR)${OCF_RA_DIR_PREFIX}/heartbeat/.ocf-returncodes $(LN_S) ../../lib/heartbeat/ocf-shellfuncs $(DESTDIR)${OCF_RA_DIR_PREFIX}/heartbeat/.ocf-shellfuncs endif if BUILD_RGMANAGER if BUILD_LINUX_HA $(LN_S) ${CLUSTERDATA} $(DESTDIR)${OCF_RA_DIR_PREFIX}/redhat endif $(INSTALL) -d $(DESTDIR)/$(LOGDIR) endif dist-clean-local: rm -f autoconf automake autoheader $(TARFILES) uninstall-local: rmdir $(DESTDIR)/$(LOGDIR) || :; BUILT_SOURCES = .version .version: echo $(VERSION) > $@-t && mv $@-t $@ dist-hook: gen-ChangeLog $(SPEC) echo $(VERSION) > $(distdir)/.tarball-version rm -f $(distdir)/$(SPEC) && \ cp $(top_srcdir)/$(SPEC) $(distdir)/$(SPEC) gen_start_date = 2000-01-01 .PHONY: gen-ChangeLog gen-ChangeLog: if test -d .git; then \ LC_ALL=C $(top_srcdir)/make/gitlog-to-changelog \ --since=$(gen_start_date) > $(distdir)/cl-t; \ rm -f $(distdir)/ChangeLog.devel; \ mv $(distdir)/cl-t $(distdir)/ChangeLog.devel; \ fi ## make rpm/srpm section. $(SPEC): $(SPEC).in rm -f $@-t $@ date="$(shell LC_ALL=C date "+%a %b %d %Y")" && \ specver="1" && \ rcver="" && \ dirty="" && \ alphatag="" && \ numcomm="" && \ ver="$(VERSION)" && \ if echo $$ver | grep -q -- "-dirty$$"; then \ dirty="dirty" && \ echo VERSION IS DIRTY && \ ver=`echo $$ver | sed -e "s/-dirty$$//"`; \ fi && \ echo $$ver && \ if echo $$ver | grep -q -- "-"; then \ alphatag=`echo $$ver | sed -e "s/.*-//"` && \ echo VERSION HAS ALPHATAG && \ ver=`echo $$ver | sed -e "s/-.*//"`; \ fi && \ echo $$ver && \ if [ -n "$$alphatag" ]; then \ echo VERSION HAS NUMCOMMITS && \ numcomm=`echo $$ver | sed -e 's/.*\.//'` && \ ver=`echo $$ver | sed -e 's/.'$$numcomm'$$//'`; \ fi && \ if echo $$ver | grep -q "\.[[:digit:]]rc[[:digit:]]"; then \ rpmver=`echo $$ver | sed -e "s/rc[[:digit:]].*//g"` && \ rcver=`echo $$ver | sed -e "s/.*\(rc[[:digit:]]\).*/\1/g"` && \ specver="0"; \ else \ rpmver=$$ver; \ fi && \ echo $$rpmver $$rcver && \ sed \ -e "s#@version@#$$rpmver#g" \ -e "s#@alphatag@#$$alphatag#g" \ -e "s#@numcomm@#$$numcomm#g" \ -e "s#@dirty@#$$dirty#g" \ -e "s#@date@#$$date#g" \ -e "s#@specver@#$$specver#g" \ -e "s#@rcver@#$$rcver#g" \ -e "s#@rgmanager@#$(RGMANAGER)#g" \ -e "s#@linux-ha@#$(LINUX_HA)#g" \ $< > $@-t; \ if [ -z "$$dirty" ]; then sed -i -e "s#%glo.*dirty.*##g" $@-t; fi; \ if [ -z "$$alphatag" ]; then sed -i -e "s#%glo.*alphatag.*##g" $@-t; fi; \ if [ -z "$$numcomm" ]; then sed -i -e "s#%glo.*numcomm.*##g" $@-t; fi; \ if [ -z "$$rcver" ]; then sed -i -e "s#%glo.*rcver.*##g" $@-t; fi chmod a-w $@-t mv $@-t $@ $(TARFILES): $(MAKE) dist RPMBUILDOPTS = --define "_sourcedir $(abs_builddir)" \ --define "_specdir $(abs_builddir)" \ --define "_builddir $(abs_builddir)" \ --define "_srcrpmdir $(abs_builddir)" \ --define "_rpmdir $(abs_builddir)" srpm: clean $(MAKE) $(SPEC) $(TARFILES) rpmbuild $(RPMBUILDOPTS) --nodeps -bs $(SPEC) rpm: clean $(MAKE) $(SPEC) $(TARFILES) rpmbuild $(RPMBUILDOPTS) -ba $(SPEC) clean-generic: rm -rf $(SPEC) $(TARFILES) ClusterLabs-resource-agents-dc69db5/NEWS000066400000000000000000000000001203363223200202240ustar00rootroot00000000000000ClusterLabs-resource-agents-dc69db5/README000066400000000000000000000005451203363223200204230ustar00rootroot00000000000000This repository contains resource agents (RAs) compliant with the Open Cluster Framework (OCF) specification. These resource agents are used by two cluster resource management implementations: - Pacemaker - rgmanager More information about OCF resource agents is available in the OCF Resource Agent Developer's guide, in the doc/dev-guides subdirectory. ClusterLabs-resource-agents-dc69db5/autogen.sh000077500000000000000000000001671203363223200215440ustar00rootroot00000000000000#!/bin/sh # Run this to generate all the initial makefiles, etc. autoreconf -i -v && echo Now run ./configure and make ClusterLabs-resource-agents-dc69db5/configure.ac000066400000000000000000000614451203363223200220370ustar00rootroot00000000000000dnl dnl autoconf for Agents dnl dnl License: GNU General Public License (GPL) dnl =============================================== dnl Bootstrap dnl =============================================== AC_PREREQ(2.63) dnl Suggested structure: dnl information on the package dnl checks for programs dnl checks for libraries dnl checks for header files dnl checks for types dnl checks for structures dnl checks for compiler characteristics dnl checks for library functions dnl checks for system services AC_INIT([resource-agents], m4_esyscmd([make/git-version-gen .tarball-version]), [to_be_defined@foobar.org]) AC_USE_SYSTEM_EXTENSIONS CRM_DTD_VERSION="1.0" PKG_FEATURES="" AC_CONFIG_AUX_DIR(.) AC_CANONICAL_HOST dnl Where #defines go (e.g. `AC_CHECK_HEADERS' below) dnl dnl Internal header: include/config.h dnl - Contains ALL defines dnl - include/config.h.in is generated automatically by autoheader dnl - NOT to be included in any header files except lha_internal.h dnl (which is also not to be included in any other header files) dnl dnl External header: include/agent_config.h dnl - Contains a subset of defines checked here dnl - Manually edit include/agent_config.h.in to have configure include new defines dnl - Should not include HAVE_* defines dnl - Safe to include anywhere AM_CONFIG_HEADER(include/config.h include/agent_config.h) ALL_LINGUAS="en fr" AC_ARG_WITH(version, [ --with-version=version Override package version (if you're a packager needing to pretend) ], [ PACKAGE_VERSION="$withval" ]) AC_ARG_WITH(pkg-name, [ --with-pkg-name=name Override package name (if you're a packager needing to pretend) ], [ PACKAGE_NAME="$withval" ]) dnl dnl AM_INIT_AUTOMAKE([1.11.1 foreign dist-bzip2 dist-xz]) dnl AM_INIT_AUTOMAKE([1.10.1 foreign dist-bzip2]) AC_DEFINE_UNQUOTED(AGENTS_VERSION, "$PACKAGE_VERSION", Current agents version) CC_IN_CONFIGURE=yes export CC_IN_CONFIGURE LDD=ldd dnl ======================================================================== dnl Compiler characteristics dnl ======================================================================== # check stolen from gnulib/m4/gnu-make.m4 if ! ${MAKE-make} --version /cannot/make/this >/dev/null 2>&1; then AC_MSG_ERROR([you don't seem to have GNU make; it is required]) fi AC_PROG_CC dnl Can force other with environment variable "CC". AM_PROG_CC_C_O AC_PROG_CC_STDC AC_PROG_AWK AC_PROG_LN_S AC_PROG_INSTALL AC_PROG_MAKE_SET AC_C_STRINGIZE AC_C_INLINE AC_TYPE_SIZE_T AC_TYPE_SSIZE_T AC_TYPE_UID_T AC_TYPE_UINT16_T AC_TYPE_UINT8_T AC_TYPE_UINT32_T AC_CHECK_SIZEOF(char) AC_CHECK_SIZEOF(short) AC_CHECK_SIZEOF(int) AC_CHECK_SIZEOF(long) AC_CHECK_SIZEOF(long long) AC_STRUCT_TIMEZONE dnl =============================================== dnl Helpers dnl =============================================== cc_supports_flag() { local CFLAGS="$@" AC_MSG_CHECKING(whether $CC supports "$@") AC_COMPILE_IFELSE([int main(){return 0;}] ,[RC=0; AC_MSG_RESULT(yes)],[RC=1; AC_MSG_RESULT(no)]) return $RC } extract_header_define() { AC_MSG_CHECKING(for $2 in $1) Cfile=$srcdir/extract_define.$2.${$} printf "#include \n" > ${Cfile}.c printf "#include <%s>\n" $1 >> ${Cfile}.c printf "int main(int argc, char **argv) { printf(\"%%s\", %s); return 0; }\n" $2 >> ${Cfile}.c $CC $CFLAGS ${Cfile}.c -o ${Cfile} value=`${Cfile}` AC_MSG_RESULT($value) printf $value rm -f ${Cfile}.c ${Cfile} } dnl =============================================== dnl Configure Options dnl =============================================== dnl Some systems, like Solaris require a custom package name AC_ARG_WITH(pkgname, [ --with-pkgname=name name for pkg (typically for Solaris) ], [ PKGNAME="$withval" ], [ PKGNAME="LXHAhb" ], ) AC_SUBST(PKGNAME) AC_ARG_ENABLE([ansi], [ --enable-ansi force GCC to compile to ANSI/ANSI standard for older compilers. [default=yes]]) AC_ARG_ENABLE([fatal-warnings], [ --enable-fatal-warnings very pedantic and fatal warnings for gcc [default=yes]]) INITDIR="" AC_ARG_WITH(initdir, [ --with-initdir=DIR directory for init (rc) scripts [${INITDIR}]], [ INITDIR="$withval" ]) OCF_ROOT_DIR="/usr/lib/ocf" AC_ARG_WITH(ocf-root, [ --with-ocf-root=DIR directory for OCF scripts [${OCF_ROOT_DIR}]], [ if test x"$withval" = xprefix; then OCF_ROOT_DIR=${prefix}; else OCF_ROOT_DIR="$withval"; fi ]) HA_RSCTMPDIR=${localstatedir}/run/resource-agents AC_ARG_WITH(rsctmpdir, [ --with-rsctmpdir=DIR directory for resource agents state files [${HA_RSCTMPDIR}]], [ if test x"$withval" = xprefix; then HA_RSCTMPDIR=${prefix}; else HA_RSCTMPDIR="$withval"; fi ]) AC_ARG_ENABLE([libnet], [ --enable-libnet Use libnet for ARP based funcationality, [default=try]], [enable_libnet="$enableval"], [enable_libnet=try]) BUILD_RGMANAGER=0 BUILD_LINUX_HA=0 RASSET=all AC_ARG_WITH(ras-set, [ --with-ras-set=SET build/install only linux-ha or rgmanager resource-agents [default: all]], [ RASSET="$withval" ]) if test x$RASSET = xyes || test x$RASSET = xall ; then BUILD_RGMANAGER=1 BUILD_LINUX_HA=1 fi if test x$RASSET = xlinux-ha; then BUILD_LINUX_HA=1 fi if test x$RASSET = xrgmanager; then BUILD_RGMANAGER=1 fi if test $BUILD_LINUX_HA -eq 0 && test $BUILD_RGMANAGER -eq 0; then AC_MSG_ERROR([Are you really sure you want this package?]) exit 1 fi AM_CONDITIONAL(BUILD_LINUX_HA, test $BUILD_LINUX_HA -eq 1) AM_CONDITIONAL(BUILD_RGMANAGER, test $BUILD_RGMANAGER -eq 1) dnl =============================================== dnl General Processing dnl =============================================== INIT_EXT="" echo Our Host OS: $host_os/$host AC_MSG_NOTICE(Sanitizing prefix: ${prefix}) case $prefix in NONE) prefix=/usr;; esac AC_MSG_NOTICE(Sanitizing exec_prefix: ${exec_prefix}) case $exec_prefix in dnl For consistency with Heartbeat, map NONE->$prefix NONE) exec_prefix=$prefix;; prefix) exec_prefix=$prefix;; esac AC_MSG_NOTICE(Sanitizing INITDIR: ${INITDIR}) case $INITDIR in prefix) INITDIR=$prefix;; "") AC_MSG_CHECKING(which init (rc) directory to use) for initdir in /etc/init.d /etc/rc.d/init.d /sbin/init.d \ /usr/local/etc/rc.d /etc/rc.d do if test -d $initdir then INITDIR=$initdir break fi done if test -z $INITDIR then INITDIR=${sysconfdir}/init.d fi AC_MSG_RESULT($INITDIR);; esac AC_SUBST(INITDIR) if test "${prefix}" = "/usr"; then INITDIRPREFIX="$INITDIR" else INITDIRPREFIX="${prefix}/$INITDIR" fi AC_SUBST(INITDIRPREFIX) AC_MSG_NOTICE(Sanitizing libdir: ${libdir}) case $libdir in dnl For consistency with Heartbeat, map NONE->$prefix *prefix*|NONE) AC_MSG_CHECKING(which lib directory to use) for aDir in lib64 lib do trydir="${exec_prefix}/${aDir}" if test -d ${trydir} then libdir=${trydir} break fi done AC_MSG_RESULT($libdir); ;; esac dnl Expand autoconf variables so that we dont end up with '${prefix}' dnl in #defines and python scripts dnl NOTE: Autoconf deliberately leaves them unexpanded to allow dnl make exec_prefix=/foo install dnl No longer being able to do this seems like no great loss to me... eval prefix="`eval echo ${prefix}`" eval exec_prefix="`eval echo ${exec_prefix}`" eval bindir="`eval echo ${bindir}`" eval sbindir="`eval echo ${sbindir}`" eval libexecdir="`eval echo ${libexecdir}`" eval datadir="`eval echo ${datadir}`" eval sysconfdir="`eval echo ${sysconfdir}`" eval sharedstatedir="`eval echo ${sharedstatedir}`" eval localstatedir="`eval echo ${localstatedir}`" eval libdir="`eval echo ${libdir}`" eval includedir="`eval echo ${includedir}`" eval oldincludedir="`eval echo ${oldincludedir}`" eval infodir="`eval echo ${infodir}`" eval mandir="`eval echo ${mandir}`" dnl docdir is a recent addition to autotools eval docdir="`eval echo ${docdir}`" if test "x$docdir" = "x"; then docdir="`eval echo ${datadir}/doc`" fi AC_SUBST(docdir) dnl Home-grown variables eval INITDIR="${INITDIR}" for j in prefix exec_prefix bindir sbindir libexecdir datadir sysconfdir \ sharedstatedir localstatedir libdir includedir oldincludedir infodir \ mandir INITDIR docdir do dirname=`eval echo '${'${j}'}'` if test ! -d "$dirname" then AC_MSG_WARN([$j directory ($dirname) does not exist!]) fi done dnl This OS-based decision-making is poor autotools practice; dnl feature-based mechanisms are strongly preferred. dnl dnl So keep this section to a bare minimum; regard as a "necessary evil". REBOOT_OPTIONS="-f" POWEROFF_OPTIONS="-f" case "$host_os" in *bsd*) LIBS="-L/usr/local/lib" CPPFLAGS="$CPPFLAGS -I/usr/local/include" INIT_EXT=".sh" ;; *solaris*) REBOOT_OPTIONS="-n" POWEROFF_OPTIONS="-n" ;; *linux*) AC_DEFINE_UNQUOTED(ON_LINUX, 1, Compiling for Linux platform) POWEROFF_OPTIONS="-nf" REBOOT_OPTIONS="-nf" ;; darwin*) AC_DEFINE_UNQUOTED(ON_DARWIN, 1, Compiling for Darwin platform) LIBS="$LIBS -L${prefix}/lib" CFLAGS="$CFLAGS -I${prefix}/include" ;; esac AC_SUBST(INIT_EXT) AC_DEFINE_UNQUOTED(HA_LOG_FACILITY, LOG_DAEMON, Default logging facility) AC_MSG_NOTICE(Host CPU: $host_cpu) case "$host_cpu" in ppc64|powerpc64) case $CFLAGS in *powerpc64*) ;; *) if test "$GCC" = yes; then CFLAGS="$CFLAGS -m64" fi ;; esac esac AC_MSG_CHECKING(which format is needed to print uint64_t) case "$host_cpu" in s390x)U64T="%lu";; *64*) U64T="%lu";; *) U64T="%llu";; esac AC_MSG_RESULT($U64T) AC_DEFINE_UNQUOTED(U64T, "$U64T", Correct printf format for logging uint64_t) dnl Variables needed for substitution AC_CHECK_HEADERS(heartbeat/glue_config.h) if test "$ac_cv_header_heartbeat_glue_config_h" = "yes"; then OCF_ROOT_DIR=`extract_header_define heartbeat/glue_config.h OCF_ROOT_DIR` else enable_libnet=no fi AC_DEFINE_UNQUOTED(OCF_ROOT_DIR,"$OCF_ROOT_DIR", OCF root directory - specified by the OCF standard) AC_SUBST(OCF_ROOT_DIR) GLUE_STATE_DIR=${localstatedir}/run AC_DEFINE_UNQUOTED(GLUE_STATE_DIR,"$GLUE_STATE_DIR", Where to keep state files and sockets) AC_SUBST(GLUE_STATE_DIR) AC_DEFINE_UNQUOTED(HA_VARRUNDIR,"$GLUE_STATE_DIR", Where Heartbeat keeps state files and sockets - old name) HA_VARRUNDIR="$GLUE_STATE_DIR" AC_SUBST(HA_VARRUNDIR) # Expand $prefix eval HA_RSCTMPDIR="`eval echo ${HA_RSCTMPDIR}`" AC_DEFINE_UNQUOTED(HA_RSCTMPDIR,"$HA_RSCTMPDIR", Where Resouce agents keep state files) AC_SUBST(HA_RSCTMPDIR) dnl Eventually move out of the heartbeat dir tree and create symlinks when needed HA_VARLIBHBDIR=${localstatedir}/lib/heartbeat AC_DEFINE_UNQUOTED(HA_VARLIBHBDIR,"$HA_VARLIBHBDIR", Whatever this used to mean) AC_SUBST(HA_VARLIBHBDIR) OCF_RA_DIR="${OCF_ROOT_DIR}/resource.d/" AC_DEFINE_UNQUOTED(OCF_RA_DIR,"$OCF_RA_DIR", Location for OCF RAs) AC_SUBST(OCF_RA_DIR) if test "${prefix}" = "/usr"; then OCF_RA_DIR_PREFIX="$OCF_RA_DIR" else OCF_RA_DIR_PREFIX="${prefix}/$OCF_RA_DIR" fi AC_SUBST(OCF_RA_DIR_PREFIX) OCF_LIB_DIR="${OCF_ROOT_DIR}/lib/" AC_DEFINE_UNQUOTED(OCF_LIB_DIR,"$OCF_LIB_DIR", Location for shared code for OCF RAs) AC_SUBST(OCF_LIB_DIR) if test "${prefix}" = "/usr"; then OCF_LIB_DIR_PREFIX="$OCF_LIB_DIR" else OCF_LIB_DIR_PREFIX="${prefix}/$OCF_LIB_DIR" fi AC_SUBST(OCF_LIB_DIR_PREFIX) dnl =============================================== dnl rgmanager ras bits dnl =============================================== LOGDIR=${localstatedir}/log/cluster CLUSTERDATA=${datadir}/cluster AC_SUBST([LOGDIR]) AC_SUBST([CLUSTERDATA]) dnl =============================================== dnl Program Paths dnl =============================================== PATH="$PATH:/sbin:/usr/sbin:/usr/local/sbin:/usr/local/bin" export PATH AM_PATH_PYTHON AC_CHECK_PROGS(MAKE, gmake make) AC_PATH_PROGS(SSH, ssh, /usr/bin/ssh) AC_PATH_PROGS(SCP, scp, /usr/bin/scp) AC_PATH_PROGS(TAR, tar) AC_PATH_PROGS(MD5, md5) AC_PATH_PROGS(TEST, test) AC_PATH_PROGS(PING, ping, /bin/ping) AC_PATH_PROGS(IFCONFIG, ifconfig, /sbin/ifconfig) AC_PATH_PROGS(MAILCMD, mailx mail, mail) AC_PATH_PROGS(EGREP, egrep) AC_PATH_PROGS(PKGCONFIG, pkg-config) AC_SUBST(MAILCMD) AC_SUBST(EGREP) AC_SUBST(SHELL) AC_SUBST(PING) AC_SUBST(TEST) AC_PATH_PROGS(ROUTE, route) AC_DEFINE_UNQUOTED(ROUTE, "$ROUTE", path to route command) AC_MSG_CHECKING(ifconfig option to list interfaces) for IFCONFIG_A_OPT in "-A" "-a" "" do $IFCONFIG $IFCONFIG_A_OPT > /dev/null 2>&1 if test "$?" = 0 then AC_DEFINE_UNQUOTED(IFCONFIG_A_OPT, "$IFCONFIG_A_OPT", option for ifconfig command) AC_MSG_RESULT($IFCONFIG_A_OPT) break fi done AC_SUBST(IFCONFIG_A_OPT) if test x"${MAKE}" = x""; then AC_MSG_ERROR(You need (g)make installed in order to build ${PACKAGE}) fi dnl =============================================== dnl Libraries dnl =============================================== AC_CHECK_LIB(socket, socket) AC_CHECK_LIB(gnugetopt, getopt_long) dnl if available if test x"${PKGCONFIG}" = x""; then AC_MSG_ERROR(You need pkgconfig installed in order to build ${PACKAGE}) fi if test "x${enable_thread_safe}" = "xyes"; then GPKGNAME="gthread-2.0" else GPKGNAME="glib-2.0" fi if $PKGCONFIG --exists $GPKGNAME then GLIBCONFIG="$PKGCONFIG $GPKGNAME" else set -x echo PKG_CONFIG_PATH=$PKG_CONFIG_PATH $PKGCONFIG --exists $GPKGNAME; echo $? $PKGCONFIG --cflags $GPKGNAME; echo $? $PKGCONFIG $GPKGNAME; echo $? set +x AC_MSG_ERROR(You need glib2-devel installed in order to build ${PACKAGE}) fi AC_MSG_RESULT(using $GLIBCONFIG) if test "X$GLIBCONFIG" != X; then AC_MSG_CHECKING(for special glib includes: ) GLIBHEAD=`$GLIBCONFIG --cflags` AC_MSG_RESULT($GLIBHEAD) CPPFLAGS="$CPPFLAGS $GLIBHEAD" AC_MSG_CHECKING(for glib library flags) GLIBLIB=`$GLIBCONFIG --libs` AC_MSG_RESULT($GLIBLIB) LIBS="$LIBS $GLIBLIB" fi dnl ======================================================================== dnl Headers dnl ======================================================================== AC_HEADER_STDC AC_CHECK_HEADERS(sys/socket.h) AC_CHECK_HEADERS(sys/sockio.h) AC_CHECK_HEADERS([arpa/inet.h]) AC_CHECK_HEADERS([fcntl.h]) AC_CHECK_HEADERS([limits.h]) AC_CHECK_HEADERS([malloc.h]) AC_CHECK_HEADERS([netdb.h]) AC_CHECK_HEADERS([netinet/in.h]) AC_CHECK_HEADERS([sys/file.h]) AC_CHECK_HEADERS([sys/ioctl.h]) AC_CHECK_HEADERS([sys/param.h]) AC_CHECK_HEADERS([sys/time.h]) AC_CHECK_HEADERS([syslog.h]) dnl ======================================================================== dnl Functions dnl ======================================================================== AC_FUNC_FORK AC_FUNC_STRNLEN AC_CHECK_FUNCS([alarm gettimeofday inet_ntoa memset mkdir socket uname]) AC_CHECK_FUNCS([strcasecmp strchr strdup strerror strrchr strspn strstr strtol strtoul]) AC_PATH_PROGS(REBOOT, reboot, /sbin/reboot) AC_SUBST(REBOOT) AC_SUBST(REBOOT_OPTIONS) AC_DEFINE_UNQUOTED(REBOOT, "$REBOOT", path to the reboot command) AC_DEFINE_UNQUOTED(REBOOT_OPTIONS, "$REBOOT_OPTIONS", reboot options) AC_PATH_PROGS(POWEROFF_CMD, poweroff, /sbin/poweroff) AC_SUBST(POWEROFF_CMD) AC_SUBST(POWEROFF_OPTIONS) AC_DEFINE_UNQUOTED(POWEROFF_CMD, "$POWEROFF_CMD", path to the poweroff command) AC_DEFINE_UNQUOTED(POWEROFF_OPTIONS, "$POWEROFF_OPTIONS", poweroff options) AC_PATH_PROGS(XSLTPROC, xsltproc) AM_CONDITIONAL(BUILD_DOC, test "x$XSLTPROC" != "x" ) if test "x$XSLTPROC" = "x"; then AC_MSG_WARN([xsltproc not installed, unable to (re-)build manual pages]) fi AC_SUBST(XSLTPROC) AC_PATH_PROGS(POD2MAN, pod2man) AM_CONDITIONAL(BUILD_POD_DOC, test "x$POD2MAN" != "x" ) if test "x$POD2MAN" = "x"; then AC_MSG_WARN([pod2man not installed, unable to (re-)build ldirector manual page]) fi AC_SUBST(POD2MAN) dnl ======================================================================== dnl Functions dnl ======================================================================== AC_CHECK_FUNCS(getopt, AC_DEFINE(HAVE_DECL_GETOPT, 1, [Have getopt function])) dnl ======================================================================== dnl sfex dnl ======================================================================== build_sfex=no case $host_os in *Linux*|*linux*) if test "$ac_cv_header_heartbeat_glue_config_h" = "yes"; then build_sfex=yes fi ;; esac AM_CONDITIONAL(BUILD_SFEX, test "$build_sfex" = "yes" ) dnl ======================================================================== dnl tickle (needs port to BSD platforms) dnl ======================================================================== AC_CHECK_MEMBERS([struct iphdr.saddr],,,[[#include ]]) AM_CONDITIONAL(BUILD_TICKLE, test "$ac_cv_member_struct_iphdr_saddr" = "yes" ) dnl ======================================================================== dnl libnet dnl ======================================================================== libnet="" libnet_version="none" LIBNETLIBS="" LIBNETDEFINES="" AC_MSG_CHECKING(if libnet is required) libnet_fatal=$enable_libnet case $enable_libnet in no) ;; yes|libnet10|libnet11|10|11) libnet_fatal=yes;; try) case $host_os in *Linux*|*linux*) libnet_fatal=no;; *) libnet_fatal=yes;; dnl legacy behavior esac ;; *) libnet_fatal=yes; enable_libnet=try;; esac AC_MSG_RESULT($libnet_fatal) if test "x$enable_libnet" != "xno"; then AC_PATH_PROGS(LIBNETCONFIG, libnet-config) AC_CHECK_LIB(nsl, t_open) dnl -lnsl AC_CHECK_LIB(socket, socket) dnl -lsocket AC_CHECK_LIB(net, libnet_get_hwaddr, LIBNETLIBS=" -lnet", []) fi AC_MSG_CHECKING(for libnet) if test "x$LIBNETLIBS" != "x" -o "x$enable_libnet" = "xlibnet11"; then LIBNETDEFINES="" if test "$ac_cv_lib_nsl_t_open" = yes; then LIBNETLIBS="-lnsl $LIBNETLIBS" fi if test "$ac_cv_lib_socket_socket" = yes; then LIBNETLIBS="-lsocket $LIBNETLIBS" fi libnet=net libnet_version="libnet1.1" fi if test "x$enable_libnet" = "xtry" -o "x$enable_libnet" = "xlibnet10"; then if test "x$LIBNETLIBS" = x -a "x${LIBNETCONFIG}" != "x" ; then LIBNETDEFINES="`$LIBNETCONFIG --defines` `$LIBNETCONFIG --cflags`"; LIBNETLIBS="`$LIBNETCONFIG --libs`"; libnet_version="libnet1.0 (old)" case $LIBNETLIBS in *-l*) libnet=`echo $LIBNETLIBS | sed 's%.*-l%%'`;; *) libnet_version=none;; esac CPPFLAGS="$CPPFLAGS $LIBNETDEFINES" AC_CHECK_HEADERS(libnet.h) if test "$ac_cv_header_libnet_h" = no; then libnet_version=none fi fi fi AC_MSG_RESULT(found $libnet_version) if test "$libnet_version" = none; then LIBNETLIBS="" LIBNETDEFINES="" if test $libnet_fatal = yes; then AC_MSG_ERROR(libnet not found) fi else AC_CHECK_LIB($libnet,libnet_init, [new_libnet=yes; AC_DEFINE(HAVE_LIBNET_1_1_API, 1, Libnet 1.1 API)], [new_libnet=no; AC_DEFINE(HAVE_LIBNET_1_0_API, 1, Libnet 1.0 API)],$LIBNETLIBS) AC_SUBST(LIBNETLIBS) fi if test "$new_libnet" = yes; then AC_MSG_CHECKING(for libnet API 1.1.4: ) save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS -fgnu89-inline -Wall -Werror" AC_COMPILE_IFELSE([#include int main(){libnet_t *l=NULL; libnet_pblock_record_ip_offset(l, l->total_size); return(0); }], [AC_MSG_RESULT(no)], [AC_DEFINE(HAVE_LIBNET_1_1_4_API, 1, Libnet 1.1.4 API) AC_MSG_RESULT(yes)]) CFLAGS="$save_CFLAGS" fi sendarp_linux=0 case $host_os in *Linux*|*linux*) sendarp_linux=1;; esac AC_SUBST(LIBNETLIBS) AC_SUBST(LIBNETDEFINES) AM_CONDITIONAL(SENDARP_LINUX, test $sendarp_linux = 1 ) AM_CONDITIONAL(USE_LIBNET, test "x$libnet_version" != "xnone" ) dnl ************************************************************************ dnl * Check for netinet/icmp6.h to enable the IPv6addr resource agent AC_CHECK_HEADERS(netinet/icmp6.h,[],[],[#include ]) AM_CONDITIONAL(USE_IPV6ADDR, test "$ac_cv_header_netinet_icmp6_h" = yes ) dnl ======================================================================== dnl Compiler flags dnl ======================================================================== dnl Make sure that CFLAGS is not exported. If the user did dnl not have CFLAGS in their environment then this should have dnl no effect. However if CFLAGS was exported from the user's dnl environment, then the new CFLAGS will also be exported dnl to sub processes. CC_ERRORS="" CC_EXTRAS="" if export | fgrep " CFLAGS=" > /dev/null; then export -n CFLAGS || true # We don't want to bomb out if this fails SAVED_CFLAGS="$CFLAGS" unset CFLAGS CFLAGS="$SAVED_CFLAGS" unset SAVED_CFLAGS fi if test "$GCC" != yes; then CFLAGS="$CFLAGS -g" enable_fatal_warnings=no else CFLAGS="$CFLAGS -ggdb3" # We had to eliminate -Wnested-externs because of libtool changes # Also remove -Waggregate-return because we use one libnet # call which returns a struct EXTRA_FLAGS="-fgnu89-inline -fstack-protector-all -Wall -Wbad-function-cast -Wcast-qual -Wcast-align -Wdeclaration-after-statement -Wendif-labels -Wfloat-equal -Wformat=2 -Wformat-security -Wformat-nonliteral -Winline -Wmissing-prototypes -Wmissing-declarations -Wmissing-format-attribute -Wnested-externs -Wno-long-long -Wno-strict-aliasing -Wpointer-arith -Wstrict-prototypes -Wunsigned-char -Wwrite-strings" # Additional warnings it might be nice to enable one day # -Wshadow # -Wunreachable-code for j in $EXTRA_FLAGS do if cc_supports_flag $j then CC_EXTRAS="$CC_EXTRAS $j" fi done dnl In lib/ais/Makefile.am there's a gcc option available as of v4.x GCC_MAJOR=`gcc -v 2>&1 | awk 'END{print $3}' | sed 's/[.].*//'` AM_CONDITIONAL(GCC_4, test "${GCC_MAJOR}" = 4) dnl System specific options case "$host_os" in *linux*|*bsd*) if test "${enable_fatal_warnings}" = "unknown"; then enable_fatal_warnings=yes fi ;; esac if test "x${enable_fatal_warnings}" != xno && cc_supports_flag -Werror ; then enable_fatal_warnings=yes else enable_fatal_warnings=no fi if test "x${enable_ansi}" != xno && cc_supports_flag -std=iso9899:199409 ; then AC_MSG_NOTICE(Enabling ANSI Compatibility) CC_EXTRAS="$CC_EXTRAS -ansi -D_GNU_SOURCE -DANSI_ONLY" fi AC_MSG_NOTICE(Activated additional gcc flags: ${CC_EXTRAS}) fi CFLAGS="$CFLAGS $CC_EXTRAS" NON_FATAL_CFLAGS="$CFLAGS" AC_SUBST(NON_FATAL_CFLAGS) dnl dnl We reset CFLAGS to include our warnings *after* all function dnl checking goes on, so that our warning flags don't keep the dnl AC_*FUNCS() calls above from working. In particular, -Werror will dnl *always* cause us troubles if we set it before here. dnl dnl if test "x${enable_fatal_warnings}" = xyes ; then AC_MSG_NOTICE(Enabling Fatal Warnings) CFLAGS="$CFLAGS -Werror" fi AC_SUBST(CFLAGS) dnl This is useful for use in Makefiles that need to remove one specific flag CFLAGS_COPY="$CFLAGS" AC_SUBST(CFLAGS_COPY) AC_SUBST(LOCALE) AC_SUBST(CC) AC_SUBST(MAKE) dnl The Makefiles and shell scripts we output AC_CONFIG_FILES(Makefile \ include/Makefile \ heartbeat/Makefile \ heartbeat/ocf-binaries \ heartbeat/ocf-directories \ heartbeat/ocf-shellfuncs \ heartbeat/shellfuncs \ tools/Makefile \ tools/ocf-tester \ tools/ocft/Makefile \ tools/ocft/ocft \ tools/ocft/caselib \ tools/ocft/README \ tools/ocft/README.zh_CN \ ldirectord/Makefile \ ldirectord/ldirectord \ ldirectord/init.d/Makefile \ ldirectord/init.d/ldirectord \ ldirectord/init.d/ldirectord.debian \ ldirectord/init.d/ldirectord.debian.default \ ldirectord/logrotate.d/Makefile \ ldirectord/OCF/Makefile \ ldirectord/OCF/ldirectord \ doc/Makefile \ doc/man/Makefile \ rgmanager/Makefile \ rgmanager/src/Makefile \ rgmanager/src/resources/Makefile \ rgmanager/src/resources/utils/Makefile \ ) dnl Now process the entire list of files added by previous dnl calls to AC_CONFIG_FILES() AC_OUTPUT() dnl ***************** dnl Configure summary dnl ***************** AC_MSG_RESULT([]) AC_MSG_RESULT([$PACKAGE configuration:]) AC_MSG_RESULT([ Version = ${VERSION}]) AC_MSG_RESULT([ Build Version = dc69db5a6d203ebd230201167f012e43784f8d23]) AC_MSG_RESULT([ Features =${PKG_FEATURES}]) AC_MSG_RESULT([]) AC_MSG_RESULT([ Prefix = ${prefix}]) AC_MSG_RESULT([ Executables = ${sbindir}]) AC_MSG_RESULT([ Man pages = ${mandir}]) AC_MSG_RESULT([ Libraries = ${libdir}]) AC_MSG_RESULT([ Header files = ${includedir}]) AC_MSG_RESULT([ Arch-independent files = ${datadir}]) AC_MSG_RESULT([ Documentation = ${docdir}]) AC_MSG_RESULT([ State information = ${localstatedir}]) AC_MSG_RESULT([ System configuration = ${sysconfdir}]) AC_MSG_RESULT([ RA state files = ${HA_RSCTMPDIR}]) AC_MSG_RESULT([ AIS Plugins = ${LCRSODIR}]) AC_MSG_RESULT([]) AC_MSG_RESULT([ CFLAGS = ${CFLAGS}]) AC_MSG_RESULT([ Libraries = ${LIBS}]) AC_MSG_RESULT([ Stack Libraries = ${CLUSTERLIBS}]) ClusterLabs-resource-agents-dc69db5/doc/000077500000000000000000000000001203363223200203045ustar00rootroot00000000000000ClusterLabs-resource-agents-dc69db5/doc/Makefile.am000066400000000000000000000015721203363223200223450ustar00rootroot00000000000000# # doc: Linux-HA resource agents # # Copyright (C) 2009 Florian Haas # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # SUBDIRS = man MAINTAINERCLEANFILES = Makefile.in EXTRA_DIST = $(doc_DATA) doc_DATA = README.webapps ClusterLabs-resource-agents-dc69db5/doc/README.webapps000066400000000000000000000117161203363223200226320ustar00rootroot00000000000000Monitoring web applications with the Apache RA One of typical uses of apache is as an interface to the one or the other kind of web application. It could be expressed thus in terms of a resource group: IP address apache web_app where web_app is a JSP application (tomcat,jeronimo) or similar. Rumour has it that the web applications suffer from occasional instability which may make them an administration nightmare. But, typical remedy is simply an application restart. How do we increase availability in this situation? The web applications are most commonly represented as one or more processes in a UNIX environment. The afore mentioned instability is most commonly not reflected in the process state. Hence, checking the process status makes us no wiser. What could help, though, is probing the application just as our unhappy user does---through the web interface. We can ask the application developers to provide a URL which should exercise the application and then provide predictable output. Now, given our generic resource group and the failed web application, which we established using a http client, we have the following situation: IP address apache FAILED web_app Some might argue that it's not apache that is the culprit or has failed, but this nevertheless should serve our purpose well. The cluster will stop web_app and apache and then start them, either on the some node or elsewhere. There's an extra apache restart which was not needed, but then again it cannot really hurt. What to monitor? Choose carefully the URL to monitor. It should probe exactly what is further up in the resource group, no more and no less. In other words, if you have a database backend running elsewhere, it would be of no use to specify a URL which depends on the database. You should monitor only what is within reach. Configuration and usage It is possible to configure the monitoring either through CIB or using an extra configuration file. If your monitoring spec consists only of a URL and a regular expression to be matched in the output, then something like this should suffice: primitive apache_a1 ocf:heartbeat:apache \ params configfile="/apps/a1.conf" \ op monitor interval=120s timeout=60s start-delay=120s \ OCF_CHECK_LEVEL=10 testurl="/webapp1_mon" testregex="This application is alive" The testurl parameter is where we connect and the testregex is what we should look for. The OCF_CHECK_LEVEL must be set to "10". Note that testurl specifies a URL which is relative to where the apache listens for connections. Obviously, this should be preferred to specifying the full URL. It is important to set start-delay to a value larger than the time needed to start the web application (the next resource). If we don't, then the first monitor operation is likely to fail. In case you need more complex configuration, it can be set in an extra configuration file: primitive apache_a1 ocf:heartbeat:apache \ params configfile="/apps/a1.conf" testconffile="/apps/webmon.cf" \ op monitor ... OCF_CHECK_LEVEL=10 /etc/apache2/webmon.cf: test webapp1 url /webapp1_mon match This application is alive client curl end This test configuration is equivalent to the first one, it's just that in the latter we want to use curl(1) as an http client instead of wget(1). Another example: test webapp1 url /webapp1_mon match This application is alive client curl client_opts --header 'Host: www.webapp1.megacorp.com' end Here we use the curl's --header option to specify the virtual host we want to talk to. It is also possible to set the credentials using the "user" and "password" keywords. The configuration file may contain more than one test definition which is handy in case one should monitor more than one web application. In that case you should specify the test name in the CIB: primitive apache_common ocf:heartbeat:apache \ params configfile="/apps/httpd.conf" testconffile="/apps/webmon.cf" \ op monitor ... OCF_CHECK_LEVEL=10 testname="a1" \ op monitor ... OCF_CHECK_LEVEL=10 testname="b1" The apache OCF RA supports wget(1) (the default) and curl(1) http clients. If neither will do, then you can specify your own using the client and client_opts keywords. Your client must allow URL as the last parameter and it must dump output from the web server to stdout. All configuration file keywords: test The name of the text. url The url to test. If it doesn't start with http, it's considered to be relative to the apache Listen directive. match The regular expression to match. user Username to authenticate with. password Password to authenticate with. client The http client. client_opts Options for the http client. end Marks the end of the test definition. # Comment. May be used only at the start of line. Notes We could support more depth levels, but it is not clear if anybody really needs that. Different check levels could be defined as different monitor operations. In case you are using the external configuration file, don't forget to replicate it to all cluster members and to keep it synchronized. ClusterLabs-resource-agents-dc69db5/doc/dev-guides/000077500000000000000000000000001203363223200223405ustar00rootroot00000000000000ClusterLabs-resource-agents-dc69db5/doc/dev-guides/ra-dev-guide-docinfo.xml000066400000000000000000000045531203363223200267610ustar00rootroot00000000000000 Florian Haas hastexo florian.haas@hastexo.com John Shi SUSE Original ocft README jshi@suse.com Dejan Muhamedagic SUSE ocft documentation rewrite dmuhamedagic@suse.com 2010 2011 LINBIT HA-Solutions GmbH 2011 Novell, Inc. 2011 SUSE Linux GmbH 2011 hastexo Professional Services GmbH License information The text of and illustrations in this document are licensed under a Creative Commons Attribution–Share Alike 3.0 Unported license ("CC-BY-SA"). A summary of CC-BY-SA is available at . The full license text is available at . In accordance with CC-BY-SA, if you distribute this document or an adaptation of it, you must provide the URL for the original version. 1.0.2 November 18, 2011 FGH 1.0.1 January 3, 2011 FGH 1.0.0 December 13, 2010 FGH ClusterLabs-resource-agents-dc69db5/doc/dev-guides/ra-dev-guide.txt000066400000000000000000002261161203363223200253620ustar00rootroot00000000000000= The OCF Resource Agent Developer's Guide == Introduction This document is to serve as a guide and reference for all developers, maintainers, and contributors working on OCF (Open Cluster Framework) compliant cluster resource agents. It explains the anatomy and general functionality of a resource agent, illustrates the resource agent API, and provides valuable hints and tips to resource agent authors. === What is a resource agent? A resource agent is an executable that manages a cluster resource. No formal definition of a cluster resource exists, other than "anything a cluster manages is a resource." Cluster resources can be as diverse as IP addresses, file systems, database services, and entire virtual machines -- to name just a few examples. === Who or what uses a resource agent? Any Open Cluster Framework (OCF) compliant cluster management application is capable of managing resources using the resource agents described in this document. At the time of writing, two OCF compliant cluster management applications exist for the Linux platform: * _Pacemaker_, a cluster manager supporting both the Corosync and Heartbeat cluster messaging frameworks. Pacemaker evolved out of the Linux-HA project. * _RGmanager_, the cluster manager bundled in Red Hat Cluster Suite. It supports the Corosync cluster messaging framework exclusively. === Which language is a resource agent written in? An OCF compliant resource agent can be implemented in _any_ programming language. The API is not language specific. However, most resource agents are implemented as shell scripts, which is why this guide primarily uses example code written in shell language. == API definitions === Environment variables A resource agent receives all configuration information about the resource it manages via environment variables. The names of these environment variables are always the name of the resource parameter, prefixed with +OCF_RESKEY_+. For example, if the resource has an +ip+ parameter set to +192.168.1.1+, then the resource agent will have access to an environment variable +OCF_RESKEY_ip+ holding that value. For any resource parameter that is not required to be set by the user -- that is, its parameter definition in the resource agent metadata does not specify +required="true"+ -- then the resource agent must * Provide a reasonable default. This should be advertised in the metadata. By convention, the resource agent uses a variable named +OCF_RESKEY__default+ that holds this default. * Alternatively, cater correctly for the value being empty. In addition, the cluster manager may also support _meta_ resource parameters. These do not apply directly to the resource configuration, but rather specify _how_ the cluster resource manager is expected to manage the resource. For example, the Pacemaker cluster manager uses the +target-role+ meta parameter to specify whether the resource should be started or stopped. Meta parameters are passed into the resource agent in the +OCF_RESKEY_CRM_meta_+ namespace, with any hypens converted to underscores. Thus, the +target-role+ attribute maps to an environment variable named +OCF_RESKEY_CRM_meta_target_role+. === Actions Any resource agent must support one command-line argument which specifies the action the resource agent is about to execute. The following actions must be supported by any resource agent: * +start+ -- starts the resource. * +stop+ -- shuts down the resource. * +monitor+ -- queries the resource for its state. * +meta-data+ -- dumps the resource agent metadata. In addition, resource agents may optionally support the following actions: * +promote+ -- turns a resource into the +Master+ role (Master/Slave resources only). * +demote+ -- turns a resource into the +Slave+ role (Master/Slave resources only). * +migrate_to+ and +migrate_from+ -- implement live migration of resources. * +validate-all+ -- validates a resource's configuration. * +usage+ or +help+ -- displays a usage message when the resource agent is invoked from the command line, rather than by the cluster manager. * +status+ -- historical (deprecated) synonym for +monitor+. === Timeouts Action timeouts are enforced outside the resource agent proper. It is the cluster manager's responsibility to monitor how long a resource agent action has been running, and terminate it if it does not meet its completion deadline. Thus, resource agents need not themselves check for any timeout expiry. Resource agents can, however, _advise_ the user of sensible timeout values (which, when correctly set, will be duly enforced by the cluster manager). See <<_metadata,the following section>> for details on how a resource agent advertises its suggested timeouts. === Metadata Every resource agent must describe its own purpose and supported parameters in a set of XML metadata. This metadata is used by cluster management applications for on-line help, and resource agent man pages are generated from it as well. The following is a fictitious set of metadata from an imaginary resource agent: [source,xml] -------------------------------------------------------------------------- 0.1 This is a fictitious example resource agent written for the OCF Resource Agent Developers Guide. Example resource agent for budding OCF RA developers Number of eggs, an example numeric parameter Number of eggs Enable superfrobnication, an example boolean parameter Enable superfrobnication Data directory, an example string parameter Data directory -------------------------------------------------------------------------- The +resource-agent+ element, of which there must only be one per resource agent, defines the resource agent +name+ and +version+. The +longdesc+ and +shortdesc+ elements in +resource-agent+ provide a long and short description of the resource agent's functionality. While +shortdesc+ is a one-line description of what the resource agent does and is usually used in terse listings, +longdesc+ should give a full-blown description of the resource agent in as much detail as possible. The +parameters+ element describes the resource agent parameters, and should hold any number of +parameter+ children -- one for each parameter that the resource agent supports. Every +parameter+ should, like the +resource-agent+ as a whole, come with a +shortdesc+ and a +longdesc+, and also a +content+ child that describes the parameter's expected content. On the +content+ element, there may be four different attributes: * +type+ describes the parameter type (+string+, +integer+, or +boolean+). If unset, +type+ defaults to +string+. * +required+ indicates whether setting the parameter is mandatory (+required="true"+) or optional (+required="false"+). * For optional parameters, it is customary to provide a sensible default via the +default+ attribute. * Finally, the +unique+ attribute (allowed values: +true+ or +false+) indicates that a specific value must be unique across the cluster, for this parameter of this particular resource type. For example, a highly available floating IP address is declared +unique+ -- as that one IP address should run only once throughout the cluster, avoiding duplicates. The +actions+ list defines the actions that the resource agent advertises as supported. Every +action+ should list its own +timeout+ value. This is a hint to the user what _minimal_ timeout should be configured for the action. This is meant to cater for the fact that some resources are quick to start and stop (IP addresses or filesystems, for example), some may take several minutes to do so (such as databases). In addition, recurring actions (such as +monitor+) should also specify a recommended minimum +interval+, which is the time between two consecutive invocations of the same action. Like +timeout+, this value does not constitute a default -- it is merely a hint for the user which action interval to configure, at minimum. == Return codes For any invocation, resource agents must exit with a defined return code that informs the caller of the outcome of the invoked action. The return codes are explained in detail in the following subsections. === +OCF_SUCCESS+ (0) The action completed successfully. This is the expected return code for any successful +start+, +stop+, +promote+, +demote+, +migrate_from+, +migrate_to+, +meta_data+, +help+, and +usage+ action. For +monitor+ (and its deprecated alias, +status+), however, a modified convention applies: * For primitive (stateless) resources, +OCF_SUCCESS+ from +monitor+ means that the resource is running. Non-running and gracefully shut-down resources must instead return +OCF_NOT_RUNNING+. * For master/slave (stateful) resources, +OCF_SUCCESS+ from +monitor+ means that the resource is running _in Slave mode_. Resources running in Master mode must instead return +OCF_RUNNING_MASTER+, and gracefully shut-down resources must instead return +OCF_NOT_RUNNING+. === +OCF_ERR_GENERIC+ (1) The action returned a generic error. A resource agent should use this exit code only when none of the more specific error codes, defined below, accurately describes the problem. The cluster resource manager interprets this exit code as a _soft_ error. This means that unless specifically configured otherwise, the resource manager will attempt to recover a resource which failed with +OCF_ERR_GENERIC+ in-place -- usually by restarting the resource on the same node. === +OCF_ERR_ARGS+ (2) The resource agent was invoked with incorrect arguments. This is a safety net "can't happen" error which the resource agent should only return when invoked with, for example, an incorrect number of command line arguments. NOTE: The resource agent should not return this error when instructed to perform an action that it does not support. Instead, under those circumstances, it should return +OCF_ERR_UNIMPLEMENTED+. === +OCF_ERR_UNIMPLEMENTED+ (3) The resource agent was instructed to execute an action that the agent does not implement. Not all resource agent actions are mandatory. +promote+, +demote+, +migrate_to+, +migrate_from+, and +notify+, are all optional actions which the resource agent may or may not implement. When a non-stateful resource agent is misconfigured as a master/slave resource, for example, then the resource agent should alert the user about this misconfiguration by returning +OCF_ERR_UNIMPLEMENTED+ on the +promote+ and +demote+ actions. === +OCF_ERR_PERM+ (4) The action failed due to insufficient permissions. This may be due to the agent not being able to open a certain file, to listen on a specific socket, to write to a directory, or similar. The cluster resource manager interprets this exit code as a _hard_ error. This means that unless specifically configured otherwise, the resource manager will attempt to recover a resource which failed with this error by restarting the resource on a different node (where the permission problem may not exist). === +OCF_ERR_INSTALLED+ (5) The action failed because a required component is missing on the node where the action was executed. This may be due to a required binary not being executable, or a vital configuration file being unreadable. The cluster resource manager interprets this exit code as a _hard_ error. This means that unless specifically configured otherwise, the resource manager will attempt to recover a resource which failed with this error by restarting the resource on a different node (where the required files or binaries may be present). === +OCF_ERR_CONFIGURED+ (6) The action failed because the user misconfigured the resource. For example, the user may have configured an alphanumeric string for a parameter that really should be an integer. The cluster resource manager interprets this exit code as a _fatal_ error. Since this is a configuration error that is present cluster-wide, it would make no sense to recover such a resource on a different node, let alone in-place. When a resource fails with this error, the cluster manager will attempt to shut down the resource, and wait for administrator intervention. === +OCF_NOT_RUNNING+ (7) The resource was found not to be running. This is an exit code that may be returned by the +monitor+ action exclusively. Note that this implies that the resource has either _gracefully_ shut down, or has never been started. If the resource is not running due to an error condition, the +monitor+ action should instead return one of the +OCF_ERR_+ exit codes or +OCF_FAILED_MASTER+. === +OCF_RUNNING_MASTER+ (8) The resource was found to be running in the +Master+ role. This applies only to stateful (Master/Slave) resources, and only to their +monitor+ action. Note that there is no specific exit code for "running in slave mode". This is because their is no functional distinction between a primitive resource running normally, and a stateful resource running as a slave. The +monitor+ action of a stateful resource running normally in the +Slave+ role should simply return +OCF_SUCCESS+. === +OCF_FAILED_MASTER+ (9) The resource was found to have failed in the +Master+ role. This applies only to stateful (Master/Slave) resources, and only to their +monitor+ action. The cluster resource manager interprets this exit code as a _soft_ error. This means that unless specifically configured otherwise, the resource manager will attempt to recover a resource which failed with +$OCF_FAILED_MASTER+ in-place -- usually by demoting, stopping, starting and then promoting the resource on the same node. == Resource agent structure A typical (shell-based) resource agent contains standard structural items, in the order as listed in this section. It describes the expected behavior of a resource agent with respect to the various actions it supports, using a fictitous resource agent named +foobar+ as an example. === Resource agent interpreter Any resource agent implemented as a script must specify its interpreter using standard "shebang" (+#!+) header syntax. [source,bash] -------------------------------------------------------------------------- #!/bin/sh -------------------------------------------------------------------------- If a resource agent is written in shell, specifying the generic shell interpreter (+#!/bin/sh+) is generally preferred, though not required. Resource agents declared as +/bin/sh+ compatible must not use constructs native to a specific shell (such as, for example, +${!variable}+ syntax native to +bash+). It is advisable to occasionally run such resource agents through a sanitization utility such as +checkbashisms+. It is considered a regression to introduce a patch that will make a previously +sh+ compatible resource agent suitable only for +bash+, +ksh+, or any other non-generic shell. It is, however, perfectly acceptable for a new resource agent to explicitly define a specific shell, such as +/bin/bash+, as its interpreter. === Author and license information The resource agent should contain a comment listing the resource agent author(s) and/or copyright holder(s), and stating the license that applies to the resource agent: [source,bash] -------------------------------------------------------------------------- # # Resource Agent for managing foobar resources. # # License: GNU General Public License (GPL) # (c) 2008-2010 John Doe, Jane Roe, # and Linux-HA contributors -------------------------------------------------------------------------- When a resource agent refers to a license for which multiple versions exist, it is assumed that the current version applies. === Initialization Any shell resource agent should source the +ocf-shellfuncs+ function library. With the syntax below, this is done in terms of +$OCF_FUNCTIONS_DIR+, which -- for testing purposes, and also for generating documentation -- may be overridden from the command line. [source,bash] -------------------------------------------------------------------------- # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs -------------------------------------------------------------------------- === Functions implementing resource agent actions What follows next are the functions implementing the resource agent's advertised actions. The individual actions are described in detail in <<_resource_agent_actions>>. === Execution block This is the part of the resource agent that actually executes when the resource agent is invoked. It typically follows a fairly standard structure: [source,bash] -------------------------------------------------------------------------- # Make sure meta-data and usage always succeed case $__OCF_ACTION in meta-data) foobar_meta_data exit $OCF_SUCCESS ;; usage|help) foobar_usage exit $OCF_SUCCESS ;; esac # Anything other than meta-data and usage must pass validation foobar_validate_all || exit $? # Translate each action into the appropriate function call case $__OCF_ACTION in start) foobar_start;; stop) foobar_stop;; status|monitor) foobar_monitor;; promote) foobar_promote;; demote) foobar_demote;; reload) ocf_log info "Reloading..." foobar_start ;; validate-all) ;; *) foobar_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? # The resource agent may optionally log a debug message ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc" exit $rc -------------------------------------------------------------------------- == Resource agent actions Each action is typically implemented in a separate function or method in the resource agent. By convention, these are usually named +_+, so the function implementing the +start+ action in +foobar+ would be named +foobar_start()+. As a general rule, whenever the resource agent encounters an error that it is not able to recover, it is permitted to immediately exit, throw an exception, or otherwise cease execution. Examples for this include configuration issues, missing binaries, permission problems, etc. It is not necessary to pass these errors up the call stack. It is the cluster manager's responsibility to initiate the appropriate recovery action based on the user's configuration. The resource agent should not guess at said configuration. === +start+ action When invoked with the +start+ action, the resource agent must start the resource if it is not yet running. This means that the agent must verify the resource's configuration, query its state, and then start it only if it is not running. A common way of doing this would be to invoke the +validate_all+ and +monitor+ function first, as in the following example: [source,bash] -------------------------------------------------------------------------- foobar_start() { # exit immediately if configuration is not valid foobar_validate_all || exit $? # if resource is already running, bail out early if foobar_monitor; then ocf_log info "Resource is already running" return $OCF_SUCCESS fi # actually start up the resource here (make sure to immediately # exit with an $OCF_ERR_ error code if anything goes seriously # wrong) ... # After the resource has been started, check whether it started up # correctly. If the resource starts asynchronously, the agent may # spin on the monitor function here -- if the resource does not # start up within the defined timeout, the cluster manager will # consider the start action failed while ! foobar_monitor; do ocf_log debug "Resource has not started yet, waiting" sleep 1 done # only return $OCF_SUCCESS if _everything_ succeeded as expected return $OCF_SUCCESS } -------------------------------------------------------------------------- === +stop+ action When invoked with the +stop+ action, the resource agent must stop the resource, if it is running. This means that the agent must verify the resource configuration, query its state, and then stop it only if it is currently running. A common way of doing this would be to invoke the +validate_all+ and +monitor+ function first. It is important to understand that +stop+ is a force operation -- the resource agent must do everything in its power to shut down, the resource, short of rebooting the node or shutting it off. Consider the following example: [source,bash] -------------------------------------------------------------------------- foobar_stop() { local rc # exit immediately if configuration is not valid foobar_validate_all || exit $? foobar_monitor rc=$? case "$rc" in "$OCF_SUCCESS") # Currently running. Normal, expected behavior. ocf_log debug "Resource is currently running" ;; "$OCF_RUNNING_MASTER") # Running as a Master. Need to demote before stopping. ocf_log info "Resource is currently running as Master" foobar_demote || \ ocf_log warn "Demote failed, trying to stop anyway" ;; "$OCF_NOT_RUNNING") # Currently not running. Nothing to do. ocf_log info "Resource is already stopped" return $OCF_SUCCESS ;; esac # actually shut down the resource here (make sure to immediately # exit with an $OCF_ERR_ error code if anything goes seriously # wrong) ... # After the resource has been stopped, check whether it shut down # correctly. If the resource stops asynchronously, the agent may # spin on the monitor function here -- if the resource does not # shut down within the defined timeout, the cluster manager will # consider the stop action failed while foobar_monitor; do ocf_log debug "Resource has not stopped yet, waiting" sleep 1 done # only return $OCF_SUCCESS if _everything_ succeeded as expected return $OCF_SUCCESS } -------------------------------------------------------------------------- NOTE: The expected exit code for a successful stop operation is +$OCF_SUCCESS+, _not_ +$OCF_NOT_RUNNING+. IMPORTANT: A failed stop operation is a potentially dangerous situation which the cluster manager will almost invariably try to resolve by means of node fencing. In other words, the cluster manager will forcibly evict from the cluster a node on which a stop operation has failed. While this measure serves ultimately to protect data, it does cause disruption to applications and their users. Thus, a resource agent should make sure that it exits with an error only if all avenues for proper resource shutdown have been exhausted. === +monitor+ action The +monitor+ action queries the current status of a resource. It must discern between three different states: * resource is currently running (return +$OCF_SUCCESS+); * resource has stopped gracefully (return +$OCF_NOT_RUNNING+); * resource has run into a problem and must be considered failed (return the appropriate +$OCF_ERR_+ code to indicate the nature of the problem). [source,bash] -------------------------------------------------------------------------- foobar_monitor() { local rc # exit immediately if configuration is not valid foobar_validate_all || exit $? ocf_run frobnicate --test # This example assumes the following exit code convention # for frobnicate: # 0: running, and fully caught up with master # 1: gracefully stopped # any other: error case "$?" in 0) rc=$OCF_SUCCESS ocf_log debug "Resource is running" ;; 1) rc=$OCF_NOT_RUNNING ocf_log debug "Resource is not running" ;; *) ocf_log err "Resource has failed" exit $OCF_ERR_GENERIC esac return $rc } -------------------------------------------------------------------------- Stateful (master/slave) resource agents may use a more elaborate monitoring scheme where they can provide "hints" to the cluster manager identifying which instance is best suited to assume the +Master+ role. <<_specifying_a_master_preference>> explains the details. NOTE: The cluster manager may invoke the +monitor+ action for a _probe_, which is a test whether the resource is currently running. Normally, the monitor operation would behave exactly the same during a probe and a "real" monitor action. If a specific resource does require special treatment for probes, however, the +ocf_is_probe+ convenience function is available in the OCF shell functions library for that purpose. === +validate-all+ action The +validate-all+ action tests for correct resource agent configuration and a working environment. +validate-all+ should exit with one of the following return codes: * +$OCF_SUCCESS+ -- all is well, the configuration is valid and usable. * +$OCF_ERR_CONFIGURED+ -- the user has misconfigured the resource. * +$OCF_ERR_INSTALLED+ -- the resource has possibly been configured correctly, but a vital component is missing on the node where +validate-all+ is being executed. * +$OCF_ERR_PERM+ -- the resource is configured correctly and is not missing any required components, but is suffering from a permission issue (such as not being able to create a necessary file). +validate-all+ is usually wrapped in a function that is not only called when explicitly invoking the corresponding action, but also -- as a sanity check -- from just about any other function. Therefore, the resource agent author must keep in mind that the function may be invoked during the +start+, +stop+, and +monitor+ operations, and also during probes. Probes pose a separate challenge for validation. During a probe (when the cluster manager may expect the resource _not_ to be running on the node where the probe is executed), some required components may be _expected_ to not be available on the affected node. For example, this includes any shared data on storage devices not available for reading during the probe. The +validate-all+ function may thus need to treat probes specially, using the +ocf_is_probe+ convenience function: [source,bash] -------------------------------------------------------------------------- foobar_validate_all() { # Test for configuration errors first if ! ocf_is_decimal $OCF_RESKEY_eggs; then ocf_log err "eggs is not numeric!" exit $OCF_ERR_CONFIGURED fi # Test for required binaries check_binary frobnicate # Check for data directory (this may be on shared storage, so # disable this test during probes) if ! ocf_is_probe; then if ! [ -d $OCF_RESKEY_datadir ]; then ocf_log err "$OCF_RESKEY_datadir does not exist or is not a directory!" exit $OCF_ERR_INSTALLED fi fi return $OCF_SUCCESS } -------------------------------------------------------------------------- === +meta-data+ action The +meta-data+ action dumps the resource agent metadata to standard output. The output must follow the metadata format as specified in <<_metadata>>. [source,bash] -------------------------------------------------------------------------- foobar_meta_data { cat < 0.1 ... EOF } -------------------------------------------------------------------------- === +promote+ action The +promote+ action is optional. It must only be supported by _stateful_ resource agents, which means agents that discern between two distinct _roles_: +Master+ and +Slave+. +Slave+ is functionally identical to the +Started+ state in a stateless resource agent. Thus, while a regular (stateless) resource agent only needs to implement +start+ and +stop+, a stateful resource agent must also support the +promote+ action to be able to make a transition between the +Started+ (+Slave+) and +Master+ roles. [source,bash] -------------------------------------------------------------------------- foobar_promote() { local rc # exit immediately if configuration is not valid foobar_validate_all || exit $? # test the resource's current state foobar_monitor rc=$? case "$rc" in "$OCF_SUCCESS") # Running as slave. Normal, expected behavior. ocf_log debug "Resource is currently running as Slave" ;; "$OCF_RUNNING_MASTER") # Already a master. Unexpected, but not a problem. ocf_log info "Resource is already running as Master" return $OCF_SUCCESS ;; "$OCF_NOT_RUNNING") # Currently not running. Need to start before promoting. ocf_log info "Resource is currently not running" foobar_start ;; *) # Failed resource. Let the cluster manager recover. ocf_log err "Unexpected error, cannot promote" exit $rc ;; esac # actually promote the resource here (make sure to immediately # exit with an $OCF_ERR_ error code if anything goes seriously # wrong) ocf_run frobnicate --master-mode || exit $OCF_ERR_GENERIC # After the resource has been promoted, check whether the # promotion worked. If the resource promotion is asynchronous, the # agent may spin on the monitor function here -- if the resource # does not assume the Master role within the defined timeout, the # cluster manager will consider the promote action failed. while true; do foobar_monitor if [ $? -eq $OCF_RUNNING_MASTER ]; then ocf_log debug "Resource promoted" break else ocf_log debug "Resource still awaiting promotion" sleep 1 fi done # only return $OCF_SUCCESS if _everything_ succeeded as expected return $OCF_SUCCESS } -------------------------------------------------------------------------- === +demote+ action The +demote+ action is optional. It must only be supported by _stateful_ resource agents, which means agents that discern between two distict _roles_: +Master+ and +Slave+. +Slave+ is functionally identical to the +Started+ state in a stateless resource agent. Thus, while a regular (stateless) resource agent only needs to implement +start+ and +stop+, a stateful resource agent must also support the +demote+ action to be able to make a transition between the +Master+ and +Started+ (+Slave+) roles. [source,bash] -------------------------------------------------------------------------- foobar_demote() { local rc # exit immediately if configuration is not valid foobar_validate_all || exit $? # test the resource's current state foobar_monitor rc=$? case "$rc" in "$OCF_RUNNING_MASTER") # Running as master. Normal, expected behavior. ocf_log debug "Resource is currently running as Master" ;; "$OCF_SUCCESS") # Alread running as slave. Nothing to do. ocf_log debug "Resource is currently running as Slave" return $OCF_SUCCESS ;; "$OCF_NOT_RUNNING") # Currently not running. Getting a demote action # in this state is unexpected. Exit with an error # and let the cluster manager recover. ocf_log err "Resource is currently not running" exit $OCF_ERR_GENERIC ;; *) # Failed resource. Let the cluster manager recover. ocf_log err "Unexpected error, cannot demote" exit $rc ;; esac # actually demote the resource here (make sure to immediately # exit with an $OCF_ERR_ error code if anything goes seriously # wrong) ocf_run frobnicate --unset-master-mode || exit $OCF_ERR_GENERIC # After the resource has been demoted, check whether the # demotion worked. If the resource demotion is asynchronous, the # agent may spin on the monitor function here -- if the resource # does not assume the Slave role within the defined timeout, the # cluster manager will consider the demote action failed. while true; do foobar_monitor if [ $? -eq $OCF_RUNNING_MASTER ]; then ocf_log debug "Resource still awaiting promotion" sleep 1 else ocf_log debug "Resource demoted" break fi done # only return $OCF_SUCCESS if _everything_ succeeded as expected return $OCF_SUCCESS } -------------------------------------------------------------------------- === +migrate_to+ action The +migrate_to+ action can serve one of two purposes: * Initiate a native _push_ type migration for the resource. In other words, instruct the resource to move _to_ a specific node from the node it is currently running on. The resource agent knows about its destination node via the +$OCF_RESKEY_CRM_meta_migrate_target+ environment variable. * Freeze the resource in a _freeze/thaw_ (also known as _suspend/resume_) type migration. In this mode, the resource does not need any information about its destination node at this point. The example below illustrates a push type migration: [source,bash] -------------------------------------------------------------------------- foobar_migrate_to() { # exit immediately if configuration is not valid foobar_validate_all || exit $? # if resource is not running, bail out early if ! foobar_monitor; then ocf_log err "Resource is not running" exit $OCF_ERR_GENERIC fi # actually start up the resource here (make sure to immediately # exit with an $OCF_ERR_ error code if anything goes seriously # wrong) ocf_run frobnicate --migrate \ --dest=$OCF_RESKEY_CRM_meta_migrate_target \ || exit OCF_ERR_GENERIC ... # only return $OCF_SUCCESS if _everything_ succeeded as expected return $OCF_SUCCESS } -------------------------------------------------------------------------- In contrast, a freeze/thaw type migration may implement its freeze operation like this: [source,bash] -------------------------------------------------------------------------- foobar_migrate_to() { # exit immediately if configuration is not valid foobar_validate_all || exit $? # if resource is not running, bail out early if ! foobar_monitor; then ocf_log err "Resource is not running" exit $OCF_ERR_GENERIC fi # actually start up the resource here (make sure to immediately # exit with an $OCF_ERR_ error code if anything goes seriously # wrong) ocf_run frobnicate --freeze || exit OCF_ERR_GENERIC ... # only return $OCF_SUCCESS if _everything_ succeeded as expected return $OCF_SUCCESS } -------------------------------------------------------------------------- === +migrate_from+ action The +migrate_from+ action can serve one of two purposes: * Complete a native _push_ type migration for the resource. In other words, check whether the migration has succeeded properly, and the resource is running on the local node. The resource agent knows about its the migration source via the +$OCF_RESKEY_CRM_meta_migrate_source+ environment variable. * Thaw the resource in a _freeze/thaw_ (also known as _suspend/resume_) type migration. In this mode, the resource usually not need any information about its source node at this point. The example below illustrates a push type migration: [source,bash] -------------------------------------------------------------------------- foobar_migrate_from() { # exit immediately if configuration is not valid foobar_validate_all || exit $? # After the resource has been migrated, check whether it resumed # correctly. If the resource starts asynchronously, the agent may # spin on the monitor function here -- if the resource does not # run within the defined timeout, the cluster manager will # consider the migrate_from action failed while ! foobar_monitor; do ocf_log debug "Resource has not yet migrated, waiting" sleep 1 done # only return $OCF_SUCCESS if _everything_ succeeded as expected return $OCF_SUCCESS } -------------------------------------------------------------------------- In contrast, a freeze/thaw type migration may implement its thaw operation like this: [source,bash] -------------------------------------------------------------------------- foobar_migrate_from() { # exit immediately if configuration is not valid foobar_validate_all || exit $? # actually start up the resource here (make sure to immediately # exit with an $OCF_ERR_ error code if anything goes seriously # wrong) ocf_run frobnicate --thaw || exit OCF_ERR_GENERIC # After the resource has been migrated, check whether it resumed # correctly. If the resource starts asynchronously, the agent may # spin on the monitor function here -- if the resource does not # run within the defined timeout, the cluster manager will # consider the migrate_from action failed while ! foobar_monitor; do ocf_log debug "Resource has not yet migrated, waiting" sleep 1 done # only return $OCF_SUCCESS if _everything_ succeeded as expected return $OCF_SUCCESS } -------------------------------------------------------------------------- === +notify+ action With notifications, instances of clones (and of master/slave resources, which are an extended kind of clones) can inform each other about their state. When notifications are enabled, any action on any instance of a clone carries a +pre+ and +post+ notification. Then, the cluster manager invokes the +notify+ operation on _all_ clone instances. For +notify+ operations, additional environment variables are passed into the resource agent during execution: * +$OCF_RESKEY_CRM_meta_notify_type+ -- the notification type (+pre+ or +post+) * +$OCF_RESKEY_CRM_meta_notify_operation+ -- the operation (action) that the notification is about (+start+, +stop+, +promote+, +demote+ etc.) * +$OCF_RESKEY_CRM_meta_notify_start_uname+ -- node name of the node where the resource is being started (+start+ notifications only) * +$OCF_RESKEY_CRM_meta_notify_stop_uname+ -- node name of the node where the resource is being stopped (+stop+ notifications only) * +$OCF_RESKEY_CRM_meta_notify_master_uname+ -- node name of the node where the resource currently _is in_ the Master role * +$OCF_RESKEY_CRM_meta_notify_promote_uname+ -- node name of the node where the resource currently _is being promoted to_ the Master role (+promote+ notifications only) * +$OCF_RESKEY_CRM_meta_notify_demote_uname+ -- node name of the node where the resource currently _is being demoted to_ the Slave role (+demote+ notifications only) Notifications come in particularly handy for master/slave resources using a "pull" scheme, where the master is a publisher and the slave a subscriber. Since the master is obviously only available as such when a promotion has occurred, the slaves can use a "pre-promote" notification to configure themselves to subscribe to the right publisher. Likewise, the subscribers may want to unsubscribe from the publisher after it has relinquished its master status, and a "post-demote" notification can be used for that purpose. Consider the example below to illustrate the concept. [source,bash] -------------------------------------------------------------------------- foobar_notify() { local type_op type_op="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" ocf_log debug "Received $type_op notification." case "$type_op" in 'pre-promote') ocf_run frobnicate --slave-mode \ --master=$OCF_RESKEY_CRM_meta_notify_promote_uname \ || exit $OCF_ERR_GENERIC ;; 'post-demote') ocf_run frobnicate --unset-slave-mode || exit $OCF_ERR_GENERIC ;; esac return $OCF_SUCCESS } -------------------------------------------------------------------------- NOTE: A master/slave resource agent may support a _multi-master_ configuration, where there is possibly more than one master at any given time. If that is the case, then the +$OCF_RESKEY_CRM_meta_notify_*_uname+ variables may each contain a space-separated lists of hostnames, rather than a single host name as shown in the example. Under those circumstances the resource agent would have to properly iterate over this list. == Script variables This section outlines variables typically available to resource agents, primarily for convenience purposes. For additional variables available while the agent is being executed, refer to <<_environment_variables>> and <<_return_codes>>. === +$OCF_ROOT+ The root of the OCF resource agent hierarchy. This should never be changed by a resource agent. This is usually +/usr/lib/ocf+. === +$OCF_FUNCTIONS_DIR+ The directory where the resource agents shell function library, +ocf-shellfuncs+, resides. This is usually defined in terms of +$OCF_ROOT+ and should never be changed by a resource agent. This variable may, however, be overridden from the command line while testing a new or modified resource agent. === +$OCF_RESOURCE_INSTANCE+ The resource instance name. For primitive (non-clone, non-stateful) resources, this is simply the resource name. For clones and stateful resources, this is the primitive name, followed by a colon an the clone instance number (such as +p_foobar:0+). === +$__OCF_ACTION+ The currently invoked action. This is exactly the first command-line argument that the cluster manager specifies when it invokes the resource agent. === +$__SCRIPT_NAME+ The name of the resource agent. This is exactly the base name of the resource agent script, with leading directory names removed. === +$HA_RSCTMP+ A temporary directory for use by resource agents. The system startup sequence (on any LSB compliant Linux distribution) guarantees that this directory is emptied on system startup, so this directory will not contain any stale data after a node reboot. == Convenience functions === Logging: +ocf_log+ Resource agents should use the +ocf_log+ function for logging purposes. This convenient logging wrapper is invoked as follows: [source,bash] -------------------------------------------------------------------------- ocf_log "Log message" -------------------------------------------------------------------------- It supports following the following severity levels: * +debug+ -- for debugging messages. Most logging configurations suppress this level by default. * +info+ -- for informational messages about the agent's behavior or status. * +warn+ -- for warnings. This is for any messages which reflect unexpected behavior that does _not_ constitute an unrecoverable error. * +err+ -- for errors. As a general rule, this logging level should only be used immediately prior to an +exit+ with the appropriate error code. * +crit+ -- for critical errors. As with +err+, this logging level should not be used unless the resource agent also exits with an error code. Very rarely used. === Testing for binaries: +have_binary+ and +check_binary+ A resource agent may need to test for the availability of a specific executable. The +have_binary+ convenience function comes in handy here: [source,bash] -------------------------------------------------------------------------- if ! have_binary frobnicate; then ocf_log warn "Missing frobnicate binary, frobnication disabled!" fi -------------------------------------------------------------------------- If a missing binary is a fatal problem for the resource, then the +check_binary+ function should be used: [source,bash] -------------------------------------------------------------------------- check_binary frobnicate -------------------------------------------------------------------------- Using +check_binary+ is a shorthand method for testing for the existence (and executability) of the specified binary, and exiting with +$OCF_ERR_INSTALLED+ if it cannot be found or executed. NOTE: Both +have_binary+ and +check_binary+ honor +$PATH+ when the binary to test for is not specified as a full path. It is usually wise to _not_ test for a full path, as binary installations path may vary by distribution or user policy. === Executing commands and capturing their output: +ocf_run+ Whenever a resource agent needs to execute a command and capture its output, it should use the +ocf_run+ convenience function, invoked as in this example: [source,bash] -------------------------------------------------------------------------- ocf_run "frobnicate --spam=eggs" || exit $OCF_ERR_GENERIC -------------------------------------------------------------------------- With the command specified above, the resource agent will invoke +frobnicate --spam=eggs+ and capture its output and exit code. If the exit code is nonzero (indicating an error), +ocf_run+ logs the command output with the +err+ logging severity, and the resource agent subsequently exits. If the exit code is zero (indicating success), any command output will be logged with the +info+ logging severity. If the resource agent wishes to ignore the output of a successful command execution, it can use the +-q+ flag with +ocf_run+. In the example below, +ocf_run+ will only log output if the command exit code is nonzero. [source,bash] -------------------------------------------------------------------------- ocf_run -q "frobnicate --spam=eggs" || exit $OCF_ERR_GENERIC -------------------------------------------------------------------------- Finally, if the resource agent wants to log the output of a command with a nonzero exit code with a severity _other_ than error, it may do so by adding the +-info+ or +-warn+ option to +ocf_run+: [source,bash] -------------------------------------------------------------------------- ocf_run -warn "frobnicate --spam=eggs" -------------------------------------------------------------------------- === Locks: +ocf_take_lock+ and +ocf_release_lock_on_exit+ Occasionally, there may be different resources of the same type in a cluster configuration that should not execute actions in parallel. When a resource agent needs to guard against parallel execution on the same machine, it can use the +ocf_take_lock+ and +ocf_release_lock_on_exit+ convenience functions: [source,bash] -------------------------------------------------------------------------- LOCKFILE=${HA_RSCTMP}/foobar ocf_release_lock_on_exit $LOCKFILE foobar_start() { ... ocf_take_lock $LOCKFILE ... } -------------------------------------------------------------------------- +ocf_take_lock+ attempts to acquire the designated +$LOCKFILE+. When it is unavailable, it sleeps a random amount of time between 0 and 1 seconds, and retries. +ocf_release_lock_on_exit+ releases the lock file when the agent exits (for any reason). === Testing for numerical values: +ocf_is_decimal+ Specifically for parameter validation, it can be helpful to test whether a given value is numeric. The +ocf_is_decimal+ function exists for that purpose: -------------------------------------------------------------------------- foobar_validate_all() { if ! ocf_is_decimal $OCF_RESKEY_eggs; then ocf_log err "eggs is not numeric!" exit $OCF_ERR_CONFIGURED fi ... } -------------------------------------------------------------------------- === Testing for boolean values: +ocf_is_true+ When a resource agent defines a boolean parameter, the value for this parameter may be specified by the user as +0+/+1+, +true+/+false+, or +on+/+off+. Since it is tedious to test for all these values from within the resource agent, the agent should instead use the +ocf_is_true+ convenience function: [source,bash] -------------------------------------------------------------------------- if ocf_is_true $OCF_RESKEY_superfrobnicate; then ocf_run "frobnicate --super" fi -------------------------------------------------------------------------- NOTE: If +ocf_is_true+ is used against an empty or non-existant variable, it always returns an exit code of +1+, which is equivalent to +false+. === Version comparison: +ocf_version_cmp+ A resource agent may want to check the version of software installed. +ocf_version_cmp+ takes care of all the necessary details. The return codes are * +0+ -- the first version is smaller (earlier) than the second * +1+ -- the two versions are equal * +2+ -- the first version is greater (later) than the second * +3+ -- one of arguments is not recognized as a version string The versions are allowed to contain digits, dots, and dashes. [source,bash] -------------------------------------------------------------------------- local v=`gooey --version` ocf_version_cmp "$v" 12.0.8-1 case $? in 0) ocf_log err "we do not support version $v, it is too old" exit $OCF_ERR_INSTALLED ;; [12]) ;; # we can work with versions >= 12.0.8-1 3) ocf_log err "gooey produced version <$v>, too funky for me" exit $OCF_ERR_INSTALLED ;; esac -------------------------------------------------------------------------- === Pseudo resources: +ha_pseudo_resource+ "Pseudo resources" are those where the resource agent in fact does not actually start or stop something akin to a runnable process, but merely executes a single action and then needs some form of tracing whether that action has been executed or not. The +portblock+ resource agent is an example of this. Resource agents for pseudo resources can use a convenience function, +ha_pseudo_resource+, which makes use of _tracking files_ to keep tabs on the status of a resource. If +foobar+ was designed to manage a pseudo resource, then its +start+ action could look like this: [source,bash] -------------------------------------------------------------------------- foobar_start() { # exit immediately if configuration is not valid foobar_validate_all || exit $? # if resource is already running, bail out early if foobar_monitor; then ocf_log info "Resource is already running" return $OCF_SUCCESS fi # start the pseudo resource ha_pseudo_resource ${OCF_RESOURCE_INSTANCE} start # After the resource has been started, check whether it started up # correctly. If the resource starts asynchronously, the agent may # spin on the monitor function here -- if the resource does not # start up within the defined timeout, the cluster manager will # consider the start action failed while ! foobar_monitor; do ocf_log debug "Resource has not started yet, waiting" sleep 1 done # only return $OCF_SUCCESS if _everything_ succeeded as expected return $OCF_SUCCESS } -------------------------------------------------------------------------- == Conventions This section contains a collection of conventions that have emerged in the resource agent repositories over the years. Following these conventions is by no means mandatory for resource agent authors, but it is a good idea based on the http://en.wikipedia.org/wiki/Principle_of_least_surprise[Principle of Least Surprise] -- resource agents following these conventions will be easier to understand, review, and use than those that do not. === Well-known parameter names Several parameter names are supported by a number of resource agents. For new resource agents, following these examples is generally a good idea: * +binary+ -- the name of a binary that principally manages the resource, such as a server daemon * +config+ -- the full path to a configuration file * +pid+ -- the full path to a file holding a process ID (PID) * +log+ -- the full path to a log file * +socket+ -- the full path to a UNIX socket that the resource manages * +ip+ -- an IP address that a daemon binds to * +port+ -- a TCP or UDP port that a daemon binds to Needless to say, resource agents should only implement any of these parameters if they are sensible to use in the agent's context. === Parameter defaults Defaults for resource agent parameters should be set by initializing variables with the suffix +_default+: [source,bash] -------------------------------------------------------------------------- # Defaults OCF_RESKEY_superfrobnicate_default=0 : ${OCF_RESKEY_superfrobnicate=${OCF_RESKEY_superfrobnicate_default}} -------------------------------------------------------------------------- NOTE: The resource agent should make sure that it sets a default for any parameter not marked as +required+ in the metadata. === Honoring +PATH+ for binaries When a resource agent supports a parameter designed to hold the name of a binary (such as a daemon, or a client utility for querying status), then that parameter should honor the +PATH+ environment variable. Do not supply full paths. Thus, the following approach: [source,bash] -------------------------------------------------------------------------- # Good example -- do it this way OCF_RESKEY_frobnicate_default="frobnicate" : ${OCF_RESKEY_frobnicate="${OCF_RESKEY_frobnicate_default}"} -------------------------------------------------------------------------- is much preferred over specifying a full path, as shown here: [source,bash] -------------------------------------------------------------------------- # Bad example -- avoid if you can OCF_RESKEY_frobnicate_default="/usr/local/sbin/frobnicate" : ${OCF_RESKEY_frobnicate="${OCF_RESKEY_frobnicate_default}"} -------------------------------------------------------------------------- This rule holds for defaults, as well. == Special considerations === Licensing Whenever possible, resource agent contributors are _encouraged_ to use the GNU General Public License (GPL), version 2 and later, for any new resource agents. The shell functions library does not strictly mandate this, however, as it is licensed under the GNU Lesser General Public License (LGPL), version 2.1 and later (so it can be used by non-GPL agents). The resource agent _must_ explicitly state its own license in the agent source code. === Locale settings When sourcing +ocf-shellfuncs+ as explained in <<_initialization>>, any resource agent automatically sets +LANG+ and +LC_ALL+ to the +C+ locale. Resource agents can thus expect to always operate in the +C+ locale, and need not reset +LANG+ or any of the +LC_+ environment variables themselves. === Testing for running processes For testing whether a particular process (with a known process ID) is currently running, a frequently found method is to send it a +0+ signal and catch errors, similar to this example: [source,bash] -------------------------------------------------------------------------- if kill -s 0 `cat $daemon_pid_file`; then ocf_log debug "Process is currently running" else ocf_log warn "Process is dead, removing pid file" rm -f $daemon_pid_file if -------------------------------------------------------------------------- IMPORTANT: An approach far superior to this example is to instead test the _functionality_ of the daemon by connecting to it with a client process, as shown in the example in <<_literal_monitor_literal_action>>. === Specifying a master preference Stateful (master/slave) resources must set their own _master preference_ -- they can thus provide hints to the cluster manager which is the the best instance to promote to the +Master+ role. IMPORTANT: It is acceptable for multiple instances to have identical positive master preferences. In that case, the cluster resource manager will automatically select a resource agent to promote. However, if _all_ instances have the (default) master score of zero, the cluster manager will not promote any instance at all. Thus, it is crucial that at least one instance has a positive master score. For this purpose, +crm_master+ comes in handy. This convenience wrapper around the +crm_attribute+ sets a node attribute named +master-<<_literal_ocf_resource_instance_literal,$OCF_RESOURCE_INSTANCE>>+ for the node it is being executed on, and fills this attribute with the specified value. The cluster manager is then expected to translate this into a promotion score for the corresponding instance, and base its promotion preference on that score. Stateful resource agents typically execute +crm_master+ during the <<_literal_monitor_literal_action,+monitor+>> and/or <<_literal_notify_literal_action,+notify+>> action. The following example assumes that the +foobar+ resource agent can test the application's status by executing a binary that returns certain exit codes based on whether * the resource is either in the master role, or is a slave that is fully caught up with the master (at any rate, it has current data), or * the resource is in the slave role, but through some form of asynchronous replication has "fallen behind" the master, or * the resource has gracefully stopped, or * the resource has unexpectedly failed. [source,bash] -------------------------------------------------------------------------- foobar_monitor() { local rc # exit immediately if configuration is not valid foobar_validate_all || exit $? ocf_run frobnicate --test # This example assumes the following exit code convention # for frobnicate: # 0: running, and fully caught up with master # 1: gracefully stopped # 2: running, but lagging behind master # any other: error case "$?" in 0) rc=$OCF_SUCCESS ocf_log debug "Resource is running" # Set a high master preference. The current master # will always get this, plus 1. Any current slaves # will get a high preference so that if the master # fails, they are next in line to take over. crm_master -l reboot -v 100 ;; 1) rc=$OCF_NOT_RUNNING ocf_log debug "Resource is not running" # Remove the master preference for this node crm_master -l reboot -D ;; 2) rc=$OCF_SUCCESS ocf_log debug "Resource is lagging behind master" # Set a low master preference: if the master fails # right now, and there is another slave that does # not lag behind the master, its higher master # preference will win and that slave will become # the new master crm_master -l reboot -v 5 ;; *) ocf_log err "Resource has failed" exit $OCF_ERR_GENERIC esac return $rc } -------------------------------------------------------------------------- == Testing resource agents This section discusses automated testing for resource agents. Testing is a vital aspect of development; it is crucial both for creating new resource agents, and for modifying existing ones. === Testing with +ocf-tester+ The resource agents repository (and hence, any installed resource agents package) contains a utility named +ocf-tester+. This shell script allows you to conveniently and easily test the functionality of your resource agent. +ocf-tester+ is commonly invoked, as +root+, like this: -------------------------------------------------------------------------- ocf-tester -n [-o = ... ] -------------------------------------------------------------------------- * ++ is an arbitrary resource name. * You may set any number of +=+ with the +-o+ option, corresponding to any resource parameters you wish to set for testing. * ++ is the full path to your resource agent. When invoked, +ocf-tester+ executes all mandatory actions and enforces action behavior as explained in <<_resource_agent_actions>>. It also tests for optional actions. Optional actions must behave as expected when advertised, but do not cause +ocf-tester+ to flag an error if not implemented. IMPORTANT: +ocf-tester+ does not initiate "dry runs" of actions, nor does it create resource dummies of any kind. Instead, it exercises the actual resource agent as-is, whether that may include opening and closing databases, mounting file systems, starting or stopping virtual machines, etc. Use with care. For example, you could run +ocf-tester+ on the +foobar+ resource agent as follows: -------------------------------------------------------------------------- # ocf-tester -n foobartest \ -o superfrobnicate=true \ -o datadir=/tmp \ /home/johndoe/ra-dev/foobar Beginning tests for /home/johndoe/ra-dev/foobar... * Your agent does not support the notify action (optional) * Your agent does not support the reload action (optional) /home/johndoe/ra-dev/foobar passed all tests -------------------------------------------------------------------------- === Testing with +ocft+ +ocft+ is a testing tool for resource agents. The main difference to +ocf-tester+ is that +ocft+ can automate creating complex testing environments. That includes package installation and arbitrary shell scripting. ==== +ocft+ components +ocft+ consists of the following components: * A test case generator (+/usr/sbin/ocft+) -- generates shell scripts from test case configuration files * Configuration files (+/usr/share/resource-agents/ocft/configs/+) -- a configuration file contains environment setup and test cases for one resource agent * The testing scripts are stored in +/var/lib/resource-agents/ocft/cases/+, but normally there is no need to inspect them ==== Customizing the testing environment +ocft+ modifies the runtime environment of the resource agent either by changing environment variables (through the interface defined by OCF) or by running ad-hoc shell scripts which can for instance change permissions of a file or unmount a file system. ==== How to test You need to know the software (resource) you want to test. Draw a sketch of all interesting scenarios, with all expected and unexpected conditions and how the resource agent should react to them. Then you need to encode these conditions and the expected outcomes as +ocft+ test cases. Running ocft is then simple: --------------------------------------- # ocft make # ocft test --------------------------------------- The first subcommand generates the scripts for your test cases whereas the second runs them and checks the outcome. ==== +ocft+ configuration file syntax There are four top level options each of which can contain one or more sub-options. ===== +CONFIG+ (top level option) This option is global and influences every test case. ** +AgentRoot+ (sub-option) --------------------------------------- AgentRoot /usr/lib/ocf/resource.d/xxx --------------------------------------- Normally, we assume that the resource agent lives under the +heartbeat+ provider. Use `AgentRoot` to test agent which is distributed by another vendor. ** +InstallPackage+ (sub-option) --------------------------------------- InstallPackage package [package2 [...]] --------------------------------------- Install packages necessary for testing. The installation is skipped if the packages have already been installed. ** 'HangTimeout' (sub-option) --------------------------------------- HangTimeout secs --------------------------------------- The maximum time allowed for a single RA action. If this timer expires, the action is considered as failed. ===== +SETUP-AGENT+ (top level option) --------------------------------------- SETUP-AGENT bash commands --------------------------------------- If the RA needs to be initialized before testing, you can put bash code here for that purpose. The initialization is done only once. If you need to reinitialize then delete the +/tmp/.[AGENT_NAME]_set+ stamp file. ===== +CASE+ (top level option) --------------------------------------- CASE "description" --------------------------------------- This is the main building block of the test suite. Each test case is to be described in one +CASE+ top level option. One case consists of several suboptions typically followed by the +RunAgent+ suboption. ** +Var+ (sub-option) --------------------------------------- Var VARIABLE=value --------------------------------------- It is to set up an environment variable of the resource agent. They usually appear to be OCF_RESKEY_xxx. One point is to be noted is there is no blank by both sides of "=". ** +Unvar+ (sub-option) --------------------------------------- Unvar VARIABLE [VARIABLE2 [...]] --------------------------------------- Remove the environment variable. ** +Include+ (sub-option) --------------------------------------- Include macro_name --------------------------------------- Include statements in 'macro_name'. See below for description of +CASE-BLOCK+. ** +Bash+ (sub-option) --------------------------------------- Bash bash_codes --------------------------------------- This option is to set up the environment of OS, where you can insert BASH code to customize the system randomly. Note, do not cause unrecoverable consequences to the system. ** +BashAtExit+ (sub-option) --------------------------------------- BashAtExit bash_codes --------------------------------------- This option is to recover the OS environment in order to run another test case correctly. Of cause you can use 'Bash' option to recover it. However, if mistakes occur in the process, the script will quit directly instead of running your recovery codes. If it happens, you ought to use BashAtExit which can restore the system environment before you quit. ** +RunAgent+ (sub-option) --------------------------------------- RunAgent cmd [ret_value] --------------------------------------- This option is to run resource agent. "cmd" is the parameter of the resource agent, such as "start, status, stop ...". The second parameter is optional. It will compare the actual returned value with the expected value when the script has run recourse agent. If differs, bugs will be found. It is also possible to execute a suboption on a remote host instead of locally. The protocol used is ssh and the command is run in the background. Just add the +@+ suffix to the suboption name. For instance: --------------------------------------- Bash@192.168.1.100 date --------------------------------------- would run the date program. Remote commands are run in background. NB: Not clear how can ssh be automated as we don't know in advance the environment. Perhaps use "well-known" host names such as "node2"? Also, if the command runs in the background, it's not clear how is the exit code checked. Finally, does Var@node make sense? Or is the current environment somehow copied over? We probably need an example here. Need examples in general. ===== +CASE-BLOCK+ (top level option) --------------------------------------- CASE-BLOCK macro_name --------------------------------------- The +CASE-BLOCK+ option defines a macro which can be +Include+d in any +CASE+. All +CASE+ suboptions are valid in +CASE-BLOCK+. == Installing and packaging resource agents This section discusses what to do with your resource agent once it is done and tested -- where to install it, and how to include it in either your own application package or in the Linux-HA resource agents repository. === Installing resource agents If you choose to include your resource agent in your own project, make sure it installs into the correct location. Resource agents should install into the +/usr/lib/ocf/resource.d/+ directory, where ++ is the name of your project or any other name you wish to identify the resource agent with. For example, if your +foobar+ resource agent is being packaged as part of a project named +fortytwo+, then the correct full path to your resource agent would be +/usr/lib/ocf/resource.d/fortytwo/foobar+. Make sure your resource agent installs with +0755+ (+-rwxr-xr-x+) permission bits. When installed this way, OCF-compliant cluster resource managers will be able to properly identify, parse, and execute your resource agent. The Pacemaker cluster manager, for example, would map the above-mentioned installation path to the +ocf:fortytwo:foobar+ resource type identifier. === Packaging resource agents When you package resource agents as part of your own project, you should apply the considerations outlined in this section. NOTE: If you instead prefer to submit your resource agent to the Linux-HA resource agents repository, see <<_submitting_resource_agents>> for information on doing so. ==== RPM packaging It is recommended to put your OCF resource agent(s) in an RPM sub-package, with the name +-resource-agents+. Ensure that the package owns its provider directory, and depends on the upstream +resource-agents+ package which lays out the directory hierarchy and provides convenience shell functions. An example RPM spec snippet is given below: -------------------------------------------------------------------------- %package resource-agents Summary: OCF resource agent for Foobar Group: System Environment/Base Requires: %{name} = %{version}-%{release}, resource-agents %description resource-agents This package contains the OCF-compliant resource agents for Foobar. %files resource-agents %defattr(755,root,root,-) %dir %{_prefix}/lib/ocf/resource.d/fortytwo %{_prefix}/lib/ocf/resource.d/fortytwo/foobar -------------------------------------------------------------------------- NOTE: If an RPM spec file contains a +%package+ declaration, then RPM considers this a sub-package which inherits top-level fields such as +Name+, +Version+, +License+, etc. Sub-packages have the top-level package name automatically prepended to their own name. Thus the snippet above would create a sub-package named +foobar-resource-agents+ (presuming the package +Name+ is +foobar+). ==== Debian packaging For Debian packages, like for <<_rpm_packaging,RPMs>>, it is recommended to create a separate package holding your resource agents, which then should depend on the +cluster-agents+ package. NOTE: This section assumes that you are packaging with +debhelper+. An example +debian/control+ snippet is given below: -------------------------------------------------------------------------- Package: foobar-cluster-agents Priority: extra Architecture: all Depends: cluster-agents Description: OCF-compliant resource agents for Foobar -------------------------------------------------------------------------- You will also create a separate +.install+ file. Sticking with the example of installing the +foobar+ resource agent as a sub-package of +fortytwo+, the +debian/fortytwo-cluster-agents.install+ file could consist of the following content: -------------------------------------------------------------------------- usr/lib/ocf/resource.d/fortytwo/foobar -------------------------------------------------------------------------- === Submitting resource agents If you choose not to bundle your resource agent with your own package, but instead wish to submit it to the upstream resource agent repository hosted on https://github.com/ClusterLabs/resource-agents[the ClusterLabs repository on GitHub], please follow the steps outlined in this section. Create a working copy (a Git _clone_) of the upstream repository with the following command: -------------------------------------------------------------------------- git clone git://github.com/ClusterLabs/resource-agents -------------------------------------------------------------------------- Then, copy your resource agent into the +heartbeat+ subdirectory: -------------------------------------------------------------------------- cd resource-agents/heartbeat cp /path/to/your/local/copy/of/foobar . chmod 0755 foobar cd .. -------------------------------------------------------------------------- Next, modify the +Makefile.am+ file in +resource-agents/heartbeat+ and add your new resource agent to the +ocf_SCRIPTS+ list. This will make sure the agent is properly installed. Lastly, open Makefile.am in +resource-agents/doc/man+ and add +ocf_heartbeat_.7+ to the +man_MANS+ variable. This will automatically generate a resource agent manual page from its metadata, and then install that man page into the correct location. Now, add your new resource agents, and the two modifications to the Makefiles, to your changeset: -------------------------------------------------------------------------- git add heartbeat/foobar git add heartbeat/Makefile.am git add doc/man/Makefile.am git commit -------------------------------------------------------------------------- In your commit message, be sure to include a meaningful description, for example: -------------------------------------------------------------------------- High: foobar: new resource agent This new resource agent adds functionality to manage a foobar service. It supports being configured as a primitive or as a master/slave set, and also optionally supports superfrobnication. -------------------------------------------------------------------------- Now the patch set is good for review on the mailing list: -------------------------------------------------------------------------- git send-email --to=linux-ha-dev@lists.linux-ha.org -------------------------------------------------------------------------- +git send-email+ will now roll all local commits not in the upstream repository into a nicely formatted email, and submit that to the mailing list. Please consult +man git-send-email+ for details on configuring and using +git send-email+. Once your new resource agent has been accepted for merging, one of the upstream developers will push your patch into the upstream repository. At that point, you can update your checkout from upstream, and remove your own patch set. -------------------------------------------------------------------------- git reset --hard origin/master git pull -------------------------------------------------------------------------- === Maintaining resource agents If you maintain a specific resource agent, or you are making repeated contributions to the codebase, it's usually a good idea to maintain your own _fork_ of the +ClusterLabs/resource-agents+ repository on GitHub. To do so, * https://github.com/signup[Create a GitHub account] if you do not have one already. * http://help.github.com/fork-a-repo/[Fork] the https://github.com/ClusterLabs/resource-agents[+resource-agents+ repository]. * Clone your personal fork into a local working copy. As you work on resource agents, *please* commit early, and commit often. You can always fold commits later with +git rebase -i+. Once you have made a number of changes that you would like others to review, push them to your GitHub fork and send a post to the +linux-ha-dev+ mailing list pointing people to it. After the review is done, fix up your tree with any requested changes, and then issue a pull request. There are two ways of doing so: * You can use the +git request-pull+ utility to get a pre-populated email skeleton summarizing your changesets. Add any information you see fit, and send it to the list. It is a good idea to prefix your email subject with +[GIT PULL]+ so upstream maintainers can pick the message out easily. * You can also issue a pull request directly on GitHub. GitHub automatically notifies upstream maintainers about new pull requests by email. Please refer to http://help.github.com/send-pull-requests/[github:help] for details on initiating pull requests. ClusterLabs-resource-agents-dc69db5/doc/man/000077500000000000000000000000001203363223200210575ustar00rootroot00000000000000ClusterLabs-resource-agents-dc69db5/doc/man/Makefile.am000066400000000000000000000152521203363223200231200ustar00rootroot00000000000000# # doc: Linux-HA resource agents # # Copyright (C) 2009 Florian Haas # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in EXTRA_DIST = $(doc_DATA) $(REFENTRY_STYLESHEET) \ mkappendix.sh ralist.sh CLEANFILES = $(man_MANS) $(xmlfiles) metadata-*.xml STYLESHEET_PREFIX ?= http://docbook.sourceforge.net/release/xsl/current MANPAGES_STYLESHEET ?= $(STYLESHEET_PREFIX)/manpages/docbook.xsl HTML_STYLESHEET ?= $(STYLESHEET_PREFIX)/xhtml/docbook.xsl FO_STYLESHEET ?= $(STYLESHEET_PREFIX)/fo/docbook.xsl REFENTRY_STYLESHEET ?= ra2refentry.xsl XSLTPROC_OPTIONS ?= --xinclude XSLTPROC_MANPAGES_OPTIONS ?= $(XSLTPROC_OPTIONS) XSLTPROC_HTML_OPTIONS ?= $(XSLTPROC_OPTIONS) XSLTPROC_FO_OPTIONS ?= $(XSLTPROC_OPTIONS) radir = $(top_srcdir)/heartbeat # OCF_ROOT=. is necessary due to a sanity check in ocf-shellfuncs # (which tests whether $OCF_ROOT points to a directory metadata-%.xml: $(radir)/% OCF_ROOT=. OCF_FUNCTIONS_DIR=$(radir) $< meta-data > $@ metadata-IPv6addr.xml: ../../heartbeat/IPv6addr OCF_ROOT=. OCF_FUNCTIONS_DIR=$(radir) $< meta-data > $@ # Please note: we can't name the man pages # ocf:heartbeat:. Believe me, I've tried. It looks like it # works, but then it doesn't. While make can deal correctly with # colons in target names (when properly escaped), it royally messes up # when it is deals with _dependencies_ that contain colons. See Bug # 12126 on savannah.gnu.org. But, maybe it gets fixed soon, it was # first reported in 1995 and added to Savannah in in 2005... if BUILD_DOC man_MANS = ocf_heartbeat_AoEtarget.7 \ ocf_heartbeat_AudibleAlarm.7 \ ocf_heartbeat_ClusterMon.7 \ ocf_heartbeat_CTDB.7 \ ocf_heartbeat_Delay.7 \ ocf_heartbeat_Dummy.7 \ ocf_heartbeat_EvmsSCC.7 \ ocf_heartbeat_Evmsd.7 \ ocf_heartbeat_Filesystem.7 \ ocf_heartbeat_ICP.7 \ ocf_heartbeat_IPaddr.7 \ ocf_heartbeat_IPaddr2.7 \ ocf_heartbeat_IPsrcaddr.7 \ ocf_heartbeat_LVM.7 \ ocf_heartbeat_LinuxSCSI.7 \ ocf_heartbeat_MailTo.7 \ ocf_heartbeat_ManageRAID.7 \ ocf_heartbeat_ManageVE.7 \ ocf_heartbeat_Pure-FTPd.7 \ ocf_heartbeat_Raid1.7 \ ocf_heartbeat_Route.7 \ ocf_heartbeat_SAPDatabase.7 \ ocf_heartbeat_SAPInstance.7 \ ocf_heartbeat_SendArp.7 \ ocf_heartbeat_ServeRAID.7 \ ocf_heartbeat_SphinxSearchDaemon.7 \ ocf_heartbeat_Squid.7 \ ocf_heartbeat_Stateful.7 \ ocf_heartbeat_SysInfo.7 \ ocf_heartbeat_VIPArip.7 \ ocf_heartbeat_VirtualDomain.7 \ ocf_heartbeat_WAS.7 \ ocf_heartbeat_WAS6.7 \ ocf_heartbeat_WinPopup.7 \ ocf_heartbeat_Xen.7 \ ocf_heartbeat_Xinetd.7 \ ocf_heartbeat_anything.7 \ ocf_heartbeat_apache.7 \ ocf_heartbeat_asterisk.7 \ ocf_heartbeat_conntrackd.7 \ ocf_heartbeat_db2.7 \ ocf_heartbeat_dhcpd.7 \ ocf_heartbeat_drbd.7 \ ocf_heartbeat_eDir88.7 \ ocf_heartbeat_ethmonitor.7 \ ocf_heartbeat_exportfs.7 \ ocf_heartbeat_fio.7 \ ocf_heartbeat_iSCSILogicalUnit.7 \ ocf_heartbeat_iSCSITarget.7 \ ocf_heartbeat_ids.7 \ ocf_heartbeat_iscsi.7 \ ocf_heartbeat_jboss.7 \ ocf_heartbeat_lxc.7 \ ocf_heartbeat_mysql.7 \ ocf_heartbeat_mysql-proxy.7 \ ocf_heartbeat_named.7 \ ocf_heartbeat_nfsserver.7 \ ocf_heartbeat_nginx.7 \ ocf_heartbeat_oracle.7 \ ocf_heartbeat_oralsnr.7 \ ocf_heartbeat_pgsql.7 \ ocf_heartbeat_pingd.7 \ ocf_heartbeat_portblock.7 \ ocf_heartbeat_postfix.7 \ ocf_heartbeat_pound.7 \ ocf_heartbeat_proftpd.7 \ ocf_heartbeat_rsyncd.7 \ ocf_heartbeat_rsyslog.7 \ ocf_heartbeat_scsi2reservation.7 \ ocf_heartbeat_sfex.7 \ ocf_heartbeat_slapd.7 \ ocf_heartbeat_symlink.7 \ ocf_heartbeat_syslog-ng.7 \ ocf_heartbeat_tomcat.7 \ ocf_heartbeat_varnish.7 \ ocf_heartbeat_vmware.7 if USE_IPV6ADDR man_MANS += ocf_heartbeat_IPv6addr.7 endif xmlfiles = $(man_MANS:.7=.xml) %.1 %.5 %.7 %.8: %.xml $(XSLTPROC) \ $(XSLTPROC_MANPAGES_OPTIONS) \ $(MANPAGES_STYLESHEET) $< ocf_heartbeat_%.xml: metadata-%.xml $(srcdir)/$(REFENTRY_STYLESHEET) $(XSLTPROC) --novalid \ --stringparam package $(PACKAGE_NAME) \ --stringparam version $(VERSION) \ --output $@ \ $(srcdir)/$(REFENTRY_STYLESHEET) $< ocf_resource_agents.xml: $(xmlfiles) mkappendix.sh ./mkappendix.sh $(xmlfiles) > $@ %.html: %.xml $(XSLTPROC) \ $(XSLTPROC_HTML_OPTIONS) \ --output $@ \ $(HTML_STYLESHEET) $< xml: ocf_resource_agents.xml endif ClusterLabs-resource-agents-dc69db5/doc/man/mkappendix.sh000077500000000000000000000006671203363223200235670ustar00rootroot00000000000000#!/bin/sh cat < Resource agent manual pages EOF for manpage in `printf "%s\n" $@ | sort -f`; do cat < EOF done cat < EOF ClusterLabs-resource-agents-dc69db5/doc/man/ra2refentry.xsl000066400000000000000000000422601203363223200240560ustar00rootroot00000000000000 resource-agents ocf heartbeat 7 | __ re-ra- Linux-HA contributors (see the resource agent source for information about individual authors) OCF resource agents Description This resource agent may be configured for native migration if available in the cluster manager. For Pacemaker, the allow-migrate="true" meta attribute enables native migration. Supported Parameters This resource agent does not support any parameters. ( unique, required optional , , , default " " default false no default ) Supported Actions This resource agent does not advertise any supported actions. This resource agent supports the following actions (operations): Starts the resource. Stops the resource. Performs a status check. Performs a detailed status check. Promotes the resource to the Master role. Demotes the resource to the Slave role. Executes steps necessary for migrating the resource away from the node. Executes steps necessary for migrating the resource to the node. Performs a validation of the resource configuration. Retrieves resource agent metadata (internal use only). Suggested minimum timeout: . Suggested interval: . Example The following is an example configuration for a resource using the crm8 shell: primitive p_ : : \ params \ \ meta allow-migrate="true" \ ms ms_ p_ \ meta notify="true" interleave="true" = \ " " op \ =" " See also http://www.linux-ha.org/wiki/ _(resource_agent) ClusterLabs-resource-agents-dc69db5/doc/man/ralist.sh000077500000000000000000000002061203363223200227120ustar00rootroot00000000000000#!/bin/sh RADIR=$1 PREFIX=$2 SUFFIX=$3 for f in `find $RADIR -type f -executable`; do echo ${PREFIX}`basename $f`${SUFFIX} done ClusterLabs-resource-agents-dc69db5/heartbeat/000077500000000000000000000000001203363223200214765ustar00rootroot00000000000000ClusterLabs-resource-agents-dc69db5/heartbeat/AoEtarget000077500000000000000000000160541203363223200233050ustar00rootroot00000000000000#!/bin/bash # # # AoEtarget OCF RA. # Manages an ATA-over-Ethernet (AoE) target utilizing the vblade utility. # # (c) 2009-2010 Florian Haas, Dejan Muhamedagic, # and Linux-HA contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ###################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_nic_default="eth0" OCF_RESKEY_pid_default="${HA_RSCTMP}/AoEtarget-${OCF_RESOURCE_INSTANCE}.pid" OCF_RESKEY_binary_default="/usr/sbin/vblade" : ${OCF_RESKEY_nic=${OCF_RESKEY_nic_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} ####################################################################### meta_data() { cat < 1.0 This resource agent manages an ATA-over-Ethernet (AoE) target using vblade. It exports any block device, or file, as an AoE target using the specified Ethernet device, shelf, and slot number. Manages ATA-over-Ethernet (AoE) target exports The local block device (or file) to export as an AoE target. Device to export The local Ethernet interface to use for exporting this AoE target. Ethernet interface The AoE shelf number to use when exporting this target. AoE shelf number The AoE slot number to use when exporting this target. AoE slot number The file to record the daemon pid to. Daemon pid file Location of the vblade binary. vblade binary EOF } ####################################################################### AoEtarget_usage() { cat <&1 & rc=$? pid=$! if [ $rc -ne 0 ]; then return $OCF_ERR_GENERIC fi echo $pid > ${OCF_RESKEY_pid} && return $OCF_SUCCESS return $OCF_ERR_GENERIC } AoEtarget_stop() { AoEtarget_monitor if [ $? -eq $OCF_SUCCESS ]; then ocf_log info "Unxporting device ${OCF_RESKEY_device} on ${OCF_RESKEY_nic} as shelf ${OCF_RESKEY_shelf}, slot ${OCF_RESKEY_slot}" pid=$(cat ${OCF_RESKEY_pid}) kill -TERM $pid # loop until we're really stopped, wait for the LRM to time us # out if not while AoEtarget_monitor; do sleep 1 done fi # Clean up pid file rm -f ${OCF_RESKEY_pid} return $OCF_SUCCESS } AoEtarget_monitor() { ocf_pidfile_status ${OCF_RESKEY_pid} >/dev/null 2>&1 rc=$? if [ $rc -eq 2 ]; then # no pid file, must assume we're not running return $OCF_NOT_RUNNING elif [ $rc -eq 1 ]; then # stale pid file, assume something went wrong return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } AoEtarget_validate() { # Is our binary executable? if [ ! -x ${OCF_RESKEY_binary} ]; then ocf_log error "${OCF_RESKEY_binary} not found or not executable" return $OCF_ERR_INSTALLED fi # Do we have all required variables? for var in device nic shelf slot pid; do param="OCF_RESKEY_${var}" if [ -z "${!param}" ]; then ocf_log error "Missing resource parameter \"$var\"!" return $OCF_ERR_CONFIGURED fi done # Is the pid file directory writable? pid_dir=`dirname "$OCF_RESKEY_pid"` touch "$pid_dir/$$" if [ $? != 0 ]; then ocf_log error "Cannot create pid file in $pid_dir -- check directory permissions" return $OCF_ERR_INSTALLED fi rm "$pid_dir/$$" # Does the device we are trying to export exist? if [ ! -e ${OCF_RESKEY_device} ]; then ocf_log error "${OCF_RESKEY_device} does not exist" return $OCF_ERR_INSTALLED fi return $OCF_SUCCESS } case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) AoEtarget_usage exit $OCF_SUCCESS ;; esac # Everything except usage and meta-data must pass the validate test AoEtarget_validate || exit $? case $__OCF_ACTION in start) AoEtarget_start ;; stop) AoEtarget_stop ;; status|monitor) AoEtarget_monitor ;; reload) ocf_log err "Reloading..." AoEtarget_start ;; validate-all) AoEtarget_validate ;; *) AoEtarget_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc ClusterLabs-resource-agents-dc69db5/heartbeat/AudibleAlarm000077500000000000000000000100651203363223200237500ustar00rootroot00000000000000#!/bin/sh # # Startup script for the Audible Alarm # # author: Kirk Lawson # Horms # # description: sets an audible alarm running by beeping at a set interval # processname: alarm # config: /etc/AudibleAlarm/AudibleAlarm.conf - not yet implemented # # OCF parameters are as below: # OCF_RESKEY_nodelist # # License: GNU General Public License (GPL) ####################################################################### # Source function library. : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### PIDFILE=${HA_VARRUN}/heartbeat-bell #For testing #PIDFILE=/tmp/heartbeat-bell # What host are we running on? us=`uname -n` usage() { echo "Usage: $0 {start|stop|restart|status|monitor|meta-data|validate-all}" echo " The node list is an optional space delimited" echo " list of hosts that should never sound the alarm." } meta_data() { cat < 1.0 Resource script for AudibleAlarm. It sets an audible alarm running by beeping at a set interval. Emits audible beeps at a configurable interval The node list that should never sound the alarm. Node list END } audiblealarm_start () { ocf_log info "$0: Starting" if [ -f $PIDFILE ]; then PID=`head -n 1 $PIDFILE` if [ -n "$PID" ]; then ocf_log info "$0: Appears to already be running, killing [$PID]" kill $PID > /dev/null fi fi # Use () to create a subshell to make the redirection be synchronized. ( while [ 1 ]; do sleep 1 #Sleep first, incase we bail out printf "\a" > /dev/console # Uncomment this line to cause floppy drive light # to flash (requires fdutils package). # /usr/bin/floppycontrol --pollstate > /dev/null # # To avoid issues when called by lrmd, redirect stdout->stderr. done & if echo $! > $PIDFILE; then : else ocf_log info "$0: Could not write to pid file \"$PIDFILE\", bailing" kill $! return $OCF_ERR_GENERIC fi) >&2 return $? } audiblealarm_stop () { ocf_log info "$0: Shutting down" if [ -f $PIDFILE ]; then PID=`head -n 1 $PIDFILE` # ocf_log info "$0: Appears to already be running, killing [$PID]" # commented by Phost, since the confusion in the log. if [ -n "$PID" ]; then # Donnot remove PIDFILE in case the `kill` fails. kill $PID > /dev/null && rm -f $PIDFILE fi fi return $? } audiblealarm_restart () { audiblealarm_stop audiblealarm_start return $? } audiblealarm_status () { if [ -f $PIDFILE ]; then PID=`head -n 1 $PIDFILE` if [ -n "$PID" ]; then echo running return $OCF_SUCCESS fi fi echo stopped return $OCF_NOT_RUNNING } audiblealarm_validate_all () { check_binary printf echo "Validate OK" return $OCF_SUCCESS } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; start) for arg in $OCF_RESKEY_nodelist do if [ "$us" = "$arg" ]; then # We should not start because we are on a host # listed in our argument list. exit $OCF_SUCCESS fi done audiblealarm_start ;; stop) audiblealarm_stop ;; restart) audiblealarm_restart ;; status|monitor) audiblealarm_status ;; validate-all) audiblealarm_validate_all ;; usage) usage exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_ARGS ;; esac exit $? ClusterLabs-resource-agents-dc69db5/heartbeat/CTDB000077500000000000000000000604121203363223200221430ustar00rootroot00000000000000#!/bin/sh # # OCF Resource Agent for managing CTDB # # Copyright (c) 2009-2010 Novell Inc., Tim Serong # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # OVERVIEW # # When run by itself, CTDB can handle IP failover and includes scripts # to manage various services (Samba, Winbind, HTTP, etc.). When run as # a resource in a Pacemaker cluster, this additional functionality # should not be used; instead one should define separate resources for # CTDB, Samba, Winbind, IP addresses, etc. # # As of 2010-11-17, there is no separate OCF Samba or Winbind RA, so # it is still possible to configure CTDB so that it manages these # resources itself. In future, once Samba and Winbind RAs are # available, this ability will be deprecated and ultimately removed. # # This RA intentionally provides no ability to configure CTDB such that # it manages IP failover, HTTP, NFS, etc. # # # TODO: # - ctdb_stop doesn't really support multiple independent CTDB instances, # unless they're running from distinct ctdbd binaries (it uses pkill # $OCF_RESKEY_ctdbd_binary if "ctdb stop" doesn't work, which it might # not under heavy load - this will kill all ctdbd instances on the # system). OTOH, running multiple CTDB instances per node is, well, # AFAIK, completely crazy. Can't run more than one in a vanilla CTDB # cluster, with the CTDB init script. So it might be nice to address # this for complete semantic correctness of the RA, but shouldn't # actually cause any trouble in real life. # - As much as possible, get rid of auto config generation # - Especially smb.conf # - Verify timeouts are sane # - Monitor differentiate between error and not running? # - Do we need to verify globally unique setting? # - Should set CTDB_NODES to ${HA_RSCTMP}/ctdb (generated based on # current nodes) # - Look at enabling set_ctdb_variables() if necessary. # - Probably possible for sysconfig file to not be restored if # CTDB dies unexpectedly. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Default parameter values: : ${OCF_RESKEY_ctdb_manages_samba:=no} : ${OCF_RESKEY_ctdb_manages_winbind:=no} : ${OCF_RESKEY_ctdb_service_smb:=""} : ${OCF_RESKEY_ctdb_service_nmb:=""} : ${OCF_RESKEY_ctdb_service_winbind:=""} : ${OCF_RESKEY_ctdb_samba_skip_share_check:=yes} : ${OCF_RESKEY_ctdb_monitor_free_memory:=100} : ${OCF_RESKEY_ctdb_start_as_disabled:=no} : ${OCF_RESKEY_ctdb_config_dir:=/etc/ctdb} : ${OCF_RESKEY_ctdb_binary:=/usr/bin/ctdb} : ${OCF_RESKEY_ctdbd_binary:=/usr/sbin/ctdbd} : ${OCF_RESKEY_ctdb_socket:=/var/lib/ctdb/ctdb.socket} : ${OCF_RESKEY_ctdb_dbdir:=/var/lib/ctdb} : ${OCF_RESKEY_ctdb_logfile:=/var/log/ctdb/log.ctdb} : ${OCF_RESKEY_ctdb_debuglevel:=2} : ${OCF_RESKEY_smb_conf:=/etc/samba/smb.conf} : ${OCF_RESKEY_smb_passdb_backend:=tdbsam} : ${OCF_RESKEY_smb_idmap_backend:=tdb2} ####################################################################### meta_data() { cat < 1.0 This resource agent manages CTDB, allowing one to use Clustered Samba in a Linux-HA/Pacemaker cluster. You need a shared filesystem (e.g. OCFS2) on which the CTDB lock will be stored. Create /etc/ctdb/nodes containing a list of private IP addresses of each node in the cluster, then configure this RA as a clone. To have CTDB manage Samba, set ctdb_manages_samba="yes". Note that this option will be deprecated in future, in favour of configuring a separate Samba resource. For more information see http://linux-ha.org/wiki/CTDB_(resource_agent) CTDB Resource Agent The location of a shared lock file, common across all nodes. This must be on shared storage, e.g.: /shared-fs/samba/ctdb.lock CTDB shared lock file Should CTDB manage starting/stopping the Samba service for you? This will be deprecated in future, in favor of configuring a separate Samba resource. Should CTDB manage Samba? Should CTDB manage starting/stopping the Winbind service for you? This will be deprecated in future, in favor of configuring a separate Winbind resource. Should CTDB manage Winbind? Name of smb init script. Only necessary if CTDB is managing Samba directly. Will usually be auto-detected. Name of smb init script Name of nmb init script. Only necessary if CTDB is managing Samba directly. Will usually be auto-detected. Name of nmb init script Name of winbind init script. Only necessary if CTDB is managing Winbind directly. Will usually be auto-detected. Name of winbind init script If there are very many shares it may not be feasible to check that all of them are available during each monitoring interval. In that case this check can be disabled. Skip share check during monitor? If the amount of free memory drops below this value the node will become unhealthy and ctdb and all managed services will be shutdown. Once this occurs, the administrator needs to find the reason for the OOM situation, rectify it and restart ctdb with "service ctdb start". Minimum amount of free memory (MB) When set to yes, the CTDB node will start in DISABLED mode and not host any public ip addresses. Start CTDB disabled? The directory containing various CTDB configuration files. The "nodes" and "notify.sh" scripts are expected to be in this directory, as is the "events.d" subdirectory. CTDB config file directory Full path to the CTDB binary. CTDB binary path Full path to the CTDB cluster daemon binary. CTDB Daemon binary path Full path to the domain socket that ctdbd will create, used for local clients to attach and communicate with the ctdb daemon. CTDB socket location The directory to put the local CTDB database files in. Persistent database files will be put in ctdb_dbdir/persistent. CTDB database directory Full path to log file. To log to syslog instead, use the value "syslog". CTDB log file location What debug level to run at (0-10). Higher means more verbose. CTDB debug level Path to default samba config file. Only necessary if CTDB is managing Samba. Path to smb.conf The directory for smbd to use for storing such files as smbpasswd and secrets.tdb. Old versions of CTBD (prior to 1.0.50) required this to be on shared storage. This parameter should not be set for current versions of CTDB, and only remains in the RA for backwards compatibility. Samba private dir (deprecated) Which backend to use for storing user and possibly group information. Only necessary if CTDB is managing Samba. Samba passdb backend Which backend to use for SID/uid/gid mapping. Only necessary if CTDB is managing Samba. Samba idmap backend Which fileid:algorithm to use with vfs_fileid. The correct value depends on which clustered filesystem is in use, e.g.: for OCFS2, this should be set to "fsid". Only necessary if CTDB is managing Samba. Samba VFS fileid algorithm END } ####################################################################### # Figure out path to /etc/sysconfig/ctdb (same logic as # loadconfig() from /etc/ctdb/functions if [ -f /etc/sysconfig/ctdb ]; then CTDB_SYSCONFIG=/etc/sysconfig/ctdb elif [ -f /etc/default/ctdb ]; then CTDB_SYSCONFIG=/etc/default/ctdb elif [ -f $OCF_RESKEY_ctdb_config_dir/ctdb ]; then CTDB_SYSCONFIG=$OCF_RESKEY_ctdb_config_dir/ctdb fi # Backup paths CTDB_SYSCONFIG_BACKUP=${CTDB_SYSCONFIG}.ctdb-ra-orig invoke_ctdb() { # CTDB's defaults are: local timeout=3 local timelimit=120 # ...but we override with the timeout for the current op: if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then timeout=$((OCF_RESKEY_CRM_meta_timeout/1000)) timelimit=$((OCF_RESKEY_CRM_meta_timeout/1000)) fi $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket \ -t $timeout -T $timelimit \ "$@" } # Enable any event scripts that are explicitly required. # Any others will ultimately be invoked or not based on how they ship # with CTDB, but will generally have no effect, beacuase the relevant # CTDB_MANAGES_* options won't be set in /etc/sysconfig/ctdb. enable_event_scripts() { local event_dir=$OCF_RESKEY_ctdb_config_dir/events.d if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then chmod u+x $event_dir/10.interface else chmod a-x $event_dir/10.interface fi if [ -f "${OCF_RESKEY_ctdb_config_dir}/static-routes" ]; then chmod u+x $event_dir/11.routing else chmod a-x $event_dir/11.routing fi if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || \ ocf_is_true "$OCF_RESKEY_ctdb_manages_winbind"; then chmod u+x $event_dir/50.samba else chmod a-x $event_dir/50.samba fi } # This function has no effect (currently no way to set CTDB_SET_*) # but remains here in case we need it in future. set_ctdb_variables() { rv=$OCF_SUCCESS set | grep ^CTDB_SET_ | cut -d_ -f3- | while read v; do varname=`echo $v | cut -d= -f1` value=`echo $v | cut -d= -f2` invoke_ctdb setvar $varname $value || rv=$OCF_ERR_GENERIC done || rv=$OCF_ERR_GENERIC return $rv } # Add necessary settings to /etc/samba/smb.conf. In a perfect world, # we'd be able to generate a new, temporary, smb.conf file somewhere, # something like: # include = /etc/samba/smb.conf # [global] # clustering = yes # # ...etc... # Unfortunately, we can't do this, because there's no way to tell the # smb init script where the temporary config is, so we just edit # the default config file. init_smb_conf() { # Don't screw around with the config if CTDB isn't managing Samba! ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || return 0 # replace these things in smb.conf local repl='# CTDB-RA:|passdb backend|clustering|idmap backend|private dir|ctdbd socket' local private_dir [ -n "$OCF_RESKEY_smb_private_dir" ] && private_dir="\tprivate dir = $OCF_RESKEY_smb_private_dir\n" local vfs_fileid local do_vfs=0 if [ -n "$OCF_RESKEY_smb_fileid_algorithm" ]; then repl="${repl}|fileid:algorithm|fileid:mapping" vfs_fileid="\tfileid:algorithm = $OCF_RESKEY_smb_fileid_algorithm\n" if sed -n '/^[[:space:]]*\[global\]/,/^[[:space:]]*\[/p' $OCF_RESKEY_smb_conf | \ grep -Eq '^[[:space:]]*vfs objects'; then # vfs objects already specified, will append fileid to existing line do_vfs=1 else vfs_fileid="$vfs_fileid\tvfs objects = fileid\n" fi fi awk ' /^[[:space:]]*\[/ { global = 0 } /^[[:space:]]*\[global\]/ { global = 1 } { if(global) { if ('$do_vfs' && $0 ~ /^[[:space:]]vfs objects/ && $0 !~ /fileid/) { print $0" fileid" } else if ($0 !~ /^[[:space:]]*('"$repl"')/) { print } } else { print } }' $OCF_RESKEY_smb_conf | sed "/^[[:space:]]*\[global\]/ a\\ \t# CTDB-RA: Begin auto-generated section (do not change below)\n\ \tpassdb backend = $OCF_RESKEY_smb_passdb_backend\n\ \tclustering = yes\n\ \tidmap backend = $OCF_RESKEY_smb_idmap_backend\n\ \tctdbd socket = $OCF_RESKEY_ctdb_socket\n$private_dir$vfs_fileid\ \t# CTDB-RA: End auto-generated section (do not change above)" > $OCF_RESKEY_smb_conf.$$ mv -f $OCF_RESKEY_smb_conf.$$ $OCF_RESKEY_smb_conf } # Get rid of that section we added cleanup_smb_conf() { ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || return 0 sed '/# CTDB-RA: Begin/,/# CTDB-RA: End/d' $OCF_RESKEY_smb_conf > $OCF_RESKEY_smb_conf.$$ mv -f $OCF_RESKEY_smb_conf.$$ $OCF_RESKEY_smb_conf } append_ctdb_sysconfig() { [ -n "$2" ] && echo "$1=$2" >> $CTDB_SYSCONFIG } # Generate a new, minimal CTDB config file that's just enough # to get CTDB running as configured by the RA parameters. generate_ctdb_sysconfig() { # Backup existing sysconfig if we're not already using an auto-generated one grep -qa '# CTDB-RA: Auto-generated' $CTDB_SYSCONFIG || cp -p $CTDB_SYSCONFIG $CTDB_SYSCONFIG_BACKUP if [ $? -ne 0 ]; then ocf_log warn "Unable to backup $CTDB_SYSCONFIG to $CTDB_SYSCONFIG_BACKUP" fi ocf_log info "Generating new $CTDB_SYSCONFIG" # Note to maintainers and other random hackers: # Parameters may need to be set here, for CTDB event # scripts to pick up, or may need to be passed to ctdbd # when starting, or both. Be careful. The CTDB source # tree and manpages are your friends. As a concrete # example, setting CTDB_START_AS_DISABLED here is # completely useless, as this is actually a command line # argument for ctdbd; it's not used anywhere else. cat >$CTDB_SYSCONFIG </dev/null for pdbase in $(ls $persistent_db_dir/*.tdb.[0-9] 2>/dev/null$) ; do /usr/bin/tdbdump $pdbase >/dev/null 2>/dev/null || { ocf_log err "Persistent database $pdbase is corrupted! CTDB will not start." return $OCF_ERR_GENERIC } done # Add necessary configuration to smb.conf init_smb_conf if [ $? -ne 0 ]; then ocf_log err "Failed to update $OCF_RESKEY_smb_conf." return $OCF_ERR_GENERIC fi # Generate new CTDB sysconfig generate_ctdb_sysconfig enable_event_scripts # Use logfile by default, or syslog if asked for local log_option="--logfile=$OCF_RESKEY_ctdb_logfile" [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ] && log_option="--syslog" # public addresses file (should not be present, but need to set for correctness if it is) local pub_addr_option="" [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ] && \ pub_addr_option="--public-addresses=${OCF_RESKEY_ctdb_config_dir}/public_addresses" # start as disabled local start_as_disabled="--start-as-disabled" ocf_is_true "$OCF_RESKEY_ctdb_start_as_disabled" || start_as_disabled="" # Start her up $OCF_RESKEY_ctdbd_binary \ --reclock=$OCF_RESKEY_ctdb_recovery_lock \ --nlist=$OCF_RESKEY_ctdb_config_dir/nodes \ --socket=$OCF_RESKEY_ctdb_socket \ --dbdir=$OCF_RESKEY_ctdb_dbdir \ --dbdir-persistent=$OCF_RESKEY_ctdb_dbdir/persistent \ --event-script-dir=$OCF_RESKEY_ctdb_config_dir/events.d \ --notification-script=$OCF_RESKEY_ctdb_config_dir/notify.sh \ --transport=tcp \ $start_as_disabled $log_option $pub_addr_option \ -d $OCF_RESKEY_ctdb_debuglevel if [ $? -ne 0 ]; then # cleanup smb.conf cleanup_smb_conf ocf_log err "Failed to execute $OCF_RESKEY_ctdbd_binary." return $OCF_ERR_GENERIC else # Wait a bit for CTDB to stabilize # (until start times out if necessary) while true; do # Initial sleep is intentional (ctdb init script # has sleep after ctdbd start, but before invoking # ctdb to talk to it) sleep 1 status=$(invoke_ctdb status 2>/dev/null) if [ $? -ne 0 ]; then # CTDB will be running, kill it before returning ctdb_stop ocf_log err "Can't invoke $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket status" return $OCF_ERR_GENERIC fi if ! echo $status | grep -qs 'UNHEALTHY (THIS'; then # Status does not say this node is unhealthy, # so we're good to go. Do a bit of final # setup and (hopefully) return success. set_ctdb_variables return $? fi done fi # ctdbd will (or can) actually still be running at this point, so kill it ctdb_stop ocf_log err "Timeout waiting for CTDB to stabilize" return $OCF_ERR_GENERIC } ctdb_stop() { # Do nothing if already stopped pkill -0 -f $OCF_RESKEY_ctdbd_binary || return $OCF_SUCCESS # Tell it to die nicely invoke_ctdb shutdown >/dev/null 2>&1 rv=$? # No more Mr. Nice Guy count=0 while pkill -0 -f $OCF_RESKEY_ctdbd_binary ; do sleep 1 count=$(($count + 1)) [ $count -gt 10 ] && { ocf_log info "killing ctdbd " pkill -9 -f $OCF_RESKEY_ctdbd_binary pkill -9 -f ${OCF_RESKEY_ctdb_config_dir}/events.d/ } done # Cleanup smb.conf cleanup_smb_conf # It was a clean shutdown, return success [ $rv -eq $OCF_SUCCESS ] && return $OCF_SUCCESS # Unclean shutdown, return success if there's no ctdbds left (we # killed them forcibly, but at least they're good and dead). pkill -0 -f $OCF_RESKEY_ctdbd_binary || return $OCF_SUCCESS # Problem: ctdb shutdown didn't work and neither did some vigorous # kill -9ing. Only thing to do is report failure. return $OCF_ERR_GENERIC } ctdb_monitor() { local status # "ctdb status" exits non-zero if CTDB isn't running. # It can also exit non-zero if there's a timeout (ctdbd blocked, # stalled, massive load, or otherwise wedged). If it's actually # not running, STDERR will say "Errno:Connection refused(111)", # whereas if it's wedged, it'll say various other unpleasant things. status=$(invoke_ctdb status 2>&1) if [ $? -ne 0 ]; then if echo $status | grep -qs 'Connection refused'; then return $OCF_NOT_RUNNING else ocf_log err "CTDB status call failed: $status" return $OCF_ERR_GENERIC fi fi if echo $status | grep -Eqs '(OK|DISABLED) \(THIS'; then return $OCF_SUCCESS fi ocf_log err "CTDB status is bad: $status" return $OCF_ERR_GENERIC } ctdb_validate() { # Required binaries (full path to tdbdump is intentional, as that's # what's used in ctdb_start, which was lifted from the init script) for binary in pkill /usr/bin/tdbdump; do check_binary $binary done if [ -z "$CTDB_SYSCONFIG" ]; then ocf_log err "Can't find CTDB config file (expecting /etc/sysconfig/ctdb, /etc/default/ctdb or similar)" return $OCF_ERR_INSTALLED fi if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" && [ ! -f "$OCF_RESKEY_smb_conf" ]; then ocf_log err "Samba config file '$OCF_RESKEY_smb_conf' does not exist." return $OCF_ERR_INSTALLED fi if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then ocf_log warn "CTDB file '${OCF_RESKEY_ctdb_config_dir}/public_addresses' exists - CTDB will try to manage IP failover!" fi if [ ! -f "$OCF_RESKEY_ctdb_config_dir/nodes" ]; then ocf_log err "$OCF_RESKEY_ctdb_config_dir/nodes does not exist." return $OCF_ERR_ARGS fi if [ -z "$OCF_RESKEY_ctdb_recovery_lock" ]; then ocf_log err "ctdb_recovery_lock not specified." return $OCF_ERR_CONFIGURED fi lock_dir=$(dirname "$OCF_RESKEY_ctdb_recovery_lock") touch "$lock_dir/$$" 2>/dev/null if [ $? != 0 ]; then ocf_log err "Directory for lock file '$OCF_RESKEY_ctdb_recovery_lock' does not exist, or is not writable." return $OCF_ERR_ARGS fi rm "$lock_dir/$$" return $OCF_SUCCESS } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) ctdb_start;; stop) ctdb_stop;; monitor) ctdb_monitor;; validate-all) ctdb_validate;; usage|help) ctdb_usage exit $OCF_SUCCESS ;; *) ctdb_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc ClusterLabs-resource-agents-dc69db5/heartbeat/ClusterMon000077500000000000000000000155511203363223200235260ustar00rootroot00000000000000#!/bin/sh # # # ClusterMon OCF RA. # Starts crm_mon in background which logs cluster status as # html to the specified file. # # Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Brée # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # OCF instance parameters: # OCF_RESKEY_user # OCF_RESKEY_pidfile # OCF_RESKEY_update # OCF_RESKEY_extra_options # OCF_RESKEY_htmlfile ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### meta_data() { cat < 1.0 This is a ClusterMon Resource Agent. It outputs current cluster status to the html. Runs crm_mon in the background, recording the cluster status to an HTML file The user we want to run crm_mon as The user we want to run crm_mon as How frequently should we update the cluster status Update interval Additional options to pass to crm_mon. Eg. -n -r Extra options PID file location to ensure only one instance is running PID file Location to write HTML output to. HTML output END } ####################################################################### ClusterMon_usage() { cat </dev/null if [ $? -eq 0 ]; then : Yes, user exists. We can further check his permission on crm_mon if necessary else ocf_log err "The user $OCF_RESKEY_user does not exist!" exit $OCF_ERR_ARGS fi fi # Pidfile better be an absolute path case $OCF_RESKEY_pidfile in /*) ;; *) ocf_log warn "You should have pidfile($OCF_RESKEY_pidfile) of absolute path!" ;; esac # Check the update interval if ocf_is_decimal "$OCF_RESKEY_update" && [ $OCF_RESKEY_update -gt 0 ]; then : else ocf_log err "Invalid update interval $OCF_RESKEY_update. It should be positive integer!" exit $OCF_ERR_ARGS fi if CheckOptions $OCF_RESKEY_extra_options; then : else ocf_log err "Invalid options $OCF_RESKEY_extra_options!" exit $OCF_ERR_ARGS fi # Htmlfile better be an absolute path case $OCF_RESKEY_htmlfile in /*) ;; *) ocf_log warn "You should have htmlfile($OCF_RESKEY_htmlfile) of absolute path!" ;; esac echo "Validate OK" return $OCF_SUCCESS } if [ $# -ne 1 ]; then ClusterMon_usage exit $OCF_ERR_ARGS fi : ${OCF_RESKEY_update:="15000"} : ${OCF_RESKEY_pidfile:="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.pid"} : ${OCF_RESKEY_htmlfile:="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.html"} OCF_RESKEY_update=`expr $OCF_RESKEY_update / 1000` case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) ClusterMon_start ;; stop) ClusterMon_stop ;; monitor) ClusterMon_monitor ;; validate-all) ClusterMon_validate ;; usage|help) ClusterMon_usage exit $OCF_SUCCESS ;; *) ClusterMon_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? ClusterLabs-resource-agents-dc69db5/heartbeat/Delay000077500000000000000000000112311203363223200224600ustar00rootroot00000000000000#!/bin/sh # # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # This script is a test resource for introducing delay. # # usage: $0 {start|stop|status|monitor|meta-data} # # OCF parameters are as below: # OCF_RESKEY_startdelay # OCF_RESKEY_stopdelay # OCF_RESKEY_mondelay # # # OCF_RESKEY_startdelay defaults to 30 (seconds) # OCF_RESKEY_stopdelay defaults to $OCF_RESKEY_startdelay # OCF_RESKEY_mondelay defaults to $OCF_RESKEY_startdelay # # # This is really a test resource script. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### usage() { cat <<-! usage: $0 {start|stop|status|monitor|meta-data|validate-all} ! } meta_data() { cat < 1.0 This script is a test resource for introducing delay. Waits for a defined timespan How long in seconds to delay on start operation. Start delay How long in seconds to delay on stop operation. Defaults to "startdelay" if unspecified. Stop delay How long in seconds to delay on monitor operation. Defaults to "startdelay" if unspecified. Monitor delay END } Delay_stat() { ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} monitor } Delay_Status() { if Delay_stat then ocf_log info "Delay is running OK" return $OCF_SUCCESS else ocf_log info "Delay is stopped" return $OCF_NOT_RUNNING fi } Delay_Monitor() { Delay_Validate_All -q sleep $OCF_RESKEY_mondelay Delay_Status } Delay_Start() { if Delay_stat then ocf_log info "Delay already running." return $OCF_SUCCESS else Delay_Validate_All -q ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} start rc=$? sleep $OCF_RESKEY_startdelay if [ $rc -ne 0 ] then return $OCF_ERR_PERM fi return $OCF_SUCCESS fi } Delay_Stop() { if Delay_stat then Delay_Validate_All -q ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} stop rc=$? sleep $OCF_RESKEY_stopdelay if [ $rc -ne 0 ] then return $OCF_ERR_PERM fi return $OCF_SUCCESS else ocf_log info "Delay already stopped." return $OCF_SUCCESS fi } # Check if all the arguments are valid numbers, a string is considered valid if: # 1. It does not contain any character but digits and period "."; # 2. The period "." does not occur more than once Are_Valid_Numbers() { for i in "$@"; do echo $i |grep -v [^0-9.] |grep -q -v [.].*[.] if test $? -ne 0; then return $OCF_ERR_ARGS fi done return $OCF_SUCCESS } Delay_Validate_All() { # Be quiet when specified -q option _and_ validation succeded getopts "q" option if test $option = "q"; then quiet=yes else quiet=no fi shift $(($OPTIND -1)) if Are_Valid_Numbers $OCF_RESKEY_startdelay $OCF_RESKEY_stopdelay \ $OCF_RESKEY_mondelay; then if test $quiet = "no"; then echo "Validate OK" fi # _Return_ on validation success return $OCF_SUCCESS else echo "Some of the instance parameters are invalid" # _Exit_ on validation failure exit $OCF_ERR_ARGS fi } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi : ${OCF_RESKEY_startdelay=30} : ${OCF_RESKEY_stopdelay=$OCF_RESKEY_startdelay} : ${OCF_RESKEY_mondelay=$OCF_RESKEY_startdelay} case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; start) Delay_Start ;; stop) Delay_Stop ;; monitor) Delay_Monitor ;; status) Delay_Status ;; validate-all) Delay_Validate_All ;; usage) usage exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_ARGS ;; esac exit $? ClusterLabs-resource-agents-dc69db5/heartbeat/Dummy000077500000000000000000000117731203363223200225300ustar00rootroot00000000000000#!/bin/sh # # # Dummy OCF RA. Does nothing but wait a few seconds, can be # configured to fail occassionally. # # Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Brée # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### meta_data() { cat < 1.0 This is a Dummy Resource Agent. It does absolutely nothing except keep track of whether its running or not. Its purpose in life is for testing and to serve as a template for RA writers. NB: Please pay attention to the timeouts specified in the actions section below. They should be meaningful for the kind of resource the agent manages. They should be the minimum advised timeouts, but they shouldn't/cannot cover _all_ possible resource instances. So, try to be neither overly generous nor too stingy, but moderate. The minimum timeouts should never be below 10 seconds. Example stateless resource agent Location to store the resource state in. State file Fake attribute that can be changed to cause a reload Fake attribute that can be changed to cause a reload END } ####################################################################### dummy_usage() { cat < 1.0 Deprecation warning: EVMS is no longer actively maintained and should not be used. This agent is deprecated and may be removed from a future release. -- Resource script for EVMS shared cluster container. It runs evms_activate on one node in the cluster. Manages EVMS Shared Cluster Containers (SCCs) (deprecated) If set to true, suppresses the deprecation warning for this agent. Suppress deprecation warning END } EvmsSCC_status() { # At the moment we don't support monitoring EVMS activations. We just return "not running" to cope with the pre-start monitor call. return $OCF_NOT_RUNNING } EvmsSCC_notify() { local n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op="$OCF_RESKEY_CRM_meta_notify_operation" local n_active="$OCF_RESKEY_CRM_meta_notify_active_uname" local n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname" local n_start="$OCF_RESKEY_CRM_meta_notify_start_uname" case "$n_type" in pre) case "$n_op" in start) ocf_log debug "EvmsSCC: Notify: Starting node(s): $n_start." EvmsSCC_start_notify_common ;; esac ;; esac return $OCF_SUCCESS } EvmsSCC_start() { local n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op="$OCF_RESKEY_CRM_meta_notify_operation" local n_active="$OCF_RESKEY_CRM_meta_notify_active_uname" local n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname" local n_start="$OCF_RESKEY_CRM_meta_notify_start_uname" ocf_log debug "EvmsSCC: Start: starting node(s): $n_start." EvmsSCC_start_notify_common return $OCF_SUCCESS } EvmsSCC_stop() { return $OCF_SUCCESS } EvmsSCC_start_notify_common() { local n_myself=${HA_CURHOST:-$(uname -n | tr A-Z a-z)} ocf_log debug "EvmsSCC: Start_Notify: I am node $n_myself." n_active="$n_active $n_start" case " $n_active " in *" $n_myself "*) ;; *) ocf_log err "EvmsSCC: $n_myself (local) not on active list!" return $OCF_ERR_GENERIC ;; esac #pick the first node from the starting list #when the cluster boots this will be one of the many booting nodes #when a node later joins the cluster, this will be the joining node local n_first=$(echo $n_start | cut -d ' ' -f 1) ocf_log debug "EvmsSCC: Start_Notify: First node in starting list is $n_first." if [ "$n_myself" = "$n_first" ] ; then ocf_log debug "EvmsSCC: Start_Notify: I am running ${EVMSACTIVATE}." while true ; do if ! ${EVMSACTIVATE} -q 2> /dev/null ; then SLEEP_TIME=$(($(ocf_maybe_random) % 40)) ocf_log info "EvmsSCC: Evms call failed - sleeping for $SLEEP_TIME seconds and then trying again." sleep $SLEEP_TIME else break fi done fi return $OCF_SUCCESS } # Check the arguments passed to this script if [ $# -ne 1 ] then usage exit $OCF_ERR_ARGS fi OP=$1 case $OP in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac # Be obnoxious, log deprecation warning on every invocation (unless # suppressed by resource configuration). ocf_deprecated check_binary $CUT check_binary $EVMSACTIVATE case $OP in start) EvmsSCC_start ;; notify) EvmsSCC_notify ;; stop) EvmsSCC_stop ;; status|monitor) EvmsSCC_status ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? ClusterLabs-resource-agents-dc69db5/heartbeat/Evmsd000077500000000000000000000076011203363223200225060ustar00rootroot00000000000000#!/bin/sh # # Evmsd OCF RA. # # Copyright (c) 2004 SUSE LINUX AG, Jo De Baer # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### meta_data() { cat < 1.0 Deprecation warning: EVMS is no longer actively maintained and should not be used. This agent is deprecated and may be removed from a future release. -- This is a Evmsd Resource Agent. Controls clustered EVMS volume management (deprecated) If set to true, suppresses the deprecation warning for this agent. Suppress deprecation warning END } ####################################################################### evmsd_usage() { cat < 1.1 Resource script for Filesystem. It manages a Filesystem on a shared storage medium. The standard monitor operation of depth 0 (also known as probe) checks if the filesystem is mounted. If you want deeper tests, set OCF_CHECK_LEVEL to one of the following values: 10: read first 16 blocks of the device (raw read) This doesn't exercise the filesystem at all, but the device on which the filesystem lives. This is noop for non-block devices such as NFS, SMBFS, or bind mounts. 20: test if a status file can be written and read The status file must be writable by root. This is not always the case with an NFS mount, as NFS exports usually have the "root_squash" option set. In such a setup, you must either use read-only monitoring (depth=10), export with "no_root_squash" on your NFS server, or grant world write permissions on the directory where the status file is to be placed. Manages filesystem mounts The name of block device for the filesystem, or -U, -L options for mount, or NFS mount specification. block device The mount point for the filesystem. mount point The type of filesystem to be mounted. filesystem type Any extra options to be given as -o options to mount. For bind mounts, add "bind" here and set fstype to "none". We will do the right thing for options such as "bind,ro". options The prefix to be used for a status file for resource monitoring with depth 20. If you don't specify this parameter, all status files will be created in a separate directory. status file prefix Specify how to decide whether to run fsck or not. "auto" : decide to run fsck depending on the fstype(default) "force" : always run fsck regardless of the fstype "no" : do not run fsck ever. run_fsck Normally, we expect no users of the filesystem and the stop operation to finish quickly. If you cannot control the filesystem users easily and want to prevent the stop action from failing, then set this parameter to "no" and add an appropriate timeout for the stop operation. fast stop The usage of a clone setup for local filesystems is forbidden by default. For special setups like glusterfs, cloning a mount of a local device with a filesystem like ext4 or xfs, independently on several nodes is a valid use-case. Only set this to "true" if you know what you are doing! allow running as a clone, regardless of filesystem type END } # # Make sure the kernel does the right thing with the FS buffers # This function should be called after unmounting and before mounting # It may not be necessary in 2.4 and later kernels, but it shouldn't hurt # anything either... # # It's really a bug that you have to do this at all... # flushbufs() { if have_binary $BLOCKDEV ; then if [ "$blockdevice" = "yes" ] ; then $BLOCKDEV --flushbufs $1 return $? fi fi return 0 } # Take advantage of /etc/mtab if present, use portable mount command # otherwise. Normalize format to "dev mountpoint fstype". is_bind_mount() { echo "$options" | grep -w bind >/dev/null 2>&1 } list_mounts() { local inpf="" if [ -e "/proc/mounts" ] && ! is_bind_mount; then inpf=/proc/mounts elif [ -f "/etc/mtab" -a -r "/etc/mtab" ]; then inpf=/etc/mtab fi if [ "$inpf" ]; then cut -d' ' -f1,2,3 < $inpf else $MOUNT | cut -d' ' -f1,3,5 fi } determine_blockdevice() { if [ $blockdevice = "yes" ]; then return fi # Get the current real device name, if possible. # (specified devname could be -L or -U...) case "$FSTYPE" in nfs4|nfs|smbfs|cifs|glusterfs|ceph|tmpfs|none) ;; *) DEVICE=`list_mounts | grep " $MOUNTPOINT " | cut -d' ' -f1` if [ -b "$DEVICE" ]; then blockdevice=yes fi ;; esac } # Lists all filesystems potentially mounted under a given path, # excluding the path itself. list_submounts() { list_mounts | grep " $1/" | cut -d' ' -f2 | sort -r } ocfs2_del_cache() { if [ -e "$_OCFS2_uuid_cache" ]; then rm -f $_OCFS2_uuid_cache fi } ocfs2_cleanup() { # We'll never see the post-stop notification. We're gone now, # have unmounted, and thus should remove the membership. # # (Do so regardless of whether we were unmounted already, # because the admin might have manually unmounted but not # cleared up the membership directory. Bad admin, no cookie.) # if [ ! -d "$OCFS2_FS_ROOT" ]; then ocf_log info "$OCFS2_FS_ROOT: Filesystem membership already gone." else ocf_log info "$OCFS2_FS_ROOT: Removing membership directory." rm -rf $OCFS2_FS_ROOT/ fi ocfs2_del_cache } ocfs2_fetch_uuid() { mounted.ocfs2 -d $DEVICE|tail -1|awk '{print $3}'|tr -d -- -|tr '[a-z]' '[A-Z]' } ocfs2_set_uuid() { _OCFS2_uuid_cache="$HA_RSCTMP/Filesystem.ocfs2_uuid.$(echo $DEVICE|tr / .)" if [ "$OP" != "start" -a -e "$_OCFS2_uuid_cache" ]; then # Trust the cache. OCFS2_UUID=$(cat $_OCFS2_uuid_cache 2>/dev/null) return 0 fi OCFS2_UUID=$(ocfs2_fetch_uuid) if [ -n "$OCFS2_UUID" -a "$OCFS2_UUID" != "UUID" ]; then # UUID valid: echo $OCFS2_UUID > $_OCFS2_uuid_cache return 0 fi # Ok, no UUID still, but that's alright for stop, because it # very likely means we never got started - if [ "$OP" = "stop" ]; then ocf_log warn "$DEVICE: No UUID; assuming never started!" OCFS2_UUID="UUID_NOT_SET" return 0 fi # Everything else - wrong: ocf_log err "$DEVICE: Could not determine ocfs2 UUID for device." exit $OCF_ERR_GENERIC } ocfs2_init() { # Check & initialize the OCFS2 specific variables. # This check detects whether the special/legacy hooks to # integrate OCFS2 with user-space clustering on SLES10 need to # be activated. # Newer kernels >= 2.6.28, with OCFS2+openAIS+Pacemaker, do # not need this: OCFS2_SLES10="" if [ "X$HA_cluster_type" = "Xcman" ]; then return elif [ "X$HA_cluster_type" != "Xopenais" ]; then if grep -q "SUSE Linux Enterprise Server 10" /etc/SuSE-release >/dev/null 2>&1 ; then OCFS2_SLES10="yes" ocf_log info "$DEVICE: Enabling SLES10 compatibility mode for OCFS2." else ocf_log err "$DEVICE: ocfs2 is not compatible with your environment." exit $OCF_ERR_CONFIGURED fi else return fi if [ $OP != "stop" ]; then if [ -z "$OCF_RESKEY_CRM_meta_clone" ]; then ocf_log err "ocfs2 must be run as a clone." exit $OCF_ERR_GENERIC fi fi if [ $blockdevice = "no" ]; then ocf_log err "$DEVICE: ocfs2 needs a block device instead." exit $OCF_ERR_GENERIC fi for f in "$OCF_RESKEY_ocfs2_configfs" /sys/kernel/config/cluster /configfs/cluster ; do if [ -n "$f" -a -d "$f" ]; then OCFS2_CONFIGFS="$f" break fi done if [ ! -d "$OCFS2_CONFIGFS" ]; then ocf_log err "ocfs2 needs configfs mounted." exit $OCF_ERR_GENERIC fi ocfs2_set_uuid if [ -n "$OCF_RESKEY_ocfs2_cluster" ]; then OCFS2_CLUSTER=$(echo $OCF_RESKEY_ocfs2_cluster) else OCFS2_CLUSTER=$(find "$OCFS2_CONFIGFS" -maxdepth 1 -mindepth 1 -type d -printf %f 2>/dev/null) set -- $OCFS2_CLUSTER local n; n="$#" if [ $n -gt 1 ]; then ocf_log err "$OCFS2_CLUSTER: several clusters found." exit $OCF_ERR_GENERIC fi if [ $n -eq 0 ]; then ocf_log err "$OCFS2_CONFIGFS: no clusters found." exit $OCF_ERR_GENERIC fi fi OCFS2_CLUSTER_ROOT="$OCFS2_CONFIGFS/$OCFS2_CLUSTER" if [ ! -d "$OCFS2_CLUSTER_ROOT" ]; then ocf_log err "$OCFS2_CLUSTER: Cluster doesn't exist. Maybe o2cb hasn't been run?" exit $OCF_ERR_GENERIC fi OCFS2_FS_ROOT=$OCFS2_CLUSTER_ROOT/heartbeat/$OCFS2_UUID } # kernels < 2.6.26 can't handle bind remounts bind_kernel_check() { echo "$options" | grep -w ro >/dev/null 2>&1 || return uname -r | awk -F. ' $1==2 && $2==6 { sub("[^0-9].*","",$3); if ($3<26) exit(1); }' [ $? -ne 0 ] && ocf_log warn "kernel `uname -r` cannot handle read only bind mounts" } bind_mount() { if is_bind_mount && [ "$options" != "-o bind" ] then bind_kernel_check bind_opts=`echo $options | sed 's/bind/remount/'` $MOUNT $bind_opts $MOUNTPOINT else true # make sure to return OK fi } is_option() { echo $OCF_RESKEY_options | grep -w "$1" >/dev/null 2>&1 } is_fsck_needed() { case $OCF_RESKEY_run_fsck in force) true;; no) false;; ""|auto) case $FSTYPE in ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs) false;; *) true;; esac;; *) ocf_log warn "Invalid parameter value for fsck: '$OCF_RESKEY_run_fsck'; setting to 'auto'" OCF_RESKEY_run_fsck="auto" is_fsck_needed;; esac } # # START: Start up the filesystem # Filesystem_start() { if [ -n "$OCFS2_SLES10" ]; then # "start" now has the notification data available; that # we're being started means we didn't get the # pre-notification, because we weren't running, so # process the information now first. ocf_log info "$OCFS2_UUID: Faking pre-notification on start." OCF_RESKEY_CRM_meta_notify_type="pre" OCF_RESKEY_CRM_meta_notify_operation="start" Filesystem_notify fi # See if the device is already mounted. if Filesystem_status >/dev/null 2>&1 ; then ocf_log info "Filesystem $MOUNTPOINT is already mounted." return $OCF_SUCCESS fi if [ "X${HOSTOS}" != "XOpenBSD" ];then if [ -z "$FSTYPE" -o "$FSTYPE" = none ]; then : No FSTYPE specified, rely on the system has the right file-system support already else local support="$FSTYPE" # support fuse-filesystems (e.g. GlusterFS) case $FSTYPE in glusterfs) support="fuse";; esac grep -w "$support"'$' /proc/filesystems >/dev/null || $MODPROBE $support >/dev/null grep -w "$support"'$' /proc/filesystems >/dev/null if [ $? -ne 0 ] ; then ocf_log err "Couldn't find filesystem $FSTYPE in /proc/filesystems" return $OCF_ERR_INSTALLED fi fi fi # Check the filesystem & auto repair. # NOTE: Some filesystem types don't need this step... Please modify # accordingly if [ $blockdevice = "yes" ]; then if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then ocf_log err "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" exit $OCF_ERR_INSTALLED fi if is_fsck_needed; then ocf_log info "Starting filesystem check on $DEVICE" if [ -z "$FSTYPE" ]; then $FSCK -p $DEVICE else $FSCK -t $FSTYPE -p $DEVICE fi # NOTE: if any errors at all are detected, it returns non-zero # if the error is >= 4 then there is a big problem if [ $? -ge 4 ]; then ocf_log err "Couldn't sucessfully fsck filesystem for $DEVICE" return $OCF_ERR_GENERIC fi fi fi if [ ! -d "$MOUNTPOINT" ] ; then ocf_log err "Couldn't find directory [$MOUNTPOINT] to use as a mount point" exit $OCF_ERR_INSTALLED fi flushbufs $DEVICE # Mount the filesystem. case "$FSTYPE" in none) $MOUNT $options $DEVICE $MOUNTPOINT && bind_mount ;; "") $MOUNT $options $DEVICE $MOUNTPOINT ;; *) $MOUNT -t $FSTYPE $options $DEVICE $MOUNTPOINT ;; esac if [ $? -ne 0 ]; then ocf_log err "Couldn't mount filesystem $DEVICE on $MOUNTPOINT" if [ -n "$OCFS2_SLES10" ]; then ocfs2_cleanup fi return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # end of Filesystem_start Filesystem_notify() { # Process notifications; this is the essential glue level for # giving user-space membership events to a cluster-aware # filesystem. Right now, only OCFS2 is supported. # # When we get a pre-start notification, we set up all the nodes # which will be active in our membership for the filesystem. # (For the resource to be started, this happens at the time of # the actual 'start' operation.) # # At a post-start, actually there's nothing to do for us really, # but no harm done in re-syncing either. # # pre-stop is meaningless; we can't remove any node yet, it # first needs to unmount. # # post-stop: the node is removed from the membership of the # other nodes. # # Note that this expects that the base cluster is already # active; ie o2cb has been started and populated # $OCFS2_CLUSTER_ROOT/node/ already. This can be achieved by # simply having o2cb run on all nodes by the CRM too. This # probably ought to be mentioned somewhere in the to be written # documentation. ;-) # if [ -z "$OCFS2_SLES10" ]; then # One of the cases which shouldn't occur; it should have # been caught much earlier. Still, you know ... ocf_log err "$DEVICE: Please only enable notifications for SLES10 OCFS2 mounts." # Yes, in theory this is a configuration error, but # simply discarding them allows users to switch from the # SLES10 stack to the new one w/o downtime. # Ignoring the notifications is harmless, afterall, and # they can simply disable them in their own time. return $OCF_SUCCESS fi local n_type; n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op; n_op="$OCF_RESKEY_CRM_meta_notify_operation" local n_active; n_active="$OCF_RESKEY_CRM_meta_notify_active_uname" local n_stop; n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname" local n_start; n_start="$OCF_RESKEY_CRM_meta_notify_start_uname" ocf_log info "$OCFS2_UUID: notify: $n_type for $n_op" ocf_log info "$OCFS2_UUID: notify active: $n_active" ocf_log info "$OCFS2_UUID: notify stop: $n_stop" ocf_log info "$OCFS2_UUID: notify start: $n_start" case "$n_type" in pre) case "$n_op" in stop) ocf_log info "$OCFS2_UUID: ignoring pre-notify for stop." return $OCF_SUCCESS ;; start) # These are about to become active; prepare to # communicate with them. # Duplicate removal - start can contain nodes # already on the active list, confusing the # script later on: for UNAME in $n_active; do n_start=`echo ${n_start} | sed s/$UNAME//` done # Merge pruned lists again: n_active="$n_active $n_start" ;; esac ;; post) case "$n_op" in stop) # remove unames from notify_stop_uname; these have been # stopped and can no longer be considered active. for UNAME in $n_stop; do n_active=`echo ${n_active} | sed s/$UNAME//` done ;; start) if [ "$n_op" = "start" ]; then ocf_log info "$OCFS2_UUID: ignoring post-notify for start." return $OCF_SUCCESS fi ;; esac ;; esac ocf_log info "$OCFS2_UUID: post-processed active: $n_active" local n_myself; n_myself=${HA_CURHOST:-$(uname -n | tr '[A-Z]' '[a-z]')} ocf_log info "$OCFS2_UUID: I am node $n_myself." case " $n_active " in *" $n_myself "*) ;; *) ocf_log err "$OCFS2_UUID: $n_myself (local) not on active list!" return $OCF_ERR_GENERIC ;; esac if [ -d "$OCFS2_FS_ROOT" ]; then entry_prefix=$OCFS2_FS_ROOT/ for entry in $OCFS2_FS_ROOT/* ; do n_fs="${entry##$entry_prefix}" # ocf_log info "$OCFS2_UUID: Found current node $n_fs" case " $n_active " in *" $n_fs "*) # Construct a list of nodes which are present # already in the membership. n_exists="$n_exists $n_fs" ocf_log info "$OCFS2_UUID: Keeping node: $n_fs" ;; *) # Node is in the membership currently, but not on our # active list. Must be removed. if [ "$n_op" = "start" ]; then ocf_log warn "$OCFS2_UUID: Removing nodes on start" fi ocf_log info "$OCFS2_UUID: Removing dead node: $n_fs" if ! rm -f $entry ; then ocf_log err "$OCFS2_UUID: Removal of $n_fs failed!" fi ;; esac done else ocf_log info "$OCFS2_UUID: heartbeat directory doesn't exist yet, creating." mkdir -p $OCFS2_FS_ROOT fi ocf_log info "$OCFS2_UUID: Existing node list: $n_exists" # (2) for entry in $n_active ; do # ocf_log info "$OCFS2_UUID: Expected active node: $entry" case " $n_exists " in *" $entry "*) ocf_log info "$OCFS2_UUID: Already active: $entry" ;; *) if [ "$n_op" = "stop" ]; then ocf_log warn "$OCFS2_UUID: Adding nodes on stop" fi ocf_log info "$OCFS2_UUID: Activating node: $entry" if ! ln -s $OCFS2_CLUSTER_ROOT/node/$entry $OCFS2_FS_ROOT/$entry ; then ocf_log err "$OCFS2_CLUSTER_ROOT/node/$entry: failed to link" fi ;; esac done } signal_processes() { local dir=$1 local sig=$2 # fuser returns a non-zero return code if none of the # specified files is accessed or in case of a fatal # error. if [ "X${HOSTOS}" = "XOpenBSD" ];then PIDS=`fstat | grep $dir | awk '{print $3}'` for PID in ${PIDS};do kill -s $sig ${PID} ocf_log info "Sent signal $sig to ${PID}" done else if $FUSER -$sig -m -k $dir ; then ocf_log info "Some processes on $dir were signalled" else ocf_log info "No processes on $dir were signalled" fi fi } try_umount() { local SUB=$1 $UMOUNT $umount_force $SUB list_mounts | grep -q " $SUB " >/dev/null 2>&1 || { ocf_log info "unmounted $SUB successfully" return $OCF_SUCCESS } return $OCF_ERR_GENERIC } fs_stop() { local SUB=$1 timeout=$2 sig cnt for sig in TERM KILL; do cnt=$((timeout/2)) # try half time with TERM while [ $cnt -gt 0 ]; do try_umount $SUB && return $OCF_SUCCESS ocf_log err "Couldn't unmount $SUB; trying cleanup with $sig" signal_processes $SUB $sig cnt=$((cnt-1)) sleep 1 done done return $OCF_ERR_GENERIC } # # STOP: Unmount the filesystem # Filesystem_stop() { # See if the device is currently mounted Filesystem_status >/dev/null 2>&1 if [ $? -eq $OCF_NOT_RUNNING ]; then # Already unmounted, wonderful. rc=$OCF_SUCCESS else # Wipe the status file, but continue with a warning if # removal fails -- the file system might be read only if [ $OCF_CHECK_LEVEL -eq 20 ]; then rm -f ${STATUSFILE} if [ $? -ne 0 ]; then ocf_log warn "Failed to remove status file ${STATUSFILE}." fi fi # Determine the real blockdevice this is mounted on (if # possible) prior to unmounting. determine_blockdevice # For networked filesystems, there's merit in trying -f: case "$FSTYPE" in nfs4|nfs|cifs|smbfs) umount_force="-f" ;; esac # Umount all sub-filesystems mounted under $MOUNTPOINT/ too. local timeout for SUB in `list_submounts $MOUNTPOINT` $MOUNTPOINT; do ocf_log info "Trying to unmount $SUB" if ocf_is_true "$FAST_STOP"; then timeout=6 else timeout=${OCF_RESKEY_CRM_meta_timeout:="20000"} timeout=$((timeout/1000)) fi fs_stop $SUB $timeout rc=$? if [ $rc -ne $OCF_SUCCESS ]; then ocf_log err "Couldn't unmount $SUB, giving up!" fi done fi flushbufs $DEVICE # Yes I know the next blob is ugly, sorry. if [ $rc -eq $OCF_SUCCESS ]; then if [ "$FSTYPE" = "ocfs2" ]; then ocfs2_init if [ -n "$OCFS2_SLES10" ]; then ocfs2_cleanup fi fi fi return $rc } # end of Filesystem_stop # # STATUS: is the filesystem mounted or not? # Filesystem_status() { if list_mounts | grep -q " $MOUNTPOINT " >/dev/null 2>&1; then rc=$OCF_SUCCESS msg="$MOUNTPOINT is mounted (running)" else rc=$OCF_NOT_RUNNING msg="$MOUNTPOINT is unmounted (stopped)" fi # TODO: For ocfs2, or other cluster filesystems, should we be # checking connectivity to other nodes here, or the IO path to # the storage? # Special case "monitor" to check whether the UUID cached and # on-disk still match? case "$OP" in status) ocf_log info "$msg";; esac return $rc } # end of Filesystem_status # Note: the read/write tests below will stall in case the # underlying block device (or in the case of a NAS mount, the # NAS server) has gone away. In that case, if I/O does not # return to normal in time, the operation hits its timeout # and it is up to the CRM to initiate appropriate recovery # actions (such as fencing the node). # # MONITOR 10: read the device # Filesystem_monitor_10() { if [ "$blockdevice" = "no" ] ; then ocf_log warn "$DEVICE is not a block device, monitor 10 is noop" return $OCF_SUCCESS fi dd_opts="iflag=direct bs=4k count=1" err_output=`dd if=$DEVICE $dd_opts 2>&1 >/dev/null` if [ $? -ne 0 ]; then ocf_log err "Failed to read device $DEVICE" ocf_log err "dd said: $err_output" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # MONITOR 20: write and read a status file # Filesystem_monitor_20() { if [ "$blockdevice" = "no" ] ; then # O_DIRECT not supported on cifs/smbfs dd_opts="oflag=sync bs=4k conv=fsync,sync" else # Writing to the device in O_DIRECT mode is imperative # to bypass caches. dd_opts="oflag=direct,sync bs=4k conv=fsync,sync" fi status_dir=`dirname $STATUSFILE` [ -d "$status_dir" ] || mkdir -p "$status_dir" err_output=` echo "${OCF_RESOURCE_INSTANCE}" | dd of=${STATUSFILE} $dd_opts 2>&1` if [ $? -ne 0 ]; then ocf_log err "Failed to write status file ${STATUSFILE}" ocf_log err "dd said: $err_output" return $OCF_ERR_GENERIC fi test -f ${STATUSFILE} if [ $? -ne 0 ]; then ocf_log err "Cannot stat the status file ${STATUSFILE}" return $OCF_ERR_GENERIC fi cat ${STATUSFILE} > /dev/null if [ $? -ne 0 ]; then ocf_log err "Cannot read the status file ${STATUSFILE}" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } Filesystem_monitor() { Filesystem_status rc=$? if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi if [ $rc -eq $OCF_SUCCESS -a $OCF_CHECK_LEVEL -gt 0 ]; then case "$OCF_CHECK_LEVEL" in 10) Filesystem_monitor_10; rc=$?;; 20) Filesystem_monitor_20; rc=$?;; *) ocf_log err "unsupported monitor level $OCF_CHECK_LEVEL" rc=$OCF_ERR_CONFIGURED ;; esac fi return $rc } # end of Filesystem_monitor # # VALIDATE_ALL: Are the instance parameters valid? # FIXME!! The only part that's useful is the return code. # This code always returns $OCF_SUCCESS (!) # Filesystem_validate_all() { if [ -n $MOUNTPOINT -a ! -d $MOUNTPOINT ]; then ocf_log warn "Mountpoint $MOUNTPOINT does not exist" fi # Check if the $FSTYPE is workable # NOTE: Without inserting the $FSTYPE module, this step may be imprecise # TODO: This is Linux specific crap. if [ ! -z "$FSTYPE" -a "$FSTYPE" != none ]; then cut -f2 /proc/filesystems |grep -q ^$FSTYPE$ if [ $? -ne 0 ]; then modpath=/lib/modules/`uname -r` moddep=$modpath/modules.dep # Do we have $FSTYPE in modules.dep? cut -d' ' -f1 $moddep |grep -q "^$modpath.*$FSTYPE\.k\?o:$" if [ $? -ne 0 ]; then ocf_log info "It seems we do not have $FSTYPE support" fi fi fi # If we are supposed to do monitoring with status files, then # we need a utility to write in O_DIRECT mode. if [ $OCF_CHECK_LEVEL -gt 0 ]; then check_binary dd # Note: really old coreutils version do not support # the "oflag" option for dd. We don't check for that # here. In case dd does not support oflag, monitor is # bound to fail, with dd spewing an error message to # the logs. On such systems, we must do without status # file monitoring. fi #TODO: How to check the $options ? return $OCF_SUCCESS } # # set the blockdevice variable to "no" or "yes" # set_blockdevice_var() { blockdevice=no # these are definitely not block devices case $FSTYPE in nfs4|nfs|smbfs|cifs|none|glusterfs|ceph) return;; esac if `is_option "loop"`; then return fi case $DEVICE in -*) # Oh... An option to mount instead... Typically -U or -L ;; /dev/null) # Special case for BSC blockdevice=yes ;; *) if [ ! -b "$DEVICE" -a ! -d "$DEVICE" -a "X$OP" != Xstart ] ; then ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" fi if [ ! -d "$DEVICE" ]; then blockdevice=yes fi ;; esac } # Check the arguments passed to this script if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi # Check the OCF_RESKEY_ environment variables... DEVICE=$OCF_RESKEY_device FSTYPE=$OCF_RESKEY_fstype if [ ! -z "$OCF_RESKEY_options" ]; then options="-o $OCF_RESKEY_options" fi FAST_STOP=${OCF_RESKEY_fast_stop:="yes"} OP=$1 # These operations do not require instance parameters case $OP in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac if [ x = x"$DEVICE" ]; then ocf_log err "Please set OCF_RESKEY_device to the device to be managed" exit $OCF_ERR_CONFIGURED fi set_blockdevice_var # Normalize instance parameters: # It is possible that OCF_RESKEY_directory has one or even multiple trailing "/". # But the output of `mount` and /proc/mounts do not. if [ -z "$OCF_RESKEY_directory" ]; then if [ X$OP = "Xstart" -o $blockdevice = "no" ]; then ocf_log err "Please specify the directory" exit $OCF_ERR_CONFIGURED fi else MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//') : ${MOUNTPOINT:=/} # At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/" # TODO: / mounted via Filesystem sounds dangerous. On stop, we'll # kill the whole system. Is that a good idea? fi # Check to make sure the utilites are found if [ "X${HOSTOS}" != "XOpenBSD" ];then check_binary $MODPROBE check_binary $FUSER fi check_binary $FSCK check_binary $MOUNT check_binary $UMOUNT if [ "$OP" != "monitor" ]; then ocf_log info "Running $OP for $DEVICE on $MOUNTPOINT" fi # These operations do not require the clone checking + OCFS2 # initialization. case $OP in status) Filesystem_status exit $? ;; monitor) Filesystem_monitor exit $? ;; validate-all) Filesystem_validate_all exit $? ;; stop) Filesystem_stop exit $? ;; esac CLUSTERSAFE=0 is_option "ro" && CLUSTERSAFE=2 case $FSTYPE in ocfs2) ocfs2_init CLUSTERSAFE=1 ;; nfs4|nfs|smbfs|cifs|none|gfs2|glusterfs|ceph) CLUSTERSAFE=1 # this is kind of safe too ;; # add here CLUSTERSAFE=0 for all filesystems which are not # cluster aware and which, even if when mounted read-only, # could still modify parts of it such as journal/metadata ext4|ext4dev|ext3|reiserfs|reiser4|xfs|jfs) if ocf_is_true "$OCF_RESKEY_force_clones"; then CLUSTERSAFE=2 else CLUSTERSAFE=0 # these are not allowed fi ;; esac if [ -n "$OCF_RESKEY_CRM_meta_clone" ]; then case $CLUSTERSAFE in 0) ocf_log err "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!" ocf_log err "DO NOT RUN IT AS A CLONE!" ocf_log err "Politely refusing to proceed to avoid data corruption." exit $OCF_ERR_CONFIGURED ;; 2) ocf_log warn "$FSTYPE on $DEVICE is NOT cluster-aware!" if ocf_is_true "$OCF_RESKEY_force_clones"; then ocf_log warn "But we'll let it run because we trust _YOU_ verified it's safe to do so." else ocf_log warn "But we'll let it run because it is mounted read-only." ocf_log warn "Please make sure that it's meta data is read-only too!" fi ;; esac fi case $OP in start) Filesystem_start ;; notify) Filesystem_notify ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? ClusterLabs-resource-agents-dc69db5/heartbeat/ICP000077500000000000000000000141371203363223200220450ustar00rootroot00000000000000#!/bin/sh # # # ICP # # Description: Manages an ICP Vortex clustered host drive as an HA resource # # # Author: Lars Marowsky-Bree # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2002 SuSE Linux AG # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 LinuxSCSI::0:0 ICP::c0h1::/dev/sdb1 LVM::myvolname # # Notice that you will need to get the utility "icpclucon" from the ICP # support to use this. # # See usage() function below for more details... # # OCF parameters are as below: # OCF_RESKEY_driveid # OCF_RESKEY_device ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # ICPCLUCON=/usr/sbin/icpclucon # usage() { methods=`ICP_methods | grep -v methods` methods=`echo $methods | tr ' ' '|'` cat <<-! usage: $0 ($methods) $0 manages an ICP Vortex clustered host drive. The 'start' operation reserves the given host drive. The 'stop' operation releses the given host drive. The 'status' operation reports whether the host drive is reserved. The 'monitor' operation reports whether the host drive is reserved. The 'validate-all' operation reports whether OCF instance parameters are valid. The 'methods' operation reports on the methods $0 supports ! } meta_data() { cat < 1.0 Resource script for ICP. It Manages an ICP Vortex clustered host drive as an HA resource. Manages an ICP Vortex clustered host drive The ICP cluster drive ID. ICP cluster drive ID The device name. device END } # # methods: What methods/operations do we support? # ICP_methods() { cat <<-! start stop status monitor methods validate-all meta-data usage ! } ICP_status() { local icp_out icp_out=$($ICPCLUCON -v -status $1) if [ $? -ne 0 ]; then ocf_log "err" "Hostdrive not reserved by us." return $OCF_ERR_GENERIC fi if expr match "$icp_out" \ '.*Drive is reserved by this host.*' >/dev/null 2>&1 ; then ocf_log "info" "Volume $1 is reserved by us." return $OCF_SUCCESS elif expr match "$icp_out" \ '.*Drive is not reserved by any host.*' >/dev/null 2>&1 ; then ocf_log "err" "Volume $1 not reserved by any host." return $OCF_NOT_RUNNING else ocf_log "err" "Unknown output from icpclucon. Assuming we do not have a reservation:" ocf_log "err" "$icp_out" return $OCF_NOT_RUNNING fi } ICP_report_status() { if ICP_status $1 ; then echo "$1: running" return $OCF_SUCCESS else echo "$1: not running" return $OCF_NOT_RUNNING fi } # # Monitor the host drive - does it really seem to be working? # # ICP_monitor() { if ICP_status $1 then return $? else ocf_log "err" "ICP host drive $1 is offline" return $OCF_NOT_RUNNING fi } Clear_bufs() { $BLOCKDEV --flushbufs $1 } # # Enable ICP host drive # ICP_start() { ocf_log "info" "Activating host drive $1" ocf_run $ICPCLUCON -v -reserve $1 if [ $? -ne 0 ]; then ocf_log "info" "Forcing reservation of $1" ocf_run $ICPCLUCON -v -force $1 || return $OCF_ERR_GENERIC fi if ICP_status $1 then : OK # A reservation isn't as prompt as it should be sleep 3 return $OCF_SUCCESS else ocf_log "err" "ICP: $1 was not reserved correctly" return $OCF_ERR_GENERIC fi } # # Release the ICP host drive # ICP_stop() { ocf_log "info" "Releasing ICP host drive $1" ocf_run $ICPCLUCON -v -release $1 || return $OCF_ERR_GENERIC ocf_log "info" "Verifying reservation" if ICP_status $1 ; then ocf_log "err" "ICP: $1 was not released correctly" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } ICP_validate_all() { check_binary $BLOCKDEV check_binary $ICPCLUCON $ICPCLUCON -v -status $driveid >/dev/null 2>&1 if [ $? -ne 0 ]; then ocf_log err "Invalid driveid $driveid" exit $OCF_ERR_ARGS fi if [ ! -b $device ]; then ocf_log err "Device $device is not a block device" exit $OCF_ERR_ARGS fi # Do not know how to check the association of $device with $driveid. return $OCF_SUCCESS } # # 'main' starts here... # if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # These operations do not require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; methods) ICP_methods exit $OCF_SUCCESS;; usage) usage exit $OCF_SUCCESS;; *) ;; esac if [ -z "$OCF_RESKEY_driveid" ] then ocf_log err "Please specify OCF_RESKEY_driveid" exit $OCF_ERR_ARGS fi if [ -z "$OCF_RESKEY_device" ]; then ocf_log err "Please specify OCF_RESKEY_device" exit $OCF_ERR_ARGS fi driveid=$OCF_RESKEY_driveid device=$OCF_RESKEY_device # What kind of method was invoked? case "$1" in start) ICP_validate_all ICP_start $driveid Clear_bufs $device exit $?;; stop) ICP_stop $driveid Clear_bufs $device exit $?;; status) ICP_report_status $driveid exit $?;; monitor) ICP_monitor $driveid exit $?;; validate-all) ICP_validate_all exit $?;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac ClusterLabs-resource-agents-dc69db5/heartbeat/IPaddr000077500000000000000000000535141203363223200225770ustar00rootroot00000000000000#!/bin/sh # # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # # This script manages IP alias IP addresses # # It can add an IP alias, or remove one. # # usage: $0 {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg adds an IP alias. # # Surprisingly, the "stop" arg removes one. :-) # # OCF parameters are as below # OCF_RESKEY_ip # OCF_RESKEY_broadcast # OCF_RESKEY_nic # OCF_RESKEY_cidr_netmask # OCF_RESKEY_lvs_support ( e.g. true, on, 1 ) # OCF_RESKEY_ARP_INTERVAL_MS # OCF_RESKEY_ARP_REPEAT # OCF_RESKEY_ARP_BACKGROUND (e.g. yes ) # OCF_RESKEY_ARP_NETMASK # OCF_RESKEY_local_start_script # OCF_RESKEY_local_stop_script # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs SENDARP=$HA_BIN/send_arp FINDIF=$HA_BIN/findif VLDIR=$HA_RSCTMP SENDARPPIDDIR=$HA_RSCTMP SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; ####################################################################### SYSTYPE="`uname -s`" case "$SYSTYPE" in SunOS) # `uname -r` = 5.9 -> SYSVERSION = 9 SYSVERSION="`uname -r | cut -d. -f 2`" ;; Darwin) # Treat Darwin the same as the other BSD variants (matched as *BSD) SYSTYPE="${SYSTYPE}BSD" ;; *) ;; esac meta_data() { cat < 1.0 This script manages IP alias IP addresses It can add an IP alias, or remove one. Manages virtual IPv4 addresses (portable version) The IPv4 address to be configured in dotted quad notation, for example "192.168.1.1". IPv4 address The base network interface on which the IP address will be brought online. If left empty, the script will try and determine this from the routing table. Do NOT specify an alias interface in the form eth0:1 or anything here; rather, specify the base interface only. Prerequisite: There must be at least one static IP address, which is not managed by the cluster, assigned to the network interface. If you can not assign any static IP address on the interface, modify this kernel parameter: sysctl -w net.ipv4.conf.all.promote_secondaries=1 (or per device) Network interface The netmask for the interface in CIDR format. (ie, 24), or in dotted quad notation 255.255.255.0). If unspecified, the script will also try to determine this from the routing table. Netmask Broadcast address associated with the IP. If left empty, the script will determine this from the netmask. Broadcast address You can specify an additional label for your IP address here. Interface label Enable support for LVS Direct Routing configurations. In case a IP address is stopped, only move it to the loopback device to allow the local node to continue to service requests, but no longer advertise it on the network. Enable support for LVS DR Script called when the IP is released Script called when the IP is released Script called when the IP is added Script called when the IP is added milliseconds between ARPs milliseconds between gratuitous ARPs How many gratuitous ARPs to send out when bringing up a new address repeat count run in background (no longer any reason to do this) run in background netmask for ARP - in nonstandard hexadecimal format. netmask for ARP END exit $OCF_SUCCESS } # The 'ping' command takes highly OS-dependent arguments, so this # function creates a suitable argument list for the host OS's 'ping'. # We use a subset of its functionality: # 1. single packet # 2. reasonable timeout (say 1 second) # # arguments: # $1: IP address to ping # result string: # arguments for ping command # # If more flexibility is needed, they could be specified in the environment # to this function, to adjust the resulting 'ping' arguments. # David Lee May 2007 pingargs() { _baseip=$1 _timeout=1 # seconds _pktcount=1 _systype="`uname -s`" case $_systype in Linux) # Default is perpetual ping: need "-c $_pktcount". # -c count -t timetolive -q(uiet) -n(umeric) -W timeout _pingargs="-c $_pktcount -q -n $_baseip" ;; SunOS) # Default is immediate (or timeout) return. _pingargs="$_baseip $_timeout" ;; *) _pingargs="-c $_pktcount $_baseip" ;; esac echo "$_pingargs" } # On Linux systems the (hidden) loopback interface may # conflict with the requested IP address. If so, this # unoriginal code will remove the offending loopback address # and save it in VLDIR so it can be added back in later # when the IPaddr is released. # lvs_remove_conflicting_loopback() { ipaddr="$1" ifname="$2" ocf_log info "Removing conflicting loopback $ifname." if echo $ifname > "$VLDIR/$ipaddr" then : Saved loopback information in $VLDIR/$ipaddr else ocf_log err "Could not save conflicting loopback $ifname." \ "it will not be restored." fi if [ ! -z "${OCF_RESKEY_local_stop_script}" ]; then if [ -x "${OCF_RESKEY_local_stop_script}" ]; then ${OCF_RESKEY_local_stop_script} $* fi fi delete_interface "$ifname" "$ipaddr" # Forcibly remove the route (if it exists) to the loopback. delete_route "$ipaddr" } # # On Linux systems the (hidden) loopback interface may # need to be restored if it has been taken down previously # by lvs_remove_conflicting_loopback() # lvs_restore_loopback() { ipaddr="$1" if [ ! -s "$VLDIR/$ipaddr" ]; then return fi ifname=`cat "$VLDIR/$ipaddr"` ocf_log info "Restoring loopback IP Address $ipaddr on $ifname." CMD="OCF_RESKEY_cidr_netmask=32 OCF_RESKEY_ip=$1 OCF_RESKEY_nic=$ifname $FINDIF" if NICINFO=`eval $CMD` NICINFO=`echo $NICINFO | tr " " " " | tr -s " "` then netmask_text=`echo "$NICINFO" | cut -f3 -d " "` broadcast=`echo "$NICINFO" | cut -f5 -d " "` else echo "ERROR: $CMD failed (rc=$rc)" exit $OCF_ERR_GENERIC fi add_interface "$ipaddr" "$ifname" "$ifname" $netmask_text $broadcast rm -f "$VLDIR/$ipaddr" } # # Find out which alias serves the given IP address # The argument is an IP address, and its output # is an aliased interface name (e.g., "eth0:0"). # find_interface_solaris() { ipaddr="$1" $IFCONFIG $IFCONFIG_A_OPT | $AWK '{if ($0 ~ /.*: / && NR > 1) {print "\n"$0} else {print}}' | while read ifname linkstuff do : ifname = $ifname read inet addr junk : inet = $inet addr = $addr while read line && [ "X$line" != "X" ] do : Nothing done case $ifname in *:*) ;; *) continue;; esac # This doesn't look right for a box with multiple NICs. # It looks like it always selects the first interface on # a machine. Yet, we appear to use the results for this case too... ifname=`echo "$ifname" | sed s'%:$%%'` case $addr in addr:$ipaddr) echo $ifname; return $OCF_SUCCESS;; $ipaddr) echo $ifname; return $OCF_SUCCESS;; esac done return $OCF_ERR_GENERIC } find_interface_bsd() { $IFCONFIG $IFCONFIG_A_OPT | awk -v ip_addr="$ipaddr" ' /UP,/ && $0 ~ /^[a-z]+[0-9]:/ { if_name=$1; sub(":$","",if_name); } $1 == "inet" && $2 == ip_addr { print if_name exit(0) }' } # # Find out which alias serves the given IP address # The argument is an IP address, and its output # is an aliased interface name (e.g., "eth0:0"). # find_interface_generic() { ipaddr="$1" $IFCONFIG $IFCONFIG_A_OPT | while read ifname linkstuff do : Read gave us ifname = $ifname read inet addr junk : Read gave us inet = $inet addr = $addr while read line && [ "X$line" != "X" ] do : Nothing done case $ifname in *:*) ifname=`echo $ifname | sed 's/:$//'`;; *) continue;; esac : "comparing $ipaddr to $addr (from ifconfig)" case $addr in addr:$ipaddr) echo $ifname; return $OCF_SUCCESS;; $ipaddr) echo $ifname; return $OCF_SUCCESS;; esac done return $OCF_ERR_GENERIC } # # Find out which alias serves the given IP address # The argument is an IP address, and its output # is an aliased interface name (e.g., "eth0:0"). # find_interface() { ipaddr="$1" case "$SYSTYPE" in SunOS) NIC=`find_interface_solaris $ipaddr`;; *BSD) NIC=`find_interface_bsd $ipaddr`;; *) NIC=`find_interface_generic $ipaddr`;; esac echo $NIC return $OCF_SUCCESS; } # # Find an unused interface/alias name for us to use for new IP alias # The argument is an IP address, and the output # is an aliased interface name (e.g., "eth0:0", "dc0", "le0:0"). # find_free_interface() { NIC="$1" if [ "X$NIC" = "X" ]; then ocf_log err "No free interface found for $OCF_RESKEY_ip" return $OCF_ERR_GENERIC; fi NICBASE="$VLDIR/IPaddr-$NIC" touch "$NICBASE" case "$SYSTYPE" in *BSD) echo $NIC; return $OCF_SUCCESS;; SunOS) j=1 IFLIST=`$IFCONFIG $IFCONFIG_A_OPT | \ grep "^$NIC:[0-9]" | sed 's%: .*%%'`;; *) j=0 IFLIST=`$IFCONFIG $IFCONFIG_A_OPT | \ grep "^$NIC:[0-9]" | sed 's% .*%%'` TRYADRCNT=`ls "${NICBASE}:"* 2>/dev/null | wc -w | tr -d ' '` if [ -f "${NICBASE}:${TRYADRCNT}" ]; then : OK else j="${TRYADRCNT}" fi ;; esac IFLIST=" `echo $IFLIST` " while [ $j -lt 512 ] do case $IFLIST in *" "$NIC:$j" "*) ;; *) NICLINK="$NICBASE:$j" if ln "$NICBASE" "$NICLINK" 2>/dev/null then echo "$NIC:$j" return $OCF_SUCCESS fi ;; esac j=`expr $j + 1` done return $OCF_ERR_GENERIC } delete_route () { ipaddr="$1" case "$SYSTYPE" in SunOS) return 0;; *BSD) CMD="$ROUTE -n delete -host $ipaddr";; *) CMD="$ROUTE -n del -host $ipaddr";; esac $CMD return $? } delete_interface () { ifname="$1" ipaddr="$2" case "$SYSTYPE" in SunOS) if [ "$SYSVERSION" -ge 8 ] ; then CMD="$IFCONFIG $ifname unplumb" else CMD="$IFCONFIG $ifname 0 down" fi;; Darwin*) CMD="$IFCONFIG $ifname $ipaddr delete";; *BSD) CMD="$IFCONFIG $ifname inet $ipaddr delete";; *) CMD="$IFCONFIG $ifname down";; esac ocf_log info "$CMD" $CMD return $? } add_interface () { ipaddr="$1" iface_base="$2" iface="$3" netmask="$4" broadcast="$5" if [ $# != 5 ]; then ocf_log err "Insufficient arguments to add_interface: $*" exit $OCF_ERR_ARGS fi case "$SYSTYPE" in SunOS) if [ "$SYSVERSION" -ge 8 ] ; then $IFCONFIG $iface plumb rc=$? if [ $rc -ne 0 ] ; then echo "ERROR: '$IFCONFIG $iface plumb' failed." return $rc fi fi # At Solaris 10, this single-command version sometimes broke. # Almost certainly an S10 bug. # CMD="$IFCONFIG $iface inet $ipaddr $text up" # So hack the following workaround: CMD="$IFCONFIG $iface inet $ipaddr" CMD="$CMD && $IFCONFIG $iface netmask $netmask" CMD="$CMD && $IFCONFIG $iface up" ;; *BSD) # netmask is always set to 255.255.255.255 for an alias CMD="$IFCONFIG $iface inet $ipaddr netmask 255.255.255.255 alias";; *) CMD="$IFCONFIG $iface $ipaddr netmask $netmask broadcast $broadcast";; esac # Use "eval $CMD" (not "$CMD"): it might be a chain of two or more commands. ocf_log info "eval $CMD" eval $CMD rc=$? if [ $rc != 0 ]; then echo "ERROR: eval $CMD failed (rc=$rc)" fi return $rc } # # Remove the IP alias for the requested IP address... # ip_stop() { SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" NIC=`find_interface $OCF_RESKEY_ip` if [ -f "$SENDARPPIDFILE" ]; then cat "$SENDARPPIDFILE" | xargs kill rm -f "$SENDARPPIDFILE" fi if [ -z "$NIC" ]; then : Requested interface not in use return $OCF_SUCCESS fi if [ ${OCF_RESKEY_lvs_support} = 1 ]; then case $NIC in lo*) : Requested interface is on loopback return $OCF_SUCCESS;; esac fi delete_route "$OCF_RESKEY_ip" delete_interface "$NIC" "$OCF_RESKEY_ip" rc=$? if [ ${OCF_RESKEY_lvs_support} = 1 ]; then lvs_restore_loopback "$OCF_RESKEY_ip" fi # remove lock file... rm -f "$VLDIR/IPaddr-$NIC" if [ $rc != 0 ]; then ocf_log warn "IP Address $OCF_RESKEY_ip NOT released: rc=$rc" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # Add an IP alias for the requested IP address... # # It could be that we already have taken it, in which case it should # do nothing. # ip_start() { # # Do we already service this IP address? # ip_status_internal if [ $? = $OCF_SUCCESS ]; then # Nothing to do, the IP is already active return $OCF_SUCCESS; fi NIC_unique=`find_free_interface $OCF_RESKEY_nic` if [ -n "$NIC_unique" ]; then : OK got interface [$NIC_unique] for $OCF_RESKEY_ip else return $OCF_ERR_GENERIC fi # This logic is mostly to support LVS (If I understand it correctly) if [ ${OCF_RESKEY_lvs_support} = 1 ]; then NIC_current=`find_interface $OCF_RESKEY_ip` case $NIC_unique in lo*) if [ x"$NIC_unique" = x"$NIC_current" ]; then # Its already "running" and not moving, nothing to do. ocf_log err "Could not find a non-loopback device to move $OCF_RESKEY_ip to" return $OCF_ERR_GENERIC fi;; *) lvs_remove_conflicting_loopback "$OCF_RESKEY_ip" "$NIC_current";; esac fi if [ ! -z "${OCF_RESKEY_local_start_script}" ]; then if [ -x "${OCF_RESKEY_local_start_script}" ]; then ${OCF_RESKEY_local_start_script} $* fi fi add_interface "$OCF_RESKEY_ip" "$OCF_RESKEY_nic" "$NIC_unique" \ "$OCF_RESKEY_cidr_netmask" "$OCF_RESKEY_broadcast" rc=$? if [ $rc != 0 ]; then ocf_log err "Could not add $OCF_RESKEY_ip to $OCF_RESKEY_nic: rc=$rc" return $rc fi # The address is active, now notify others about it using sendarp if [ "$SYSTYPE" = "DarwinBSD" -a "$NIC_unique" = "lo0" ]; then # Darwin can't send ARPs on loopback devices SENDARP="x$SENDARP" # Prevent the binary from being found fi if [ -x $SENDARP ]; then TARGET_INTERFACE=`echo $NIC_unique | sed 's%:.*%%'` SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" ARGS="-i $OCF_RESKEY_ARP_INTERVAL_MS -r $OCF_RESKEY_ARP_REPEAT" ARGS="$ARGS -p $SENDARPPIDFILE $TARGET_INTERFACE $OCF_RESKEY_ip" ARGS="$ARGS auto $OCF_RESKEY_ip $OCF_RESKEY_ARP_NETMASK" ocf_log debug "Sending Gratuitous Arp for $OCF_RESKEY_ip on $NIC_unique [$TARGET_INTERFACE]" case $OCF_RESKEY_ARP_BACKGROUND in yes) ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps. rc=$?" & ) >&2 ;; *) $SENDARP $ARGS || ocf_log err "Could not send gratuitous arps. rc=$?";; esac fi ip_status_internal return $? } ip_status_internal() { NIC=`find_interface "$OCF_RESKEY_ip"` if [ "x$NIC" = x ]; then return $OCF_NOT_RUNNING elif [ "${OCF_RESKEY_lvs_support}" = "1" ]; then case $NIC in lo*) return $OCF_NOT_RUNNING;; *) return $OCF_SUCCESS;; esac else if [ x$OCF_RESKEY_nic != x ]; then simple_OCF_NIC=`echo $OCF_RESKEY_nic | awk -F: '{print $1}'` simple_NIC=`echo $NIC | awk -F: '{print $1}'` if [ $simple_OCF_NIC != $simple_NIC ]; then ocf_log err "$OCF_RESKEY_ip is running an interface ($simple_NIC) instead of the configured one ($simple_OCF_NIC)" return $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS fi } ip_status() { ip_status_internal rc=$? if [ $rc = $OCF_SUCCESS ]; then echo "running" elif [ $rc = $OCF_NOT_RUNNING ]; then echo "stopped" else echo "unknown" fi return $rc; } # # Determine if this IP address is really being served, or not. # Note that we must distinguish if *we're* serving it locally... # ip_monitor() { ip_status_internal rc=$? if [ $OCF_CHECK_LEVEL = 0 -o $rc != 0 ]; then return $rc fi ocf_log info "Checking IP stack" PINGARGS="`pingargs $OCF_RESKEY_ip`" for j in 1 2 3 4 5 6 7 8 9 10; do MSG=`$PING $PINGARGS 2>&1` if [ $? = 0 ]; then return $OCF_SUCCESS fi done ocf_log err "$MSG" return $OCF_ERR_GENERIC } is_positive_integer() { ocf_is_decimal $1 && [ $1 -ge 1 ] if [ $? = 0 ]; then return 1 fi return 0 } ip_validate_all() { : ${OCF_RESKEY_ARP_BACKGROUND=yes} : ${OCF_RESKEY_ARP_NETMASK=ffffffffffff} : ${OCF_RESKEY_ARP_INTERVAL_MS=500} : ${OCF_RESKEY_ARP_REPEAT=10} check_binary $AWK check_binary $IFCONFIG check_binary $ROUTE check_binary $PING if is_positive_integer $OCF_RESKEY_ARP_INTERVAL_MS then ocf_log err "Invalid parameter value: ARP_INTERVAL_MS [$OCF_RESKEY_ARP_INTERVAL_MS]" return $OCF_ERR_ARGS fi if is_positive_integer $OCF_RESKEY_ARP_REPEAT then ocf_log err "Invalid parameter value: ARP_REPEAT [$OCF_RESKEY_ARP_REPEAT]" return $OCF_ERR_ARGS fi : ${OCF_RESKEY_lvs_support=0} if [ "$SYSTYPE" = "Linux" -o "$SYSTYPE" = "SunOS" ]; then : else if [ "${OCF_RESKEY_lvs_support}" = "1" ]; then ocf_log err "$SYSTYPE does not support LVS" return $OCF_ERR_GENERIC fi fi case $OCF_RESKEY_ip in "") ocf_log err "Required parameter OCF_RESKEY_ip is missing" return $OCF_ERR_CONFIGURED;; [0-9]*.[0-9]*.[0-9]*.*[0-9]) : OK;; *) ocf_log err "Parameter OCF_RESKEY_ip [$OCF_RESKEY_ip] not an IP address" return $OCF_ERR_CONFIGURED;; esac # Unconditionally do this? case $OCF_RESKEY_nic in *:*) OCF_RESKEY_nic=`echo $OCF_RESKEY_nic | sed 's/:.*//'` ;; esac NICINFO=`$FINDIF` rc=$? if [ $rc != 0 ]; then ocf_log err "$FINDIF failed [rc=$rc]." return $OCF_ERR_GENERIC fi tmp=`echo "$NICINFO" | cut -f1` if [ "x$OCF_RESKEY_nic" = "x" ] then ocf_log info "Using calculated nic for ${OCF_RESKEY_ip}: $tmp" OCF_RESKEY_nic=$tmp elif [ x$tmp != x${OCF_RESKEY_nic} ] then ocf_log err "Invalid parameter value: nic [$OCF_RESKEY_nic] Calculated nic: [$tmp]" return $OCF_ERR_ARGS fi tmp=`echo "$NICINFO" | cut -f2 | cut -d ' ' -f2` if [ "x$OCF_RESKEY_cidr_netmask" != "x$tmp" ] then ocf_log info "Using calculated netmask for ${OCF_RESKEY_ip}: $tmp" fi # Always use the calculated version becuase it might have been specified # using CIDR notation which not every system accepts OCF_RESKEY_netmask=$tmp OCF_RESKEY_cidr_netmask=$tmp; export OCF_RESKEY_cidr_netmask tmp=`echo "$NICINFO" | cut -f3 | cut -d ' ' -f2` if [ "x$OCF_RESKEY_broadcast" = "x" ] then ocf_log debug "Using calculated broadcast for ${OCF_RESKEY_ip}: $tmp" OCF_RESKEY_broadcast=$tmp elif [ x$tmp != x${OCF_RESKEY_broadcast} ]; then ocf_log err "Invalid parameter value: broadcast [$OCF_RESKEY_broadcast] Calculated broadcast: [$tmp]" return $OCF_ERR_ARGS fi return $OCF_SUCCESS } usage() { echo $USAGE >&2 return $1 } if [ $# -ne 1 ]; then usage $OCF_ERR_ARGS fi : ${OCF_RESKEY_lvs_support=0} # Normalize the value of lvs_support if [ "${OCF_RESKEY_lvs_support}" = "true" \ -o "${OCF_RESKEY_lvs_support}" = "on" \ -o "${OCF_RESKEY_lvs_support}" = "yes" \ -o "${OCF_RESKEY_lvs_support}" = "1" ]; then OCF_RESKEY_lvs_support=1 else OCF_RESKEY_lvs_support=0 fi # Note: We had a version out there for a while which used # netmask instead of cidr_netmask. So, don't remove this aliasing code! if [ ! -z "$OCF_RESKEY_netmask" -a -z "$OCF_RESKEY_cidr_netmask" ] then OCF_RESKEY_cidr_netmask=$OCF_RESKEY_netmask export OCF_RESKEY_cidr_netmask fi case $1 in meta-data) meta_data;; start) ip_validate_all && ip_start;; stop) ip_stop;; status) ip_status;; monitor) ip_monitor;; validate-all) ip_validate_all;; usage) usage $OCF_SUCCESS;; *) usage $OCF_ERR_UNIMPLEMENTED;; esac exit $? ClusterLabs-resource-agents-dc69db5/heartbeat/IPaddr2000077500000000000000000000571721203363223200226650ustar00rootroot00000000000000#!/bin/sh # # $Id: IPaddr2.in,v 1.24 2006/08/09 13:01:54 lars Exp $ # # OCF Resource Agent compliant IPaddr2 script. # # Based on work by Tuomo Soini, ported to the OCF RA API by Lars # Marowsky-Brée. Implements Cluster Alias IP functionality too. # # Cluster Alias IP cleanup, fixes and testing by Michael Schwartzkopff # # # Copyright (c) 2003 Tuomo Soini # Copyright (c) 2004-2006 SUSE LINUX AG, Lars Marowsky-Brée # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # TODO: # - There ought to be an ocf_run_cmd function which does all logging, # timeout handling etc for us # - Make this the standard IP address agent on Linux; the other # platforms simply should ignore the additional parameters OR can use # the legacy heartbeat resource script... # - Check LVS <-> clusterip incompatibilities. # # OCF parameters are as below # OCF_RESKEY_ip # OCF_RESKEY_broadcast # OCF_RESKEY_nic # OCF_RESKEY_cidr_netmask # OCF_RESKEY_iflabel # OCF_RESKEY_mac # OCF_RESKEY_clusterip_hash # OCF_RESKEY_arp_interval # OCF_RESKEY_arp_count # OCF_RESKEY_arp_bg # OCF_RESKEY_arp_mac # # OCF_RESKEY_CRM_meta_clone # OCF_RESKEY_CRM_meta_clone_max ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_lvs_support_default=false OCF_RESKEY_clusterip_hash_default="sourceip-sourceport" OCF_RESKEY_unique_clone_address_default=false OCF_RESKEY_arp_interval_default=200 OCF_RESKEY_arp_count_default=5 OCF_RESKEY_arp_bg_default=true OCF_RESKEY_arp_mac_default="ffffffffffff" : ${OCF_RESKEY_lvs_support=${OCF_RESKEY_lvs_support_default}} : ${OCF_RESKEY_clusterip_hash=${OCF_RESKEY_clusterip_hash_default}} : ${OCF_RESKEY_unique_clone_address=${OCF_RESKEY_unique_clone_address_default}} : ${OCF_RESKEY_arp_interval=${OCF_RESKEY_arp_interval_default}} : ${OCF_RESKEY_arp_count=${OCF_RESKEY_arp_count_default}} : ${OCF_RESKEY_arp_bg=${OCF_RESKEY_arp_bg_default}} : ${OCF_RESKEY_arp_mac=${OCF_RESKEY_arp_mac_default}} ####################################################################### SENDARP=$HA_BIN/send_arp FINDIF=$HA_BIN/findif VLDIR=$HA_RSCTMP SENDARPPIDDIR=$HA_RSCTMP CIP_lockfile=$HA_RSCTMP/IPaddr2-CIP-${OCF_RESKEY_ip} ####################################################################### meta_data() { cat < 1.0 This Linux-specific resource manages IP alias IP addresses. It can add an IP alias, or remove one. In addition, it can implement Cluster Alias IP functionality if invoked as a clone resource. If used as a clone, you should explicitly set clone-node-max >= 2, and/or clone-max < number of nodes. In case of node failure, clone instances need to be re-allocated on surviving nodes. Which would not be possible, if there is already an instance on those nodes, and clone-node-max=1 (which is the default). Manages virtual IPv4 addresses (Linux specific version) The IPv4 address to be configured in dotted quad notation, for example "192.168.1.1". IPv4 address The base network interface on which the IP address will be brought online. If left empty, the script will try and determine this from the routing table. Do NOT specify an alias interface in the form eth0:1 or anything here; rather, specify the base interface only. If you want a label, see the iflabel parameter. Prerequisite: There must be at least one static IP address, which is not managed by the cluster, assigned to the network interface. If you can not assign any static IP address on the interface, modify this kernel parameter: sysctl -w net.ipv4.conf.all.promote_secondaries=1 # (or per device) Network interface The netmask for the interface in CIDR format (e.g., 24 and not 255.255.255.0) If unspecified, the script will also try to determine this from the routing table. CIDR netmask Broadcast address associated with the IP. If left empty, the script will determine this from the netmask. Broadcast address You can specify an additional label for your IP address here. This label is appended to your interface name. If a label is specified in nic name, this parameter has no effect. Interface label Enable support for LVS Direct Routing configurations. In case a IP address is stopped, only move it to the loopback device to allow the local node to continue to service requests, but no longer advertise it on the network. Enable support for LVS DR Set the interface MAC address explicitly. Currently only used in case of the Cluster IP Alias. Leave empty to chose automatically. Cluster IP MAC address Specify the hashing algorithm used for the Cluster IP functionality. Cluster IP hashing function If true, add the clone ID to the supplied value of ip to create a unique address to manage Create a unique address for cloned instances Specify the interval between unsolicited ARP packets in milliseconds. ARP packet interval in ms Number of unsolicited ARP packets to send. ARP packet count Whether or not to send the arp packets in the background. ARP from background MAC address to send the ARP packets to. You really shouldn't be touching this. ARP MAC Flush the routing table on stop. This is for applications which use the cluster IP address and which run on the same physical host that the IP address lives on. The Linux kernel may force that application to take a shortcut to the local loopback interface, instead of the interface the address is really bound to. Under those circumstances, an application may, somewhat unexpectedly, continue to use connections for some time even after the IP address is deconfigured. Set this parameter in order to immediately disable said shortcut when the IP address goes away. Flush kernel routing table on stop END exit $OCF_SUCCESS } ip_init() { local rc if [ X`uname -s` != "XLinux" ]; then ocf_log err "IPaddr2 only supported Linux." exit $OCF_ERR_INSTALLED fi if [ X"$OCF_RESKEY_ip" = "X" ]; then ocf_log err "IP address (the ip parameter) is mandatory" exit $OCF_ERR_CONFIGURED fi if case $__OCF_ACTION in start|stop) ocf_is_root;; *) true;; esac then : YAY! else ocf_log err "You must be root for $__OCF_ACTION operation." exit $OCF_ERR_PERM fi BASEIP="$OCF_RESKEY_ip" BRDCAST="$OCF_RESKEY_broadcast" NIC="$OCF_RESKEY_nic" # Note: We had a version out there for a while which used # netmask instead of cidr_netmask. Don't remove this aliasing code! if [ ! -z "$OCF_RESKEY_netmask" -a -z "$OCF_RESKEY_cidr_netmask" ] then OCF_RESKEY_cidr_netmask=$OCF_RESKEY_netmask export OCF_RESKEY_cidr_netmask fi NETMASK="$OCF_RESKEY_cidr_netmask" IFLABEL="$OCF_RESKEY_iflabel" IF_MAC="$OCF_RESKEY_mac" IP_INC_GLOBAL=${OCF_RESKEY_CRM_meta_clone_max:-1} IP_INC_NO=`expr ${OCF_RESKEY_CRM_meta_clone:-0} + 1` if ocf_is_true ${OCF_RESKEY_lvs_support} && [ $IP_INC_GLOBAL -gt 1 ]; then ocf_log err "LVS and load sharing do not go together well" exit $OCF_ERR_CONFIGURED fi if ocf_is_decimal "$IP_INC_GLOBAL" && [ $IP_INC_GLOBAL -gt 0 ]; then : else ocf_log err "Invalid OCF_RESKEY_incarnations_max_global [$IP_INC_GLOBAL], should be positive integer" exit $OCF_ERR_CONFIGURED fi # $FINDIF takes its parameters from the environment # NICINFO=`$FINDIF -C` rc=$? if [ $rc -eq 0 ] then NICINFO=`echo $NICINFO | sed -e 's/netmask\ //;s/broadcast\ //'` NIC=`echo "$NICINFO" | cut -d" " -f1` NETMASK=`echo "$NICINFO" | cut -d" " -f2` BRDCAST=`echo "$NICINFO" | cut -d" " -f3` else # findif couldn't find the interface if ocf_is_probe; then ocf_log info "[$FINDIF -C] failed" exit $OCF_NOT_RUNNING elif [ "$__OCF_ACTION" = stop ]; then ocf_log warn "[$FINDIF -C] failed" exit $OCF_SUCCESS else ocf_log err "[$FINDIF -C] failed" exit $rc fi fi SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" case $NIC in *:*) IFLABEL=$NIC NIC=`echo $NIC | sed 's/:.*//'` ;; *) if [ -n "$IFLABEL" ]; then IFLABEL=${NIC}:${IFLABEL} fi ;; esac if [ "$IP_INC_GLOBAL" -gt 1 ] && ! ocf_is_true "$OCF_RESKEY_unique_clone_address"; then IP_CIP="yes" IP_CIP_HASH="${OCF_RESKEY_clusterip_hash}" if [ -z "$IF_MAC" ]; then # Choose a MAC # 1. Concatenate some input together # 2. This doesn't need to be a cryptographically # secure hash. # 3. Drop everything after the first 6 octets (12 chars) # 4. Delimit the octets with ':' # 5. Make sure the first octet is odd, # so the result is a multicast MAC IF_MAC=`echo $OCF_RESKEY_ip $NETMASK $BRDCAST | \ md5sum | \ sed -e 's#\(............\).*#\1#' \ -e 's#..#&:#g; s#:$##' \ -e 's#^\(.\)[02468aAcCeE]#\11#'` fi IP_CIP_FILE="/proc/net/ipt_CLUSTERIP/$OCF_RESKEY_ip" fi } # # Find out which interfaces serve the given IP address and netmask. # The arguments are an IP address and a netmask. # Its output are interface names devided by spaces (e.g., "eth0 eth1"). # find_interface() { local ipaddr="$1" local netmask="$2" # # List interfaces but exclude FreeS/WAN ipsecN virtual interfaces # local iface="`$IP2UTIL -o -f inet addr show \ | grep "\ $ipaddr/$netmask" \ | cut -d ' ' -f2 \ | grep -v '^ipsec[0-9][0-9]*$'`" echo "$iface" return 0 } # # Delete an interface # delete_interface () { ipaddr="$1" iface="$2" netmask="$3" CMD="$IP2UTIL -f inet addr delete $ipaddr/$netmask dev $iface" ocf_run $CMD || return $OCF_ERR_GENERIC if ocf_is_true $OCF_RESKEY_flush_routes; then ocf_run $IP2UTIL route flush cache fi return $OCF_SUCCESS } # # Add an interface # add_interface () { local cmd msg ipaddr netmask broadcast iface label ipaddr="$1" netmask="$2" broadcast="$3" iface="$4" label="$5" cmd="$IP2UTIL -f inet addr add $ipaddr/$netmask brd $broadcast dev $iface" msg="Adding IPv4 address $ipaddr/$netmask with broadcast address $broadcast to device $iface" if [ ! -z "$label" ]; then cmd="$cmd label $label" msg="${msg} (with label $label)" fi ocf_log info "$msg" ocf_run $cmd || return $OCF_ERR_GENERIC msg="Bringing device $iface up" cmd="$IP2UTIL link set $iface up" ocf_log info "$msg" ocf_run $cmd || return $OCF_ERR_GENERIC return $OCF_SUCCESS } # # Delete a route # delete_route () { prefix="$1" iface="$2" CMD="$IP2UTIL route delete $prefix dev $iface" ocf_log info "$CMD" $CMD return $? } # On Linux systems the (hidden) loopback interface may # conflict with the requested IP address. If so, this # unoriginal code will remove the offending loopback address # and save it in VLDIR so it can be added back in later # when the IPaddr is released. # # TODO: This is very ugly and should be controlled by an additional # instance parameter. Or even: multi-state, with the IP only being # "active" on the master!? # remove_conflicting_loopback() { ipaddr="$1" netmask="$2" broadcast="$3" ifname="$4" ocf_log info "Removing conflicting loopback $ifname." if echo "$ipaddr $netmask $broadcast $ifname" > "$VLDIR/$ipaddr" then : Saved loopback information in $VLDIR/$ipaddr else ocf_log err "Could not save conflicting loopback $ifname." \ "it will not be restored." fi delete_interface "$ipaddr" "$ifname" "$netmask" # Forcibly remove the route (if it exists) to the loopback. delete_route "$ipaddr" "$ifname" } # # On Linux systems the (hidden) loopback interface may # need to be restored if it has been taken down previously # by remove_conflicting_loopback() # restore_loopback() { ipaddr="$1" if [ -s "$VLDIR/$ipaddr" ]; then ifinfo=`cat "$VLDIR/$ipaddr"` ocf_log info "Restoring loopback IP Address " \ "$ifinfo." add_interface $ifinfo rm -f "$VLDIR/$ipaddr" fi } is_infiniband() { $IP2UTIL link show $NIC | grep link/infiniband >/dev/null } # # Run send_arp to note peers about new mac address # run_send_arp() { ARGS="-i $OCF_RESKEY_arp_interval -r $OCF_RESKEY_arp_count -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip auto not_used not_used" if [ "x$IP_CIP" = "xyes" ] ; then if [ x = "x$IF_MAC" ] ; then MY_MAC=auto else MY_MAC=`echo ${IF_MAC} | sed -e 's/://g'` fi ARGS="-i $OCF_RESKEY_arp_interval -r $OCF_RESKEY_arp_count -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip $MY_MAC not_used not_used" fi ocf_log info "$SENDARP $ARGS" if ocf_is_true $OCF_RESKEY_arp_bg; then ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps" &) >&2 else $SENDARP $ARGS || ocf_log err "Could not send gratuitous arps" fi } # # Run ipoibarping to note peers about new Infiniband address # run_send_ib_arp() { ARGS="-q -c $OCF_RESKEY_arp_count -U -I $NIC $OCF_RESKEY_ip" ocf_log info "ipoibarping $ARGS" if ocf_is_true $OCF_RESKEY_arp_bg; then (ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps" &) >&2 else ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps" fi } # Do we already serve this IP address on the given $NIC? # # returns: # ok = served (for CIP: + hash bucket) # partial = served and no hash bucket (CIP only) # partial2 = served and no CIP iptables rule # no = nothing # ip_served() { if [ -z "$NIC" ]; then # no nic found or specified echo "no" return 0 fi cur_nic="`find_interface $OCF_RESKEY_ip $NETMASK`" if [ -z "$cur_nic" ]; then echo "no" return 0 fi if [ -z "$IP_CIP" ]; then for i in $cur_nic; do # only mark as served when on the same interfaces as $NIC [ "$i" = "$NIC" ] || continue echo "ok" return 0 done # There used to be logic here to pretend "not served", # if ${OCF_RESKEY_lvs_support} was enabled, and the IP was # found active on "lo*" only. With lvs_support on, you should # have NIC != lo, so thats already filtered # by the continue above. echo "no" return 0 fi # Special handling for the CIP: if [ ! -e $IP_CIP_FILE ]; then echo "partial2" return 0 fi if egrep -q "(^|,)${IP_INC_NO}(,|$)" $IP_CIP_FILE ; then echo "ok" return 0 else echo "partial" return 0 fi exit $OCF_ERR_GENERIC } ####################################################################### ip_usage() { cat <$IP_CIP_FILE fi if [ "$ip_status" = "no" ]; then if ocf_is_true ${OCF_RESKEY_lvs_support}; then for i in `find_interface $OCF_RESKEY_ip $NETMASK`; do case $i in lo*) remove_conflicting_loopback $OCF_RESKEY_ip 32 255.255.255.255 lo ;; esac done fi add_interface $OCF_RESKEY_ip $NETMASK $BRDCAST $NIC $IFLABEL if [ $? -ne 0 ]; then ocf_log err "$CMD failed." exit $OCF_ERR_GENERIC fi fi case $NIC in lo*) : no need to run send_arp on loopback ;; *) if is_infiniband; then run_send_ib_arp elif [ -x $SENDARP ]; then run_send_arp fi ;; esac exit $OCF_SUCCESS } ip_stop() { local ip_del_if="yes" if [ -n "$IP_CIP" ]; then # Cluster IPs need special processing when the last bucket # is removed from the node... take a lock to make sure only one # process executes that code ocf_take_lock $CIP_lockfile ocf_release_lock_on_exit $CIP_lockfile fi if [ -f "$SENDARPPIDFILE" ] ; then kill `cat "$SENDARPPIDFILE"` if [ $? -ne 0 ]; then ocf_log warn "Could not kill previously running send_arp for $OCF_RESKEY_ip" else ocf_log info "killed previously running send_arp for $OCF_RESKEY_ip" rm -f "$SENDARPPIDFILE" fi fi local ip_status=`ip_served` ocf_log info "IP status = $ip_status, IP_CIP=$IP_CIP" if [ $ip_status = "no" ]; then : Requested interface not in use exit $OCF_SUCCESS fi if [ -n "$IP_CIP" ] && [ $ip_status != "partial2" ]; then if [ $ip_status = "partial" ]; then exit $OCF_SUCCESS fi echo "-$IP_INC_NO" >$IP_CIP_FILE if [ "x$(cat $IP_CIP_FILE)" = "x" ]; then ocf_log info $OCF_RESKEY_ip, $IP_CIP_HASH i=1 while [ $i -le $IP_INC_GLOBAL ]; do ocf_log info $i $IPTABLES -D INPUT -d $OCF_RESKEY_ip -i $NIC -j CLUSTERIP \ --new \ --clustermac $IF_MAC \ --total-nodes $IP_INC_GLOBAL \ --local-node $i \ --hashmode $IP_CIP_HASH i=`expr $i + 1` done else ip_del_if="no" fi fi if [ "$ip_del_if" = "yes" ]; then delete_interface $OCF_RESKEY_ip $NIC $NETMASK if [ $? -ne 0 ]; then exit $OCF_ERR_GENERIC fi if ocf_is_true ${OCF_RESKEY_lvs_support}; then restore_loopback "$OCF_RESKEY_ip" fi fi exit $OCF_SUCCESS } ip_monitor() { # TODO: Implement more elaborate monitoring like checking for # interface health maybe via a daemon like FailSafe etc... local ip_status=`ip_served` case $ip_status in ok) return $OCF_SUCCESS ;; partial|no|partial2) exit $OCF_NOT_RUNNING ;; *) # Errors on this interface? return $OCF_ERR_GENERIC ;; esac } ip_validate() { check_binary $IP2UTIL IP_CIP= ip_init is_infiniband && check_binary ipoibarping if [ -n "$IP_CIP" ]; then check_binary $IPTABLES check_binary $MODPROBE fi # $BASEIP, $NETMASK, $NIC , $IP_INC_GLOBAL, and $BRDCAST have been checked within ip_init, # do not bother here. if ocf_is_true "$OCF_RESKEY_unique_clone_address" && ! ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then ocf_log err "unique_clone_address makes sense only with meta globally_unique set" exit $OCF_ERR_CONFIGURED fi if ocf_is_decimal "$OCF_RESKEY_arp_interval" && [ $OCF_RESKEY_arp_interval -gt 0 ]; then : else ocf_log err "Invalid OCF_RESKEY_arp_interval [$OCF_RESKEY_arp_interval]" exit $OCF_ERR_CONFIGURED fi if ocf_is_decimal "$OCF_RESKEY_arp_count" && [ $OCF_RESKEY_arp_count -gt 0 ]; then : else ocf_log err "Invalid OCF_RESKEY_arp_count [$OCF_RESKEY_arp_count]" exit $OCF_ERR_CONFIGURED fi if [ -n "$IP_CIP" ]; then local valid=1 case $IP_CIP_HASH in sourceip|sourceip-sourceport|sourceip-sourceport-destport) ;; *) ocf_log err "Invalid OCF_RESKEY_clusterip_hash [$IP_CIP_HASH]" exit $OCF_ERR_CONFIGURED ;; esac if ocf_is_true ${OCF_RESKEY_lvs_support}; then ecf_log err "LVS and load sharing not advised to try" exit $OCF_ERR_CONFIGURED fi case $IF_MAC in [0-9a-zA-Z][13579bBdDfF][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][!0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]) ;; *) valid=0 ;; esac if [ $valid -eq 0 ]; then ocf_log err "Invalid IF_MAC [$IF_MAC]" exit $OCF_ERR_CONFIGURED fi fi } if ocf_is_true "$OCF_RESKEY_unique_clone_address"; then prefix=`echo $OCF_RESKEY_ip | awk -F. '{print $1"."$2"."$3}'` suffix=`echo $OCF_RESKEY_ip | awk -F. '{print $4}'` suffix=`expr ${OCF_RESKEY_CRM_meta_clone:-0} + $suffix` OCF_RESKEY_ip="$prefix.$suffix" fi case $__OCF_ACTION in meta-data) meta_data ;; usage|help) ip_usage exit $OCF_SUCCESS ;; esac ip_validate case $__OCF_ACTION in start) ip_start ;; stop) ip_stop ;; status) ip_status=`ip_served` if [ $ip_status = "ok" ]; then echo "running" exit $OCF_SUCCESS else echo "stopped" exit $OCF_NOT_RUNNING fi ;; monitor) ip_monitor ;; validate-all) ;; *) ip_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac ClusterLabs-resource-agents-dc69db5/heartbeat/IPsrcaddr000077500000000000000000000276141203363223200233110ustar00rootroot00000000000000#!/bin/sh # # Description: IPsrcaddr - Preferred source address modification # # Author: John Sutton # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: SCL Internet # # Based on the IPaddr script. # # This script manages the preferred source address associated with # packets which originate on the localhost and are routed through the # default route. By default, i.e. without the use of this script or # similar, these packets will carry the IP of the primary i.e. the # non-aliased interface. This can be a nuisance if you need to ensure # that such packets carry the same IP irrespective of which host in # a redundant cluster they actually originate from. # # It can add a preferred source address, or remove one. # # usage: IPsrcaddr {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg adds a preferred source address. # # Surprisingly, the "stop" arg removes it. :-) # # NOTES: # # 1) There must be one and not more than 1 default route! Mainly because # I can't see why you should have more than one. And if there is more # than one, we would have to box clever to find out which one is to be # modified, or we would have to pass its identity as an argument. # # 2) The script depends on Alexey Kuznetsov's ip utility from the # iproute aka iproute2 package. # # 3) No checking is done to see if the passed in IP address can # reasonably be associated with the interface on which the default # route exists. So unless you want to deliberately spoof your source IP, # check it! Normally, I would expect that your haresources looks # something like: # # nodename ip1 ip2 ... ipN IPsrcaddr::ipX # # where ipX is one of the ip1 to ipN. # # OCF parameters are as below: # OCF_RESKEY_ipaddress ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; CMDSHOW="$IP2UTIL route show to exact 0.0.0.0/0" CMDCHANGE="$IP2UTIL route change to " SYSTYPE="`uname -s`" usage() { echo $USAGE >&2 } meta_data() { cat < 1.0 Resource script for IPsrcaddr. It manages the preferred source address modification. Manages the preferred source address for outgoing IP packets The IP address. IP address The netmask for the interface in CIDR format. (ie, 24), or in dotted quad notation 255.255.255.0). Netmask END } errorexit() { ocf_log err "$*" exit $OCF_ERR_GENERIC } # # We can distinguish 3 cases: no preferred source address, a # preferred source address exists which matches that specified, and one # exists but doesn't match that specified. srca_read() returns 1,0,2 # respectively. # # The output of route show is something along the lines of: # # default via X.X.X.X dev eth1 src Y.Y.Y.Y # # where the src clause "src Y.Y.Y.Y" may or may not be present WS="[`echo -en ' \t'`]" OCTET="[0-9]\{1,3\}" IPADDR="\($OCTET\.\)\{3\}$OCTET" SRCCLAUSE="src$WS$WS*\($IPADDR\)" MATCHROUTE="\(.*${WS}\)\($SRCCLAUSE\)\($WS.*\|$\)" FINDIF=$HA_BIN/findif # findif needs that to be set export OCF_RESKEY_ip=$OCF_RESKEY_ipaddress srca_read() { # Capture the default route - doublequotes prevent word splitting... DEFROUTE="`$CMDSHOW`" || errorexit "command '$CMDSHOW' failed" # ... so we can make sure there is only 1 default route [ 1 -eq `echo "$DEFROUTE" | wc -l` ] || \ errorexit "more than 1 default route exists" # But there might still be no default route [ -z "$DEFROUTE" ] && errorexit "no default route exists" # Sed out the source ip address if it exists SRCIP=`echo $DEFROUTE | sed -n "s/$MATCHROUTE/\3/p"` # and what remains after stripping out the source ip address clause ROUTE_WO_SRC=`echo $DEFROUTE | sed "s/$MATCHROUTE/\1\5/"` [ -z "$SRCIP" ] && return 1 [ $SRCIP = $1 ] && return 0 return 2 } # # Add (or change if it already exists) the preferred source address # The exit code should conform to LSB exit codes. # srca_start() { srca_read $1 rc=$? if [ $rc = 0 ]; then rc=$OCF_SUCCESS ocf_log info "The ip route has been already set.($NETWORK, $INTERFACE, $ROUTE_WO_SRC)" else ip route replace $NETWORK dev $INTERFACE src $1 || \ errorexit "command 'ip route replace $NETWORK dev $INTERFACE src $1' failed" $CMDCHANGE $ROUTE_WO_SRC src $1 || \ errorexit "command '$CMDCHANGE $ROUTE_WO_SRC src $1' failed" rc=$? fi return $rc } # # Remove (if it exists) the preferred source address. # If one exists but it's not the same as the one specified, that's # an error. Maybe that's the wrong behaviour because if this fails # then when IPaddr releases the associated interface (if there is one) # your default route will also get dropped ;-( # The exit code should conform to LSB exit codes. # srca_stop() { srca_read $1 rc=$? if [ $rc = 1 ]; then # We do not have a preferred source address for now ocf_log info "No preferred source address defined, nothing to stop" exit $OCF_SUCCESS fi [ $rc = 2 ] && errorexit "The address you specified to stop does not match the preferred source address" ip route replace $NETWORK dev $INTERFACE || \ errorexit "command 'ip route replace $NETWORK dev $INTERFACE' failed" $CMDCHANGE $ROUTE_WO_SRC || \ errorexit "command '$CMDCHANGE $ROUTE_WO_SRC' failed" return $? } srca_status() { srca_read $1 case $? in 0) echo "OK" return $OCF_SUCCESS;; 1) echo "No preferred source address defined" return $OCF_NOT_RUNNING;; 2) echo "Preferred source address has incorrect value" return $OCF_ERR_GENERIC;; esac } # A not reliable IP address checking function, which only picks up those _obvious_ violations... # # It accepts IPv4 address in dotted quad notation, for example "192.168.1.1" # # 100% confidence whenever it reports "negative", # but may get false "positive" answer. # CheckIP() { ip="$1" case $ip in *[!0-9.]*) #got invalid char false;; .*|*.) #begin or end by ".", which is invalid false;; *..*) #consecutive ".", which is invalid false;; *.*.*.*.*) #four decimal dots, which is too many false;; *.*.*.*) #exactly three decimal dots, candidate, evaluate each field local IFS=. set -- $ip if ( [ $1 -le 254 ] && [ $2 -le 254 ] && [ $3 -le 254 ] && [ $4 -le 254 ] ) then if [ $1 -eq 127 ]; then ocf_log err "IP address [$ip] is a loopback address, thus can not be preferred source address" exit $OCF_ERR_CONFIGURED fi else true fi ;; *) #less than three decimal dots false;; esac return $? # This return is unnecessary, this comment too :) } # # Find out which interface or alias serves the given IP address # The argument is an IP address, and its output # is an (aliased) interface name (e.g., "eth0" and "eth0:0"). # find_interface_solaris() { $IFCONFIG $IFCONFIG_A_OPT | $AWK '{if ($0 ~ /.*: / && NR > 1) {print "\n"$0} else {print}}' | while read ifname linkstuff do : ifname = $ifname read inet addr junk : inet = $inet addr = $addr while read line && [ "X$line" != "X" ] do : Nothing done # This doesn't look right for a box with multiple NICs. # It looks like it always selects the first interface on # a machine. Yet, we appear to use the results for this case too... ifname=`echo "$ifname" | sed s'%:*$%%'` case $addr in addr:$BASEIP) echo $ifname; return $OCF_SUCCESS;; $BASEIP) echo $ifname; return $OCF_SUCCESS;; esac done return $OCF_ERR_GENERIC } # # Find out which interface or alias serves the given IP address # The argument is an IP address, and its output # is an (aliased) interface name (e.g., "eth0" and "eth0:0"). # find_interface_generic() { local iface=`$IP2UTIL -o -f inet addr show | grep "\ $BASEIP" \ | cut -d ' ' -f2 | grep -v '^ipsec[0-9][0-9]*$'` if [ -z "$iface" ]; then return $OCF_ERR_GENERIC else echo $iface return $OCF_SUCCESS fi } # # Find out which interface or alias serves the given IP address # The argument is an IP address, and its output # is an (aliased) interface name (e.g., "eth0" and "eth0:0"). # find_interface() { case "$SYSTYPE" in SunOS) IF=`find_interface_solaris $BASEIP` ;; *) IF=`find_interface_generic $BASEIP` ;; esac echo $IF return $OCF_SUCCESS; } ip_status() { BASEIP="$1" case "$SYSTYPE" in Darwin) # Treat Darwin the same as the other BSD variants (matched as *BSD) SYSTYPE="${SYSTYPE}BSD" ;; *) ;; esac case "$SYSTYPE" in *BSD) $IFCONFIG $IFCONFIG_A_OPT | grep "inet.*[: ]$BASEIP " >/dev/null 2>&1 if [ $? = 0 ]; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi;; Linux|SunOS) IF=`find_interface "$BASEIP"` if [ -z "$IF" ]; then return $OCF_NOT_RUNNING fi case $IF in lo*) ocf_log err "IP address [$BASEIP] is served by loopback, thus can not be preferred source address" exit $OCF_ERR_CONFIGURED ;; *)return $OCF_SUCCESS;; esac ;; *) if [ -z "$IF" ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi;; esac } srca_validate_all() { check_binary $AWK check_binary $IFCONFIG # The IP address should be in good shape if CheckIP "$ipaddress"; then : else ocf_log err "Invalid IP address [$ipaddress]" exit $OCF_ERR_CONFIGURED fi if ocf_is_probe; then return $OCF_SUCCESS fi # We should serve this IP address of course if ip_status "$ipaddress"; then : else ocf_log err "We are not serving [$ipaddress], hence can not make it a preferred source address" exit $OCF_ERR_INSTALLED fi } if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # These operations do not require the OCF instance parameters to be set case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac if [ -z "$OCF_RESKEY_ipaddress" ] then # usage ocf_log err "Please set OCF_RESKEY_ipaddress to the preferred source IP address!" exit $OCF_ERR_CONFIGURED fi ipaddress="$OCF_RESKEY_ipaddress" if [ "x$SYSTYPE" = "xLinux" ]; then srca_validate_all fi findif_out=`$FINDIF -C` rc=$? [ $rc -ne 0 ] && { ocf_log err "[$FINDIF -C] failed" exit $rc } INTERFACE=`echo $findif_out | awk '{print $1}'` NETWORK=`ip route list dev $INTERFACE scope link match $ipaddress|grep -o '^[^ ]*'` case $1 in start) srca_start $ipaddress ;; stop) srca_stop $ipaddress ;; status) srca_status $ipaddress ;; monitor) srca_status $ipaddress ;; validate-all) srca_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? # # Version 0.3 2002/11/04 17:00:00 John Sutton # Name changed from IPsrcroute to IPsrcaddr and now reports errors # using ha_log rather than on stderr. # # Version 0.2 2002/11/02 17:00:00 John Sutton # Changed status output to "OK" to satisfy ResourceManager's # we_own_resource() function. # # Version 0.1 2002/11/01 17:00:00 John Sutton # First effort but does the job? # ClusterLabs-resource-agents-dc69db5/heartbeat/IPv6addr.c000066400000000000000000000630331203363223200232660ustar00rootroot00000000000000 /* * This program manages IPv6 address with OCF Resource Agent standard. * * Author: Huang Zhen * Copyright (c) 2004 International Business Machines * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* * It can add an IPv6 address, or remove one. * * Usage: IPv6addr {start|stop|status|monitor|meta-data} * * The "start" arg adds an IPv6 address. * The "stop" arg removes one. * The "status" arg shows whether the IPv6 address exists * The "monitor" arg shows whether the IPv6 address can be pinged (ICMPv6 ECHO) * The "meta_data" arg shows the meta data(XML) */ /* * ipv6-address: * * currently the following forms are legal: * address * address/prefix * * E.g. * 3ffe:ffff:0:f101::3 * 3ffe:ffff:0:f101::3/64 * * It should be passed by environment variant: * OCF_RESKEY_ipv6addr=3ffe:ffff:0:f101::3 * OCF_RESKEY_cidr_netmask=64 * OCF_RESKEY_nic=eth0 * */ /* * start: * 1.IPv6addr will choice a proper interface for the new address. * 2.Then assign the new address to the interface. * 3.Wait until the new address is available (reply ICMPv6 ECHO packet) * 4.Send out the unsolicited advertisements. * * return 0(OCF_SUCCESS) for success * return 1(OCF_ERR_GENERIC) for failure * return 2(OCF_ERR_ARGS) for invalid or excess argument(s) * * * stop: * remove the address from the inferface. * * return 0(OCF_SUCCESS) for success * return 1(OCF_ERR_GENERIC) for failure * return 2(OCF_ERR_ARGS) for invalid or excess argument(s) * * status: * return the status of the address. only check whether it exists. * * return 0(OCF_SUCCESS) for existing * return 1(OCF_NOT_RUNNING) for not existing * return 2(OCF_ERR_ARGS) for invalid or excess argument(s) * * * monitor: * ping the address by ICMPv6 ECHO request. * * return 0(OCF_SUCCESS) for response correctly. * return 1(OCF_NOT_RUNNING) for no response. * return 2(OCF_ERR_ARGS) for invalid or excess argument(s) */ #include #include #include #include #include #include #include #include #include /* for inet_pton */ #include /* for if_nametoindex */ #include #include #include #include #include #include #include #include #define PIDFILE_BASE HA_RSCTMPDIR "/IPv6addr-" /* 0 No error, action succeeded completely 1 generic or unspecified error (current practice) The "monitor" operation shall return this for a crashed, hung or otherwise non-functional resource. 2 invalid or excess argument(s) Likely error code for validate-all, if the instance parameters do not validate. Any other action is free to also return this exit status code for this case. 3 unimplemented feature (for example, "reload") 4 user had insufficient privilege 5 program is not installed 6 program is not configured 7 program is not running 8 resource is running in "master" mode and fully operational 9 resource is in "master" mode but in a failed state */ #define OCF_SUCCESS 0 #define OCF_ERR_GENERIC 1 #define OCF_ERR_ARGS 2 #define OCF_ERR_UNIMPLEMENTED 3 #define OCF_ERR_PERM 4 #define OCF_ERR_INSTALLED 5 #define OCF_ERR_CONFIGURED 6 #define OCF_NOT_RUNNING 7 const char* IF_INET6 = "/proc/net/if_inet6"; const char* APP_NAME = "IPv6addr"; const char* START_CMD = "start"; const char* STOP_CMD = "stop"; const char* STATUS_CMD = "status"; const char* MONITOR_CMD = "monitor"; const char* ADVT_CMD = "advt"; const char* RECOVER_CMD = "recover"; const char* RELOAD_CMD = "reload"; const char* META_DATA_CMD = "meta-data"; const char* VALIDATE_CMD = "validate-all"; char BCAST_ADDR[] = "ff02::1"; const int UA_REPEAT_COUNT = 5; const int QUERY_COUNT = 5; #define HWADDR_LEN 6 /* mac address length */ struct in6_ifreq { struct in6_addr ifr6_addr; uint32_t ifr6_prefixlen; unsigned int ifr6_ifindex; }; static int start_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname); static int stop_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname); static int status_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname); static int monitor_addr6(struct in6_addr* addr6, int prefix_len); static int advt_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname); static int meta_data_addr6(void); static void usage(const char* self); int write_pid_file(const char *pid_file); int create_pid_directory(const char *pid_file); static void byebye(int nsig); static char* scan_if(struct in6_addr* addr_target, int* plen_target, int use_mask, char* prov_ifname); static char* find_if(struct in6_addr* addr_target, int* plen_target, char* prov_ifname); static char* get_if(struct in6_addr* addr_target, int* plen_target, char* prov_ifname); static int assign_addr6(struct in6_addr* addr6, int prefix_len, char* if_name); static int unassign_addr6(struct in6_addr* addr6, int prefix_len, char* if_name); int is_addr6_available(struct in6_addr* addr6); static int send_ua(struct in6_addr* src_ip, char* if_name); int main(int argc, char* argv[]) { char pid_file[256]; char* ipv6addr; char* cidr_netmask; int ret; char* cp; char* prov_ifname = NULL; int prefix_len = -1; struct in6_addr addr6; /* Check the count of parameters first */ if (argc < 2) { usage(argv[0]); return OCF_ERR_ARGS; } /* set termination signal */ siginterrupt(SIGTERM, 1); signal(SIGTERM, byebye); /* open system log */ cl_log_set_entity(APP_NAME); cl_log_set_facility(LOG_DAEMON); /* the meta-data dont need any parameter */ if (0 == strncmp(META_DATA_CMD, argv[1], strlen(META_DATA_CMD))) { ret = meta_data_addr6(); return OCF_SUCCESS; } /* check the OCF_RESKEY_ipv6addr parameter, should be an IPv6 address */ ipv6addr = getenv("OCF_RESKEY_ipv6addr"); if (ipv6addr == NULL) { cl_log(LOG_ERR, "Please set OCF_RESKEY_ipv6addr to the IPv6 address you want to manage."); usage(argv[0]); return OCF_ERR_ARGS; } /* legacy option */ if ((cp = strchr(ipv6addr, '/'))) { prefix_len = atol(cp + 1); if ((prefix_len < 0) || (prefix_len > 128)) { cl_log(LOG_ERR, "Invalid prefix_len [%s], should be an integer in [0, 128]", cp+1); usage(argv[0]); return OCF_ERR_ARGS; } *cp=0; } /* get provided netmask (optional) */ cidr_netmask = getenv("OCF_RESKEY_cidr_netmask"); if (cidr_netmask != NULL) { if ((atol(cidr_netmask) < 0) || (atol(cidr_netmask) > 128)) { cl_log(LOG_ERR, "Invalid prefix_len [%s], " "should be an integer in [0, 128]", cidr_netmask); usage(argv[0]); return OCF_ERR_ARGS; } if (prefix_len != -1 && prefix_len != atol(cidr_netmask)) { cl_log(LOG_DEBUG, "prefix_len(%d) is overwritted by cidr_netmask(%s)", prefix_len, cidr_netmask); } prefix_len = atol(cidr_netmask); } else if (prefix_len == -1) { prefix_len = 0; } /* get provided interface name (optional) */ prov_ifname = getenv("OCF_RESKEY_nic"); if (inet_pton(AF_INET6, ipv6addr, &addr6) <= 0) { cl_log(LOG_ERR, "Invalid IPv6 address [%s]", ipv6addr); usage(argv[0]); return OCF_ERR_ARGS; } /* Check whether this system supports IPv6 */ if (access(IF_INET6, R_OK)) { cl_log(LOG_ERR, "No support for INET6 on this system."); return OCF_ERR_GENERIC; } /* create the pid file so we can make sure that only one IPv6addr * for this address is running */ if (snprintf(pid_file, sizeof(pid_file), "%s%s", PIDFILE_BASE, ipv6addr) >= (int)sizeof(pid_file)) { cl_log(LOG_ERR, "Pid file truncated"); return OCF_ERR_GENERIC; } if (write_pid_file(pid_file) < 0) { return OCF_ERR_GENERIC; } /* switch the command */ if (0 == strncmp(START_CMD,argv[1], strlen(START_CMD))) { ret = start_addr6(&addr6, prefix_len, prov_ifname); }else if (0 == strncmp(STOP_CMD,argv[1], strlen(STOP_CMD))) { ret = stop_addr6(&addr6, prefix_len, prov_ifname); }else if (0 == strncmp(STATUS_CMD,argv[1], strlen(STATUS_CMD))) { ret = status_addr6(&addr6, prefix_len, prov_ifname); }else if (0 ==strncmp(MONITOR_CMD,argv[1], strlen(MONITOR_CMD))) { ret = monitor_addr6(&addr6, prefix_len); }else if (0 ==strncmp(RELOAD_CMD,argv[1], strlen(RELOAD_CMD))) { ret = OCF_ERR_UNIMPLEMENTED; }else if (0 ==strncmp(RECOVER_CMD,argv[1], strlen(RECOVER_CMD))) { ret = OCF_ERR_UNIMPLEMENTED; }else if (0 ==strncmp(VALIDATE_CMD,argv[1], strlen(VALIDATE_CMD))) { /* ipv6addr has been validated by inet_pton, hence a valid IPv6 address */ ret = OCF_SUCCESS; }else if (0 ==strncmp(ADVT_CMD,argv[1], strlen(MONITOR_CMD))) { ret = advt_addr6(&addr6, prefix_len, prov_ifname); }else{ usage(argv[0]); ret = OCF_ERR_ARGS; } /* release the pid file */ unlink(pid_file); return ret; } int start_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname) { int i; char* if_name; if(OCF_SUCCESS == status_addr6(addr6,prefix_len,prov_ifname)) { return OCF_SUCCESS; } /* we need to find a proper device to assign the address */ if_name = find_if(addr6, &prefix_len, prov_ifname); if (NULL == if_name) { cl_log(LOG_ERR, "no valid mechanisms"); return OCF_ERR_GENERIC; } /* Assign the address */ if (0 != assign_addr6(addr6, prefix_len, if_name)) { cl_log(LOG_ERR, "failed to assign the address to %s", if_name); return OCF_ERR_GENERIC; } /* Check whether the address available */ for (i = 0; i < QUERY_COUNT; i++) { if (0 == is_addr6_available(addr6)) { break; } sleep(1); } if (i == QUERY_COUNT) { cl_log(LOG_ERR, "failed to ping the address"); return OCF_ERR_GENERIC; } /* Send unsolicited advertisement packet to neighbor */ for (i = 0; i < UA_REPEAT_COUNT; i++) { send_ua(addr6, if_name); sleep(1); } return OCF_SUCCESS; } int advt_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname) { /* First, we need to find a proper device to assign the address */ char* if_name = get_if(addr6, &prefix_len, prov_ifname); int i; if (NULL == if_name) { cl_log(LOG_ERR, "no valid mechanisms"); return OCF_ERR_GENERIC; } /* Send unsolicited advertisement packet to neighbor */ for (i = 0; i < UA_REPEAT_COUNT; i++) { send_ua(addr6, if_name); sleep(1); } return OCF_SUCCESS; } int stop_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname) { char* if_name; if(OCF_NOT_RUNNING == status_addr6(addr6,prefix_len,prov_ifname)) { return OCF_SUCCESS; } if_name = get_if(addr6, &prefix_len, prov_ifname); if (NULL == if_name) { cl_log(LOG_ERR, "no valid mechanisms."); /* I think this should be a success exit according to LSB. */ return OCF_ERR_GENERIC; } /* Unassign the address */ if (0 != unassign_addr6(addr6, prefix_len, if_name)) { cl_log(LOG_ERR, "failed to assign the address to %s", if_name); return OCF_ERR_GENERIC; } return OCF_SUCCESS; } int status_addr6(struct in6_addr* addr6, int prefix_len, char* prov_ifname) { char* if_name = get_if(addr6, &prefix_len, prov_ifname); if (NULL == if_name) { return OCF_NOT_RUNNING; } return OCF_SUCCESS; } int monitor_addr6(struct in6_addr* addr6, int prefix_len) { if(0 == is_addr6_available(addr6)) { return OCF_SUCCESS; } return OCF_NOT_RUNNING; } /* Send an unsolicited advertisement packet * Please refer to rfc4861 / rfc3542 */ int send_ua(struct in6_addr* src_ip, char* if_name) { int status = -1; int fd; int ifindex; int hop; struct ifreq ifr; u_int8_t *payload = NULL; int payload_size; struct nd_neighbor_advert *na; struct nd_opt_hdr *opt; struct sockaddr_in6 src_sin6; struct sockaddr_in6 dst_sin6; if ((fd = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6)) == -1) { cl_log(LOG_ERR, "socket(IPPROTO_ICMPV6) failed: %s", strerror(errno)); return status; } /* set the outgoing interface */ ifindex = if_nametoindex(if_name); if (setsockopt(fd, IPPROTO_IPV6, IPV6_MULTICAST_IF, &ifindex, sizeof(ifindex)) < 0) { cl_log(LOG_ERR, "setsockopt(IPV6_MULTICAST_IF) failed: %s", strerror(errno)); goto err; } /* set the hop limit */ hop = 255; /* 255 is required. see rfc4861 7.1.2 */ if (setsockopt(fd, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, &hop, sizeof(hop)) < 0) { cl_log(LOG_ERR, "setsockopt(IPV6_MULTICAST_HOPS) failed: %s", strerror(errno)); goto err; } /* set the source address */ memset(&src_sin6, 0, sizeof(src_sin6)); src_sin6.sin6_family = AF_INET6; src_sin6.sin6_addr = *src_ip; src_sin6.sin6_port = 0; if (IN6_IS_ADDR_LINKLOCAL(&src_sin6.sin6_addr) || IN6_IS_ADDR_MC_LINKLOCAL(&src_sin6.sin6_addr)) { src_sin6.sin6_scope_id = ifindex; } if (bind(fd, (struct sockaddr *)&src_sin6, sizeof(src_sin6)) < 0) { cl_log(LOG_ERR, "bind() failed: %s", strerror(errno)); goto err; } /* get the hardware address */ memset(&ifr, 0, sizeof(ifr)); strncpy(ifr.ifr_name, if_name, sizeof(ifr.ifr_name) - 1); if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) { cl_log(LOG_ERR, "ioctl(SIOCGIFHWADDR) failed: %s", strerror(errno)); goto err; } /* build a neighbor advertisement message */ payload_size = sizeof(struct nd_neighbor_advert) + sizeof(struct nd_opt_hdr) + HWADDR_LEN; payload = memalign(sysconf(_SC_PAGESIZE), payload_size); if (!payload) { cl_log(LOG_ERR, "malloc for payload failed"); goto err; } memset(payload, 0, payload_size); /* Ugly typecast from ia64 hell! */ na = (struct nd_neighbor_advert *)((void *)payload); na->nd_na_type = ND_NEIGHBOR_ADVERT; na->nd_na_code = 0; na->nd_na_cksum = 0; /* calculated by kernel */ na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE; na->nd_na_target = *src_ip; /* options field; set the target link-layer address */ opt = (struct nd_opt_hdr *)(payload + sizeof(struct nd_neighbor_advert)); opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; opt->nd_opt_len = 1; /* The length of the option in units of 8 octets */ memcpy(payload + sizeof(struct nd_neighbor_advert) + sizeof(struct nd_opt_hdr), &ifr.ifr_hwaddr.sa_data, HWADDR_LEN); /* sending an unsolicited neighbor advertisement to all */ memset(&dst_sin6, 0, sizeof(dst_sin6)); dst_sin6.sin6_family = AF_INET6; inet_pton(AF_INET6, BCAST_ADDR, &dst_sin6.sin6_addr); /* should not fail */ if (sendto(fd, payload, payload_size, 0, (struct sockaddr *)&dst_sin6, sizeof(dst_sin6)) != payload_size) { cl_log(LOG_ERR, "sendto(%s) failed: %s", if_name, strerror(errno)); goto err; } status = 0; err: close(fd); free(payload); return status; } /* find the network interface associated with an address */ char* scan_if(struct in6_addr* addr_target, int* plen_target, int use_mask, char* prov_ifname) { FILE *f; static char devname[21]=""; struct in6_addr addr; struct in6_addr mask; unsigned int plen, scope, dad_status, if_idx; unsigned int addr6p[4]; /* open /proc/net/if_inet6 file */ if ((f = fopen(IF_INET6, "r")) == NULL) { return NULL; } /* Loop for each entry */ while (1) { int i; int n; int s; gboolean same = TRUE; i = fscanf(f, "%08x%08x%08x%08x %x %02x %02x %02x %20s\n", &addr6p[0], &addr6p[1], &addr6p[2], &addr6p[3], &if_idx, &plen, &scope, &dad_status, devname); if (i == EOF) { break; } else if (i != 9) { cl_log(LOG_INFO, "Error parsing %s, " "perhaps the format has changed\n", IF_INET6); break; } /* Consider link-local addresses (scope == 0x20) only when * the inerface name is provided, and global addresses * (scope == 0). Skip everything else. */ if (scope != 0) { if (scope != 0x20 || prov_ifname == 0 || *prov_ifname == 0) continue; } /* If specified prefix, only same prefix entry * would be considered. */ if (*plen_target!=0 && plen != *plen_target) { continue; } /* If interface name provided, only same devname entry * would be considered */ if (prov_ifname!=0 && *prov_ifname!=0) { if (strcmp(devname, prov_ifname)) continue; } for (i = 0; i< 4; i++) { addr.s6_addr32[i] = htonl(addr6p[i]); } /* Make the mask based on prefix length */ memset(mask.s6_addr, 0xff, 16); if (use_mask && plen < 128) { n = plen / 32; memset(mask.s6_addr32 + n + 1, 0, (3 - n) * 4); s = 32 - plen % 32; if (s == 32) mask.s6_addr32[n] = 0x0; else mask.s6_addr32[n] = 0xffffffff << s; mask.s6_addr32[n] = htonl(mask.s6_addr32[n]); } /* compare addr and addr_target */ same = TRUE; for (i = 0; i < 4; i++) { if ((addr.s6_addr32[i]&mask.s6_addr32[i]) != (addr_target->s6_addr32[i]&mask.s6_addr32[i])) { same = FALSE; break; } } /* We found it! */ if (same) { fclose(f); *plen_target = plen; return devname; } } fclose(f); return NULL; } /* find a proper network interface to assign the address */ char* find_if(struct in6_addr* addr_target, int* plen_target, char* prov_ifname) { char *best_ifname = scan_if(addr_target, plen_target, 1, prov_ifname); /* use the provided ifname and prefix if the address did not match */ if (best_ifname == NULL && prov_ifname != 0 && *prov_ifname != 0 && *plen_target != 0) { cl_log(LOG_INFO, "Could not find a proper interface by the ipv6addr. Using the specified nic:'%s' and cidr_netmask:'%d'", prov_ifname, *plen_target); return prov_ifname; } return best_ifname; } /* get the device name and the plen_target of a special address */ char* get_if(struct in6_addr* addr_target, int* plen_target, char* prov_ifname) { return scan_if(addr_target, plen_target, 0, prov_ifname); } int assign_addr6(struct in6_addr* addr6, int prefix_len, char* if_name) { struct in6_ifreq ifr6; /* Get socket first */ int fd; struct ifreq ifr; fd = socket(AF_INET6, SOCK_DGRAM, 0); if (fd < 0) { return 1; } /* Query the index of the if */ strcpy(ifr.ifr_name, if_name); if (ioctl(fd, SIOGIFINDEX, &ifr) < 0) { return -1; } /* Assign the address to the if */ ifr6.ifr6_addr = *addr6; ifr6.ifr6_ifindex = ifr.ifr_ifindex; ifr6.ifr6_prefixlen = prefix_len; if (ioctl(fd, SIOCSIFADDR, &ifr6) < 0) { return -1; } close (fd); return 0; } int unassign_addr6(struct in6_addr* addr6, int prefix_len, char* if_name) { int fd; struct ifreq ifr; struct in6_ifreq ifr6; /* Get socket first */ fd = socket(AF_INET6, SOCK_DGRAM, 0); if (fd < 0) { return 1; } /* Query the index of the if */ strcpy(ifr.ifr_name, if_name); if (ioctl(fd, SIOGIFINDEX, &ifr) < 0) { return -1; } /* Unassign the address to the if */ ifr6.ifr6_addr = *addr6; ifr6.ifr6_ifindex = ifr.ifr_ifindex; ifr6.ifr6_prefixlen = prefix_len; if (ioctl(fd, SIOCDIFADDR, &ifr6) < 0) { return -1; } close (fd); return 0; } #define MINPACKSIZE 64 int is_addr6_available(struct in6_addr* addr6) { struct sockaddr_in6 addr; struct icmp6_hdr icmph; u_char outpack[MINPACKSIZE]; int icmp_sock; int ret; struct iovec iov; u_char packet[MINPACKSIZE]; struct msghdr msg; if ((icmp_sock = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6)) == -1) { return -1; } memset(&icmph, 0, sizeof(icmph)); icmph.icmp6_type = ICMP6_ECHO_REQUEST; icmph.icmp6_code = 0; icmph.icmp6_cksum = 0; icmph.icmp6_seq = htons(0); icmph.icmp6_id = 0; memset(&outpack, 0, sizeof(outpack)); memcpy(&outpack, &icmph, sizeof(icmph)); memset(&addr, 0, sizeof(struct sockaddr_in6)); addr.sin6_family = AF_INET6; addr.sin6_port = htons(IPPROTO_ICMPV6); memcpy(&addr.sin6_addr,addr6,sizeof(struct in6_addr)); /* Only the first 8 bytes of outpack are meaningful... */ ret = sendto(icmp_sock, (char *)outpack, sizeof(outpack), 0, (struct sockaddr *) &addr, sizeof(struct sockaddr_in6)); if (0 >= ret) { return -1; } iov.iov_base = (char *)packet; iov.iov_len = sizeof(packet); msg.msg_name = &addr; msg.msg_namelen = sizeof(addr); msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = NULL; msg.msg_controllen = 0; ret = recvmsg(icmp_sock, &msg, MSG_DONTWAIT); if (0 >= ret) { return -1; } return 0; } static void usage(const char* self) { printf("usage: %s {start|stop|status|monitor|validate-all|meta-data}\n",self); return; } /* Following code is copied from send_arp.c, linux-HA project. */ void byebye(int nsig) { (void)nsig; /* Avoid an "error exit" log message if we're killed */ exit(0); } int create_pid_directory(const char *pid_file) { int status; int return_status = -1; struct stat stat_buf; char* dir; dir = strdup(pid_file); if (!dir) { cl_log(LOG_INFO, "Memory allocation failure: %s", strerror(errno)); return -1; } dirname(dir); status = stat(dir, &stat_buf); if (status < 0 && errno != ENOENT && errno != ENOTDIR) { cl_log(LOG_INFO, "Could not stat pid-file directory " "[%s]: %s", dir, strerror(errno)); goto err; } if (!status) { if (S_ISDIR(stat_buf.st_mode)) { goto out; } cl_log(LOG_INFO, "Pid-File directory exists but is " "not a directory [%s]", dir); goto err; } if (mkdir(dir, S_IRUSR|S_IWUSR|S_IXUSR | S_IRGRP|S_IXGRP) < 0) { cl_log(LOG_INFO, "Could not create pid-file directory " "[%s]: %s", dir, strerror(errno)); goto err; } out: return_status = 0; err: free(dir); return return_status; } int write_pid_file(const char *pid_file) { int pidfilefd; char pidbuf[11]; unsigned long pid; ssize_t bytes; if (*pid_file != '/') { cl_log(LOG_INFO, "Invalid pid-file name, must begin with a " "'/' [%s]\n", pid_file); return -1; } if (create_pid_directory(pid_file) < 0) { return -1; } while (1) { pidfilefd = open(pid_file, O_CREAT|O_EXCL|O_RDWR, S_IRUSR|S_IWUSR); if (pidfilefd < 0) { if (errno != EEXIST) { /* Old PID file */ cl_log(LOG_INFO, "Could not open pid-file " "[%s]: %s", pid_file, strerror(errno)); return -1; } } else { break; } pidfilefd = open(pid_file, O_RDONLY, S_IRUSR|S_IWUSR); if (pidfilefd < 0) { cl_log(LOG_INFO, "Could not open pid-file " "[%s]: %s", pid_file, strerror(errno)); return -1; } while (1) { bytes = read(pidfilefd, pidbuf, sizeof(pidbuf)-1); if (bytes < 0) { if (errno == EINTR) { continue; } cl_log(LOG_INFO, "Could not read pid-file " "[%s]: %s", pid_file, strerror(errno)); return -1; } pidbuf[bytes] = '\0'; break; } if(unlink(pid_file) < 0) { cl_log(LOG_INFO, "Could not delete pid-file " "[%s]: %s", pid_file, strerror(errno)); return -1; } if (!bytes) { cl_log(LOG_INFO, "Invalid pid in pid-file " "[%s]: %s", pid_file, strerror(errno)); return -1; } close(pidfilefd); pid = strtoul(pidbuf, NULL, 10); if (pid == ULONG_MAX && errno == ERANGE) { cl_log(LOG_INFO, "Invalid pid in pid-file " "[%s]: %s", pid_file, strerror(errno)); return -1; } if (kill(pid, SIGKILL) < 0 && errno != ESRCH) { cl_log(LOG_INFO, "Error killing old proccess [%lu] " "from pid-file [%s]: %s", pid, pid_file, strerror(errno)); return -1; } cl_log(LOG_INFO, "Killed old send_arp process [%lu]", pid); } if (snprintf(pidbuf, sizeof(pidbuf), "%u" , getpid()) >= (int)sizeof(pidbuf)) { cl_log(LOG_INFO, "Pid too long for buffer [%u]", getpid()); return -1; } while (1) { bytes = write(pidfilefd, pidbuf, strlen(pidbuf)); if (bytes != strlen(pidbuf)) { if (bytes < 0 && errno == EINTR) { continue; } cl_log(LOG_INFO, "Could not write pid-file " "[%s]: %s", pid_file, strerror(errno)); return -1; } break; } close(pidfilefd); return 0; } static int meta_data_addr6(void) { const char* meta_data= "\n" "\n" "\n" " 1.0\n" " \n" " This script manages IPv6 alias IPv6 addresses,It can add an IP6\n" " alias, or remove one.\n" " \n" " Manages IPv6 aliases\n" " \n" " \n" " \n" " The IPv6 address this RA will manage \n" " \n" " IPv6 address\n" " \n" " \n" " \n" " \n" " The netmask for the interface in CIDR format. (ie, 24).\n" " The value of this parameter overwrites the value of _prefix_\n" " of ipv6addr parameter.\n" " \n" " Netmask\n" " \n" " \n" " \n" " \n" " The base network interface on which the IPv6 address will\n" " be brought online.\n" " \n" " Network interface\n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" "\n"; printf("%s\n",meta_data); return OCF_SUCCESS; } ClusterLabs-resource-agents-dc69db5/heartbeat/LVM000077500000000000000000000170601203363223200220660ustar00rootroot00000000000000#!/bin/sh # # # LVM # # Description: Manages an LVM volume as an HA resource # # # Author: Alan Robertson # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2002 - 2005 International Business Machines, Inc. # # This code significantly inspired by the LVM resource # in FailSafe by Lars Marowsky-Bree # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 ServeRAID::1::1 LVM::myvolname # # See usage() function below for more details... # # OCF parameters are as below: # OCF_RESKEY_volgrpname # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### usage() { methods=`LVM_methods` methods=`echo $methods | tr ' ' '|'` cat < 1.0 Resource script for LVM. It manages an Linux Volume Manager volume (LVM) as an HA resource. Controls the availability of an LVM Volume Group The name of volume group. Volume group name If set, the volume group will be activated exclusively. Exclusive activation If set, the volume group will be activated even only partial of the physical volumes available. It helps to set to true, when you are using mirroring logical volumes. Activate VG even with partial PV only EOF } # # methods: What methods/operations do we support? # LVM_methods() { cat <&1 | grep 'Volume group .* not found' >/dev/null && { ocf_log info "Volume group $1 not found" return $OCF_SUCCESS } ocf_log info "Deactivating volume group $1" ocf_run vgchange -a ln $1 || return $OCF_ERR_GENERIC if LVM_status $1 then ocf_log err "LVM: $1 did not stop correctly" return $OCF_ERR_GENERIC fi # TODO: This MUST run vgexport as well return $OCF_SUCCESS } # # Check whether the OCF instance parameters are valid # LVM_validate_all() { check_binary $AWK # Off-the-shelf tests... VGOUT=`vgck ${VOLUME} 2>&1` if [ $? -ne 0 ]; then ocf_log err "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}" exit $OCF_ERR_GENERIC fi # Double-check if [ "$LVM_MAJOR" -eq "1" ] then VGOUT=`vgdisplay ${VOLUME} 2>&1` else VGOUT=`vgdisplay -v ${VOLUME} 2>&1` fi if [ $? -ne 0 ]; then ocf_log err "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}" exit $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # 'main' starts here... # if [ $# -ne 1 ] then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS;; methods) LVM_methods exit $?;; usage) usage exit $OCF_SUCCESS;; *) ;; esac if [ -z "$OCF_RESKEY_volgrpname" ] then ocf_log err "You must identify the volume group name!" exit $OCF_ERR_CONFIGURED fi # Get the LVM version number, for this to work we assume(thanks to panjiam): # # LVM1 outputs like this # # # vgchange --version # vgchange: Logical Volume Manager 1.0.3 # Heinz Mauelshagen, Sistina Software 19/02/2002 (IOP 10) # # LVM2 and higher versions output in this format # # # vgchange --version # LVM version: 2.00.15 (2004-04-19) # Library version: 1.00.09-ioctl (2004-03-31) # Driver version: 4.1.0 LVM_VERSION=`vgchange --version 2>&1 | \ $AWK '/Logical Volume Manager/ {print $5"\n"; exit; } /LVM version:/ {printf $3"\n"; exit;}'` rc=$? if ( [ $rc -ne 0 ] || [ -z "$LVM_VERSION" ] ) then ocf_log err "LVM: $1 could not determine LVM version. Try 'vgchange --version' manually and modify $0 ?" exit $OCF_ERR_INSTALLED fi LVM_MAJOR="${LVM_VERSION%%.*}" VOLUME=$OCF_RESKEY_volgrpname OP_METHOD=$1 # What kind of method was invoked? case "$1" in start) LVM_start $VOLUME exit $?;; stop) LVM_stop $VOLUME exit $?;; status) LVM_status $VOLUME $1 exit $?;; monitor) LVM_monitor $VOLUME exit $?;; validate-all) LVM_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac ClusterLabs-resource-agents-dc69db5/heartbeat/LinuxSCSI000077500000000000000000000174531203363223200232170ustar00rootroot00000000000000#!/bin/sh # # # LinuxSCSI # # Description: Enables/Disables SCSI devices to protect them from being # used by mistake # # # Author: Alan Robertson # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2002 - 2005 IBM # # CAVEATS: See the usage message for some important warnings # # usage: ./LinuxSCSI (start|stop|status|monitor|meta-data|validate-all|methods) # # OCF parameters are as below: # OCF_RESKEY_scsi # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 LinuxSCSI:0:0:11 # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### zeropat="[ 0]0" PROCSCSI=/proc/scsi/scsi usage() { cat < 1.0 Deprecation warning: This agent makes use of Linux SCSI hot-plug functionality which has been superseded by SCSI reservations. It is deprecated and may be removed from a future release. See the scsi2reservation and sfex agents for alternatives. -- This is a resource agent for LinuxSCSI. It manages the availability of a SCSI device from the point of view of the linux kernel. It make Linux believe the device has gone away, and it can make it come back again. Enables and disables SCSI devices through the kernel SCSI hot-plug subsystem (deprecated) The SCSI instance to be managed. SCSI instance If set to true, suppresses the deprecation warning for this agent. Suppress deprecation warning EOF } scsi_methods() { cat <>$PROCSCSI echo "scsi add-single-device $host $channel $target $lun" >>$PROCSCSI if scsi_status "$1" then return $OCF_SUCCESS else ocf_log err "SCSI device $1 not active!" return $OCF_ERR_GENERIC fi } # # stop: Disable the given SCSI device in the kernel # scsi_stop() { parseinst "$1" # [ $target = error ] && exit 1 echo "scsi remove-single-device $host $channel $target $lun" >>$PROCSCSI if scsi_status "$1" then ocf_log err "SCSI device $1 still active!" return $OCF_ERR_GENERIC else return $OCF_SUCCESS fi } # # status: is the given device now available? # scsi_status() { parseinst "$1" # [ $target = error ] && exit 1 [ $channel -eq 0 ] && channel=$zeropat [ $target -eq 0 ] && target=$zeropat [ $lun -eq 0 ] && lun=$zeropat greppat="Host: *scsi$host *Channel: *$channel *Id: *$target *Lun: *$lun" grep -i "$greppat" $PROCSCSI >/dev/null if [ $? -eq 0 ]; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi } # # validate_all: Check the OCF instance parameters # scsi_validate_all() { parseinst $instance return $OCF_SUCCESS } if ( [ $# -ne 1 ] ) then ocf_log err "Parameter number error." usage exit $OCF_ERR_GENERIC fi #if # [ -z "$OCF_RESKEY_scsi" ] && [ "X$1" = "Xmethods" ] #then # scsi_methods # exit #? #fi case $1 in methods) scsi_methods exit $OCF_SUCCESS ;; meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac # Be obnoxious, log deprecation warning on every invocation (unless # suppressed by resource configuration). ocf_deprecated if [ -z "$OCF_RESKEY_scsi" ] then ocf_log err "You have to set a valid scsi id at least!" # usage exit $OCF_ERR_GENERIC fi instance=$OCF_RESKEY_scsi case $1 in start) scsi_start $instance ;; stop) scsi_stop $instance ;; status|monitor) if scsi_status $instance then ocf_log info "SCSI device $instance is running" return $OCF_SUCCESS else ocf_log info "SCSI device $instance is stopped" exit $OCF_NOT_RUNNING fi ;; validate-all) scsi_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? ClusterLabs-resource-agents-dc69db5/heartbeat/MailTo000077500000000000000000000076061203363223200226220ustar00rootroot00000000000000#!/bin/sh # # Resource script for MailTo # # Author: Alan Robertson # # Description: sends email to a sysadmin whenever a takeover occurs. # # Note: This command requires an argument, unlike normal init scripts. # # This can be given in the haresources file as: # # You can also give a mail subject line or even multiple addresses # MailTo::alanr@unix.sh::BigImportantWebServer # MailTo::alanr@unix.sh,spoppi@gmx.de::BigImportantWebServer # # This will then be put into the message subject and body. # # OCF parameters are as below: # OCF_RESKEY_email # OCF_RESKEY_subject # # License: GNU General Public License (GPL) # # Copyright: (C) 2005 International Business Machines ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### ARGS="$0 $*" us=`uname -n` usage() { echo "Usage: $0 {start|stop|status|monitor|meta-data|validate-all}" } meta_data() { cat < 1.0 This is a resource agent for MailTo. It sends email to a sysadmin whenever a takeover occurs. Notifies recipients by email in the event of resource takeover The email address of sysadmin. Email address The subject of the email. Subject END } MailProgram() { $MAILCMD -s "$1" "$email" < 1.00.2 Manages starting, stopping and monitoring of RAID devices which are preconfigured in /etc/conf.d/HB-ManageRAID. Manages RAID devices Name (case sensitive) of RAID to manage. (preconfigured in /etc/conf.d/HB-ManageRAID) RAID name END } # # start_raid() # start_raid() { declare -i retcode status_raid retcode=$? if [[ $retcode == $OCF_SUCCESS ]]; then return $OCF_SUCCESS elif [[ $retcode != $OCF_NOT_RUNNING ]]; then return $retcode fi for ldev in ${RAID_LOCALDISKS[@]}; do if [[ ! -b $ldev ]]; then ocf_log err "$ldev is not a (local) block device." return $OCF_ERR_ARGS fi done $MDADM -A $RAID_DEVPATH -a yes -u ${!RAID_UUID} ${RAID_LOCALDISKS[@]} &> /dev/null if [[ $? != 0 ]]; then ocf_log err "starting ${!RAID_DEV} with ${RAID_LOCALDISKS[@]} failed." return $OCF_ERR_GENERIC fi $MOUNT -o ${!RAID_MOUNTOPTIONS} $RAID_DEVPATH ${!RAID_MOUNTPOINT} &> /dev/null if [[ $? != 0 ]]; then $MDADM -S $RAID_DEVPATH &> /dev/null if [[ $? != 0 ]]; then ocf_log err "mounting ${!RAID_DEV} to ${!RAID_MOUNTPOINT} failed as well as stopping the RAID itself." else ocf_log err "mounting ${!RAID_DEV} to ${!RAID_MOUNTPOINT} failed. RAID stopped again." fi return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # stop_raid() # stop_raid() { status_raid if [[ $? == $OCF_NOT_RUNNING ]]; then return $OCF_SUCCESS fi $UMOUNT ${!RAID_MOUNTPOINT} &> /dev/null if [[ $? != 0 ]]; then ocf_log err "unmounting ${!RAID_MOUNTPOINT} failed. not stopping ${!RAID_DEV}!" return $OCF_ERR_GENERIC fi $MDADM -S $RAID_DEVPATH &> /dev/null if [[ $? != 0 ]]; then ocf_log err "stopping RAID ${!RAID_DEV} failed." return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # status_raid() # status_raid() { declare -i retcode_raidcheck declare -i retcode_uuidcheck $CAT $RAID_MDSTAT | $GREP -e "${!RAID_DEV}[\ ]*:[\ ]*active" &> /dev/null if [[ $? != 0 ]]; then return $OCF_NOT_RUNNING fi if [[ ! -e $RAID_DEVPATH ]]; then return $OCF_ERR_GENERIC fi $MDADM --detail -t $RAID_DEVPATH &> /dev/null retcode_raidcheck=$? $MDADM --detail -t $RAID_DEVPATH | $GREP -qEe "^[\ ]*UUID[\ ]*:[\ ]*${!RAID_UUID}" &> /dev/null retcode_uuidcheck=$? if [[ $retcode_raidcheck > 3 ]]; then ocf_log err "mdadm returned error code $retcode_raidcheck while checking ${!RAID_DEV}." return $OCF_ERR_GENERIC elif [[ $retcode_raidcheck == 3 ]]; then ocf_log err "${!RAID_DEV} has failed." return $OCF_ERR_GENERIC elif [[ $retcode_raidcheck < 3 && $retcode_uuidcheck != 0 ]]; then ocf_log err "active RAID ${!RAID_DEV} and configured UUID (!$RAID_UUID) do not match." return $OCF_ERR_GENERIC fi $MOUNT | $GREP -e "$RAID_DEVPATH on ${!RAID_MOUNTPOINT}" &> /dev/null if [[ $? != 0 ]]; then ocf_log err "${!RAID_DEV} seems to be no longer mounted at ${!RAID_MOUNTPOINT}" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # validate_all_raid() # validate_all_raid() { # # since all parameters are checked every time ManageRAID is # invoked, there not much more to check... # # status_raid should cover the rest. # declare -i retcode status_ve retcode=$? if [[ $retcode != $OCF_SUCCESS && $retcode != $OCF_NOT_RUNNING ]]; then return $retcode fi return $OCF_SUCCESS } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac ## required configuration # [ -f /etc/conf.d/HB-ManageRAID ] || { ocf_log err "/etc/conf.d/HB-ManageRAID missing" exit $OCF_ERR_INSTALLED } . /etc/conf.d/HB-ManageRAID # ## # # check relevant environment variables for sanity and security # declare -i retcode_test declare -i retcode_grep $TEST -z "$OCF_RESKEY_raidname" retcode_test=$? echo "$OCF_RESKEY_raidname" | $GREP -qEe "^[[:alnum:]\_]+$" retcode_grep=$? if [[ $retcode_test != 1 || $retcode_grep != 0 ]]; then ocf_log err "OCF_RESKEY_raidname not set or invalid." exit $OCF_ERR_ARGS fi RAID_UUID=${OCF_RESKEY_raidname}_UUID echo ${!RAID_UUID} | $GREP -qEe "^[[:alnum:]]{8}:[[:alnum:]]{8}:[[:alnum:]]{8}:[[:alnum:]]{8}$" if [[ $? != 0 ]]; then ocf_log err "${OCF_RESKEY_raidname}_UUID is invalid." exit $OCF_ERR_ARGS fi RAID_DEV=${OCF_RESKEY_raidname}_DEV echo ${!RAID_DEV} | $GREP -qEe "^md[0-9]+$" if [[ $? != 0 ]]; then ocf_log err "${OCF_RESKEY_raidname}_DEV is invalid." exit $OCF_ERR_ARGS fi RAID_DEVPATH=/dev/${!RAID_DEV/md/md\/} RAID_MOUNTPOINT=${OCF_RESKEY_raidname}_MOUNTPOINT echo ${!RAID_MOUNTPOINT} | $GREP -qEe "^[[:alnum:]\/\_\"\ ]+$" if [[ $? != 0 ]]; then ocf_log err "${OCF_RESKEY_raidname}_MOUNTPOINT is invalid." exit $OCF_ERR_ARGS fi RAID_MOUNTOPTIONS=${OCF_RESKEY_raidname}_MOUNTOPTIONS echo ${!RAID_MOUNTOPTIONS} | $GREP -qEe "^[[:alpha:]\,]+$" if [[ $? != 0 ]]; then ocf_log err "${OCF_RESKEY_raidname}_MOUNTOPTIONS is invalid." exit $OCF_ERR_ARGS fi RAID_LOCALDISKS=${OCF_RESKEY_raidname}_LOCALDISKS[@] RAID_LOCALDISKS=( "${!RAID_LOCALDISKS}" ) if [[ ${#RAID_LOCALDISKS[@]} < 1 ]]; then ocf_log err "you have to specify at least one local disk." exit $OCF_ERR_ARGS fi # # check that all relevant utilities are available # check_binary $MDADM check_binary $MOUNT check_binary $UMOUNT check_binary $GREP check_binary $CAT check_binary $TEST check_binary echo # # check that all relevant devices are available # check_file $RAID_MDSTAT # # finally... let's see what we are ordered to do :-) # case "$1" in start) start_raid ;; stop) stop_raid ;; status|monitor) status_raid ;; validate-all) validate_all_raid ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? ClusterLabs-resource-agents-dc69db5/heartbeat/ManageVE000077500000000000000000000154021203363223200230510ustar00rootroot00000000000000#!/bin/bash # # ManageVE OCF RA. Manages OpenVZ Virtual Environments (VEs) # # (c) 2006-2010 Matthias Dahl, Florian Haas, # and Linux-HA contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # This OCF compliant resource agent manages OpenVZ VEs and thus requires # a proper OpenVZ installation including a recent vzctl util. # # rev. 1.00.4 # # Changelog # # 21/Oct/10 1.00.4 implement migrate_from/migrate_to # 12/Sep/06 1.00.3 more cleanup # 12/Sep/06 1.00.2 fixed some logic in start_ve # general cleanup all over the place # 11/Sep/06 1.00.1 fixed some typos # 07/Sep/06 1.00.0 it's alive... muahaha... ALIVE... :-) # ### : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ### # required utilities VZCTL=/usr/sbin/vzctl # # usage() # usage() { cat <<-EOF usage: $0 {start|stop|status|monitor|migrate_from|migrate_to|validate-all|usage|meta-data} EOF } # # meta_data() # meta_data() { cat < 1.00.4 This OCF compliant resource agent manages OpenVZ VEs and thus requires a proper OpenVZ installation including a recent vzctl util. Manages an OpenVZ Virtual Environment (VE) OpenVZ ID of virtual environment (see output of vzlist -a for all assigned IDs) OpenVZ ID of VE END } # # start_ve() # # Starts a VE, or simply logs a message if the VE is already running. # start_ve() { if status_ve; then ocf_log info "VE $VEID already running." return $OCF_SUCCESS fi ocf_run $VZCTL start $VEID || exit $OCF_ERR_GENERIC return $OCF_SUCCESS } # # stop_ve() # # ATTENTION: The following code relies on vzctl's exit codes, especially: # # 0 : success # # In case any of those exit codes change, this function will need fixing. # stop_ve() { status_ve if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log info "VE $VEID already stopped." return $OCF_SUCCESS fi ocf_run $VZCTL stop $VEID || exit $OCF_ERR_GENERIC return $OCF_SUCCESS } # # migrate_to_ve() # # In the process of a resource migration, checkpoints the VE. For this # to work, vzctl must obviously create the dump file in a place which # the migration target has access to (an NFS mount, a DRBD device, # etc.). # migrate_to_ve() { if ! status_ve; then ocf_log err "VE $VEID is not running, aborting" exit $OCF_ERR_GENERIC fi ocf_run $VZCTL chkpnt $VEID || exit $OCF_ERR_GENERIC return $OCF_SUCCESS } # # migrate_to_ve() # # In the process of a resource migration, restores the VE. For this to # work, vzctl must obviously have access to the dump file which was # created on the migration source (on an NFS mount, a DRBD device, # etc.). # migrate_from_ve() { ocf_run $VZCTL restore $VEID || exit $OCF_ERR_GENERIC return $OCF_SUCCESS } # # status_ve() # # ATTENTION: The following code relies on vzctl's status output. The fifth # column is interpreted as the VE status (either up or down). # # In case the output format should change, this function will need fixing. # status_ve() { declare -i retcode veexists=`$VZCTL status $VEID 2>/dev/null | $AWK '{print $3}'` vestatus=`$VZCTL status $VEID 2>/dev/null | $AWK '{print $5}'` retcode=$? if [[ $retcode != 0 ]]; then ocf_log err "vzctl status $VEID returned: $retcode" exit $OCF_ERR_GENERIC fi if [[ $veexists != "exist" ]]; then ocf_log err "vzctl status $VEID returned: $VEID does not exist." return $OCF_NOT_RUNNING fi case "$vestatus" in running) return $OCF_SUCCESS ;; down) return $OCF_NOT_RUNNING ;; *) ocf_log err "vzctl status $VEID, wrong output format. (5th column: $vestatus)" exit $OCF_ERR_GENERIC ;; esac } # # validate_all_ve() # # ATTENTION: The following code relies on vzctl's status output. The fifth # column is interpreted as the VE status (either up or down). # # In case the output format should change, this function will need fixing. # validate_all_ve() { declare -i retcode # VEID should be a valid VE `status_ve` retcode=$? if [[ $retcode != $OCF_SUCCESS && $retcode != $OCF_NOT_RUNNING ]]; then return $retcode fi return $OCF_SUCCESS } if [[ $# != 1 ]]; then usage exit $OCF_ERR_ARGS fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac # # check relevant environment variables for sanity and security # # empty string? `test -z "$OCF_RESKEY_veid"` declare -i veidtest1=$? # really a number? `echo "$OCF_RESKEY_veid" | egrep -q '^[[:digit:]]+$'` if [[ $veidtest1 != 1 || $? != 0 ]]; then ocf_log err "OCF_RESKEY_veid not set or not a number." exit $OCF_ERR_ARGS fi declare -i VEID=$OCF_RESKEY_veid # # check that all relevant utilities are available # check_binary $VZCTL check_binary $AWK # # finally... let's see what we are ordered to do :-) # case "$1" in start) start_ve ;; stop) stop_ve ;; status|monitor) status_ve ;; migrate_to) migrate_to_ve ;; migrate_from) migrate_from_ve ;; validate-all) validate_all_ve ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? ClusterLabs-resource-agents-dc69db5/heartbeat/Pure-FTPd000077500000000000000000000126761203363223200231460ustar00rootroot00000000000000#!/bin/sh # # Resource script for Pure-FTPd # # Description: Manages Pure-FTPd as an OCF resource in # an Active-Passive High Availability setup. # # Author: Rajat Upadhyaya : Pure-FTPd script # Author: Raoul Bhatia : Minor Cleanup. Added Debian GNU/Linux Support # License: GNU General Public License (GPL) # # # usage: $0 {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg starts Pure-FTPd. # # The "stop" arg stops it. # # OCF parameters: # OCF_RESKEY_script # OCF_RESKEY_conffile # OCF_RESKEY_daemon_type # OCF_RESKEY_pidfile # ########################################################################## # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs : ${OCF_RESKEY_script="/sbin/pure-config.pl"} script_basename=`basename $OCF_RESKEY_script` : ${OCF_RESKEY_conffile="/etc/pure-ftpd/pure-ftpd.conf"} : ${OCF_RESKEY_daemon_type=""} : ${OCF_RESKEY_pidfile="${HA_RSCTMP}/pure-ftpd-${OCF_RESOURCE_INSTANCE}.pid"} USAGE="Usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; ########################################################################## usage() { echo $USAGE >&2 } meta_data() { cat < 1.0 This script manages Pure-FTPd in an Active-Passive setup Manages a Pure-FTPd FTP server instance The full path to the Pure-FTPd startup script. For example, "/sbin/pure-config.pl" Script name with full path The Pure-FTPd configuration file name with full path. For example, "/etc/pure-ftpd/pure-ftpd.conf" Configuration file name with full path The Pure-FTPd daemon to be called by pure-ftpd-wrapper. Valid options are "" for pure-ftpd, "mysql" for pure-ftpd-mysql, "postgresql" for pure-ftpd-postgresql and "ldap" for pure-ftpd-ldap Configuration file name with full path PID file PID file END exit $OCF_SUCCESS } isRunning() { kill -s 0 "$1" > /dev/null } PureFTPd_status() { if [ -f $OCF_RESKEY_pidfile ] then # Pure-FTPd is probably running PID=`head -n 1 $OCF_RESKEY_pidfile` if [ ! -z $PID ] ; then isRunning "$PID" && [ `ps -p $PID | grep pure-ftpd | wc -l` -eq 1 ] return $? fi fi # Pure-FTPd is not running false } PureFTPd_start() { # # make a few checks and start Pure-FTPd # if ocf_is_root ; then : ; else ocf_log err "You must be root." exit $OCF_ERR_PERM fi # if Pure-FTPd is running return success if PureFTPd_status ; then exit $OCF_SUCCESS fi # check that the Pure-FTPd script exists and can be executed if [ ! -x "$OCF_RESKEY_script" ]; then ocf_log err "Pure-FTPd script '$OCF_RESKEY_script' does not exist or cannot be executed" exit $OCF_ERR_GENERIC fi # test for pure-ftpd-wrapper (e.g. Debian GNU/Linux Systems) if [ "$script_basename" = "pure-ftpd-wrapper" ]; then # pure-ftpd-wrapper expects STANDALONE_OR_INETD to be set to standalone STANDALONE_OR_INETD=standalone $OCF_RESKEY_script $OCF_RESKEY_daemon_type else # check that the Pure-FTPd config file exist if [ ! -f "$OCF_RESKEY_conffile" ]; then ocf_log err "Pure_FTPd config file '$OCF_RESKEY_conffile' does not exist" exit $OCF_ERR_GENERIC fi $OCF_RESKEY_script $OCF_RESKEY_conffile -g $OCF_RESKEY_pidfile fi if [ $? -ne 0 ]; then ocf_log info "Pure-FTPd returned error" $? exit $OCF_ERR_GENERIC fi exit $OCF_SUCCESS } PureFTPd_stop() { if PureFTPd_status ; then PID=`head -n 1 $OCF_RESKEY_pidfile` if [ ! -z $PID ] ; then kill $PID fi fi exit $OCF_SUCCESS } PureFTPd_monitor() { if PureFTPd_status ; then return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } PureFTPd_validate_all() { return $OCF_SUCCESS } # # Main # if [ $# -ne 1 ] then usage exit $OCF_ERR_ARGS fi case $1 in start) PureFTPd_start ;; stop) PureFTPd_stop ;; status) if PureFTPd_status then ocf_log info "Pure-FTPd is running" exit $OCF_SUCCESS else ocf_log info "Pure-FTPd is stopped" exit $OCF_NOT_RUNNING fi ;; monitor) PureFTPd_monitor exit $? ;; validate-all) PureFTPd_validate_all exit $? ;; meta-data) meta_data ;; usage) usage exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac ClusterLabs-resource-agents-dc69db5/heartbeat/README000066400000000000000000000013511203363223200223560ustar00rootroot00000000000000The OCF RA shared code directory If an RA is too big to be comfortably maintained, split it into several source files. Obviosuly, if two or more RAs share some code, move that code out to a file which can be shared. These files will be installed in $OCF_ROOT/lib/heartbeat with permissions 644. Naming practice Use names such as .sh or -check.sh or anything-else.sh where "anything-else" should be related to both the RA and the code it contains. By adding extension (.sh) it is going to be easier to notice that these files are not complete resource agents. For instance, oracle and oralsnr RA can both use code in ora-common.sh. Of course, if the RA is implemented in another programming language, use the appropriate extension. ClusterLabs-resource-agents-dc69db5/heartbeat/Raid1000077500000000000000000000254611203363223200223740ustar00rootroot00000000000000#!/bin/sh # # # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # # Raid1 # Description: Manages a Linux software RAID device on a shared storage medium. # Original Author: Eric Z. Ayers (eric.ayers@compgen.com) # Original Release: 25 Oct 2000 # RAID patches: http://people.redhat.com/mingo/raid-patches/ # Word to the Wise: http://lwn.net/2000/0810/a/raid-faq.php3 # Sympathetic Ear: mailto:linux-raid@vger.kernel.org # # usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} # # # EXAMPLE config file /etc/raidtab.md0 # This file must exist on both machines! # # raiddev /dev/md0 # raid-level 1 # nr-raid-disks 2 # chunk-size 64k # persistent-superblock 1 # #nr-spare-disks 0 # device /dev/sda1 # raid-disk 0 # device /dev/sdb1 # raid-disk 1 # # EXAMPLE config file /etc/mdadm.conf (for more info:man mdadm.conf) # # DEVICE /dev/sdb1 /dev/sdc1 # ARRAY /dev/md0 UUID=4a865b55:ba27ef8d:29cd5701:6fb42799 ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data} EOT } meta_data() { cat < 1.0 This resource agent manages Linux software RAID (MD) devices on a shared storage medium. It uses mdadm(8) to start, stop, and monitor the MD devices. Raidtools are supported, but deprecated. See https://raid.wiki.kernel.org/index.php/Linux_Raid for more information. Manages Linux software RAID (MD) devices on shared storage The RAID configuration file, e.g. /etc/mdadm.conf. RAID config file One or more block devices to use, space separated. Alternatively, set to "auto" to manage all devices specified in raidconf. block device The value for the homehost directive; this is an mdadm feature to protect RAIDs against being activated by accident. It is recommended to create RAIDs managed by the cluster with "homehost" set to a special value, so they are not accidentially auto-assembled by nodes not supposed to own them. Homehost for mdadm If processes or kernel threads are using the array, it cannot be stopped. We will try to stop processes, first by sending TERM and then, if that doesn't help in $PROC_CLEANUP_TIME seconds, using KILL. The lsof(8) program is required to get the list of array users. Of course, the kernel threads cannot be stopped this way. If the processes are critical for data integrity, then set this parameter to false. Note that in that case the stop operation will fail and the node will be fenced. force stop processes using the array END } list_conf_arrays() { test -f $RAIDCONF || { ocf_log err "$RAIDCONF gone missing!" exit $OCF_ERR_GENERIC } grep ^ARRAY $RAIDCONF | awk '{print $2}' } forall() { local func=$1 local checkall=$2 local mddev rc=0 for mddev in $RAIDDEVS; do $func $mddev rc=$(($rc | $?)) [ "$checkall" = all ] && continue [ $rc -ne 0 ] && return $rc done return $rc } are_arrays_stopped() { local rc mddev for mddev in $RAIDDEVS; do raid1_monitor_one $mddev rc=$? [ $rc -ne $OCF_NOT_RUNNING ] && break done test $rc -eq $OCF_NOT_RUNNING } md_assemble() { local mddev=$1 $MDADM --assemble $mddev --config=$RAIDCONF $MDADM_HOMEHOST } # # START: Start up the RAID device # raid1_start() { local rc raid1_monitor rc=$? if [ $rc -eq $OCF_SUCCESS ]; then # md already online, nothing to do. return $OCF_SUCCESS fi if [ $rc -ne $OCF_NOT_RUNNING ]; then # If the array is in a broken state, this agent doesn't # know how to repair that. ocf_log err "$RAIDDEVS in a broken state; cannot start (rc=$rc)" return $OCF_ERR_GENERIC fi if [ $HAVE_RAIDTOOLS = "true" ]; then # Run raidstart to start up the RAID array $RAIDSTART --configfile $RAIDCONF $MDDEV else forall md_assemble all fi raid1_monitor if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS else ocf_log err "Couldn't start RAID for $RAIDDEVS" return $OCF_ERR_GENERIC fi } # # STOP: stop the RAID device # mark_readonly() { local mddev=$1 local rc ocf_log info "Attempting to mark array $mddev readonly" $MDADM --readonly $mddev --config=$RAIDCONF rc=$? if [ $rc -ne 0 ]; then ocf_log err "Failed to set $mddev readonly (rc=$rc)" fi return $rc } raid1_stop_one() { ocf_log info "Stopping array $1" $MDADM --stop $1 --config=$RAIDCONF --wait-clean -W } get_users_pids() { local mddev=$1 local outp l ocf_log debug "running lsof to list $mddev users..." outp=`lsof $mddev | tail -n +2` echo "$outp" | awk '{print $2}' | sort -u echo "$outp" | while read l; do ocf_log warn "$l" done } stop_raid_users() { local pids pids=`forall get_users_pids all | sort -u` if [ -z "$pids" ]; then ocf_log warn "lsof reported no users holding arrays" return 2 else ocf_stop_processes TERM $PROC_CLEANUP_TIME $pids fi } stop_arrays() { if [ $HAVE_RAIDTOOLS = "true" ]; then $RAIDSTOP --configfile $RAIDCONF $MDDEV else forall raid1_stop_one all fi } raid1_stop() { local rc # See if the MD device is already cleanly stopped: if are_arrays_stopped; then return $OCF_SUCCESS fi # Turn off raid if ! stop_arrays; then if ocf_is_true $FORCESTOP; then if have_binary lsof; then stop_raid_users case $? in 2) false;; *) stop_arrays;; esac else ocf_log warn "install lsof(8) to list users holding the disk" false fi else false fi fi rc=$? if [ $rc -ne 0 ]; then ocf_log warn "Couldn't stop RAID for $RAIDDEVS (rc=$rc)" if [ $HAVE_RAIDTOOLS != "true" ]; then forall mark_readonly all fi return $OCF_ERR_GENERIC fi if are_arrays_stopped; then return $OCF_SUCCESS fi ocf_log err "RAID $RAIDDEVS still active after stop command!" return $OCF_ERR_GENERIC } # # monitor: a less noisy status # raid1_monitor_one() { local mddev=$1 local md=`echo $mddev | sed 's,/dev/,,'` local rc local TRY_READD=0 local pbsize # check if the md device exists first if [ ! -b $mddev ]; then ocf_log info "$mddev is not a block device" return $OCF_NOT_RUNNING fi if ! grep -e "^$md[ \t:]" /proc/mdstat >/dev/null ; then ocf_log info "$md not found in /proc/mdstat" return $OCF_NOT_RUNNING fi if [ $HAVE_RAIDTOOLS != "true" ]; then $MDADM --detail --test $mddev >/dev/null 2>&1 ; rc=$? case $rc in 0) ;; 1) ocf_log warn "$mddev has at least one failed device." TRY_READD=1 ;; 2) ocf_log err "$mddev has failed." return $OCF_ERR_GENERIC ;; 4) ocf_log err "mdadm failed on $mddev." return $OCF_ERR_GENERIC ;; *) ocf_log err "mdadm returned an unknown result ($rc)." return $OCF_ERR_GENERIC ;; esac fi if [ "$__OCF_ACTION" = "monitor" -a "$OCF_RESKEY_CRM_meta_interval" != 0 \ -a $TRY_READD -eq 1 -a $OCF_CHECK_LEVEL -gt 0 ]; then ocf_log info "Attempting recovery sequence to re-add devices on $mddev:" $MDADM $mddev --fail detached $MDADM $mddev --remove failed $MDADM $mddev --re-add missing # TODO: At this stage, there's nothing to actually do # here. Either this worked or it did not. fi pbsize=`(blockdev --getpbsz $mddev || stat -c "%o" $mddev) 2>/dev/null` if [ -z "$pbsize" ]; then ocf_log warn "both blockdev and stat could not get the block size (will use 4k)" pbsize=4096 # try with 4k fi if ! dd if=$mddev count=1 bs=$pbsize of=/dev/null \ iflag=direct >/dev/null 2>&1 ; then ocf_log err "$mddev: I/O error on read" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } raid1_monitor() { forall raid1_monitor_one } # # STATUS: is the raid device online or offline? # raid1_status() { # See if the MD device is online local rc raid1_monitor rc=$? if [ $rc -ne $OCF_SUCCESS ]; then echo "stopped" else echo "running" fi return $rc } raid1_validate_all() { return $OCF_SUCCESS } PROC_CLEANUP_TIME=3 if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; *) ;; esac RAIDCONF="$OCF_RESKEY_raidconf" MDDEV="$OCF_RESKEY_raiddev" FORCESTOP="${OCF_RESKEY_force_stop:-1}" if [ -z "$RAIDCONF" ] ; then ocf_log err "Please set OCF_RESKEY_raidconf!" exit $OCF_ERR_CONFIGURED fi if [ ! -r "$RAIDCONF" ] ; then ocf_log err "Configuration file [$RAIDCONF] does not exist, or can not be opend!" exit $OCF_ERR_INSTALLED fi if [ -z "$MDDEV" ] ; then ocf_log err "Please set OCF_RESKEY_raiddev to the Raid device you want to control!" exit $OCF_ERR_CONFIGURED fi if ocf_is_true $FORCESTOP && ! have_binary lsof; then ocf_log warn "Please install lsof(8), we may need it when stopping Raid device! Now continuing anyway ..." fi HAVE_RAIDTOOLS=false if have_binary $MDADM >/dev/null 2>&1 ; then if [ -n "$OCF_RESKEY_homehost" ]; then MDADM_HOMEHOST="--homehost=${OCF_RESKEY_homehost}" else MDADM_HOMEHOST="" fi else check_binary $RAIDSTART HAVE_RAIDTOOLS=true fi if [ $HAVE_RAIDTOOLS = true ]; then if [ "$MDDEV" = "auto" ]; then ocf_log err "autoconf supported only with mdadm!" exit $OCF_ERR_INSTALLED elif [ `echo $MDDEV|wc -w` -gt 1 ]; then ocf_log err "multiple devices supported only with mdadm!" exit $OCF_ERR_INSTALLED fi fi if [ "$MDDEV" = "auto" ]; then RAIDDEVS=`list_conf_arrays` else RAIDDEVS="$MDDEV" fi # At this stage, # [ $HAVE_RAIDTOOLS = false ] <=> we have $MDADM, # otherwise we have raidtools (raidstart and raidstop) # Look for how we are called case "$1" in start) raid1_start ;; stop) raid1_stop ;; status) raid1_status ;; monitor) raid1_monitor ;; validate-all) raid1_validate_all ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? ClusterLabs-resource-agents-dc69db5/heartbeat/Route000077500000000000000000000225271203363223200225320ustar00rootroot00000000000000#!/bin/sh # # Route OCF RA. Enables and disables network routes. # # (c) 2008-2010 Florian Haas, Dejan Muhamedagic, # and Linux-HA contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### meta_data() { cat < 1.0 Enables and disables network routes. Supports host and net routes, routes via a gateway address, and routes using specific source addresses. This resource agent is useful if a node's routing table needs to be manipulated based on node role assignment. Consider the following example use case: - One cluster node serves as an IPsec tunnel endpoint. - All other nodes use the IPsec tunnel to reach hosts in a specific remote network. Then, here is how you would implement this scheme making use of the Route resource agent: - Configure an ipsec LSB resource. - Configure a cloned Route OCF resource. - Create an order constraint to ensure that ipsec is started before Route. - Create a colocation constraint between the ipsec and Route resources, to make sure no instance of your cloned Route resource is started on the tunnel endpoint itself. Manages network routes The destination network (or host) to be configured for the route. Specify the netmask suffix in CIDR notation (e.g. "/24"). If no suffix is given, a host route will be created. Specify "0.0.0.0/0" or "default" if you want this resource to set the system default route. Destination network The outgoing network device to use for this route. Outgoing network device The gateway IP address to use for this route. Gateway IP address The source IP address to be configured for the route. Source IP address The routing table to be configured for the route. Routing table END } ####################################################################### create_route_spec() { # Creates a route specification for use by "ip route (add|del|show)" route_spec="to ${OCF_RESKEY_destination}" if [ -n "${OCF_RESKEY_device}" ]; then route_spec="${route_spec} dev ${OCF_RESKEY_device}" fi if [ -n "${OCF_RESKEY_gateway}" ]; then route_spec="${route_spec} via ${OCF_RESKEY_gateway}" fi if [ -n "${OCF_RESKEY_source}" ]; then route_spec="${route_spec} src ${OCF_RESKEY_source}" fi if [ -n "${OCF_RESKEY_table}" ]; then route_spec="${route_spec} table ${OCF_RESKEY_table}" fi echo "$route_spec" } route_usage() { cat </dev/null 2>&1; then ocf_log error "Network device ${OCF_RESKEY_device} appears not to be available on this system." # OCF_ERR_ARGS prevents the resource from running anywhere at all, # maybe another node has the interface? # OCF_ERR_INSTALLED just prevents starting on this particular node. return $OCF_ERR_INSTALLED fi fi # The following tests must return $OCF_ERR_INSTALLED, but only if # the resource is actually running (i.e., not during probes) if ! ocf_is_probe; then # If a source address has been configured, is it available on # this system? if [ -n "${OCF_RESKEY_source}" ]; then if ! ip address show | grep -w ${OCF_RESKEY_source} >/dev/null 2>&1; then ocf_log error "Source address ${OCF_RESKEY_source} appears not to be available on this system." # same reason as with _device: return $OCF_ERR_INSTALLED fi fi # If a gateway address has been configured, is it reachable? if [ -n "${OCF_RESKEY_gateway}" ]; then if ! ip route get ${OCF_RESKEY_gateway} >/dev/null 2>&1; then ocf_log error "Gateway address ${OCF_RESKEY_gateway} is unreachable." # same reason as with _device: return $OCF_ERR_INSTALLED fi fi fi return $OCF_SUCCESS } # These two actions must always succeed case $__OCF_ACTION in meta-data) meta_data # OCF variables are not set when querying meta-data exit 0 ;; usage|help) route_usage exit $OCF_SUCCESS ;; esac # Don't do anything if the necessary utilities aren't present for binary in ip grep; do check_binary $binary done route_validate || exit $? case $__OCF_ACTION in start) route_start;; stop) route_stop;; status|monitor) route_status;; reload) ocf_log info "Reloading..." route_start ;; validate-all) ;; *) route_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc" exit $rc ClusterLabs-resource-agents-dc69db5/heartbeat/SAPDatabase000077500000000000000000000323431203363223200235010ustar00rootroot00000000000000#!/bin/sh # # SAPDatabase # # Description: Manages any type of SAP supported database instance # as a High-Availability OCF compliant resource. # # Author: Alexander Krauth, October 2006 # Support: linux@sap.com # License: GNU General Public License (GPL) # Copyright: (c) 2006, 2007, 2010, 2012 Alexander Krauth # # An example usage: # See usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_SID # OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) # OCF_RESKEY_DBTYPE (mandatory, one of the following values: ORA,ADA,DB6,SYB,HDB) # OCF_RESKEY_DBINSTANCE (optional, Database instance name, if not equal to SID) # OCF_RESKEY_STRICT_MONITORING (optional, activate application level monitoring - with Oracle a failover will occur in case of an archiver stuck) # OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery, default is false) # OCF_RESKEY_MONITOR_SERVICES (optional, default is to monitor all database services) # OCF_RESKEY_PRE_START_USEREXIT (optional, lists a script which can be executed before the resource is started) # OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started) # OCF_RESKEY_PRE_STOP_USEREXIT (optional, lists a script which can be executed before the resource is stopped) # OCF_RESKEY_POST_STOP_USEREXIT (optional, lists a script which can be executed after the resource is stopped) # Deprecated parameters: # OCF_RESKEY_NETSERVICENAME # OCF_RESKEY_DBJ2EE_ONLY # OCF_RESKEY_JAVA_HOME # OCF_RESKEY_DIR_BOOTSTRAP # OCF_RESKEY_DIR_SECSTORE # OCF_RESKEY_DB_JARS # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### SH=/bin/sh usage() { methods=`sapdatabase_methods` methods=`echo $methods | tr ' ' '|'` cat <<-! usage: $0 ($methods) $0 manages a SAP database of any type as an HA resource. Currently Oracle, MaxDB, DB/2 UDB, Sybase ASE and SAP HANA Database are supported. ABAP databases as well as JAVA only databases are supported. The 'start' operation starts the instance. The 'stop' operation stops the instance. The 'status' operation reports whether the instance is running The 'monitor' operation reports whether the instance seems to be working The 'recover' operation tries to recover the instance after a crash (instance will be stopped first!) The 'validate-all' operation reports whether the parameters are valid The 'methods' operation reports on the methods $0 supports ! } meta_data() { cat < 2.06 Manages a SAP database instance as an HA resource. Resource script for SAP databases. It manages a SAP database of any type as an HA resource. The purpose of the resource agent is to start, stop and monitor the database instance of a SAP system. Together with the RDBMS system it will also control the related network service for the database. Like the Oracle Listener and the xserver of MaxDB. The resource agent expects a standard SAP installation of the database and therefore needs less parameters to configure. The resource agent supports the following databases: - Oracle 10.2 and 11.2 - DB/2 UDB for Windows and Unix 9.x - SAP-DB / MaxDB 7.x - Sybase ASE 15.7 - SAP HANA Database since 1.00 - with SAP node 1625203 (http://sdn.sap.com) In fact this resource agent does not run any database commands directly. It uses the SAP standard process SAPHostAgent to control the database. The SAPHostAgent must be installed on each cluster node locally. It will not work, if you try to run the SAPHostAgent also as a HA resource. Please follow SAP note 1031096 for the installation of SAPHostAgent. The required minimum version of SAPHostAgent is: Release: 7.20 Patch Number: 90 or compile time after: Dec 17 2011 The unique database system identifier. e.g. P01 Database system ID The full qualified path where to find saphostexec and saphostctrl. Usually you can leave this empty. Then the default: /usr/sap/hostctrl/exe is used. path of saphostexec and saphostctrl The name of the database vendor you use. Set either: ADA, DB6, ORA, SYB, HDB database vendor Must be used for special database implementations, when database instance name is not equal to the SID (e.g. Oracle DataGuard) Database instance name, if not equal to SID Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore This controls how the resource agent monitors the database. If set to true, it will use 'saphostctrl -function GetDatabaseStatus' to test the database state. If set to false, only operating system processes are monitored. Activates application level monitoring If you set this to true, 'saphostctrl -function StartDatabase' will always be called with the '-force' option. Enable or disable automatic startup recovery Defines which services are monitored by the SAPDatabase resource agent, if STRICT_MONITORING is set to true. Service names must correspond with the output of the 'saphostctrl -function GetDatabaseStatus' command. Database services to monitor Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore The full qualified path where to find a script or program which should be executed before this resource gets started. path to a pre-start script The full qualified path where to find a script or program which should be executed after this resource got started. path to a post-start script The full qualified path where to find a script or program which should be executed before this resource gets stopped. path to a pre-start script The full qualified path where to find a script or program which should be executed after this resource got stopped. path to a post-start script END } # # methods: What methods/operations do we support? # sapdatabase_methods() { cat <<-! start stop status monitor recover validate-all methods meta-data usage ! } # # sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems. # This specialties do not allow a totally generic SAP cluster resource agent. # Someone should write a resource agent for each additional process you need, if it # is required to monitor that process within the cluster manager. To enable # you to extent this resource agent without developing a new one, this user exit # was introduced. # sapuserexit() { NAME="$1" VALUE="$2" if [ -n "$VALUE" ] then if have_binary "$VALUE" then ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}" "$VALUE" >/dev/null 2>&1 ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?" else ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable" fi fi return $OCF_SUCCESS } # # saphostctrl_installed # saphostctrl_installed() { OCF_RESKEY_DIR_EXECUTABLE_default="/usr/sap/hostctrl/exe" : ${OCF_RESKEY_DIR_EXECUTABLE=${OCF_RESKEY_DIR_EXECUTABLE_default}} SAPHOSTCTRL="${OCF_RESKEY_DIR_EXECUTABLE}/saphostctrl" SAPHOSTEXEC="${OCF_RESKEY_DIR_EXECUTABLE}/saphostexec" SAPHOSTSRV="${OCF_RESKEY_DIR_EXECUTABLE}/sapstartsrv" SAPHOSTOSCOL="${OCF_RESKEY_DIR_EXECUTABLE}/saposcol" have_binary $SAPHOSTCTRL && have_binary $SAPHOSTEXEC } # # 'main' starts here... # if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # These operations don't require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage) usage exit $OCF_SUCCESS;; methods) sapdatabase_methods exit $?;; *);; esac if ! ocf_is_root then ocf_log err "$0 must be run as root" exit $OCF_ERR_PERM fi # mandatory parameter check if [ -z "$OCF_RESKEY_SID" ]; then ocf_log err "Please set OCF_RESKEY_SID to the SAP system id!" exit $OCF_ERR_ARGS fi SID=`echo "$OCF_RESKEY_SID"` if [ -z "$OCF_RESKEY_DBTYPE" ]; then ocf_log err "Please set OCF_RESKEY_DBTYPE to the database vendor specific tag (ADA,DB6,ORA,SYB,HDB)!" exit $OCF_ERR_ARGS fi DBTYPE=`echo "$OCF_RESKEY_DBTYPE" | tr '[:lower:]' '[:upper:]'` # source functions and initialize global variables if saphostctrl_installed; then . ${OCF_FUNCTIONS_DIR}/sapdb.sh else . ${OCF_FUNCTIONS_DIR}/sapdb-nosha.sh fi sapdatabase_init # we always want to fall to the faster status method in case of a probe by the cluster ACTION=$1 if ocf_is_probe then ACTION=status fi # What kind of method was invoked? case "$ACTION" in start|stop|status|recover) sapdatabase_$ACTION exit $?;; monitor) sapdatabase_monitor $OCF_RESKEY_STRICT_MONITORING exit $?;; validate-all) sapdatabase_validate exit $?;; *) sapdatabase_methods exit $OCF_ERR_UNIMPLEMENTED;; esac ClusterLabs-resource-agents-dc69db5/heartbeat/SAPInstance000077500000000000000000001024671203363223200235460ustar00rootroot00000000000000#!/bin/sh # # SAPInstance # # Description: Manages a single SAP Instance as a High-Availability # resource. One SAP Instance is defined by one # SAP Instance-Profile. start/stop handels all services # of the START-Profile, status and monitor care only # about essential services. # # Author: Alexander Krauth, June 2006 # Support: linux@sap.com # License: GNU General Public License (GPL) # Copyright: (c) 2006-2008 Alexander Krauth # # An example usage: # See usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_InstanceName # OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) # OCF_RESKEY_DIR_PROFILE (optional, well known directories will be searched by default) # OCF_RESKEY_START_PROFILE (optional, well known directories will be searched by default) # OCF_RESKEY_START_WAITTIME (optional, to solve timing problems during J2EE-Addin start) # OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery using cleanipc, default is false) # OCF_RESKEY_MONITOR_SERVICES (optional, default is to monitor critical services only) # OCF_RESKEY_SHUTDOWN_METHOD (optional, defaults to NORMAL, KILL: terminate the SAP instance with OS commands - faster, at your own risk) # OCF_RESKEY_ERS_InstanceName (optional, InstanceName of the ERS instance in a Master/Slave configuration) # OCF_RESKEY_ERS_START_PROFILE (optional, START_PROFILE of the ERS instance in a Master/Slave configuration) # OCF_RESKEY_PRE_START_USEREXIT (optional, lists a script which can be executed before the resource is started) # OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started) # OCF_RESKEY_PRE_STOP_USEREXIT (optional, lists a script which can be executed before the resource is stopped) # OCF_RESKEY_POST_STOP_USEREXIT (optional, lists a script which can be executed after the resource is stopped) # # TODO: - Option to shutdown sapstartsrv for non-active instances -> that means: do probes only with OS tools (sapinstance_status) # - Option for better standalone enqueue server monitoring, using ensmon (test enque-deque) # - Option for cleanup abandoned enqueue replication tables # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### SH=/bin/sh sapinstance_usage() { methods=`sapinstance_methods` methods=`echo $methods | tr ' ' '|'` cat <<-! usage: $0 ($methods) $0 manages a SAP Instance as an HA resource. The 'start' operation starts the instance or the ERS instance in a Master/Slave configuration The 'stop' operation stops the instance The 'status' operation reports whether the instance is running The 'monitor' operation reports whether the instance seems to be working The 'promote' operation starts the primary instance in a Master/Slave configuration The 'demote' operation stops the primary instance and starts the ERS instance The 'notify' operation always returns SUCCESS The 'validate-all' operation reports whether the parameters are valid The 'methods' operation reports on the methods $0 supports ! } sapinstance_meta_data() { cat < 2.13 Manages a SAP instance as an HA resource. Usually a SAP system consists of one database and at least one or more SAP instances (sometimes called application servers). One SAP Instance is defined by having exactly one instance profile. The instance profiles can usually be found in the directory /sapmnt/SID/profile. Each instance must be configured as it's own resource in the cluster configuration. The resource agent supports the following SAP versions: - SAP WebAS ABAP Release 6.20 - 7.30 - SAP WebAS Java Release 6.40 - 7.30 - SAP WebAS ABAP + Java Add-In Release 6.20 - 7.30 (Java is not monitored by the cluster in that case) When using a SAP Kernel 6.40 please check and implement the actions from the section "Manual postprocessing" from SAP note 995116 (http://sdn.sap.com). All operations of the SAPInstance resource agent are done by using the startup framework called SAP Management Console or sapstartsrv that was introduced with SAP kernel release 6.40. Find more information about the SAP Management Console in SAP note 1014480. Using this framework defines a clear interface for the Heartbeat cluster, how it sees the SAP system. The options for monitoring the SAP system are also much better than other methods like just watching the ps command for running processes or doing some pings to the application. sapstartsrv uses SOAP messages to request the status of running SAP processes. Therefore it can actually ask a process itself what it's status is, independent from other problems that might exist at the same time. sapstartsrv knows 4 status colours: - GREEN = everything is fine - YELLOW = something is wrong, but the service is still working - RED = the service does not work - GRAY = the service has not been started The SAPInstance resource agent will interpret GREEN and YELLOW as OK. That means that minor problems will not be reported to the Heartbeat cluster. This prevents the cluster from doing an unwanted failover. The statuses RED and GRAY are reported as NOT_RUNNING to the cluster. Depending on the status the cluster expects from the resource, it will do a restart, failover or just nothing. The full qualified SAP instance name. e.g. P01_DVEBMGS00_sapp01ci. Usually this is the name of the SAP instance profile. Instance name: SID_INSTANCE_VIR-HOSTNAME The full qualified path where to find sapstartsrv and sapcontrol. Specify this parameter, if you have changed the SAP kernel directory location after the default SAP installation. Path of sapstartsrv and sapcontrol The full qualified path where to find the SAP START profile. Specify this parameter, if you have changed the SAP profile directory location after the default SAP installation. Path of start profile The name of the SAP START profile. Specify this parameter, if you have changed the name of the SAP START profile after the default SAP installation. As SAP release 7.10 does not have a START profile anymore, you need to specify the Instance Profile than. Start profile name After that time in seconds a monitor operation is executed by the resource agent. Does the monitor return SUCCESS, the start ishandled as SUCCESS. This is useful to resolve timing problems with e.g. the J2EE-Addin instance.Usually the resource agent waits until all services are started and the SAP Management Console reports a GREEN status. A double stack installation (ABAP + Java AddIn) consists of an ABAP dispatcher and aJAVA instance. Normally the start of the JAVA instance takes much longer than the start of the ABAP instance. For a JAVA Instance you may need to configure a much higher timeout for the start operation of the resource in Heartbeat. The disadvantage here is, that the discovery of a failed start by the cluster takes longer. Somebody might say: For me it is important, that the ABAP instance is up and running. A failure of the JAVA instance shall not cause a failover of the SAP instance. Actually the SAP MC reports a YELLOW status, if the JAVA instance of a double stack system fails. From the resource agent point of view YELLOW means:everything is OK. Setting START_WAITTIME to a lower value determines the resource agent to check the status of the instance during a start operation after that time. As it would wait normally for a GREEN status, now it reports SUCCESS to the cluster in case of a YELLOW status already after the specified time. That is only useful for double stack systems. Check the successful start after that time (do not wait for J2EE-Addin) The SAPInstance resource agent tries to recover a failed start attempt automaticaly one time. This is done by killing runing instance processes, removing the kill.sap file and executing cleanipc. Sometimes a crashed SAP instance leaves some processes and/or shared memory segments behind. Setting this option to true will try to remove those leftovers during a start operation. That is to reduce manual work for the administrator. Enable or disable automatic startup recovery Within a SAP instance there can be several services. Usually you will find the defined services in the START profile of the related instance (Attention: with SAP Release 7.10 the START profile content was moved to the instance profile). Not all of those services are worth to monitor by the cluster. For example you properly do not like to failover your SAP instance, if the central syslog collector daemon fails. Those services are monitored within the SAPInstance resource agent: - disp+work - msg_server - enserver - enrepserver - jcontrol - jstart That names match the strings used in the output of the command 'sapcontrol -nr [Instance-Nr] -function GetProcessList'. The default should fit most cases where you want to manage a SAP Instance from the cluster. You may change this with this parameter, if you like to monitor more/less or other services that sapstartsrv supports. You may specify multiple services seperated by a | (pipe) sign in this parameter: disp+work|msg_server|enserver Services to monitor Usual a SAP Instance is stopped by the command 'sapcontrol -nr InstanceNr -function Stop'. SHUTDOWN_METHOD=KILL means to kill the SAP Instance using OS commands. SAP processes of the instance are terminated with 'kill -9', shared memory is deleted with 'cleanipc' and the 'kill.sap' file will be deleted. That method is much faster than the gracefull stop, but the instance does not have the chance to say goodbye to other SAPinstances in the same system. USE AT YOUR OWN RISK !! Shutdown graceful or kill a SAP instance by terminating the processes. (normal|KILL) Only used in a Master/Slave resource configuration: The full qualified SAP enqueue replication instance name. e.g. P01_ERS02_sapp01ers. Usually this is the name of the SAP instance profile. The enqueue replication instance must be installed, before you want to configure a master-slave cluster recource. The master-slave configuration in the cluster must use this properties: clone_max = 2 clone_node_max = 1 master_node_max = 1 master_max = 1 Enqueue replication instance name: SID_INSTANCE_VIR-HOSTNAME Only used in a Master/Slave resource configuration: The parameter ERS_InstanceName must also be set in this configuration. The name of the SAP START profile. Specify this parameter, if you have changed the name of the SAP START profile after the default SAP installation. As SAP release 7.10 does not have a START profile anymore, you need to specify the Instance Profile than. Enqueue replication start profile name The full qualified path where to find a script or program which should be executed before this resource gets started. Path to a pre-start script The full qualified path where to find a script or program which should be executed after this resource got started. Path to a post-start script The full qualified path where to find a script or program which should be executed before this resource gets stopped. Path to a pre-start script The full qualified path where to find a script or program which should be executed after this resource got stopped. Path to a post-start script END } # # methods: What methods/operations do we support? # sapinstance_methods() { cat <<-! start stop status monitor promote demote notify validate-all methods meta-data usage ! } # # is_clone : find out if we are configured to run in a Master/Slave configuration # is_clone() { if [ -n "$OCF_RESKEY_CRM_meta_clone_max" ] \ && [ "$OCF_RESKEY_CRM_meta_clone_max" -gt 0 ] then if [ "$OCF_RESKEY_CRM_meta_clone_max" -ne 2 ] || \ [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] || \ [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ] || \ [ "$OCF_RESKEY_CRM_meta_master_max" -ne 1 ] then ocf_log err "Clone options misconfigured. (expect: clone_max=2,clone_node_max=1,master_node_max=1,master_max=1)" exit $OCF_ERR_CONFIGURED fi if [ -z "$OCF_RESKEY_ERS_InstanceName" ] then ocf_log err "In a Master/Slave configuration the ERS_InstanceName parameter is mandatory." exit $OCF_ERR_ARGS fi else return 0 fi return 1 } # # abnormal_end : essential things are missing, but in the natur of a SAP installation - which can be very different # from customer to customer - we cannot handle this always as an error # This would be the case, if the software is installed on shared disks and not visible # to all cluster nodes at all times. # abnormal_end() { err_msg=$1 ocf_is_probe && { sapinstance_status exit $? } if [ "$ACTION" = "stop" ] then cleanup_instance exit $OCF_SUCCESS fi ocf_log err $err_msg exit $OCF_ERR_CONFIGURED } # # sapinstance_init : Define global variables with default values, if optional parameters are not set # # sapinstance_init() { myInstanceName="$1" SID=`echo "$myInstanceName" | cut -d_ -f1` InstanceName=`echo "$myInstanceName" | cut -d_ -f2` InstanceNr=`echo "$InstanceName" | sed 's/.*\([0-9][0-9]\)$/\1/'` SAPVIRHOST=`echo "$myInstanceName" | cut -d_ -f3` # optional OCF parameters, we try to guess which directories are correct if [ -z "$OCF_RESKEY_DIR_EXECUTABLE" ] then if have_binary /usr/sap/$SID/$InstanceName/exe/sapstartsrv && have_binary /usr/sap/$SID/$InstanceName/exe/sapcontrol then DIR_EXECUTABLE="/usr/sap/$SID/$InstanceName/exe" SAPSTARTSRV="/usr/sap/$SID/$InstanceName/exe/sapstartsrv" SAPCONTROL="/usr/sap/$SID/$InstanceName/exe/sapcontrol" elif have_binary /usr/sap/$SID/SYS/exe/run/sapstartsrv && have_binary /usr/sap/$SID/SYS/exe/run/sapcontrol then DIR_EXECUTABLE="/usr/sap/$SID/SYS/exe/run" SAPSTARTSRV="/usr/sap/$SID/SYS/exe/run/sapstartsrv" SAPCONTROL="/usr/sap/$SID/SYS/exe/run/sapcontrol" fi else if have_binary "$OCF_RESKEY_DIR_EXECUTABLE/sapstartsrv" && have_binary "$OCF_RESKEY_DIR_EXECUTABLE/sapcontrol" then DIR_EXECUTABLE="$OCF_RESKEY_DIR_EXECUTABLE" SAPSTARTSRV="$OCF_RESKEY_DIR_EXECUTABLE/sapstartsrv" SAPCONTROL="$OCF_RESKEY_DIR_EXECUTABLE/sapcontrol" fi fi sidadm="`echo $SID | tr '[:upper:]' '[:lower:]'`adm" [ -z "$DIR_EXECUTABLE" ] && abnormal_end "Cannot find sapstartsrv and sapcontrol executable, please set DIR_EXECUTABLE parameter!" if [ -z "$OCF_RESKEY_DIR_PROFILE" ] then DIR_PROFILE="/usr/sap/$SID/SYS/profile" else DIR_PROFILE="$OCF_RESKEY_DIR_PROFILE" fi if [ "$myInstanceName" != "$OCF_RESKEY_InstanceName" ] then currentSTART_PROFILE=$OCF_RESKEY_ERS_START_PROFILE else currentSTART_PROFILE=$OCF_RESKEY_START_PROFILE fi if [ -z "$currentSTART_PROFILE" ] then SAPSTARTPROFILE="$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}" else SAPSTARTPROFILE="$currentSTART_PROFILE" fi if [ -z "$OCF_RESKEY_START_WAITTIME" ] then export OCF_RESKEY_START_WAITTIME=3600 fi if [ -z "$OCF_RESKEY_MONITOR_SERVICES" ] then export OCF_RESKEY_MONITOR_SERVICES="disp+work|msg_server|enserver|enrepserver|jcontrol|jstart" fi # as root user we need the library path to the SAP kernel to be able to call sapcontrol if [ `echo $LD_LIBRARY_PATH | grep -c "^$DIR_EXECUTABLE\>"` -eq 0 ]; then LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH export LD_LIBRARY_PATH fi return $OCF_SUCCESS } # # check_sapstartsrv : Before using sapcontrol we make sure that the sapstartsrv is running for the correct instance. # We cannot use sapinit and the /usr/sap/sapservices file in case of an enquerep instance, # because then we have two instances with the same instance number. # check_sapstartsrv() { restart=0 runninginst="" chkrc=$OCF_SUCCESS output=`$SAPCONTROL -nr $InstanceNr -function ParameterValue INSTANCE_NAME -format script` if [ $? -eq 0 ] then runninginst=`echo "$output" | grep '^0 : ' | cut -d' ' -f3` if [ "$runninginst" != "$InstanceName" ] then ocf_log warn "sapstartsrv is running for instance $runninginst, that service will be killed" restart=1 fi else ocf_log warn "sapstartsrv is not running for instance $SID-$InstanceName, it will be started now" restart=1 fi if [ -z "$runninginst" ]; then runninginst=$InstanceName; fi if [ $restart -eq 1 ] then if [ -d /usr/sap/$SID/SYS/profile/ ] then DIR_PROFILE="/usr/sap/$SID/SYS/profile" else abnormal_end "Expected /usr/sap/$SID/SYS/profile/ to be a directory, please set DIR_PROFILE parameter!" fi [ ! -r $SAPSTARTPROFILE ] && abnormal_end "Expected $SAPSTARTPROFILE to be the instance START profile, please set START_PROFILE parameter!" pkill -9 -f "sapstartsrv.*$runninginst" $SAPSTARTSRV pf=$SAPSTARTPROFILE -D -u $sidadm # now make sure the daemon has been started and is able to respond srvrc=1 while [ $srvrc -eq 1 -a `pgrep -f "sapstartsrv.*$runninginst" | wc -l` -gt 0 ] do sleep 1 $SAPCONTROL -nr $InstanceNr -function GetProcessList > /dev/null 2>&1 srvrc=$? done if [ $srvrc -ne 1 ] then ocf_log info "sapstartsrv for instance $SID-$InstanceName was restarted !" chkrc=$OCF_SUCCESS else ocf_log error "sapstartsrv for instance $SID-$InstanceName could not be started!" chkrc=$OCF_ERR_GENERIC ocf_is_probe && chkrc=$OCF_NOT_RUNNING fi fi return $chkrc } # # sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems. # This specialties do not allow a totally generic SAP cluster resource agent. # Someone should write a resource agent for each additional process you need, if it # is required to monitor that process within the cluster manager. To enable # you to extent this resource agent without developing a new one, this user exit # was introduced. # sapuserexit() { NAME="$1" VALUE="$2" if [ -n "$VALUE" ] then if have_binary "$VALUE" then ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}" "$VALUE" >/dev/null 2>&1 ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?" else ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable" fi fi return 0 } # # cleanup_instance : remove resources (processes and shared memory) from a crashed instance) # cleanup_instance() { pkill -9 -f -U $sidadm $InstanceName ocf_log info "Terminated instance using 'pkill -9 -f -U $sidadm $InstanceName'" # it is necessary to call cleanipc as user sidadm if the system has 'vmcj/enable = ON' set - otherwise SHM-segments in /dev/shm/SAP_ES2* cannot beremoved su - $sidadm -c "cleanipc $InstanceNr remove" ocf_log info "Tried to remove shared memory resources using 'cleanipc $InstanceNr remove' as user $sidadm" if [ -f "/usr/sap/$SID/$InstanceName/work/kill.sap" ] then rm -f /usr/sap/$SID/$InstanceName/work/kill.sap ocf_log info "Deleted /usr/sap/$SID/$InstanceName/work/kill.sap" fi return 0 } # # sapinstance_start : Start the SAP instance # sapinstance_start() { sapuserexit PRE_START_USEREXIT "$OCF_RESKEY_PRE_START_USEREXIT" rc=$OCF_NOT_RUNNING loopcount=0 while [ $loopcount -lt 2 ] do loopcount=$(($loopcount + 1)) check_sapstartsrv rc=$? if [ $rc -eq $OCF_SUCCESS ]; then output=`$SAPCONTROL -nr $InstanceNr -function Start` rc=$? ocf_log info "Starting SAP Instance $SID-$InstanceName: $output" fi if [ $rc -ne 0 ] then ocf_log err "SAP Instance $SID-$InstanceName start failed." return $OCF_ERR_GENERIC fi startrc=1 while [ $startrc -gt 0 ] do waittime_start=`date +%s` output=`$SAPCONTROL -nr $InstanceNr -function WaitforStarted $OCF_RESKEY_START_WAITTIME 10` startrc=$? waittime_stop=`date +%s` if [ $startrc -ne 0 ] then if [ $(($waittime_stop - $waittime_start)) -ge $OCF_RESKEY_START_WAITTIME ] then sapinstance_monitor NOLOG if [ $? -eq $OCF_SUCCESS ] then output="START_WAITTIME ($OCF_RESKEY_START_WAITTIME) has elapsed, but instance monitor returned SUCCESS. Instance considered running." startrc=0; loopcount=2 fi else if [ $loopcount -eq 1 ] && ocf_is_true $OCF_RESKEY_AUTOMATIC_RECOVER then ocf_log warn "SAP Instance $SID-$InstanceName start failed: $output" ocf_log warn "Try to recover $SID-$InstanceName" cleanup_instance else loopcount=2 fi startrc=-1 fi else loopcount=2 fi done done if [ $startrc -eq 0 ] then ocf_log info "SAP Instance $SID-$InstanceName started: $output" rc=$OCF_SUCCESS sapuserexit POST_START_USEREXIT "$OCF_RESKEY_POST_START_USEREXIT" else ocf_log err "SAP Instance $SID-$InstanceName start failed: $output" rc=$OCF_NOT_RUNNING fi return $rc } # # sapinstance_recover: Try startup of failed instance by cleaning up resources # sapinstance_recover() { cleanup_instance sapinstance_start return $? } # # sapinstance_stop: Stop the SAP instance # sapinstance_stop() { sapuserexit PRE_STOP_USEREXIT "$OCF_RESKEY_PRE_STOP_USEREXIT" if [ "$OCF_RESKEY_SHUTDOWN_METHOD" = "KILL" ] then ocf_log info "Stopping SAP Instance $SID-$InstanceName with shutdown method KILL!" cleanup_instance return $OCF_SUCCESS fi check_sapstartsrv rc=$? if [ $rc -eq $OCF_SUCCESS ]; then output=`$SAPCONTROL -nr $InstanceNr -function Stop` rc=$? ocf_log info "Stopping SAP Instance $SID-$InstanceName: $output" fi if [ $? -eq 0 ] then output=`$SAPCONTROL -nr $InstanceNr -function WaitforStopped 3600 1` if [ $? -eq 0 ] then ocf_log info "SAP Instance $SID-$InstanceName stopped: $output" rc=$OCF_SUCCESS else ocf_log err "SAP Instance $SID-$InstanceName stop failed: $output" rc=$OCF_ERR_GENERIC fi else ocf_log err "SAP Instance $SID-$InstanceName stop failed: $output" rc=$OCF_ERR_GENERIC fi sapuserexit POST_STOP_USEREXIT "$OCF_RESKEY_POST_STOP_USEREXIT" return $rc } # # sapinstance_monitor: Can the given SAP instance do anything useful? # sapinstance_monitor() { MONLOG=$1 check_sapstartsrv rc=$? if [ $rc -eq $OCF_SUCCESS ] then count=0 output=`$SAPCONTROL -nr $InstanceNr -function GetProcessList -format script` # we have to parse the output, because the returncode doesn't tell anything about the instance status for SERVNO in `echo "$output" | grep '^[0-9] ' | cut -d' ' -f1 | sort -u` do COLOR=`echo "$output" | grep "^$SERVNO dispstatus: " | cut -d' ' -f3` SERVICE=`echo "$output" | grep "^$SERVNO name: " | cut -d' ' -f3` STATE=0 case $COLOR in GREEN|YELLOW) STATE=$OCF_SUCCESS;; *) STATE=$OCF_NOT_RUNNING;; esac SEARCH=`echo "$OCF_RESKEY_MONITOR_SERVICES" | sed 's/\+/\\\+/g' | sed 's/\./\\\./g'` if [ `echo "$SERVICE" | egrep -c "$SEARCH"` -eq 1 ] then if [ $STATE -eq $OCF_NOT_RUNNING ] then [ "$MONLOG" != "NOLOG" ] && ocf_log err "SAP instance service $SERVICE is not running with status $COLOR !" rc=$STATE fi count=1 fi done if [ $count -eq 0 -a $rc -eq $OCF_SUCCESS ] then if ocf_is_probe then rc=$OCF_NOT_RUNNING else [ "$MONLOG" != "NOLOG" ] && ocf_log err "The SAP instance does not run any services which this RA could monitor!" rc=$OCF_ERR_GENERIC fi fi fi return $rc } # # sapinstance_status: Lightweight check of SAP instance only with OS tools # sapinstance_status() { [ ! -f "/usr/sap/$SID/$InstanceName/work/kill.sap" ] && return $OCF_NOT_RUNNING pids=`grep '^kill -[0-9]' /usr/sap/$SID/$InstanceName/work/kill.sap | awk '{print $3}'` for pid in $pids do [ `pgrep -f -U $sidadm $InstanceName | grep -c $pid` -gt 0 ] && return $OCF_SUCCESS done return $OCF_NOT_RUNNING } # # sapinstance_validate: Check the symantic of the input parameters # sapinstance_validate() { rc=$OCF_SUCCESS if [ `echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$SID' is not a valid system ID!" rc=$OCF_ERR_ARGS fi if [ `echo "$InstanceName" | grep -c '^[A-Z].*[0-9][0-9]$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$InstanceName' is not a valid instance name!" rc=$OCF_ERR_ARGS fi if [ `echo "$InstanceNr" | grep -c '^[0-9][0-9]$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$InstanceNr' is not a valid instance number!" rc=$OCF_ERR_ARGS fi if [ `echo "$SAPVIRHOST" | grep -c '^[A-Za-z][A-Za-z0-9_-]*$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$SAPVIRHOST' is not a valid hostname!" rc=$OCF_ERR_ARGS fi return $rc } # # sapinstance_start_clone # sapinstance_start_clone() { sapinstance_init $OCF_RESKEY_ERS_InstanceName ${HA_SBIN_DIR}/crm_master -v 50 -l reboot sapinstance_start return $? } # # sapinstance_stop_clone # sapinstance_stop_clone() { sapinstance_init $OCF_RESKEY_ERS_InstanceName ${HA_SBIN_DIR}/crm_master -v 0 -l reboot sapinstance_stop return $? } # # sapinstance_monitor_clone # sapinstance_monitor_clone() { # first check with the status function (OS tools) if there could be something like a SAP instance running # as we do not know here, if we are in master or slave state we do not want to start our monitoring # agents (sapstartsrv) on the wrong host sapinstance_init $OCF_RESKEY_InstanceName if sapinstance_status; then if sapinstance_monitor; then ${HA_SBIN_DIR}/crm_master -Q -v 100 -l reboot return $OCF_RUNNING_MASTER fi # by nature of the SAP enqueue server we have to make sure # that we do a failover to the slave (enqueue replication server) # in case the enqueue process has failed. We signal this to the # cluster by setting our master preference to a lower value than the slave. ${HA_SBIN_DIR}/crm_master -v 10 -l reboot return $OCF_FAILED_MASTER fi sapinstance_init $OCF_RESKEY_ERS_InstanceName sapinstance_status && sapinstance_monitor rc=$? if [ $rc -eq $OCF_SUCCESS ]; then ${HA_SBIN_DIR}/crm_master -Q -v 100 -l reboot fi return $rc } # # sapinstance_promote_clone: In a Master/Slave configuration get Master by starting the SCS instance and stopping the ERS instance # The order is important here to behave correct from the application levels view # sapinstance_promote_clone() { sapinstance_init $OCF_RESKEY_InstanceName ocf_log info "Promoting $SID-$InstanceName to running Master." sapinstance_start rc=$? if [ $rc -eq $OCF_SUCCESS ]; then sapinstance_init $OCF_RESKEY_ERS_InstanceName sapinstance_stop rc=$? fi return $rc } # # sapinstance_demote_clone: In a Master/Slave configuration get Slave by stopping the SCS instance and starting the ERS instance # sapinstance_demote_clone() { sapinstance_init $OCF_RESKEY_InstanceName ocf_log info "Demoting $SID-$InstanceName to a slave." sapinstance_stop rc=$? if [ $rc -eq $OCF_SUCCESS ]; then sapinstance_init $OCF_RESKEY_ERS_InstanceName sapinstance_start rc=$? fi return $rc } # # sapinstance_notify: Handle master scoring - to make sure a slave gets the next master # sapinstance_notify() { local n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op="$OCF_RESKEY_CRM_meta_notify_operation" if [ "${n_type}_${n_op}" = "post_promote" ]; then # After promotion of one master in the cluster, we make sure that all clones reset their master # value back to 100. This is because a failed monitor on a master might have degree one clone # instance to score 10. ${HA_SBIN_DIR}/crm_master -v 100 -l reboot elif [ "${n_type}_${n_op}" = "pre_demote" ]; then # if we are a slave and a demote event is anounced, make sure we have the highes wish to became master # that is, when a slave resource was startet after the promote event of a already running master (e.g. node of slave was down) # We also have to make sure to overrule the globaly set resource_stickiness or any fail-count factors => INFINITY local n_uname="$OCF_RESKEY_CRM_meta_notify_demote_uname" if [ ${n_uname} != ${HOSTNAME} ]; then ${HA_SBIN_DIR}/crm_master -v INFINITY -l reboot fi fi } # # 'main' starts here... # ## GLOBALS SID="" sidadm="" InstanceName="" InstanceNr="" SAPVIRHOST="" DIR_EXECUTABLE="" SAPSTARTSRV="" SAPCONTROL="" DIR_PROFILE="" SAPSTARTPROFILE="" CLONE=0 if ( [ $# -ne 1 ] ) then sapinstance_usage exit $OCF_ERR_ARGS fi ACTION=$1 if [ "$ACTION" = "status" ]; then ACTION=monitor fi # These operations don't require OCF instance parameters to be set case "$ACTION" in usage|methods) sapinstance_$ACTION exit $OCF_SUCCESS;; meta-data) sapinstance_meta_data exit $OCF_SUCCESS;; notify) sapinstance_notify exit $OCF_SUCCESS;; *);; esac if ! ocf_is_root then ocf_log err "$0 must be run as root" exit $OCF_ERR_PERM fi # parameter check if [ -z "$OCF_RESKEY_InstanceName" ] then ocf_log err "Please set OCF_RESKEY_InstanceName to the name to the SAP instance profile!" exit $OCF_ERR_ARGS fi is_clone; CLONE=$? if [ ${CLONE} -eq 1 ] then CLACT=_clone else if [ "$ACTION" = "promote" -o "$ACTION" = "demote" ] then ocf_log err "$ACTION called in a non master/slave environment" exit $OCF_ERR_ARGS fi sapinstance_init $OCF_RESKEY_InstanceName fi # What kind of method was invoked? case "$ACTION" in start|stop|monitor|promote|demote) sapinstance_$ACTION$CLACT exit $?;; validate-all) sapinstance_validate exit $?;; *) sapinstance_methods exit $OCF_ERR_UNIMPLEMENTED;; esac ClusterLabs-resource-agents-dc69db5/heartbeat/SendArp000077500000000000000000000145201203363223200227620ustar00rootroot00000000000000#!/bin/sh # # # Copyright (c) 2006, Huang Zhen # Converting original heartbeat RA to OCF RA. # # Copyright (C) 2004 Horms # # Based on IPaddr2: Copyright (C) 2003 Tuomo Soini # # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # # This script send out gratuitous Arp for an IP address # # It can be used _instead_ of the IPaddr2 or IPaddr resource # to send gratuitous arp for an IP address on a given interface, # without adding the address to that interface. I.e. if for # some reason you want to send gratuitous arp for addresses # managed by IPaddr2 or IPaddr on an additional interface. # # OCF parameters are as below: # OCF_RESKEY_ip # OCF_RESKEY_nic # # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs SENDARP=$HA_BIN/send_arp SENDARPPIDDIR=${HA_RSCTMP} BASEIP="$OCF_RESKEY_ip" INTERFACE="$OCF_RESKEY_nic" RESIDUAL="" SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$BASEIP" # Set default values : ${ARP_INTERVAL_MS=200} # milliseconds between ARPs : ${ARP_REPEAT=5} # repeat count : ${ARP_BACKGROUND=yes} # no to run in foreground : ${ARP_NETMASK=ffffffffffff} # netmask for ARP ####################################################################### sendarp_meta_data() { cat < 1.0 This RA can be used _instead_ of the IPaddr2 or IPaddr RA to send gratuitous ARP for an IP address on a given interface, without adding the address to that interface. For example, if for some resaon you wanted to send gratuitous ARP for addresses managed by IPaddr2 or IPaddr on an additional interface. Broadcasts unsolicited ARP announcements The IP address for sending ARP packet. IP address The NIC for sending ARP packet. NIC END } ####################################################################### sendarp_usage() { cat < 1.0 Resource script for ServeRAID. It enables/disables shared ServeRAID merge groups. Enables and disables shared ServeRAID merge groups The adapter number of the ServeRAID adapter. serveraid The logical drive under consideration. mergegroup END } ServeRAID_methods() { cat <<-! start stop status validate-all methods usage meta-data ! } ServeRAIDSCSI="/proc/scsi/ips" IPS=ipssend proc_scsi=/proc/scsi/scsi parseinst() { sr_adapter=error sr_mergegroup=error hostid=error sr_logicaldrivenumber=error if [ $# -ne 2 ] then ocf_log err "Invalid ServeRAID instance: $*" exit $OCF_ERR_ARGS fi PerlScript='next unless /^Host/; $_ .= <>.<>; print "$1 " if /SERVERAID/ and /Proces/ and /scsi(\d+)/' # Get the list of host ids of the ServeRAID host adapters hostlist=`$PERL -ne "${PerlScript}" <$proc_scsi` # Figure the host id of the desired ServeRAID adapter hostid=`echo $hostlist | cut -d' ' -f$1` if [ ! -f "$ServeRAIDSCSI/$hostid" ] then ocf_log err "No such ServeRAID adapter: $1" exit $OCF_ERR_ARGS fi case $2 in [1-8]);; *) ocf_log err "Invalid Shared Merge Group Number: $2" exit $OCF_ERR_ARGS;; esac sr_adapter=$1 sr_mergegroup=$2 CheckRaidLevel return $? } SRLogicalDriveConfig() { $IPS getconfig $sr_adapter ld } MergeGroupToSCSI_ID() { PerlScript="while (<>) { /logical drive number *([0-9]+)/i && (\$ld=\$1); /part of merge group *: *$sr_mergegroup *\$/i && print \$ld - 1, \"\n\"; }" ID=`SRLogicalDriveConfig | $PERL -e "$PerlScript"` case $ID in [0-9]*) echo "$ID"; return 0;; *) return 1;; esac } MergeGroupRaidLevel() { PerlScript="while (<>) { /RAID level *: *([0-9]+[A-Za-z]*)/i && (\$ld=\$1); /part of merge group *: *$sr_mergegroup *\$/i && print \$ld, \"\n\"; }" Level=`SRLogicalDriveConfig | $PERL -e "$PerlScript"` case $Level in ?*) echo "$Level"; return 0;; *) return 1;; esac } CheckRaidLevel() { RAIDlevel=`MergeGroupRaidLevel` case $RAIDlevel in *5*) ocf_log err "ServeRAID device $sr_adapter $sr_mergegroup is RAID level $RAIDlevel" ocf_log err "This level of ServeRAID RAID is not supported for failover by the firmware." exit $OCF_ERR_GENERIC;; esac return $OCF_SUCCESS } ReleaseSCSI() { targetid=`MergeGroupToSCSI_ID` echo "${SCSI}remove-single-device $hostid 0 $targetid 0" > $proc_scsi } AddSCSI() { targetid=`MergeGroupToSCSI_ID` echo "${SCSI}add-single-device $hostid 0 $targetid 0" > $proc_scsi } # # start: Enable the given ServeRAID device # ServeRAID_start() { if ServeRAID_status $serveraid $mergegroup then ocf_log debug "ServeRAID merge group $serveraid $mergegroup is running." return $OCF_SUCCESS else if # # Normally we do a MERGE PARTNER, but if we still own the drive for # some reason, then we'll need to do a MERGE OWN instead... # out=`$IPS MERGE $sr_adapter $sr_mergegroup PARTNER 2>&1` if [ $? -eq $srsuccess ] then ocf_log info "$out" else ocf_run $IPS MERGE $sr_adapter $sr_mergegroup OWN fi then : OK All is well! targetid=`MergeGroupToSCSI_ID` sr_logicaldrivenumber=`expr $targetid + 1` #run $IPS SYNCH $sr_adapter $sr_logicaldrivenumber & # This version of the SYNCH command requires the 6.10 or later # ServeRAID support CD. # To avoid issues when called by lrmd, redirect stdout->stderr. # Use () to create a subshell to make the redirection be synchronized. ( ocf_run $IPS SYNCH $sr_adapter $sr_mergegroup & ) >&2 AddSCSI else return $OCF_ERR_GENERIC fi fi if ServeRAID_status "$@" then return $OCF_SUCCESS else ocf_log err "ServeRAID device $1 not active!" exit $OCF_ERR_GENERIC fi } # # stop: Disable the given ServeRAID device # ServeRAID_stop() { parseinst "$@" ReleaseSCSI if ocf_run $IPS UNMERGE $sr_adapter $sr_mergegroup then : UNMERGE $sr_adapter $sr_mergegroup worked fi if ServeRAID_status "$@" then ocf_log err "ServeRAID device $@ is still active!" return $OCF_ERR_GENERIC else return $OCF_SUCCESS fi } # # status: is the given device now available? # ServeRAID_status() { parseinst "$@" # # The output we're looking for # Part of merge group : 2 # SRLogicalDriveConfig \ | grep -i "part of merge group[ ]*: *$sr_mergegroup *\$" >/dev/null } # # validate_all: are the OCF instance parameters valid? # ServeRAID_validate_all() { check_binary $PERL # parseinst() will do all the work... parseinst "$@" return $? } if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # These operations don't require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; # # methods: What methods do we support? # methods) ServeRAID_methods exit $?;; usage) usage exit $OCF_SUCCESS;; *) ;; esac if ( [ -z "$OCF_RESKEY_serveraid" ] || [ -z "$OCF_RESKEY_mergegroup" ] ) then ocf_log err "You have to set the OCF_RESKEY_serveraid and OCF_RESKEY_mergegroup\n enviroment virables before running $0 !" # usage exit $OCF_ERR_GENERIC fi : Right Number of arguments.. serveraid=$OCF_RESKEY_serveraid mergegroup=$OCF_RESKEY_mergegroup # Look for the start, stop, status, or methods calls... case "$1" in stop) ServeRAID_stop $serveraid $mergegroup exit $?;; start) ServeRAID_start $serveraid $mergegroup exit $?;; status|monitor) if ServeRAID_status $serveraid $mergegroup then ocf_log debug "ServeRAID merge group $serveraid $mergegroup is running." exit $OCF_SUCCESS else ocf_log debug "ServeRAID merge group $serveraid $mergegroup is stopped." exit $OCF_NOT_RUNNING fi exit $?;; validate-all) ServeRAID_validate_all $serveraid $mergegroup exit $?;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac ClusterLabs-resource-agents-dc69db5/heartbeat/SphinxSearchDaemon000077500000000000000000000135001203363223200251460ustar00rootroot00000000000000#!/bin/sh # # # Searchd OCF RA. # Manages the Sphinx search daemon # # Copyright (c) 2007 Christian Rishoj (christian@rishoj.net) # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### meta_data() { cat < 1.0 This is a searchd Resource Agent. It manages the Sphinx Search Daemon. Manages the Sphinx search daemon. searchd configuration file Configuration file searchd binary searchd binary Search binary for functional testing in the monitor action. search binary Test query for functional testing in the monitor action. The query does not need to match any documents in the index. The purpose is merely to test whether the search daemon is is able to query its indices and respond properly. test query END } ####################################################################### searchd_usage() { cat < /dev/null && [ `ps -p "$1" | grep searchd | wc -l` -eq 1 ] } searchd_status() { pidfile=`grep -v "^#" "$OCF_RESKEY_config" | grep -w pid_file | awk -F "[ \t]*=[ \t]*" '{ print $2 }'` if [ -f "$pidfile" ] ; then PID=`head -n 1 $pidfile` if [ ! -z "$PID" ] ; then isRunning "$PID" if [ $? = 0 ] ; then return 0 fi fi fi false } searchd_check() { $OCF_RESKEY_search --config $OCF_RESKEY_config --noinfo "$OCF_RESKEY_testQuery" > /dev/null } searchd_monitor() { if ! searchd_validate ; then return $OCF_NOT_RUNNING fi if searchd_status ; then if searchd_check ; then return $OCF_SUCCESS else return $OCF_ERR_GENERIC fi else return $OCF_NOT_RUNNING fi } searchd_validate() { if [ ! -x "$OCF_RESKEY_search" ]; then ocf_log err "search binary '$OCF_RESKEY_search' does not exist or cannot be executed" return $OCF_ERR_ARGS fi if [ ! -x "$OCF_RESKEY_searchd" ]; then ocf_log err "searchd binary '$OCF_RESKEY_searchd' does not exist or cannot be executed" return $OCF_ERR_ARGS fi if [ ! -f "$OCF_RESKEY_config" ]; then ocf_log err "config file '$OCF_RESKEY_config' does not exist" return $OCF_ERR_ARGS fi return $OCF_SUCCESS } : ${OCF_RESKEY_config=/etc/sphinx/sphinx.conf} : ${OCF_RESKEY_search=/usr/local/bin/search} : ${OCF_RESKEY_searchd=/usr/local/bin/searchd} : ${OCF_RESKEY_testQuery=Heartbeat_Monitor_Query_Match_string} case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) searchd_start;; stop) searchd_stop;; monitor) searchd_monitor;; validate-all) searchd_validate;; usage|help) searchd_usage exit $OCF_SUCCESS ;; *) searchd_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc ClusterLabs-resource-agents-dc69db5/heartbeat/Squid000077500000000000000000000247641203363223200225260ustar00rootroot00000000000000#!/bin/bash # # Description: Manages a Squid Server provided by NTT OSSC as an # OCF High-Availability resource under Heartbeat/LinuxHA control # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # # Copyright (c) 2008 NIPPON TELEGRAPH AND TELEPHONE CORPORATION # ####################################################################### # OCF parameters: # OCF_RESKEY_squid_exe : Executable file # OCF_RESKEY_squid_conf : Configuration file # OCF_RESKEY_squid_pidfile: Process id file # OCF_RESKEY_squid_port : Port number # OCF_RESKEY_debug_mode : Debug mode # OCF_RESKEY_debug_log : Debug log file # OCF_RESKEY_squid_stop_timeout: # Number of seconds to await to confirm a # normal stop method # # OCF_RESKEY_squid_exe, OCF_RESKEY_squid_conf, OCF_RESKEY_squid_pidfile # and OCF_RESKEY_squid_port must be specified. Each of the rests # has its default value or refers OCF_RESKEY_squid_conf to make # its value when no explicit value is given. ############################################################################### : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs usage() { cat <<-! usage: $0 action action: start : start a new squid instance stop : stop the running squid instance status : return the status of squid, run or down monitor : return TRUE if the squid appears to be working. meta-data : show meta data message validate-all: validate the instance parameters ! return $OCF_ERR_ARGS } metadata_squid() { cat < 1.0 The resource agent of Squid. This manages a Squid instance as an HA resource. Manages a Squid proxy server instance This is a required parameter. This parameter specifies squid's executable file. Executable file This is a required parameter. This parameter specifies a configuration file for a squid instance managed by this RA. Configuration file This is a required parameter. This parameter specifies a process id file for a squid instance managed by this RA. Pidfile This is a required parameter. This parameter specifies a port number for a squid instance managed by this RA. If plural ports are used, you must specifiy the only one of them. Port number On stop, a squid shutdown is invoked first. If the resource doesn't stop within this timeout, we resort to stopping processes by sending signals and finally KILLing them. how long to wait for squid shutdown to stop the instance before resorting to kill This is an optional parameter. This RA runs in debug mode when this parameter includes 'x' or 'v'. If 'x' is included, both of STDOUT and STDERR redirect to the logfile specified by "debug_log", and then the builtin shell option 'x' is turned on. It is similar about 'v'. Debug mode This is an optional and omittable parameter. This parameter specifies a destination file for debug logs and works only if this RA run in debug mode. Refer to "debug_mode" about debug mode. If no value is given but it's requied, it's made by the following rules: "/var/log/" as a directory part, the basename of the configuration file given by "syslog_ng_conf" as a basename part, ".log" as a suffix. A destination of the debug log END return $OCF_SUCCESS } get_pids() { SQUID_PIDS=( ) # Seek by pattern SQUID_PIDS[0]=$(pgrep -f "$PROCESS_PATTERN") # Seek by pidfile SQUID_PIDS[1]=$(awk '1{print $1}' $SQUID_PIDFILE 2>/dev/null) if [[ -n "${SQUID_PIDS[1]}" ]]; then typeset exe exe=$(ls -l "/proc/${SQUID_PIDS[1]}/exe") if [[ $? = 0 ]]; then exe=${exe##*-> } if ! [[ "$exe" = $SQUID_EXE ]]; then SQUID_PIDS[1]="" fi else SQUID_PIDS[1]="" fi fi # Seek by port SQUID_PIDS[2]=$( netstat -apn | awk '/tcp.*[0-9]+\.[0-9]+\.+[0-9]+\.[0-9]+:'$SQUID_PORT' / && $7~/^[1-9]/ { sub("\\/.*", "", $7); print $7; exit}') } are_all_pids_found() { if [[ -n "${SQUID_PIDS[0]}" ]] && [[ -n "${SQUID_PIDS[1]}" ]] && [[ -n "${SQUID_PIDS[2]}" ]] then return 0 else return 1 fi } are_pids_sane() { if [[ "${SQUID_PIDS[1]}" = "${SQUID_PIDS[2]}" ]]; then return $OCF_SUCCESS else ocf_log err "$SQUID_NAME:Pid unmatch" return $OCF_ERR_GENERIC fi } is_squid_dead() { if [[ -z "${SQUID_PIDS[0]}" ]] && [[ -z "${SQUID_PIDS[2]}" ]] then return 0 else return 1 fi } monitor_squid() { typeset trialcount=0 while true; do get_pids if are_all_pids_found; then are_pids_sane return $OCF_SUCCESS fi if is_squid_dead; then return $OCF_NOT_RUNNING fi ocf_log info "$SQUID_NAME:Inconsistent processes:" \ "${SQUID_PIDS[0]},${SQUID_PIDS[1]},${SQUID_PIDS[2]}" (( trialcount = trialcount + 1 )) if (( trialcount > SQUID_CONFIRM_TRIALCOUNT )); then ocf_log err "$SQUID_NAME:Inconsistency of processes remains unsolved" return $OCF_ERR_GENERIC fi sleep 1 done } start_squid() { typeset status monitor_squid status=$? if [[ $status != $OCF_NOT_RUNNING ]]; then return $status fi set -- "$SQUID_OPTS" ocf_run $SQUID_EXE -f "$SQUID_CONF" "$@" status=$? if [[ $status != $OCF_SUCCESS ]]; then return $OCF_ERR_GENERIC fi while true; do get_pids if are_all_pids_found && are_pids_sane; then return $OCF_SUCCESS fi ocf_log info "$SQUID_NAME:Waiting for squid to be invoked" sleep 1 done return $OCF_ERR_GENERIC } stop_squid() { typeset lapse_sec if ocf_run $SQUID_EXE -f $SQUID_CONF -k shutdown; then lapse_sec=0 while true; do get_pids if is_squid_dead; then rm -f $SQUID_PIDFILE return $OCF_SUCCESS fi (( lapse_sec = lapse_sec + 1 )) if (( lapse_sec > SQUID_STOP_TIMEOUT )); then break fi sleep 1 ocf_log info "$SQUID_NAME:$FUNCNAME:$LINENO: " \ "stop NORM $lapse_sec/$SQUID_STOP_TIMEOUT" done fi while true; do get_pids ocf_log info "$SQUID_NAME:$FUNCNAME:$LINENO: " \ "try to stop by SIGKILL:${SQUID_PIDS[0]} ${SQUID_PIDS[2]}" kill -KILL ${SQUID_PIDS[0]} ${SQUID_PIDS[2]} sleep 1 if is_squid_dead; then rm -f $SQUID_PIDFILE return $OCF_SUCCESS fi done return $OCF_ERR_GENERIC } status_squid() { return $OCF_SUCCESS } validate_all_squid() { ocf_log info "validate_all_squid[$SQUID_NAME]" return $OCF_SUCCESS } : === Debug ${0##*/} $1 === if [[ "$1" = "meta-data" ]]; then metadata_squid exit $? fi SQUID_CONF="${OCF_RESKEY_squid_conf}" if [[ -z "$SQUID_CONF" ]]; then ocf_log err "SQUID_CONF is not defined" exit $OCF_ERR_CONFIGURED fi SQUID_NAME="${SQUID_CONF##*/}" SQUID_NAME="${SQUID_NAME%.*}" DEBUG_LOG="${OCF_RESKEY_debug_log-/var/log/squid_${SQUID_NAME}_debug}.log" DEBUG_MODE="" case $OCF_RESKEY_debug_mode in *x*) DEBUG_MODE="${DEBUG_MODE}x";; esac case $OCF_RESKEY_debug_mode in *v*) DEBUG_MODE="${DEBUG_MODE}v";; esac if [ -n "$DEBUG_MODE" ]; then PS4='\d \t \h '"${1-unknown} " export PS4 exec 1>>$DEBUG_LOG 2>&1 set -$DEBUG_MODE fi SQUID_EXE="${OCF_RESKEY_squid_exe}" if [[ -z "$SQUID_EXE" ]]; then ocf_log err "SQUID_EXE is not defined" exit $OCF_ERR_CONFIGURED fi if [[ ! -x "$SQUID_EXE" ]]; then ocf_log err "$SQUID_EXE is not found" exit $OCF_ERR_CONFIGURED fi SQUID_PIDFILE="${OCF_RESKEY_squid_pidfile}" if [[ -z "$SQUID_PIDFILE" ]]; then ocf_log err "SQUID_PIDFILE is not defined" exit $OCF_ERR_CONFIGURED fi SQUID_PORT="${OCF_RESKEY_squid_port}" if [[ -z "$SQUID_PORT" ]]; then ocf_log err "SQUID_PORT is not defined" exit $OCF_ERR_CONFIGURED fi SQUID_OPTS="${OCF_RESKEY_squid_opts}" SQUID_PIDS=( ) SQUID_CONFIRM_TRIALCOUNT="${OCF_RESKEY_squid_confirm_trialcount-3}" SQUID_STOP_TIMEOUT="${OCF_RESKEY_squid_stop_timeout-10}" SQUID_SUSPEND_TRIALCOUNT="${OCF_RESKEY_squid_suspend_trialcount-10}" PROCESS_PATTERN="$SQUID_EXE -f $SQUID_CONF" COMMAND=$1 case "$COMMAND" in start) ocf_log debug "[$SQUID_NAME] Enter squid start" start_squid func_status=$? ocf_log debug "[$SQUID_NAME] Leave squid start $func_status" exit $func_status ;; stop) ocf_log debug "[$SQUID_NAME] Enter squid stop" stop_squid func_status=$? ocf_log debug "[$SQUID_NAME] Leave squid stop $func_status" exit $func_status ;; status) status_squid exit $? ;; monitor) #ocf_log debug "[$SQUID_NAME] Enter squid monitor" monitor_squid func_status=$? #ocf_log debug "[$SQUID_NAME] Leave squid monitor $func_status" exit $func_status ;; validate-all) validate_all_squid exit $? ;; *) usage ;; esac # vim: set sw=4 ts=4 : ClusterLabs-resource-agents-dc69db5/heartbeat/Stateful000077500000000000000000000106511203363223200232160ustar00rootroot00000000000000#!/bin/sh # # # Example of a stateful OCF Resource Agent. # # Copyright (c) 2006 Andrew Beekhof # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot" ####################################################################### meta_data() { cat < 1.0 This is an example resource agent that impliments two states Example stateful resource agent Location to store the resource state in State file END exit $OCF_SUCCESS } ####################################################################### stateful_usage() { cat < ${OCF_RESKEY_state} } stateful_check_state() { target=$1 if [ -f ${OCF_RESKEY_state} ]; then state=`cat ${OCF_RESKEY_state}` if [ "x$target" = "x$state" ]; then return 0 fi else if [ "x$target" = "x" ]; then return 0 fi fi return 1 } stateful_start() { stateful_check_state master if [ $? = 0 ]; then # CRM Error - Should never happen return $OCF_RUNNING_MASTER fi stateful_update slave $CRM_MASTER -v 5 return 0 } stateful_demote() { stateful_check_state if [ $? = 0 ]; then # CRM Error - Should never happen return $OCF_NOT_RUNNING fi stateful_update slave $CRM_MASTER -v 5 return 0 } stateful_promote() { stateful_check_state if [ $? = 0 ]; then return $OCF_NOT_RUNNING fi stateful_update master $CRM_MASTER -v 10 return 0 } stateful_stop() { $CRM_MASTER -D stateful_check_state master if [ $? = 0 ]; then # CRM Error - Should never happen return $OCF_RUNNING_MASTER fi if [ -f ${OCF_RESKEY_state} ]; then rm ${OCF_RESKEY_state} fi return 0 } stateful_monitor() { stateful_check_state "master" if [ $? = 0 ]; then return $OCF_RUNNING_MASTER fi stateful_check_state "slave" if [ $? = 0 ]; then return $OCF_SUCCESS fi if [ -f ${OCF_RESKEY_state} ]; then echo "File '${OCF_RESKEY_state}' exists but contains unexpected contents" cat ${OCF_RESKEY_state} return $OCF_ERR_GENERIC fi return 7 } stateful_validate() { exit $OCF_SUCCESS } : ${OCF_RESKEY_state=${HA_RSCTMP}/Stateful-${OCF_RESOURCE_INSTANCE}.state} case $__OCF_ACTION in meta-data) meta_data;; start) stateful_start;; promote) stateful_promote;; demote) stateful_demote;; stop) stateful_stop;; monitor) stateful_monitor;; validate-all) stateful_validate;; usage|help) stateful_usage $OCF_SUCCESS;; *) stateful_usage $OCF_ERR_UNIMPLEMENTED;; esac exit $? ClusterLabs-resource-agents-dc69db5/heartbeat/SysInfo000077500000000000000000000223241203363223200230210ustar00rootroot00000000000000#!/bin/bash # # # SysInfo OCF Resource Agent # It records (in the CIB) various attributes of a node # # Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Brée # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### meta_data() { cat < 1.0 This is a SysInfo Resource Agent. It records (in the CIB) various attributes of a node Sample Linux output: arch: i686 os: Linux-2.4.26-gentoo-r14 free_swap: 1999 cpu_info: Intel(R) Celeron(R) CPU 2.40GHz cpu_speed: 4771.02 cpu_cores: 1 cpu_load: 0.00 ram_total: 513 ram_free: 117 root_free: 2.4 Sample Darwin output: arch: i386 os: Darwin-8.6.2 cpu_info: Intel Core Duo cpu_speed: 2.16 cpu_cores: 2 cpu_load: 0.18 ram_total: 2016 ram_free: 787 root_free: 13 Units: free_swap: Mb ram_*: Mb root_free: Gb cpu_speed (Linux): bogomips cpu_speed (Darwin): Ghz Records various node attributes in the CIB PID file PID file Interval to allow values to stabilize Dampening Delay END } ####################################################################### UpdateStat() { name=$1; shift value="$*" echo -e "$name:\t$value" ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -S status -n $name -v "$value" } SysInfoStats() { UpdateStat arch "`uname -m`" UpdateStat os "`uname -s`-`uname -r`" case `uname -s` in "Darwin") mem=`top -l 1 | grep Mem: | awk '{print $10}'` mem_used=`top -l 1 | grep Mem: | awk '{print $8}'` mem=`SysInfo_mem_units $mem` mem_used=`SysInfo_mem_units $mem_used` mem_total=`expr $mem_used + $mem` cpu_type=`system_profiler SPHardwareDataType | grep "CPU Type:"` cpu_type=${cpu_type/*: /} cpu_speed=`system_profiler SPHardwareDataType | grep "CPU Speed:" | awk '{print $3}'` cpu_cores=`system_profiler SPHardwareDataType | grep "Number Of"` cpu_cores=${cpu_cores/*: /} ;; "Linux") if [ -f /proc/cpuinfo ]; then cpu_type=`grep "model name" /proc/cpuinfo | head -n 1` cpu_type=${cpu_type/*: /} cpu_speed=`grep "bogomips" /proc/cpuinfo | head -n 1` cpu_speed=${cpu_speed/*: /} cpu_cores=`grep "^processor" /proc/cpuinfo | wc -l` fi if [ -f /proc/meminfo ]; then # meminfo results are in kB mem=`grep "SwapFree" /proc/meminfo | awk '{print $2"k"}'` if [ ! -z $mem ]; then UpdateStat free_swap `SysInfo_mem_units $mem` fi mem=`grep "Inactive" /proc/meminfo | awk '{print $2"k"}'` mem_total=`grep "MemTotal" /proc/meminfo | awk '{print $2"k"}'` else mem=`top -n 1 | grep Mem: | awk '{print $7}'` fi ;; *) esac if [ x != x"$cpu_type" ]; then UpdateStat cpu_info "$cpu_type" fi if [ x != x"$cpu_speed" ]; then UpdateStat cpu_speed "$cpu_speed" fi if [ x != x"$cpu_cores" ]; then UpdateStat cpu_cores "$cpu_cores" fi loads=`uptime` load15=`echo ${loads} | awk '{print $10}'` UpdateStat cpu_load $load15 if [ ! -z "$mem" ]; then # Massage the memory values UpdateStat ram_total `SysInfo_mem_units $mem_total` UpdateStat ram_free `SysInfo_mem_units $mem` fi # Portability notes: # o df: -h flag not available on Solaris 8. (OK on 9, 10, ...) #FIXME# # o tail: explicit "-n" not available in Solaris; instead simplify # 'tail -n ' to the equivalent 'tail -'. disk=`df -h / | tail -1 | awk '{print $4}'` if [ x != x"$disk" ]; then UpdateStat root_free `SysInfo_hdd_units $disk` fi } SysInfo_mem_units() { mem=$1 if [ -z $1 ]; then return fi memlen=`expr ${#mem} - 1` memlen_alt=`expr ${#mem} - 2` if [ ${mem:$memlen:1} = "G" ]; then mem="${mem:0:$memlen}" if [ $mem != ${mem/./} ]; then mem_before=${mem/.*/} mem_after=${mem/*./} mem=$[mem_before*1024] if [ ${#mem_after} = 0 ]; then : elif [ ${#mem_after} = 1 ]; then mem=$[mem+100*$mem_after] elif [ ${#mem_after} = 2 ]; then mem=$[mem+10*$mem_after] elif [ ${#mem_after} = 3 ]; then mem=$[mem+$mem_after] else mem_after=${mem_after:0:3} mem=$[mem+$mem_after] fi fi elif [ ${mem:$memlen:1} = "M" ]; then mem=${mem/.*/} mem="${mem:0:$memlen}" elif [ ${mem:$memlen:1} = "k" ]; then mem="${mem:0:$memlen}" mem=${mem/.*/} mem=`expr $mem / 1024` elif [ ${mem:$memlen_alt:2} = "kB" ]; then mem="${mem:0:$memlen_alt}" mem=${mem/.*/} mem=`expr $mem / 1024` elif [ ${mem:$memlen_alt:2} = "Mb" ]; then mem="${mem:0:$memlen_alt}" mem=${mem/.*/} elif [ ${mem:$memlen_alt:2} = "MB" ]; then mem="${mem:0:$memlen_alt}" mem=${mem/.*/} fi # Round to the next multiple of 50 memlen=`expr ${#mem} - 2` mem_round="${mem:$memlen:2}" if [ x$mem_round = x ]; then : elif [ $mem_round = "00" ]; then : else mem_round=`echo $mem_round | sed 's/^0//'` if [ $mem_round -lt "50" ]; then mem=$[mem+50] mem=$[mem-$mem_round] else mem=$[mem+100] mem=$[mem-$mem_round] fi fi echo $mem } SysInfo_hdd_units() { disk=$1 disklen=`expr ${#disk} - 1` disklen_alt=`expr ${#disk} - 2` if [ ${disk:$disklen:1} = "G" ]; then disk="${disk:0:$disklen}" elif [ ${disk:$disklen:1} = "M" ]; then disk="${disk:0:$disklen}" disk=${disk/.*/} disk=`expr $disk / 1024` elif [ ${disk:$disklen:1} = "k" ]; then disk="${disk:0:$disklen}" disk=${disk/.*/} disk=`expr $disk / 1048576` elif [ ${disk:$disklen_alt:2} = "kB" ]; then disk="${disk:0:$disklen_alt}" disk=${disk/.*/} disk=`expr $disk / 1048576` elif [ ${disk:$disklen_alt:2} = "Mb" ]; then disk="${disk:0:$disklen_alt}" disk=${disk/.*/} disk=`expr $disk / 1024` elif [ ${disk:$disklen_alt:2} = "MB" ]; then disk="${disk:0:$disklen_alt}" disk=${disk/.*/} disk=`expr $disk / 1024` fi echo $disk } SysInfo_usage() { cat < $OCF_RESKEY_pidfile SysInfoStats exit $OCF_SUCCESS } SysInfo_stop() { rm $OCF_RESKEY_pidfile exit $OCF_SUCCESS } SysInfo_monitor() { if [ -f $OCF_RESKEY_pidfile ]; then clone=`cat $OCF_RESKEY_pidfile` fi if [ x$clone = x ]; then rm $OCF_RESKEY_pidfile exit $OCF_NOT_RUNNING elif [ $clone = $OCF_RESKEY_clone ]; then SysInfoStats exit $OCF_SUCCESS elif [ x$OCF_RESKEY_CRM_meta_globally_unique = xtrue -o x$OCF_RESKEY_CRM_meta_globally_unique = xTrue -o x$OCF_RESKEY_CRM_meta_globally_unique = xyes -o x$OCF_RESKEY_CRM_meta_globally_unique = xYes ]; then SysInfoStats exit $OCF_SUCCESS fi exit $OCF_NOT_RUNNING } SysInfo_validate() { return $OCF_SUCCESS } if [ $# -ne 1 ]; then SysInfo_usage exit $OCF_ERR_ARGS fi : ${OCF_RESKEY_pidfile:="$HA_RSCTMP/SysInfo-${OCF_RESOURCE_INSTANCE}"} : ${OCF_RESKEY_clone:="0"} if [ x != x${OCF_RESKEY_delay} ]; then OCF_RESKEY_delay="-d ${OCF_RESKEY_delay}" fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) SysInfo_start ;; stop) SysInfo_stop ;; monitor) SysInfo_monitor ;; validate-all) SysInfo_validate ;; usage|help) SysInfo_usage exit $OCF_SUCCESS ;; *) SysInfo_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? ClusterLabs-resource-agents-dc69db5/heartbeat/VIPArip000077500000000000000000000147331203363223200227060ustar00rootroot00000000000000#!/bin/sh # # License: GNU General Public License (GPL) # Support: linux-ha@lists.linux-ha.org # Author: Huang Zhen # Copyright (c) 2006 International Business Machines # # Virtual IP Address by RIP2 protocol. # This script manages IP alias in different subnet with quagga/ripd. # It can add an IP alias, or remove one. # # The quagga package should be installed to run this RA # # usage: $0 {start|stop|status|monitor|validate-all|meta-data} # # The "start" arg adds an IP alias. # Surprisingly, the "stop" arg removes one. :-) # # OCF parameters are as below # OCF_RESKEY_ip The IP address in different subnet # OCF_RESKEY_nic The nic for broadcast the route information # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs RIPDCONF=$HA_RSCTMP/VIPArip-ripd.conf ZEBRA=/usr/sbin/zebra RIPD=/usr/sbin/ripd USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; ####################################################################### meta_data() { cat < 1.0 Virtual IP Address by RIP2 protocol. This script manages IP alias in different subnet with quagga/ripd. It can add an IP alias, or remove one. Manages a virtual IP address through RIP2 The IPv4 address in different subnet, for example "192.168.1.1". The IP address in different subnet The nic for broadcast the route information. The ripd uses this nic to broadcast the route informaton to others The nic for broadcast the route information Absolute path to the zebra binary. zebra binary Absolute path to the ripd binary. ripd binary END exit $OCF_SUCCESS } usage() { echo $USAGE >&2 } new_config_file() { echo new_config_file $1 $2 $3 cat >$RIPDCONF < $RIPDCONF.tmp cp $RIPDCONF.tmp $RIPDCONF } add_ip() { echo add_ip $1 sed "s/ip_tag/ip_tag\naccess-list private permit $1\/32/g" $RIPDCONF > $RIPDCONF.tmp cp $RIPDCONF.tmp $RIPDCONF } del_ip() { echo del_ip $1 sed "/$1/d" $RIPDCONF > $RIPDCONF.tmp cp $RIPDCONF.tmp $RIPDCONF if $GREP "access-list private permit" $RIPDCONF>/dev/null then echo some other IP is running reload_config else stop_quagga echo remove $RIPDCONF rm $RIPDCONF fi } add_nic() { echo add_nic $1 if $GREP "network $1" $RIPDCONF >/dev/null then echo the nic is already in the config file else sed "s/nic_tag/nic_tag\n no passive-interface $1\n network $1\n distribute-list private out $1\n distribute-list private in $1/g" $RIPDCONF > $RIPDCONF.tmp cp $RIPDCONF.tmp $RIPDCONF fi } reload_config() { echo reload_config echo $RIPDCONF: cat $RIPDCONF echo killall -SIGHUP ripd killall -SIGHUP ripd } start_quagga() { echo start_quagga echo $RIPDCONF: cat $RIPDCONF echo $ZEBRA -d $ZEBRA -d echo $RIPD -d -f $RIPDCONF $RIPD -d -f $RIPDCONF } stop_quagga() { echo stop_quagga echo $RIPDCONF: cat $RIPDCONF echo killall -SIGTERM ripd killall -SIGTERM ripd echo killall -SIGTERM zebra killall -SIGTERM zebra } start_rip_ip() { echo start_rip_ip check_params if [ x"$OCF_RESKEY_nic" = x ] then echo OCF_RESKEY_nic is null, set to eth0 OCF_RESKEY_nic="eth0" fi status_rip_ip case $? in $OCF_SUCCESS) ocf_log info "already running" exit $OCF_SUCCESS ;; $OCF_NOT_RUNNING) ;; *) ocf_log info "state undefined, stopping first" stop_rip_ip ;; esac $IP2UTIL addr add $OCF_RESKEY_ip/32 dev lo if [ -f "$RIPDCONF" ] then # there is a config file, add new data(IP,nic,metric) # to the existing config file. add_ip $OCF_RESKEY_ip add_nic $OCF_RESKEY_nic set_metric 1 reload_config echo sleep 3 sleep 3 set_metric 3 reload_config else new_config_file $OCF_RESKEY_ip $OCF_RESKEY_nic 1 start_quagga echo sleep 3 sleep 3 set_metric 3 reload_config fi return $OCF_SUCCESS } stop_rip_ip() { echo stop_rip_ip check_params status_rip_ip if [ $? = $OCF_NOT_RUNNING ] then exit $OCF_SUCCESS fi $IP2UTIL addr del $OCF_RESKEY_ip dev lo echo sleep 2 sleep 2 del_ip $OCF_RESKEY_ip return $OCF_SUCCESS } status_rip_ip() { check_params if $IP2UTIL addr | $GREP $OCF_RESKEY_ip >/dev/null then if $GREP $OCF_RESKEY_ip $RIPDCONF >/dev/null then if pidof ripd >/dev/null then return $OCF_SUCCESS fi fi return $OCF_ERR_GENERIC fi return $OCF_NOT_RUNNING } if [ $# -ne 1 ] then usage exit $OCF_ERR_ARGS fi [ x != x"$OCF_RESKEY_zebra_binary" ] && ZEBRA=$OCF_RESKEY_zebra_binary [ x != x"$OCF_RESKEY_ripd_binary" ] && RIPD=$OCF_RESKEY_ripd_binary case $1 in start) start_rip_ip;; stop) stop_rip_ip;; status) status_rip_ip;; monitor) status_rip_ip;; validate-all) check_binary $IP2UTIL exit $OCF_SUCCESS;; meta-data) meta_data;; usage) usage; exit $OCF_SUCCESS;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac ClusterLabs-resource-agents-dc69db5/heartbeat/VirtualDomain000077500000000000000000000447461203363223200242210ustar00rootroot00000000000000#!/bin/sh # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # Resource Agent for domains managed by the libvirt API. # Requires a running libvirt daemon (libvirtd). # # (c) 2008-2010 Florian Haas, Dejan Muhamedagic, # and Linux-HA contributors # # usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all} # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_force_stop_default=0 OCF_RESKEY_hypervisor_default="$(virsh --quiet uri)" OCF_RESKEY_autoset_utilization_cpu_default="true" OCF_RESKEY_autoset_utilization_hv_memory_default="true" : ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}} : ${OCF_RESKEY_hypervisor=${OCF_RESKEY_hypervisor_default}} : ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}} : ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}} ####################################################################### ## I'd very much suggest to make this RA use bash, ## and then use magic $SECONDS. ## But for now: NOW=$(date +%s) usage() { echo "usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}" } meta_data() { cat < 1.1 Resource agent for a virtual domain (a.k.a. domU, virtual machine, virtual environment etc., depending on context) managed by libvirtd. Manages virtual domains through the libvirt virtualization framework Absolute path to the libvirt configuration file, for this virtual domain. Virtual domain configuration file Hypervisor URI to connect to. See the libvirt documentation for details on supported URI formats. The default is system dependent. Hypervisor URI Always forcefully shut down ("destroy") the domain on stop. The default behavior is to resort to a forceful shutdown only after a graceful shutdown attempt has failed. You should only set this to true if your virtual domain (or your virtualization backend) does not support graceful shutdown. Always force shutdown on stop Transport used to connect to the remote hypervisor while migrating. Please refer to the libvirt documentation for details on transports available. If this parameter is omitted, the resource will use libvirt's default transport to connect to the remote hypervisor. Remote hypervisor transport Use a dedicated migration network. The migration URI is composed by adding this parameters value to the end of the node name. If the node name happens to be an FQDN (as opposed to an unqualified host name), insert the suffix immediately prior to the first period (.) in the FQDN. At the moment Qemu/KVM and Xen migration via a dedicated network is supported. Note: Be sure this composed host name is locally resolveable and the associated IP is reachable through the favored network. Migration network host name suffix To additionally monitor services within the virtual domain, add this parameter with a list of scripts to monitor. Note: when monitor scripts are used, the start and migrate_from operations will complete only when all monitor scripts have completed successfully. Be sure to set the timeout of these operations to accommodate this delay. space-separated list of monitor scripts If set true, the agent will detect the number of domainU's vCPUs from virsh, and put it into the cpu utilization of the resource when the monitor is executed. Enable auto setting the cpu utilization of the resource If set true, the agent will detect the number of *Max memory* from virsh, and put it into the hv_memory utilization of the resource when the monitor is executed. Enable auto setting the hv_memory utilization of the resource EOF } set_util_attr() { local attr=$1 val=$2 local cval outp cval=$(crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>/dev/null) if [ "$cval" != "$val" ]; then outp=`crm_resource -r $OCF_RESOURCE_INSTANCE -z -p $attr -v $val 2>&1` || ocf_log warn "crm_resource failed to set utilization attribute $attr: $outp" fi } update_utilization() { local dom_cpu dom_mem if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu"; then dom_cpu=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} | awk '/CPU\(s\)/{print $2}') test -n "$dom_cpu" && set_util_attr cpu $dom_cpu fi if ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory"; then dom_mem=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} | awk '/Max memory/{printf("%d", $3/1024)}') test -n "$dom_mem" && set_util_attr hv_memory "$dom_mem" fi } # Set options to be passed to virsh: VIRSH_OPTIONS="--connect=${OCF_RESKEY_hypervisor} --quiet" # A state file where we record the domain name: STATEFILE="${HA_RSCTMP}/VirtualDomain-${OCF_RESOURCE_INSTANCE}.state" VirtualDomain_Define() { local virsh_output local domain_name # Note: passing in the domain name from outside the script is # intended for testing and debugging purposes only. Don't do this # in production, instead let the script figure out the domain name # from the config file. You have been warned. if [ -z "$DOMAIN_NAME" ]; then # Spin until we have a domain name while true; do virsh_output=`virsh ${VIRSH_OPTIONS} define ${OCF_RESKEY_config}` domain_name=`echo "$virsh_output" | sed -e 's/Domain \(.*\) defined from .*$/\1/'` if [ -n "$domain_name" ]; then break; fi ocf_log debug "Domain not defined yet, probably unable to connect to hypervisor. Retrying." sleep 1 done echo "$domain_name" > $STATEFILE ocf_log info "Domain name \"$domain_name\" saved to $STATEFILE." else ocf_log warn "Domain name ${DOMAIN_NAME} already defined, overriding configuration file ${OCF_RESKEY_config}. You should do this for testing only." fi } VirtualDomain_Cleanup_Statefile() { rm -f $STATEFILE || ocf_log warn "Failed to remove $STATEFILE during $__OCF_ACTION." } VirtualDomain_Status() { local try=0 rc=$OCF_ERR_GENERIC status="no state" while [ "$status" = "no state" ]; do try=$(($try + 1 )) status="`virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME`" case "$status" in "shut off") # shut off: domain is defined, but not started ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status." rc=$OCF_NOT_RUNNING ;; running|paused|idle|blocked|"in shutdown") # running: domain is currently actively consuming cycles # paused: domain is paused (suspended) # idle: domain is running but idle # blocked: synonym for idle used by legacy Xen versions # in shutdown: the domain is in process of shutting down, but has not completely shutdown or crashed. ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status." rc=$OCF_SUCCESS ;; ""|"no state") # Empty string may be returned when virsh does not # receive a reply from libvirtd. # "no state" may occur when the domain is currently # being migrated (on the migration target only), or # whenever virsh can't reliably obtain the domain # state. status="no state" if [ "$__OCF_ACTION" = "stop" ] && [ $try -ge 3 ]; then # During the stop operation, we want to bail out # quickly, so as to be able to force-stop (destroy) # the domain if necessary. ocf_log error "Virtual domain $DOMAIN_NAME has no state during stop operation, bailing out." return $OCF_ERR_GENERIC; else # During all other actions, we just wait and try # again, relying on the CRM/LRM to time us out if # this takes too long. ocf_log info "Virtual domain $DOMAIN_NAME currently has no state, retrying." sleep 1 fi ;; *) # any other output is unexpected. ocf_log error "Virtual domain $DOMAIN_NAME has unknown status \"$status\"!" ;; esac done return $rc } VirtualDomain_Start() { if VirtualDomain_Status; then ocf_log info "Virtual domain $DOMAIN_NAME already running." return $OCF_SUCCESS fi virsh $VIRSH_OPTIONS start ${DOMAIN_NAME} rc=$? if [ $rc -ne 0 ]; then ocf_log error "Failed to start virtual domain ${DOMAIN_NAME}." return $OCF_ERR_GENERIC fi while ! VirtualDomain_Monitor; do sleep 1 done return $OCF_SUCCESS } VirtualDomain_Stop() { local i local status local shutdown_timeout local out ex VirtualDomain_Status status=$? case $status in $OCF_SUCCESS) if ! ocf_is_true $OCF_RESKEY_force_stop; then # Issue a graceful shutdown request ocf_log info "Issuing graceful shutdown request for domain ${DOMAIN_NAME}." virsh $VIRSH_OPTIONS shutdown ${DOMAIN_NAME} # The "shutdown_timeout" we use here is the operation # timeout specified in the CIB, minus 5 seconds shutdown_timeout=$(( $NOW + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 )) # Loop on status until we reach $shutdown_timeout while [ $NOW -lt $shutdown_timeout ]; do VirtualDomain_Status status=$? case $status in $OCF_NOT_RUNNING) # This was a graceful shutdown. Clean # up and return. VirtualDomain_Cleanup_Statefile return $OCF_SUCCESS ;; $OCF_SUCCESS) # Domain is still running, keep # waiting (until shutdown_timeout # expires) sleep 1 ;; *) # Something went wrong. Bail out and # resort to forced stop (destroy). break; esac NOW=$(date +%s) done fi ;; $OCF_NOT_RUNNING) ocf_log info "Domain $DOMAIN_NAME already stopped." return $OCF_SUCCESS esac # OK. Now if the above graceful shutdown hasn't worked, kill # off the domain with destroy. If that too does not work, # have the LRM time us out. ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}." out=$(virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1) ex=$? echo >&2 "$out" # unconditionally clean up. VirtualDomain_Cleanup_Statefile case $ex$out in *"error:"*"domain is not running"*) : ;; # unexpected path to the intended outcome, all is well [!0]*) return $OCF_ERR_GENERIC ;; 0*) while [ $status != $OCF_NOT_RUNNING ]; do VirtualDomain_Status status=$? done ;; esac return $OCF_SUCCESS } VirtualDomain_Migrate_To() { local target_node local remoteuri local transport_suffix local migrateuri local migrateport local migrate_target local hypervisor target_node="$OCF_RESKEY_CRM_meta_migrate_target" if VirtualDomain_Status; then # Find out the remote hypervisor to connect to. That is, turn # something like "qemu://foo:9999/system" into # "qemu+tcp://bar:9999/system" if [ -n "${OCF_RESKEY_migration_transport}" ]; then transport_suffix="+${OCF_RESKEY_migration_transport}" fi # A typical migration URI via a special migration network looks # like "tcp://bar-mig:49152". The port would be randomly chosen # by libvirt from the range 49152-49215 if omitted, at least since # version 0.7.4 ... if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then hypervisor="${OCF_RESKEY_hypervisor%%[+:]*}" # Hostname might be a FQDN migrate_target=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},") case $hypervisor in qemu) # For quiet ancient libvirt versions a migration port is needed # and the URI must not contain the "//". Newer versions can handle # the "bad" URI. migrateport=$(( 49152 + $(ocf_maybe_random) % 64 )) migrateuri="tcp:${migrate_target}:${migrateport}" ;; xen) migrateuri="xenmigr://${migrate_target}" ;; *) ocf_log warn "$DOMAIN_NAME: Migration via dedicated network currently not supported for ${hypervisor}." ;; esac fi # Scared of that sed expression? So am I. :-) remoteuri=$(echo ${OCF_RESKEY_hypervisor} | sed -e "s,\(.*\)://[^/:]*\(:\?[0-9]*\)/\(.*\),\1${transport_suffix}://${target_node}\2/\3,") # OK, we know where to connect to. Now do the actual migration. ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using remote hypervisor URI ${remoteuri} ${migrateuri})." virsh ${VIRSH_OPTIONS} migrate --live $DOMAIN_NAME ${remoteuri} ${migrateuri} rc=$? if [ $rc -ne 0 ]; then ocf_log err "$DOMAIN_NAME: live migration to ${remoteuri} ${migrateuri} failed: $rc" return $OCF_ERR_GENERIC else ocf_log info "$DOMAIN_NAME: live migration to ${target_node} succeeded." VirtualDomain_Cleanup_Statefile return $OCF_SUCCESS fi else ocf_log err "$DOMAIN_NAME: migrate_to: Not active locally!" return $OCF_ERR_GENERIC fi } VirtualDomain_Migrate_From() { while ! VirtualDomain_Monitor; do sleep 1 done ocf_log info "$DOMAIN_NAME: live migration from ${OCF_RESKEY_CRM_meta_migrate_source} succeeded." return $OCF_SUCCESS } VirtualDomain_Monitor() { # First, check the domain status. If that returns anything other # than $OCF_SUCCESS, something is definitely wrong. VirtualDomain_Status rc=$? if [ ${rc} -eq ${OCF_SUCCESS} ]; then # OK, the generic status check turned out fine. Now, if we # have monitor scripts defined, run them one after another. for script in ${OCF_RESKEY_monitor_scripts}; do script_output="$($script 2>&1)" script_rc=$? if [ ${script_rc} -ne ${OCF_SUCCESS} ]; then # A monitor script returned a non-success exit # code. Stop iterating over the list of scripts, log a # warning message, and propagate $OCF_ERR_GENERIC. ocf_log warn "Monitor command \"${script}\" for domain ${DOMAIN_NAME} returned ${script_rc} with output: ${script_output}" rc=$OCF_ERR_GENERIC break else ocf_log debug "Monitor command \"${script}\" for domain ${DOMAIN_NAME} completed successfully with output: ${script_output}" fi done fi update_utilization return ${rc} } VirtualDomain_Validate_All() { # Required binaries: for binary in virsh sed; do check_binary $binary done if [ -z $OCF_RESKEY_config ]; then ocf_log error "Missing configuration parameter \"config\"." return $OCF_ERR_CONFIGURED fi # check if we can read the config file (otherwise we're unable to # deduce $DOMAIN_NAME from it, see below) if [ ! -r $OCF_RESKEY_config ]; then if ocf_is_probe; then ocf_log info "Configuration file $OCF_RESKEY_config not readable during probe." elif [ "$__OCF_ACTION" = "stop" ]; then ocf_log info "Configuration file $OCF_RESKEY_config not readable, resource considered stopped." else ocf_log error "Configuration file $OCF_RESKEY_config does not exist or is not readable." return $OCF_ERR_INSTALLED fi fi } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac # Everything except usage and meta-data must pass the validate test VirtualDomain_Validate_All || exit $? # During a probe, it is permissible for the config file to not be # readable (it might be on shared storage not available during the # probe). In that case, VirtualDomain_Define can't work and we're # unable to get the domain name. Thus, we also can't check whether the # domain is running. The only thing we can do here is to assume that # it is not running. if [ ! -r $OCF_RESKEY_config ]; then ocf_is_probe && exit $OCF_NOT_RUNNING [ "$__OCF_ACTION" = "stop" ] && exit $OCF_SUCCESS fi # Define the domain on startup, and re-define whenever someone deleted # the state file, or touched the config. if [ ! -e $STATEFILE ] || [ $OCF_RESKEY_config -nt $STATEFILE ]; then VirtualDomain_Define fi # By now, we should definitely be able to read from the state file. # If not, something went wrong. if [ ! -r $STATEFILE ]; then ocf_log err "$STATEFILE not found or unreadable. This is unexpected. Cannot determine domain name." exit $OCF_ERR_GENERIC fi # Finally, retrieve the domain name from the state file. DOMAIN_NAME=`cat $STATEFILE 2>/dev/null` if [ -z $DOMAIN_NAME ]; then ocf_log err "$STATEFILE is empty. This is unexpected. Cannot determine domain name." exit $OCF_ERR_GENERIC fi case $1 in start) VirtualDomain_Start ;; stop) VirtualDomain_Stop ;; migrate_to) VirtualDomain_Migrate_To ;; migrate_from) VirtualDomain_Migrate_From ;; status) VirtualDomain_Status ;; monitor) VirtualDomain_Monitor ;; validate-all) ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? ClusterLabs-resource-agents-dc69db5/heartbeat/WAS000077500000000000000000000305201203363223200220560ustar00rootroot00000000000000#!/bin/sh # # # WAS # # Description: Manages a Websphere Application Server as an HA resource # # # Author: Alan Robertson # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2002 - 2005 International Business Machines, Inc. # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 WAS::/opt/WebSphere/ApplicationServer/config/server-cfg.xml # # See usage() function below for more details... # # OCF parameters are as below: # OCF_RESKEY_config # (WAS-configuration file, used for the single server edition of WAS) # OCF_RESKEY_port # (WAS--port-number, used for the advanced edition of WAS) ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### WASDIR=/opt/WebSphere/AppServer if [ ! -d $WASDIR ] then WASDIR=/usr/WebSphere/AppServer fi STARTTIME=300 # 5 minutes DEFAULT_WASPORTS="9080" # # WASBIN=$WASDIR/bin DEFAULT=$WASDIR/config/server-cfg.xml # # Print usage message # usage() { methods=`WAS_methods | grep -v methods` methods=`echo $methods | tr ' ' '|'` cat <<-END usage: $0 ($methods) For the single server edition of WAS, you have to set the following enviroment virable: OCF_RESKEY_config (WAS-configuration file) For the advanced edition of WAS, you have to set the following enviroment virable: OCF_RESKEY_port (WAS--port-number) $0 manages a Websphere Application Server (WAS) as an HA resource The 'start' operation starts WAS. The 'stop' operation stops WAS. The 'status' operation reports whether WAS is running The 'monitor' operation reports whether the WAS seems to be working (httpd also needs to be working for this case) The 'validate-all' operation reports whether the OCF instance parameter (OCF_RESKEY_config or OCF_RESKEY_port) is valid The 'methods' operation reports on the methods $0 supports This is known to work with the Single Server edition of Websphere, and is believed to work with the Advanced edition too. Since the Advanced Edition has no configuration file (it's in a the database) you need to give a port number instead of a configuration file for this config parameter. The default configuration file for the single server edition is: $DEFAULT The default snoop-port for the advanced edition is: $DEFAULT_WASPORTS The start and stop operations must be run as root. The status operation will report a pid of "-" for the WAS root process using unless it is run as root. If you don't have xmllint on your system, parsing of WAS configuration files is very primitive. In this case, the port specification we need from the XML config file has to be on the same line as the first part of the tag. We run servlet/snoop on the first transport port listed in the config file for the "monitor" operation. END } meta_data() { cat < 1.0 Resource script for WAS. It manages a Websphere Application Server (WAS) as an HA resource. Manages a WebSphere Application Server instance The WAS-configuration file. configration file The WAS-(snoop)-port-number. port END } # # Reformat the XML document in a sort of canonical form # if we can. If we don't have xmllint, we just cat it out # and hope for the best ;-) # xmlcat() { if [ "X$XMLcat" = X ] then XMLcat=`which xmllint 2>/dev/null` if [ "X${XMLcat}" = X -o ! -x "${XMLcat}" ] then XMLcat=cat else XMLcat="$XMLcat --recover --format" fi fi for j in "$@" do ${XMLcat} "$j" done } # #This is a bit skanky, but it works anyway... # # # # # # It's not really skanky if we can find xmllint on the system, because it # reformats tags so they are all on one line, which is all we we need... # # # Get the numbers of the ports WAS should be listening on... # # If we don't have xmllint around, then the applicationserver and the # port= specification have to be on the same line in the XML config file. # GetWASPorts() { case $1 in [0-9]*) echo "$1" | tr ',' '\012';; *) xmlcat $1 | grep -i 'transports.*applicationserver:HTTPTransport' | grep port= | sed -e 's%.*port= *"* *%%' \ -e 's%[^0-9][^0-9]*.*$%%' # Delete up to port=, throw away optional quote and optional # white space. # Throw away everything after the first non-digit. # This should leave us the port number all by itself... esac } # # We assume that the first port listed in the # is the one we should run servlet/snoop on. # GetWASSnoopPort() { GetWASPorts "$@" | head -n1 } # # Return information on the processname/id for the WAS ports # # pid/java is the expected output. Several lines, one per port... # # WASPortInfo() { pat="" once=yes PortCount=0 for j in $* do case $pat in "") pat="$j";; *) pat="$pat|$j";; esac PortCount=`expr $PortCount + 1` done netstat -ltnp 2>/dev/null| egrep -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%' } # # Return the number of WAS ports which are open # CheckWASPortsInUse() { count=`WASPortInfo "$@" | wc -l` echo $count } # # Return the pid(s) of the processes that have WAS ports open # WASPIDs() { WASPortInfo "$@" | sort -u | cut -f1 -d/ } # # The version of ps that returns all processes and their (long) args # It's only used by WAS_procs, which isn't used for anything ;-) # ps_long() { ps axww } # # The total set of WAS processes (single server only) # WAS_procs() { ps_long | grep -i "config=$1" | grep -i java | cut -d' ' -f1 } # # methods: What methods/operations do we support? # WAS_methods() { cat <<-! start stop status methods validate-all meta-data usage ! if have_binary $WGET then echo monitor fi } # # Return WAS status (silently) # WAS_status() { WASPorts=`GetWASPorts $1` PortsInUse=`CheckWASPortsInUse $WASPorts` case $PortsInUse in 0) false;; *) true;; esac } # # Report on WAS status to stdout... # WAS_report_status() { WASPorts=`GetWASPorts $1` PortCount=`echo $WASPorts | wc -w` PortCount=`echo $PortCount` PortsInUse=`CheckWASPortsInUse $WASPorts` case $PortsInUse in 0) ocf_log debug "WAS: server $1 is stopped."; return $OCF_NOT_RUNNING;; *) pids=`WASPIDs $WASPorts` if [ $PortsInUse -ge $PortCount ] then ocf_log debug "WAS: server $1 is running (pid" $pids "et al)." else ocf_log debug "WAS: server $1 is running (pid $pids et al) but not listening on all ports." fi return $OCF_SUCCESS;; esac } # # Monitor WAS - does it really seem to be working? # # For this we invoke the snoop applet via wget. # # This is actually faster than WAS_status above... # WAS_monitor() { trap '[ -z "$tmpfile" ] || rmtempfile "$tmpfile"' 0 tmpfile=`maketempfile` || return 1 SnoopPort=`GetWASSnoopPort $1` output=`$WGET -nv -O$tmpfile http://localhost:$SnoopPort/servlet/snoop 2>&1` rc=$? if [ $rc -eq 0 ] then if grep -i 'user-agent.*Wget' $tmpfile >/dev/null then : OK else ocf_log "err" "WAS: $1: no user-agent from snoop application" rc=$OCF_ERR_GENERIC fi else ocf_log "err" "WAS: $1: wget failure: $output" rc=$OCF_ERR_GENERIC fi return $rc } # # Start WAS instance # WAS_start() { # Launch Arguments: # # -configFile # -nodeName # -serverName # -oltEnabled # -oltHost # -oltPort # -debugEnabled # -jdwpPort # -debugSource # -serverTrace # -serverTraceFile # -script [] # -platform # -noExecute # -help if [ -x $WASBIN/startServer.sh ] then cmd="$WASBIN/startServer.sh -configFile $1" else cmd="$WASBIN/startupServer.sh" fi if ocf_run $cmd then if WAS_wait_4_start $STARTTIME "$@" then #true return $OCF_SUCCESS else ocf_log "err" "WAS server $1 did not start correctly" return $OCF_ERR_GENERIC fi else #false return $OCF_ERR_GENERIC fi } # # Wait for WAS to actually start up. # # It seems to take between 30 and 60 seconds for it to # start up on a trivial WAS instance. # WAS_wait_4_start() { max=$1 retries=0 shift while [ $retries -lt $max ] do if WAS_status "$@" then return $OCF_SUCCESS else sleep 1 fi retries=`expr $retries + 1` done WAS_status "$@" } # # Shut down WAS # WAS_stop() { # They don't return good return codes... # And, they seem to allow anyone to stop WAS (!) if [ -x $WASBIN/stopServer.sh ] then ocf_run $WASBIN/stopServer.sh -configFile $1 else WASPorts=`GetWASPorts $1` kill `WASPIDs $WASPorts` fi if WAS_status $1 then ocf_log "err" "WAS: $1 did not stop correctly" #false return $OCF_ERR_GENERIC else #true return $OCF_SUCCESS fi } # # Check if the port is valid # CheckPort() { ocf_is_decimal "$1" && [ $1 -gt 0 ] } WAS_validate_all() { if [ -x $WASBIN/startServer.sh ]; then # $arg should be config file if [ ! -f "$arg" ]; then ocf_log err "Configuration file [$arg] does not exist" exit $OCF_ERR_ARGS fi # $arg should specify a valid port number at the very least local WASPorts=`GetWASPorts $arg` if [ -z "$WASPorts" ]; then ocf_log err "No port number specified in configuration file [$arg]" exit $OCF_ERR_CONFIGURED fi local port local have_valid_port=false for port in $WASPorts; do if CheckPort $port; then have_valid_port=true break fi done if [ "false" = "$have_valid_port" ]; then ocf_log err "No valid port number specified in configuration file [$arg]" exit $OCF_ERR_CONFIGURED fi elif [ -x $WASBIN/startupServer.sh ]; then # $arg should be port number if CheckPort "$arg"; then ocf_log err "Port number is required but [$arg] is not valid port number" exit $OCF_ERR_ARGS fi else # Do not know hot to validate_all ocf_log warn "Do not know how to validate-all, assuming validation OK" return $OCF_SUCCESS fi } # # 'main' starts here... # if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # # Supply default configuration parameter(s) # if ( [ -z $OCF_RESKEY_config ] && [ -z $OCF_RESKEY_port ] ) then if [ -f $DEFAULT ] then arg=$DEFAULT else arg=$DEFAULT_WASPORTS fi elif [ ! -z $OCF_RESKEY_config ] then arg=$OCF_RESKEY_config else arg=$OCF_RESKEY_port fi if [ ! -f $arg ] then case $arg in [0-9]*) ;; # ignore port numbers... *) ocf_log "err" "WAS configuration file $arg does not exist!" usage exit $OCF_ERR_ARGS;; esac fi # What kind of method was invoked? case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; start) WAS_start $arg exit $?;; stop) WAS_stop $arg exit $?;; status) WAS_report_status $arg exit $?;; monitor) WAS_monitor $arg exit $?;; validate-all) WAS_validate_all $arg exit $?;; methods) WAS_methods exit $?;; usage) usage exit $OCF_SUCCESS;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac ClusterLabs-resource-agents-dc69db5/heartbeat/WAS6000077500000000000000000000306201203363223200221450ustar00rootroot00000000000000#!/bin/sh # WAS6 # # Description: Manages a Websphere Application Server as an HA resource # # # Author: Ru Xiang Min # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2006 International Business Machines China, Ltd., Inc. # # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 WAS::/opt/IBM/WebSphere/AppServer/profiles/default/config/cells/Node01Cell/nodes/Node01/serverindex.xml # # See usage() function below for more details... # # OCF parameters are as below: # OCF_RESKEY_profile # (WAS profile name, used for the single server edition of WAS6) ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### WAS_DIR=/opt/IBM/WebSphere/AppServer if [ ! -d $WAS_DIR ] then WAS_DIR=/usr/IBM/WebSphere/AppServer fi STARTTIME=300 # 5 minutes DEFAULT_WASPORTS="9080" # # WAS_BIN=$WAS_DIR/bin DEFAULT=default # # Print usage message # usage() { methods=`WAS_methods | grep -v methods` methods=`echo $methods | tr ' ' '|'` cat <<-END usage: $0 ($methods) For the single server edition of WAS6, you have to set the following enviroment virable: OCF_RESKEY_profile (WAS profile name) $0 manages a Websphere Application Server 6(WAS6) as an HA resource The 'start' operation starts WAS6. The 'stop' operation stops WAS6. The 'status' operation reports whether WAS6 is running The 'monitor' operation reports whether the WAS6 seems to be working (httpd also needs to be working for this case) The 'validate-all' operation reports whether the OCF instance parameter (OCF_RESKEY_profileName ) is valid The 'methods' operation reports on the methods $0 supports This is known to work with the Single Server edition of Websphere. The default profile name for the single server edition is: $DEFAULT The start and stop operations must be run as root. The status operation will report a pid of "-" for the WAS root process using unless it is run as root. If you don't have xmllint on your system, parsing of WAS configuration files is very primitive. We run servlet/snoop on the seventh transport port listed in the config file for the "monitor" operation. END } meta_data() { cat < 1.0 Resource script for WAS6. It manages a Websphere Application Server (WAS6) as an HA resource. Manages a WebSphere Application Server 6 instance The WAS profile name. profile name END } # # Reformat the XML document in a sort of canonical form # if we can. If we don't have xmllint, we just cat it out # and hope for the best ;-) # xmlcat() { if [ "X$XMLcat" = X ] then XMLcat=`which xmllint 2>/dev/null` if [ "X${XMLcat}" = X -o ! -x "${XMLcat}" ] then XMLcat=cat else XMLcat="$XMLcat --recover --format" fi fi for j in "$@" do ${XMLcat} "$j" done } # #This is a bit skanky, but it works anyway... # # It's not really skanky if we can find xmllint on the system, because it # reformats tags so they are all on one line, which is all we we need... # # # Get the numbers of the ports WAS should be listening on... # # If we don't have xmllint around, then the applicationserver and the # port= specification have to be on the same line in the XML config file. # GetWASPorts() { case $1 in [0-9]*) echo "$1" | tr ',' '\012';; *) xmlcat ${WAS_DIR}/profiles/${WAS_PROFILE_NAME}/config/cells/${WAS_CELL}/nodes/${WAS_NODE}/serverindex.xml | grep port= | sed -e 's%.*port= *"* *%%' \ -e 's%[^0-9][^0-9]*.*$%%' # Delete up to port=, throw away optional quote and optional # white space. # Throw away everything after the first non-digit. # This should leave us the port number all by itself... esac } # # We assume that the seventh port listed in the serverindex.xml # is the one we should run servlet/snoop on. # GetWASSnoopPort() { GetWASPorts "$@" | sed -n '7p' } # # Return information on the processname/id for the WAS ports # # pid/java is the expected output. Several lines, one per port... # # WASPortInfo() { pat="" once=yes PortCount=0 for j in $* do case $pat in "") pat="$j";; *) pat="$pat|$j";; esac PortCount=`expr $PortCount + 1` done netstat -ltnp 2>/dev/null| egrep -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%' } # # Return the number of WAS ports which are open # CheckWASPortsInUse() { count=`WASPortInfo "$@" | wc -l` echo $count } # # Return the pid(s) of the processes that have WAS ports open # WASPIDs() { WASPortInfo "$@" | sort -u | cut -f1 -d/ } # # The version of ps that returns all processes and their (long) args # It's only used by WAS_procs, which isn't used for anything ;-) # ps_long() { ps axww } # # The total set of WAS processes (single server only) # WAS_procs() { ps_long | grep -i "config=$1" | grep -i java | cut -d' ' -f1 } # # methods: What methods/operations do we support? # WAS_methods() { cat <<-! start stop status methods validate-all meta-data usage ! if have_binary $WGET then echo " monitor" fi } # # Return WAS status (silently) # WAS_status() { WASPorts=`GetWASPorts $1` PortsInUse=`CheckWASPortsInUse $WASPorts` case $PortsInUse in 0) false;; *) true;; esac } # # Report on WAS status to stdout... # WAS_report_status() { WASPorts=`GetWASPorts $1` PortCount=`echo $WASPorts | wc -w` PortCount=`echo $PortCount` PortsInUse=`CheckWASPortsInUse $WASPorts` case $PortsInUse in 0) ocf_log debug "WAS: server $1 is stopped."; return $OCF_NOT_RUNNING;; *) pids=`WASPIDs $WASPorts` if [ $PortsInUse -ge $PortCount ] then ocf_log debug "WAS: server $1 is running (pid" $pids "et al)." else ocf_log debug "WAS: server $1 is running (pid $pids et al) but not listening on all ports." fi return $OCF_SUCCESS;; esac } # # Monitor WAS - does it really seem to be working? # # For this we invoke the snoop applet via wget. # # This is actually faster than WAS_status above... # WAS_monitor() { trap '[ -z "$tmpfile" || rmtempfile "$tmpfile"' 0 tmpfile=`maketempfile` || exit 1 SnoopPort=`GetWASSnoopPort $1` output=`$WGET -nv -O$tmpfile http://localhost:$SnoopPort/snoop 2>&1` rc=$? if [ $rc -eq 0 ] then if grep -i 'user-agent.*Wget' $tmpfile >/dev/null then : OK else ocf_log "err" "WAS: $1: no user-agent from snoop application" rc=$OCF_ERR_GENERIC fi else ocf_log "err" "WAS: $1: wget failure: $output" rc=$OCF_ERR_GENERIC fi return $rc } # # Start WAS instance # WAS_start() { # Launch Arguments: # -nowait # -quiet # -logfile # -replacelog # -trace # -script [