pax_global_header00006660000000000000000000000064141561452730014522gustar00rootroot0000000000000052 comment=456fd203c98f6edca91e9b272cd01e6a5569fdb9 raft-0.11.3/000077500000000000000000000000001415614527300125405ustar00rootroot00000000000000raft-0.11.3/.clang-format000066400000000000000000000002171415614527300151130ustar00rootroot00000000000000BasedOnStyle: Chromium BreakBeforeBraces: Custom BraceWrapping: AfterFunction: true AfterStruct: true IndentWidth: 4 PointerAlignment: Right raft-0.11.3/.dir-locals.el000066400000000000000000000004231415614527300151700ustar00rootroot00000000000000((nil . ((fill-column . 80))) (c-mode . ((flycheck-clang-definitions . ("HAVE_LINUX_AIO_ABI_H" "HAVE_LINUX_IO_URING_H" "_GNU_SOURCE")) (flycheck-clang-args . ("-Wpedantic" "-Wall" "-Wextra")) (flycheck-gcc-definitions . ("HAVE_LINUX_IO_URING_H" "_GNU_SOURCE"))))) raft-0.11.3/.github/000077500000000000000000000000001415614527300141005ustar00rootroot00000000000000raft-0.11.3/.github/workflows/000077500000000000000000000000001415614527300161355ustar00rootroot00000000000000raft-0.11.3/.github/workflows/build-and-test.yml000066400000000000000000000066241415614527300215040ustar00rootroot00000000000000name: CI Tests on: - push - pull_request jobs: build-and-test: strategy: fail-fast: false matrix: os: - ubuntu-18.04 - ubuntu-20.04 compiler: - gcc - clang tracing: - LIBRAFT_TRACE=1 - NOLIBRAFT_TRACE=1 runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v2 - name: Setup dependencies run: | sudo apt-get update -qq sudo apt-get install -qq lcov linux-libc-dev liblz4-dev libuv1-dev btrfs-progs xfsprogs zfsutils-linux - name: Build env: CC: ${{ matrix.compiler }} run: | git clone --depth 1 https://github.com/edlund/amalgamate.git export PATH=$PATH:$PWD/amalgamate autoreconf -i ./configure --enable-example --enable-debug --enable-code-coverage --enable-sanitize amalgamate.py --config=amalgamation.json --source=$(pwd) $CC raft.c -c -D_GNU_SOURCE -DHAVE_LINUX_AIO_ABI_H -Wall -Wextra -Wpedantic -fpic - name: Test env: CC: ${{ matrix.compiler }} run: | export ${{ matrix.tracing }} ./test/lib/fs.sh setup make check $(./test/lib/fs.sh detect) || (cat ./test-suite.log && false) ./test/lib/fs.sh teardown - name: Coverage env: CC: ${{ matrix.compiler }} run: if [ "${CC}" = "gcc" ]; then make code-coverage-capture; fi - name: Upload coverage to Codecov uses: codecov/codecov-action@v1 with: verbose: true build-and-test-nolz4: strategy: fail-fast: false matrix: os: - ubuntu-18.04 - ubuntu-20.04 compiler: - gcc - clang runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v2 - name: Setup dependencies run: | sudo apt-get update -qq sudo apt-get install -qq lcov linux-libc-dev libuv1-dev btrfs-progs xfsprogs zfsutils-linux - name: Build env: CC: ${{ matrix.compiler }} run: | git clone --depth 1 https://github.com/edlund/amalgamate.git export PATH=$PATH:$PWD/amalgamate autoreconf -i ./configure --enable-example --enable-debug --enable-code-coverage --enable-sanitize --disable-lz4 amalgamate.py --config=amalgamation.json --source=$(pwd) $CC raft.c -c -D_GNU_SOURCE -DHAVE_LINUX_AIO_ABI_H -Wall -Wextra -Wpedantic -fpic - name: Test env: CC: ${{ matrix.compiler }} run: | ./test/lib/fs.sh setup make check CFLAGS=-O0 $(./test/lib/fs.sh detect) || (cat ./test-suite.log && false) ./test/lib/fs.sh teardown - name: Coverage env: CC: ${{ matrix.compiler }} run: if [ "${CC}" = "gcc" ]; then make code-coverage-capture; fi - name: Upload coverage to Codecov uses: codecov/codecov-action@v1 with: verbose: true build-nolz4-fail: runs-on: ubuntu-18.04 steps: - uses: actions/checkout@v2 - name: Setup dependencies run: | sudo apt-get update -qq sudo apt-get install -qq lcov linux-libc-dev libuv1-dev btrfs-progs xfsprogs zfsutils-linux # Expect the configure step to fail - name: Build env: CC: gcc run: | autoreconf -i ! ./configure --enable-example --enable-debug --enable-code-coverage --enable-sanitize raft-0.11.3/.github/workflows/cla-check.yml000066400000000000000000000002711415614527300204720ustar00rootroot00000000000000name: Canonical CLA on: - pull_request jobs: cla-check: runs-on: ubuntu-20.04 steps: - name: Check if CLA signed uses: canonical/has-signed-canonical-cla@v1 raft-0.11.3/.github/workflows/coverity.yml000066400000000000000000000027511415614527300205310ustar00rootroot00000000000000name: Coverity on: push: branches: - master jobs: test: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v2 - name: Download Coverity Build Tool run: | wget -q https://scan.coverity.com/download/cxx/linux64 --post-data "token=$TOKEN&project=canonical/raft" -O cov-analysis-linux64.tar.gz mkdir cov-analysis-linux64 tar xzf cov-analysis-linux64.tar.gz --strip 1 -C cov-analysis-linux64 env: TOKEN: ${{ secrets.COVERITY_SCAN_TOKEN }} - name: Install dependencies run: | sudo apt-get update -qq sudo apt-get install -qq lcov linux-libc-dev liblz4-dev libuv1-dev btrfs-progs xfsprogs zfsutils-linux - name: Run coverity run: | export PATH="$(pwd)/cov-analysis-linux64/bin:${PATH}" # Configure autoreconf -i mkdir build cd build ../configure # Build cov-build --dir cov-int make -j4 tar czvf raft.tgz cov-int # Submit the results curl \ --form project=canonical/raft \ --form token=${TOKEN} \ --form email=mathieu.bordere@canonical.com \ --form file=@raft.tgz \ --form version=master \ --form description="${GITHUB_SHA}" \ https://scan.coverity.com/builds?project=canonical/raft env: TOKEN: ${{ secrets.COVERITY_SCAN_TOKEN }} raft-0.11.3/.github/workflows/packages.yml000066400000000000000000000024471415614527300204450ustar00rootroot00000000000000name: Build PPA source packages on: - push jobs: build: strategy: fail-fast: false matrix: target: - bionic - focal - impish runs-on: ubuntu-20.04 steps: - name: Clone the repositories run: | git clone https://github.com/canonical/raft git clone https://github.com/canonical/dqlite-ppa -b raft --depth 1 - name: Setup dependencies run: | sudo apt-get update -qq sudo apt-get install -qq debhelper devscripts - name: Build source package env: DEBFULLNAME: "Github Actions" DEBEMAIL: "noreply@linuxcontainers.org" TARGET: ${{ matrix.target }} run: | cp -R dqlite-ppa/debian raft/ cd raft/ VERSION="$(git describe --tags | sed -e "s/^v//" -e "s/-/+git/")" dch --create \ --distribution ${TARGET} \ --package raft \ --newversion ${VERSION}~${TARGET}1 \ "Automatic build from Github" debuild -S -sa -us -uc -d - name: Upload artifacts uses: actions/upload-artifact@v2 with: name: debian-${{ matrix.target }} if-no-files-found: error path: | *.buildinfo *.changes *.dsc *.tar.* raft-0.11.3/.gitignore000066400000000000000000000006771415614527300145420ustar00rootroot00000000000000*.o *.gcno *.gcda *~ Makefile.in aclocal.m4 aminclude_static.am autom4te.cache/ config.h.in configure Makefile config.h config.log config.status libtool raft.pc stamp-h1 *.lo *.la .dirstamp .deps/ .libs/ test/unit/core test/unit/uv test/integration/core test/integration/uv test/fuzzy/core test/*/*.log test/*/*.trs os-test* test-suite.log coverage/ coverage.info TAGS example/server example/cluster benchmark/os-disk-write tmp conftest* doc/buildraft-0.11.3/.travis.yml000066400000000000000000000020561415614527300146540ustar00rootroot00000000000000language: c addons: apt: packages: - lcov - linux-libc-dev - libuv1-dev - liblz4-dev - btrfs-progs - xfsprogs - zfsutils-linux jobs: include: - if: type != pull_request compiler: gcc dist: bionic arch: s390x - if: type == pull_request compiler: gcc dist: bionic arch: arm64 - if: type != pull_request compiler: clang dist: bionic arch: ppc64le before_script: - git clone --depth 1 https://github.com/edlund/amalgamate.git - export PATH=$PATH:$PWD/amalgamate script: - autoreconf -i - | if [ $TRAVIS_CPU_ARCH = "s390x" ] || [ $TRAVIS_CPU_ARCH = "arm64" ]; then ./configure --enable-example --enable-debug else ./configure --enable-example --enable-debug --enable-sanitize fi - amalgamate.py --config=amalgamation.json --source=$(pwd) - $CC raft.c -c -D_GNU_SOURCE -DHAVE_LINUX_AIO_ABI_H -Wall -Wextra -Wpedantic -fpic - ./test/lib/fs.sh setup - make check $(./test/lib/fs.sh detect) || (cat ./test-suite.log && false) - ./test/lib/fs.sh teardown raft-0.11.3/AUTHORS000066400000000000000000000003601415614527300136070ustar00rootroot00000000000000Unless mentioned otherwise in a specific file's header, all code in this project is released under the LGPL v3 license. The list of authors and contributors can be retrieved from the git commit history and in some cases, the file headers. raft-0.11.3/LICENSE000066400000000000000000000216111415614527300135460ustar00rootroot00000000000000All files in this repository are licensed as follows. If you contribute to this repository, it is assumed that you license your contribution under the same license unless you state otherwise. All files Copyright (C) 2019 Canonical Ltd. unless otherwise specified in the file. This software is licensed under the LGPLv3, included below. As a special exception to the GNU Lesser General Public License version 3 ("LGPL3"), the copyright holders of this Library give you permission to convey to a third party a Combined Work that links statically or dynamically to this Library without providing any Minimal Corresponding Source or Minimal Application Code as set out in 4d or providing the installation information set out in section 4e, provided that you comply with the other provisions of LGPL3 and provided that you meet, for the Application the terms and conditions of the license(s) which apply to the Application. Except as stated in this special exception, the provisions of LGPL3 will continue to comply in full to this Library. If you modify this Library, you may apply this exception to your version of this Library, but you are not obliged to do so. If you do not wish to do so, delete this exception statement from your version. This exception does not (and cannot) modify any license terms which apply to the Application, with which you must still comply. SPDX-License-Identifier: LGPL-3.0-only WITH LGPL-3.0-linking-exception GNU LESSER GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. This version of the GNU Lesser General Public License incorporates the terms and conditions of version 3 of the GNU General Public License, supplemented by the additional permissions listed below. 0. Additional Definitions. As used herein, "this License" refers to version 3 of the GNU Lesser General Public License, and the "GNU GPL" refers to version 3 of the GNU General Public License. "The Library" refers to a covered work governed by this License, other than an Application or a Combined Work as defined below. An "Application" is any work that makes use of an interface provided by the Library, but which is not otherwise based on the Library. Defining a subclass of a class defined by the Library is deemed a mode of using an interface provided by the Library. A "Combined Work" is a work produced by combining or linking an Application with the Library. The particular version of the Library with which the Combined Work was made is also called the "Linked Version". The "Minimal Corresponding Source" for a Combined Work means the Corresponding Source for the Combined Work, excluding any source code for portions of the Combined Work that, considered in isolation, are based on the Application, and not on the Linked Version. The "Corresponding Application Code" for a Combined Work means the object code and/or source code for the Application, including any data and utility programs needed for reproducing the Combined Work from the Application, but excluding the System Libraries of the Combined Work. 1. Exception to Section 3 of the GNU GPL. You may convey a covered work under sections 3 and 4 of this License without being bound by section 3 of the GNU GPL. 2. Conveying Modified Versions. If you modify a copy of the Library, and, in your modifications, a facility refers to a function or data to be supplied by an Application that uses the facility (other than as an argument passed when the facility is invoked), then you may convey a copy of the modified version: a) under this License, provided that you make a good faith effort to ensure that, in the event an Application does not supply the function or data, the facility still operates, and performs whatever part of its purpose remains meaningful, or b) under the GNU GPL, with none of the additional permissions of this License applicable to that copy. 3. Object Code Incorporating Material from Library Header Files. The object code form of an Application may incorporate material from a header file that is part of the Library. You may convey such object code under terms of your choice, provided that, if the incorporated material is not limited to numerical parameters, data structure layouts and accessors, or small macros, inline functions and templates (ten or fewer lines in length), you do both of the following: a) Give prominent notice with each copy of the object code that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the object code with a copy of the GNU GPL and this license document. 4. Combined Works. You may convey a Combined Work under terms of your choice that, taken together, effectively do not restrict modification of the portions of the Library contained in the Combined Work and reverse engineering for debugging such modifications, if you also do each of the following: a) Give prominent notice with each copy of the Combined Work that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the Combined Work with a copy of the GNU GPL and this license document. c) For a Combined Work that displays copyright notices during execution, include the copyright notice for the Library among these notices, as well as a reference directing the user to the copies of the GNU GPL and this license document. d) Do one of the following: 0) Convey the Minimal Corresponding Source under the terms of this License, and the Corresponding Application Code in a form suitable for, and under terms that permit, the user to recombine or relink the Application with a modified version of the Linked Version to produce a modified Combined Work, in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source. 1) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (a) uses at run time a copy of the Library already present on the user's computer system, and (b) will operate properly with a modified version of the Library that is interface-compatible with the Linked Version. e) Provide Installation Information, but only if you would otherwise be required to provide such information under section 6 of the GNU GPL, and only to the extent that such information is necessary to install and execute a modified version of the Combined Work produced by recombining or relinking the Application with a modified version of the Linked Version. (If you use option 4d0, the Installation Information must accompany the Minimal Corresponding Source and Corresponding Application Code. If you use option 4d1, you must provide the Installation Information in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source.) 5. Combined Libraries. You may place library facilities that are a work based on the Library side by side in a single library together with other library facilities that are not Applications and are not covered by this License, and convey such a combined library under terms of your choice, if you do both of the following: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities, conveyed under the terms of this License. b) Give prominent notice with the combined library that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 6. Revised Versions of the GNU Lesser General Public License. The Free Software Foundation may publish revised and/or new versions of the GNU Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library as you received it specifies that a certain numbered version of the GNU Lesser General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that published version or of any later version published by the Free Software Foundation. If the Library as you received it does not specify a version number of the GNU Lesser General Public License, you may choose any version of the GNU Lesser General Public License ever published by the Free Software Foundation. If the Library as you received it specifies that a proxy can decide whether future versions of the GNU Lesser General Public License shall apply, that proxy's public statement of acceptance of any version is permanent authorization for you to choose that version for the Library. raft-0.11.3/Makefile.am000066400000000000000000000146051415614527300146020ustar00rootroot00000000000000ACLOCAL_AMFLAGS = -I m4 AM_CFLAGS += $(CODE_COVERAGE_CFLAGS) include_HEADERS = include/raft.h raftincludedir = $(includedir)/raft raftinclude_HEADERS = lib_LTLIBRARIES = libraft.la libraft_la_CFLAGS = $(AM_CFLAGS) -fvisibility=hidden libraft_la_LDFLAGS = -version-info 0:7:0 libraft_la_SOURCES = \ src/byte.c \ src/client.c \ src/compress.c \ src/configuration.c \ src/convert.c \ src/election.c \ src/entry.c \ src/err.c \ src/heap.c \ src/log.c \ src/membership.c \ src/progress.c \ src/raft.c \ src/recv.c \ src/recv_append_entries.c \ src/recv_append_entries_result.c \ src/recv_request_vote.c \ src/recv_request_vote_result.c \ src/recv_install_snapshot.c \ src/recv_timeout_now.c \ src/replication.c \ src/snapshot.c \ src/start.c \ src/state.c \ src/syscall.c \ src/tick.c \ src/tracing.c bin_PROGRAMS = check_PROGRAMS = \ test/unit/core TESTS = $(check_PROGRAMS) check_LTLIBRARIES = libtest.la libtest_la_CFLAGS = $(AM_CFLAGS) -DMUNIT_TEST_NAME_LEN=60 -Wno-unused-result -Wno-conversion libtest_la_SOURCES = \ test/lib/fault.c \ test/lib/fsm.c \ test/lib/heap.c \ test/lib/munit.c \ test/lib/tracer.c \ test/lib/tcp.c test_unit_core_SOURCES = \ src/byte.c \ src/compress.c \ src/configuration.c \ src/err.c \ src/heap.c \ src/log.c \ test/unit/main_core.c \ test/unit/test_byte.c \ test/unit/test_compress.c \ test/unit/test_configuration.c \ test/unit/test_err.c \ test/unit/test_log.c \ test/unit/test_queue.c test_unit_core_CFLAGS = $(AM_CFLAGS) -Wno-conversion test_unit_core_LDADD = libtest.la if LZ4_AVAILABLE test_unit_core_CFLAGS += -DLZ4_AVAILABLE test_unit_core_LDFLAGS = $(LZ4_LIBS) libraft_la_CFLAGS += -DLZ4_AVAILABLE libraft_la_LDFLAGS += $(LZ4_LIBS) endif # LZ4_AVAILABLE if LZ4_ENABLED test_unit_core_CFLAGS += -DLZ4_ENABLED libraft_la_CFLAGS += -DLZ4_ENABLED endif # LZ4_ENABLED if FIXTURE_ENABLED libraft_la_SOURCES += src/fixture.c raftinclude_HEADERS += include/raft/fixture.h check_PROGRAMS += \ test/integration/core \ test/fuzzy/core libtest_la_SOURCES += \ test/lib/cluster.c test_integration_core_SOURCES = \ test/integration/main_core.c \ test/integration/test_apply.c \ test/integration/test_assign.c \ test/integration/test_barrier.c \ test/integration/test_bootstrap.c \ test/integration/test_digest.c \ test/integration/test_election.c \ test/integration/test_fixture.c \ test/integration/test_heap.c \ test/integration/test_membership.c \ test/integration/test_recover.c \ test/integration/test_replication.c \ test/integration/test_snapshot.c \ test/integration/test_strerror.c \ test/integration/test_tick.c \ test/integration/test_transfer.c \ test/integration/test_start.c test_integration_core_CFLAGS = $(AM_CFLAGS) -Wno-conversion test_integration_core_LDFLAGS = -no-install test_integration_core_LDADD = libtest.la libraft.la test_fuzzy_core_SOURCES = \ test/fuzzy/main_core.c \ test/fuzzy/test_election.c \ test/fuzzy/test_liveness.c \ test/fuzzy/test_membership.c \ test/fuzzy/test_replication.c test_fuzzy_core_CFLAGS = $(AM_CFLAGS) -Wno-conversion test_fuzzy_core_LDFLAGS = -no-install test_fuzzy_core_LDADD = libtest.la libraft.la endif # FIXTURE_ENABLED if UV_ENABLED libraft_la_SOURCES += \ src/uv.c \ src/uv_append.c \ src/uv_encoding.c \ src/uv_finalize.c \ src/uv_fs.c \ src/uv_ip.c \ src/uv_list.c \ src/uv_metadata.c \ src/uv_os.c \ src/uv_prepare.c \ src/uv_recv.c \ src/uv_segment.c \ src/uv_send.c \ src/uv_snapshot.c \ src/uv_tcp.c \ src/uv_tcp_listen.c \ src/uv_tcp_connect.c \ src/uv_truncate.c \ src/uv_writer.c libraft_la_LDFLAGS += $(UV_LIBS) raftinclude_HEADERS += include/raft/uv.h check_PROGRAMS += \ test/unit/uv \ test/integration/uv libtest_la_SOURCES += \ test/lib/aio.c \ test/lib/dir.c \ test/lib/tcp.c \ test/lib/loop.c test_unit_uv_SOURCES = \ src/err.c \ src/heap.c \ src/syscall.c \ src/tracing.c \ src/uv_fs.c \ src/uv_os.c \ src/uv_writer.c \ test/unit/main_uv.c \ test/unit/test_uv_fs.c \ test/unit/test_uv_writer.c test_unit_uv_LDFLAGS = $(UV_LIBS) test_unit_uv_CFLAGS = $(AM_CFLAGS) -Wno-conversion test_unit_uv_LDADD = libtest.la # The integration/uv test is not linked to libraft, but built # directly against the libraft sources in order to test some # non-visible, non-API functions. test_integration_uv_SOURCES = \ ${libraft_la_SOURCES} \ test/integration/main_uv.c \ test/integration/test_uv_init.c \ test/integration/test_uv_append.c \ test/integration/test_uv_bootstrap.c \ test/integration/test_uv_load.c \ test/integration/test_uv_recover.c \ test/integration/test_uv_recv.c \ test/integration/test_uv_send.c \ test/integration/test_uv_set_term.c \ test/integration/test_uv_tcp_connect.c \ test/integration/test_uv_tcp_listen.c \ test/integration/test_uv_snapshot_put.c \ test/integration/test_uv_truncate.c test_integration_uv_CFLAGS = $(AM_CFLAGS) -Wno-type-limits -Wno-conversion test_integration_uv_LDFLAGS = -no-install $(UV_LIBS) test_integration_uv_LDADD = libtest.la AM_CFLAGS += $(UV_CFLAGS) if LZ4_AVAILABLE test_integration_uv_CFLAGS += -DLZ4_AVAILABLE test_integration_uv_LDFLAGS += $(LZ4_LIBS) endif # LZ4_AVAILABLE if LZ4_ENABLED test_integration_uv_CFLAGS += -DLZ4_ENABLED endif # LZ4_ENABLED endif # UV_ENABLED if EXAMPLE_ENABLED bin_PROGRAMS += \ example/server \ example/cluster example_server_SOURCES = example/server.c example_server_LDFLAGS = -no-install $(UV_LIBS) example_server_LDADD = libraft.la example_cluster_SOURCES = example/cluster.c endif # EXAMPLE_ENABLED if BENCHMARK_ENABLED bin_PROGRAMS += \ benchmark/os-disk-write benchmark_os_disk_write_SOURCES = benchmark/os_disk_write.c benchmark_os_disk_write_LDFLAGS = -luring endif # BENCHMARK_ENABLED if DEBUG_ENABLED AM_CFLAGS += -Werror -Wall else AM_CFLAGS += -DNDEBUG endif if SANITIZE_ENABLED AM_CFLAGS += -fsanitize=address endif if CODE_COVERAGE_ENABLED include $(top_srcdir)/aminclude_static.am CODE_COVERAGE_DIRECTORY=./src CODE_COVERAGE_OUTPUT_DIRECTORY=coverage CODE_COVERAGE_OUTPUT_FILE=coverage.info CODE_COVERAGE_IGNORE_PATTERN="/usr/include/*" CODE_COVERAGE_BRANCH_COVERAGE=1 CODE_COVERAGE_LCOV_OPTIONS=$(CODE_COVERAGE_LCOV_OPTIONS_DEFAULT) --rc lcov_excl_br_line="assert\(" clean-local: code-coverage-clean distclean-local: code-coverage-dist-clean endif # CODE_COVERAGE_ENABLED pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = @PACKAGE_NAME@.pc raft-0.11.3/README.md000066400000000000000000000145361415614527300140300ustar00rootroot00000000000000[![Build Status](https://travis-ci.org/canonical/raft.png)](https://travis-ci.org/canonical/raft) [![codecov](https://codecov.io/gh/canonical/raft/branch/master/graph/badge.svg)](https://codecov.io/gh/canonical/raft) [![Documentation Status](https://readthedocs.org/projects/raft/badge/?version=latest)](https://raft.readthedocs.io/en/latest/?badge=latest) Fully asynchronous C implementation of the Raft consensus protocol. The library has modular design: its core part implements only the core Raft algorithm logic, in a fully platform independent way. On top of that, a pluggable interface defines the I/O implementation for networking (send/receive RPC messages) and disk persistence (store log entries and snapshots). A stock implementation of the I/O interface is provided when building the library with default options. It is based on [libuv](http://libuv.org) and should fit the vast majority of use cases. The only catch is that it currently requires Linux, since it uses the Linux [AIO](http://man7.org/linux/man-pages/man2/io_submit.2.html) API for disk I/O. Patches are welcome to add support for more platforms. See [raft.h](https://github.com/canonical/raft/blob/master/include/raft.h) for full documentation. License ------- This raft C library is released under a slightly modified version of LGPLv3, that includes a copyright exception letting users to statically link the library code in their project and release the final work under their own terms. See the full [license](https://github.com/canonical/raft/blob/LICENSE) text. Features -------- This implementation includes all the basic features described in the Raft dissertation: - Leader election - Log replication - Log compaction - Membership changes It also includes a few optional enhancements: - Optimistic pipelining to reduce log replication latency - Writing to leader's disk in parallel - Automatic stepping down when the leader loses quorum - Leadership transfer extension - Pre-vote protocol Install ------- If you are on a Debian-based system, you can get the latest development release from dqlite's [dev PPA](https://launchpad.net/~dqlite/+archive/ubuntu/dev): ``` sudo add-apt-repository ppa:dqlite/dev sudo apt-get update sudo apt-get install libraft-dev ``` Building -------- To build ``libraft`` from source you'll need: * A reasonably recent version of [libuv](https://libuv.org/) (v1.18.0 or beyond). * Optionally, but recommended, a reasonably recent version of [liblz4](https://lz4.github.io/lz4/) (v1.7.1 or beyond). ```bash sudo apt-get install libuv1-dev liblz4-dev autoreconf -i ./configure --enable-example make ``` Example ------- The best way to understand how to use the library is probably reading the code of the [example server](https://github.com/canonical/raft/blob/master/example/server.c) included in the source code. You can also see the example server in action by running: ```bash ./example/cluster ``` which spawns a little cluster of 3 servers, runs a sample workload, and randomly stops and restarts a server from time to time. Quick guide ----------- It is recommended that you read [raft.h](https://github.com/canonical/raft/blob/master/include/raft.h) for documentation details, but here's a quick high-level guide of what you'll need to do (error handling is omitted for brevity). Create an instance of the stock ```raft_io``` interface implementation (or implement your own one if the one that comes with the library really does not fit): ```C const char *dir = "/your/raft/data"; struct uv_loop_s loop; struct raft_uv_transport transport; struct raft_io io; uv_loop_init(&loop); raft_uv_tcp_init(&transport, &loop); raft_uv_init(&io, &loop, dir, &transport); ``` Define your application Raft FSM, implementing the ```raft_fsm``` interface: ```C struct raft_fsm { void *data; int (*apply)(struct raft_fsm *fsm, const struct raft_buffer *buf, void **result); int (*snapshot)(struct raft_fsm *fsm, struct raft_buffer *bufs[], unsigned *n_bufs); int (*restore)(struct raft_fsm *fsm, struct raft_buffer *buf); } ``` Pick a unique ID and address for each server and initialize the raft object: ```C unsigned id = 1; const char *address = "192.168.1.1:9999"; struct raft raft; raft_init(&raft, &io, &fsm, id, address); ``` If it's the first time you start the cluster, create a configuration object containing each server that should be present in the cluster (typically just one, since you can grow your cluster at a later point using ```raft_add``` and ```raft_promote```) and bootstrap: ```C struct raft_configuration configuration; raft_configuration_init(&configuration); raft_configuration_add(&configuration, 1, "192.168.1.1:9999", true); raft_bootstrap(&raft, &configuration); ``` Start the raft server: ```C raft_start(&raft); uv_run(&loop, UV_RUN_DEFAULT); ``` Asynchronously submit requests to apply new commands to your application FSM: ```C static void apply_callback(struct raft_apply *req, int status, void *result) { /* ... */ } struct raft_apply req; struct raft_buffer buf; buf.len = ...; /* The length of your FSM entry data */ buf.base = ...; /* Your FSM entry data */ raft_apply(&raft, &req, &buf, 1, apply_callback); ``` To add more servers to the cluster use the ```raft_add()``` and ```raft_promote``` APIs. Usage Notes ----------- The default [libuv](http://libuv.org) based ```raft_io``` implementation compresses the raft snapshots using the ```liblz4``` library. Next to saving disk space, the lz4 compressed snapshots offer additional data integrity checks in the form of a [Content Checksum](https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md), this allows raft to detect corruptions that occurred during storage. It is therefore recommended to not disable lz4 compression by means of the ```--disable-lz4``` configure flag. Detailed tracing will be enabled when the environment variable `LIBRAFT_TRACE` is set upon startup. Notable users ------------- - [dqlite](https://github.com/canonical/dqlite) Credits ------- Of course the biggest thanks goes to Diego Ongaro :) (the original author of the Raft dissertation) A lot of ideas and inspiration was taken from other Raft implementations such as: - CoreOS' Go implementation for [etcd](https://github.com/etcd-io/etcd/tree/master/raft) - Hashicorp's Go [raft](https://github.com/hashicorp/raft) - Willem's [C implementation](https://github.com/willemt/raft) - LogCabin's [C++ implementation](https://github.com/logcabin/logcabin) raft-0.11.3/ac/000077500000000000000000000000001415614527300131235ustar00rootroot00000000000000raft-0.11.3/ac/.gitignore000066400000000000000000000000161415614527300151100ustar00rootroot00000000000000* !.gitignore raft-0.11.3/amalgamation.json000066400000000000000000000017361415614527300160740ustar00rootroot00000000000000{ "target": "raft.c", "sources": [ "src/byte.c", "src/client.c", "src/configuration.c", "src/convert.c", "src/election.c", "src/entry.c", "src/err.c", "src/fixture.c", "src/heap.c", "src/log.c", "src/membership.c", "src/progress.c", "src/raft.c", "src/recv.c", "src/recv_append_entries.c", "src/recv_append_entries_result.c", "src/recv_install_snapshot.c", "src/recv_request_vote.c", "src/recv_request_vote_result.c", "src/replication.c", "src/snapshot.c", "src/start.c", "src/state.c", "src/syscall.c", "src/tick.c", "src/tracing.c", "src/uv.c", "src/uv_append.c", "src/uv_encoding.c", "src/uv_finalize.c", "src/uv_fs.c", "src/uv_ip.c", "src/uv_list.c", "src/uv_metadata.c", "src/uv_os.c", "src/uv_prepare.c", "src/uv_recv.c", "src/uv_segment.c", "src/uv_send.c", "src/uv_snapshot.c", "src/uv_tcp.c", "src/uv_tcp_connect.c", "src/uv_tcp_listen.c", "src/uv_truncate.c", "src/uv_writer.c" ], "include_paths": [ "include" ] } raft-0.11.3/benchmark/000077500000000000000000000000001415614527300144725ustar00rootroot00000000000000raft-0.11.3/benchmark/os_disk_write.c000066400000000000000000000253421415614527300175110ustar00rootroot00000000000000#include #include #include #include #include #include #include #include #include #include #include #include static char doc[] = "Benchmark operating system disk write performance"; /* Minimum buffer size to benchmark. */ #define MIN_BUF_SIZE 64 /* Maximum buffer size to benchmark. */ #define MAX_BUF_SIZE 4096 /* Minimum physical block size for direct I/O that we expect to detect. */ #define MIN_BLOCK_SIZE 512 /* Maximum physical block size for direct I/O that we expect to detect. */ #define MAX_BLOCK_SIZE 4096 /* Engines */ #define PWRITEV2 0 #define URING 1 /* Modes */ #define BUFFERED 0 #define DIRECT 1 static const char *engines[] = {[PWRITEV2] = "pwritev2", [URING] = "uring", NULL}; static const char *modes[] = {[BUFFERED] = "buffered", [DIRECT] = "direct", NULL}; /* Order of fields: {NAME, KEY, ARG, FLAGS, DOC, GROUP}.*/ static struct argp_option options[] = { {"dir", 'd', "DIR", 0, "Directory to use for temp files (default /tmp)", 0}, {"buf", 'b', "BUF", 0, "Write buffer size (default st_blksize)", 0}, {"writes", 'n', "N", 0, "Number of writes to perform (default 1024)", 0}, {"engine", 'e', "ENGINE", 0, "I/O engine to use (default all)", 0}, {"mode", 'm', "MODE", 0, "Use 'buffered' or 'direct' I/O", 0}, {0}}; struct arguments { char *dir; int buf; int n; int engine; int mode; }; static int engineCode(const char *engine) { int i = 0; while (engines[i] != NULL) { if (strcmp(engines[i], engine) == 0) { return i; } i++; } return -1; } static int modeCode(const char *mode) { int i = 0; while (modes[i] != NULL) { if (strcmp(modes[i], mode) == 0) { return i; } i++; } return -1; } static error_t argumentsParse(int key, char *arg, struct argp_state *state) { struct arguments *arguments = state->input; switch (key) { case 'd': arguments->dir = arg; break; case 'b': arguments->buf = atoi(arg); break; case 'n': arguments->n = atoi(arg); break; case 'e': arguments->engine = engineCode(arg); if (arguments->engine == -1) { return ARGP_ERR_UNKNOWN; } break; case 'm': arguments->mode = modeCode(arg); if (arguments->mode == -1) { return ARGP_ERR_UNKNOWN; } break; default: return ARGP_ERR_UNKNOWN; } return 0; } static char *makeTempFileTemplate(const char *dir) { char *path; path = malloc(strlen(dir) + strlen("/bench-XXXXXX") + 1); assert(path != NULL); sprintf(path, "%s/bench-XXXXXX", dir); return path; } static int createTempFile(const char *dir, int size, char **path, int *fd) { int dirfd; int rv; *path = makeTempFileTemplate(dir); *fd = mkstemp(*path); if (*fd == -1) { printf("mstemp '%s': %s\n", *path, strerror(errno)); return -1; } rv = posix_fallocate(*fd, 0, size); if (rv != 0) { errno = rv; printf("posix_fallocate: %s\n", strerror(errno)); return -1; } /* Sync the file and its directory. */ rv = fsync(*fd); assert(rv == 0); dirfd = open(dir, O_RDONLY | O_DIRECTORY); assert(dirfd != -1); rv = fsync(dirfd); assert(rv == 0); close(dirfd); return 0; } /* Allocate a buffer of the given size. */ static void allocBuffer(struct iovec *iov, int size) { iov->iov_len = size; iov->iov_base = aligned_alloc(iov->iov_len, iov->iov_len); assert(iov->iov_base != NULL); } static void setDirectIO(int fd) { int flags; /* Current fcntl flags */ int rv; flags = fcntl(fd, F_GETFL); rv = fcntl(fd, F_SETFL, flags | O_DIRECT); assert(rv == 0); } /* Detect all suitable block size we can use to write to the underlying device * using direct I/O. */ static int detectSuitableBlockSizesForDirectIO(const char *dir, int **block_size, int *n_block_size) { char *path; int fd; int size; int rv; rv = createTempFile(dir, MAX_BLOCK_SIZE, &path, &fd); if (rv != 0) { unlink(path); return -1; } setDirectIO(fd); *block_size = NULL; *n_block_size = 0; for (size = MIN_BLOCK_SIZE; size <= MAX_BLOCK_SIZE; size *= 2) { struct iovec iov; allocBuffer(&iov, size); rv = pwritev2(fd, &iov, 1, 0, RWF_DSYNC | RWF_HIPRI); free(iov.iov_base); if (rv == -1) { assert(errno == EINVAL); continue; /* Try with a bigger buffer size */ } assert(rv == size); *n_block_size += 1; *block_size = realloc(*block_size, *n_block_size * sizeof **block_size); assert(*block_size != NULL); (*block_size)[*n_block_size - 1] = size; } close(fd); unlink(path); return 0; } /* Save current time in 'time'. */ static void timeNow(struct timespec *time) { int rv; rv = clock_gettime(CLOCK_MONOTONIC, time); assert(rv == 0); } /* Calculate how much time has elapsed since 'start', in microseconds. */ static int timeSince(struct timespec *start) { struct timespec now; long nsecs; timeNow(&now); if (start->tv_sec == now.tv_sec) { nsecs = now.tv_nsec - start->tv_nsec; } else { nsecs = (now.tv_sec - start->tv_sec) * 1000 * 1000 * 1000 - start->tv_nsec + now.tv_nsec; } return nsecs / 1000; } static int writeWithPwriteV2(int fd, struct iovec *iov, int i) { int rv; rv = pwritev2(fd, iov, 1, i * iov->iov_len, RWF_DSYNC | RWF_HIPRI); if (rv == -1) { perror("pwritev2"); return -1; } assert(rv == (int)iov->iov_len); return 0; } static struct io_uring uring; static void initUring(int fd, struct iovec *iov) { int rv; rv = io_uring_queue_init(4, &uring, 0); assert(rv == 0); rv = io_uring_register_files(&uring, &fd, 1); assert(rv == 0); rv = io_uring_register_buffers(&uring, iov, 1); assert(rv == 0); } static int writeWithUring(int fd, struct iovec *iov, int i) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int rv; if (i == 0) { initUring(fd, iov); } sqe = io_uring_get_sqe(&uring); io_uring_prep_write_fixed(sqe, 0, iov->iov_base, iov->iov_len, i * iov->iov_len, 0); io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE); sqe->rw_flags = RWF_DSYNC; rv = io_uring_submit(&uring); assert(rv == 1); io_uring_wait_cqe(&uring, &cqe); if (cqe->res < 0) { printf("sqe failed: %s\n", strerror(-cqe->res)); return -1; } assert(cqe->res == (int)iov->iov_len); io_uring_cqe_seen(&uring, cqe); return 0; } /* Benchmark the performance of a single disk write. */ int benchmarkWritePerformance(const char *dir, int buf, int n, int engine, int mode) { char *path; int fd; struct iovec iov; struct timespec start; int i; int rv; rv = createTempFile(dir, n * buf, &path, &fd); if (rv != 0) { unlink(path); return -1; } allocBuffer(&iov, buf); if (mode == DIRECT) { setDirectIO(fd); } timeNow(&start); for (i = 0; i < n; i++) { switch (engine) { case PWRITEV2: rv = writeWithPwriteV2(fd, &iov, i); break; case URING: rv = writeWithUring(fd, &iov, i); break; default: assert(0); } } if (rv != 0) { return -1; } printf("%-8s: %8s writes of %4d bytes take %4d microsecs on average\n", engines[engine], modes[mode], buf, timeSince(&start) / n); if (engine == URING) { io_uring_queue_exit(&uring); } close(fd); unlink(path); free(path); return 0; } static bool isSuitableBufSizeForDirectIO(int *block_size, int n_block_size, int buf) { int i; for (i = 0; i < n_block_size; i++) { if (block_size[i] == buf) { return true; } } return false; } int main(int argc, char *argv[]) { struct argp argp = {options, argumentsParse, NULL, doc, 0, 0, 0}; struct arguments arguments; struct stat st; int engine; int mode; int buf; int *block_size; int n_block_size; int i; int rv; arguments.dir = "/tmp"; arguments.buf = -1; arguments.n = 1024; arguments.engine = -1; arguments.mode = -1; argp_parse(&argp, argc, argv, 0, 0, &arguments); rv = stat(arguments.dir, &st); if (rv != 0) { printf("stat '%s': %s\n", arguments.dir, strerror(errno)); return -1; } rv = detectSuitableBlockSizesForDirectIO(arguments.dir, &block_size, &n_block_size); if (rv != 0) { return rv; } if (arguments.buf != -1) { if (arguments.mode == -1 || arguments.mode == DIRECT) { if (!isSuitableBufSizeForDirectIO(block_size, n_block_size, arguments.buf)) { printf("suitable buffer sizes for direct I/O:"); for (i = 0; i < n_block_size; i++) { printf(" %4d", block_size[i]); } printf("\n"); return -1; } } } for (engine = PWRITEV2; engine <= URING; engine++) { if (arguments.engine != engine && arguments.engine != -1) { continue; } for (mode = BUFFERED; mode <= DIRECT; mode++) { if (arguments.mode != mode && arguments.mode != -1) { continue; } for (buf = MIN_BUF_SIZE; buf <= MAX_BUF_SIZE; buf *= 2) { if (arguments.buf != buf && arguments.buf != -1) { continue; } if (mode == DIRECT) { if (!isSuitableBufSizeForDirectIO(block_size, n_block_size, buf)) { continue; } } rv = benchmarkWritePerformance(arguments.dir, buf, arguments.n, engine, mode); if (rv != 0) { return -1; } } } } return 0; } raft-0.11.3/configure.ac000066400000000000000000000134411415614527300150310ustar00rootroot00000000000000AC_PREREQ(2.60) AC_INIT([raft], [0.11.2]) AC_LANG([C]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([ac]) AM_INIT_AUTOMAKE([subdir-objects -Wall -Wno-portability foreign]) AM_SILENT_RULES([yes]) AC_USE_SYSTEM_EXTENSIONS # Defines _GNU_SOURCE and similar LT_INIT # The libuv raft_io implementation is built by default if libuv is found, unless # explicitly disabled. AC_ARG_ENABLE(uv, AS_HELP_STRING([--disable-uv], [do not build the libuv-based raft_io implementation])) AS_IF([test "x$enable_uv" != "xno"], [PKG_CHECK_MODULES(UV, [libuv >= 1.18.0], [have_uv=yes], [have_uv=no])], [have_uv=no]) AS_IF([test "x$enable_uv" = "xyes" -a "x$have_uv" = "xno"], [AC_MSG_ERROR([libuv required but not found])], []) AM_CONDITIONAL(UV_ENABLED, test "x$have_uv" = "xyes") # The libuv raft_io implementation is built by default to compress snapshots if liblz4 is found, unless # explicitly disabled. AC_ARG_ENABLE(lz4, AS_HELP_STRING([--disable-lz4], [do not use lz4 compression])) # Thanks to the OpenVPN configure.ac file for this part. # If this fails, we will do another test next. # We also add set LZ4_LIBS otherwise linker will not know about the lz4 library PKG_CHECK_MODULES(LZ4, [liblz4 >= 1.7.1], [have_lz4="yes"], [LZ4_LIBS="-llz4"]) if test "${have_lz4}" != "yes" ; then AC_CHECK_HEADERS([lz4.h], [have_lz4h="yes"], []) if test "${have_lz4h}" = "yes" ; then AC_MSG_CHECKING([additionally if system LZ4 version >= 1.7.1]) AC_COMPILE_IFELSE( [AC_LANG_PROGRAM([[ #include ]], [[ /* Version encoding: MMNNPP (Major miNor Patch) - see lz4.h for details */ #if LZ4_VERSION_NUMBER < 10701L #error LZ4 is too old #endif ]] )], [ AC_MSG_RESULT([ok]) have_lz4="yes" ], [ AC_MSG_RESULT([system LZ4 library is too old]) have_lz4="no" ] ) fi fi AS_IF([test "x$enable_lz4" != "xno" -a "x$have_lz4" != "xyes"], [AC_MSG_ERROR([liblz4 required but not found])], []) # LZ4 Can be available without being enabled, this allows a user to activate # it at a later stage through an API call. AM_CONDITIONAL(LZ4_AVAILABLE, test "x$have_lz4" = "xyes") # `LZ4_ENABLED` will cause the libuv snapshot implementation to use lz4 # compression by default. AM_CONDITIONAL(LZ4_ENABLED, test "x$enable_lz4" != "xno" -a "x$have_lz4" = "xyes") # The fake I/O implementation and associated fixture is built by default, unless # explicitly disabled. AC_ARG_ENABLE(fixture, AS_HELP_STRING([--disable-fixture], [do not build the raft_fixture test helper])) AM_CONDITIONAL(FIXTURE_ENABLED, test "x$enable_fixture" != "xno") # The example program is optional. AC_ARG_ENABLE(example, AS_HELP_STRING([--enable-example[=ARG]], [build the example program [default=no]])) AS_IF([test "x$enable_example" = "xyes" -a "x$have_uv" = "xno"], [AC_MSG_ERROR([example program requires libuv])], []) AM_CONDITIONAL(EXAMPLE_ENABLED, test "x$enable_example" = "xyes") # The benchmark programs are optional. AC_ARG_ENABLE(benchmark, AS_HELP_STRING([--enable-benchmark[=ARG]], [build the benchmark programs [default=no]])) AM_CONDITIONAL(BENCHMARK_ENABLED, test "x$enable_benchmark" = "xyes") # Whether to enable debugging code. AC_ARG_ENABLE(debug, AS_HELP_STRING([--enable-debug[=ARG]], [enable debugging [default=no]])) AM_CONDITIONAL(DEBUG_ENABLED, test "x$enable_debug" = "xyes") # Whether to enable memory sanitizer. AC_ARG_ENABLE(sanitize, AS_HELP_STRING([--enable-sanitize[=ARG]], [enable code sanitizers [default=no]])) AM_CONDITIONAL(SANITIZE_ENABLED, test x"$enable_sanitize" = x"yes") # Whether to enable code coverage. AX_CODE_COVERAGE # Checks for header files. AC_CHECK_HEADERS([stdlib.h string.h stdio.h assert.h unistd.h linux/io_uring.h linux/aio_abi.h]) # Check if zfs >= 0.8.0 is available (for direct I/O support). AC_CHECK_PROG(have_zfs, zfs, yes) AS_IF([test x"$have_zfs" = x"yes"], [AX_COMPARE_VERSION($(cat /sys/module/zfs/version | cut -f 1 -d -), [ge], [0.8.0], [AC_DEFINE(RAFT_HAVE_ZFS_WITH_DIRECT_IO)], []) ], []) # Checks for typedefs, structures, and compiler characteristics. AC_TYPE_SIZE_T AC_TYPE_SSIZE_T AC_TYPE_UINT8_T AC_TYPE_UINT16_T AC_TYPE_UINT32_T AC_TYPE_UINT64_T # Checks for library functions and definitions. AC_CHECK_DECLS([UV_FS_O_CREAT], [], [], [[#include ]]) # Enable large file support. This is mandatory in order to interoperate with # libuv, which enables large file support by default, making the size of 'off_t' # on 32-bit architecture be 8 bytes instead of the normal 4. AC_SYS_LARGEFILE CC_CHECK_FLAGS_APPEND([AM_CFLAGS],[CFLAGS],[ \ -std=c11 \ -g \ --mcet \ -fcf-protection \ --param=ssp-buffer-size=4 \ -pipe \ -fno-strict-aliasing \ -fdiagnostics-color \ -fexceptions \ -fstack-clash-protection \ -fstack-protector-strong \ -fasynchronous-unwind-tables \ -fdiagnostics-show-option \ -Wall \ -Wextra \ -Wpedantic \ -Wimplicit-fallthrough=5 \ -Wcast-align \ -Wstrict-prototypes \ -Wlogical-op \ -Wmissing-include-dirs \ -Wold-style-definition \ -Winit-self \ -Wfloat-equal \ -Wsuggest-attribute=noreturn \ -Wformat=2 \ -Wendif-labels \ -Wdate-time \ -Wnested-externs \ -Wconversion \ -Werror=implicit-function-declaration \ -Wunused-but-set-variable \ -Werror=return-type \ -Werror=incompatible-pointer-types \ -Wshadow \ -Werror=overflow \ -Werror=shift-count-overflow \ -Werror=shift-overflow=2 \ -Warray-bounds \ -Wrestrict \ -Wreturn-local-addr \ -Wstringop-overflow \ ]) AC_SUBST(AM_CFLAGS) CC_CHECK_FLAGS_APPEND([AM_LDFLAGS],[LDFLAGS],[ \ -z relro \ -z now \ -fstack-protector-strong \ --param=ssp-buffer-size=4 \ ]) AC_SUBST(AM_LDLAGS) AC_CONFIG_FILES([raft.pc Makefile]) AC_OUTPUT raft-0.11.3/docs/000077500000000000000000000000001415614527300134705ustar00rootroot00000000000000raft-0.11.3/docs/_themes/000077500000000000000000000000001415614527300151145ustar00rootroot00000000000000raft-0.11.3/docs/_themes/material/000077500000000000000000000000001415614527300167125ustar00rootroot00000000000000raft-0.11.3/docs/_themes/material/genindex.html000066400000000000000000000003211415614527300213750ustar00rootroot00000000000000{# basic/genindex.html ~~~~~~~~~~~~~~~~~~~ Template for an "all-in-one" index. :copyright: Copyright 2007-2019 by the Sphinx team, see AUTHORS. :license: BSD, see LICENSE for details. #} raft-0.11.3/docs/_themes/material/layout.html000066400000000000000000000115401415614527300211160ustar00rootroot00000000000000 {%- macro css() %} {%- for css in css_files %} {%- if css|attr("filename") %} {{ css_tag(css) }} {%- else %} {%- endif %} {%- endfor %} {%- endmacro %} {%- macro header() %}
{%- endmacro %} {%- macro navigation() %}
{%- endmacro %} {%- macro localtoc() %}
{%- endmacro %} {%- block htmltitle %} {{ title|striptags|e }}{{ titlesuffix }} {%- endblock %} {%- block css %} {{- css() }} {%- endblock %} {%- block header %}{{ header() }}{% endblock %}
{%- block navigation %}{{ navigation() }}{% endblock %} {%- block localtoc %}{{ localtoc() }}{% endblock %} {% block body %} {% endblock %}
raft-0.11.3/docs/_themes/material/page.html000066400000000000000000000002521415614527300205130ustar00rootroot00000000000000{%- extends "layout.html" %} {% block body %}
{{ body }}
{% endblock %} raft-0.11.3/docs/_themes/material/relations.html000066400000000000000000000011361415614527300216010ustar00rootroot00000000000000{# basic/relations.html ~~~~~~~~~~~~~~~~~~~~ Sphinx sidebar template: relation links. :copyright: Copyright 2007-2019 by the Sphinx team, see AUTHORS. :license: BSD, see LICENSE for details. #} {%- if prev %}

{{ _('Previous topic') }}

{{ prev.title }}

{%- endif %} {%- if next %}

{{ _('Next topic') }}

{{ next.title }}

{%- endif %} raft-0.11.3/docs/_themes/material/search.html000066400000000000000000000003071415614527300210450ustar00rootroot00000000000000{# basic/search.html ~~~~~~~~~~~~~~~~~ Template for the search page. :copyright: Copyright 2007-2019 by the Sphinx team, see AUTHORS. :license: BSD, see LICENSE for details. #} raft-0.11.3/docs/_themes/material/searchbox.html000066400000000000000000000014331415614527300215570ustar00rootroot00000000000000{# basic/searchbox.html ~~~~~~~~~~~~~~~~~~~~ Sphinx sidebar template: quick search box. :copyright: Copyright 2007-2019 by the Sphinx team, see AUTHORS. :license: BSD, see LICENSE for details. #} {%- if pagename != "search" and builder != "singlehtml" %} {%- endif %} raft-0.11.3/docs/_themes/material/sourcelink.html000066400000000000000000000010401415614527300217510ustar00rootroot00000000000000{# basic/sourcelink.html ~~~~~~~~~~~~~~~~~~~~~ Sphinx sidebar template: "show source" link. :copyright: Copyright 2007-2019 by the Sphinx team, see AUTHORS. :license: BSD, see LICENSE for details. #} {%- if show_source and has_source and sourcename %}

{{ _('This Page') }}

{%- endif %} raft-0.11.3/docs/_themes/material/static/000077500000000000000000000000001415614527300202015ustar00rootroot00000000000000raft-0.11.3/docs/_themes/material/static/fonts.css_t000066400000000000000000000252361415614527300223770ustar00rootroot00000000000000/* cyrillic-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: italic; font-weight: 400; src: local('Source Sans Pro Italic'), local('SourceSansPro-Italic'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK1dSBYKcSV-LCoeQqfX1RYOo3qPZ7qsDJT9g.woff2) format('woff2'); unicode-range: U+0460-052F, U+1C80-1C88, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F; } /* cyrillic */ @font-face { font-family: 'Source Sans Pro'; font-style: italic; font-weight: 400; src: local('Source Sans Pro Italic'), local('SourceSansPro-Italic'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK1dSBYKcSV-LCoeQqfX1RYOo3qPZ7jsDJT9g.woff2) format('woff2'); unicode-range: U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116; } /* greek-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: italic; font-weight: 400; src: local('Source Sans Pro Italic'), local('SourceSansPro-Italic'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK1dSBYKcSV-LCoeQqfX1RYOo3qPZ7rsDJT9g.woff2) format('woff2'); unicode-range: U+1F00-1FFF; } /* greek */ @font-face { font-family: 'Source Sans Pro'; font-style: italic; font-weight: 400; src: local('Source Sans Pro Italic'), local('SourceSansPro-Italic'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK1dSBYKcSV-LCoeQqfX1RYOo3qPZ7ksDJT9g.woff2) format('woff2'); unicode-range: U+0370-03FF; } /* vietnamese */ @font-face { font-family: 'Source Sans Pro'; font-style: italic; font-weight: 400; src: local('Source Sans Pro Italic'), local('SourceSansPro-Italic'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK1dSBYKcSV-LCoeQqfX1RYOo3qPZ7osDJT9g.woff2) format('woff2'); unicode-range: U+0102-0103, U+0110-0111, U+1EA0-1EF9, U+20AB; } /* latin-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: italic; font-weight: 400; src: local('Source Sans Pro Italic'), local('SourceSansPro-Italic'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK1dSBYKcSV-LCoeQqfX1RYOo3qPZ7psDJT9g.woff2) format('woff2'); unicode-range: U+0100-024F, U+0259, U+1E00-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF; } /* latin */ @font-face { font-family: 'Source Sans Pro'; font-style: italic; font-weight: 400; src: local('Source Sans Pro Italic'), local('SourceSansPro-Italic'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK1dSBYKcSV-LCoeQqfX1RYOo3qPZ7nsDI.woff2) format('woff2'); unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD; } /* cyrillic-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 300; src: local('Source Sans Pro Light'), local('SourceSansPro-Light'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ik4zwmhduz8A.woff2) format('woff2'); unicode-range: U+0460-052F, U+1C80-1C88, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F; } /* cyrillic */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 300; src: local('Source Sans Pro Light'), local('SourceSansPro-Light'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ik4zwkxduz8A.woff2) format('woff2'); unicode-range: U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116; } /* greek-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 300; src: local('Source Sans Pro Light'), local('SourceSansPro-Light'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ik4zwmxduz8A.woff2) format('woff2'); unicode-range: U+1F00-1FFF; } /* greek */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 300; src: local('Source Sans Pro Light'), local('SourceSansPro-Light'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ik4zwlBduz8A.woff2) format('woff2'); unicode-range: U+0370-03FF; } /* vietnamese */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 300; src: local('Source Sans Pro Light'), local('SourceSansPro-Light'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ik4zwmBduz8A.woff2) format('woff2'); unicode-range: U+0102-0103, U+0110-0111, U+1EA0-1EF9, U+20AB; } /* latin-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 300; src: local('Source Sans Pro Light'), local('SourceSansPro-Light'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ik4zwmRduz8A.woff2) format('woff2'); unicode-range: U+0100-024F, U+0259, U+1E00-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF; } /* latin */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 300; src: local('Source Sans Pro Light'), local('SourceSansPro-Light'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ik4zwlxdu.woff2) format('woff2'); unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD; } /* cyrillic-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 400; src: local('Source Sans Pro Regular'), local('SourceSansPro-Regular'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK3dSBYKcSV-LCoeQqfX1RYOo3qNa7lqDY.woff2) format('woff2'); unicode-range: U+0460-052F, U+1C80-1C88, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F; } /* cyrillic */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 400; src: local('Source Sans Pro Regular'), local('SourceSansPro-Regular'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK3dSBYKcSV-LCoeQqfX1RYOo3qPK7lqDY.woff2) format('woff2'); unicode-range: U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116; } /* greek-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 400; src: local('Source Sans Pro Regular'), local('SourceSansPro-Regular'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK3dSBYKcSV-LCoeQqfX1RYOo3qNK7lqDY.woff2) format('woff2'); unicode-range: U+1F00-1FFF; } /* greek */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 400; src: local('Source Sans Pro Regular'), local('SourceSansPro-Regular'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK3dSBYKcSV-LCoeQqfX1RYOo3qO67lqDY.woff2) format('woff2'); unicode-range: U+0370-03FF; } /* vietnamese */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 400; src: local('Source Sans Pro Regular'), local('SourceSansPro-Regular'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK3dSBYKcSV-LCoeQqfX1RYOo3qN67lqDY.woff2) format('woff2'); unicode-range: U+0102-0103, U+0110-0111, U+1EA0-1EF9, U+20AB; } /* latin-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 400; src: local('Source Sans Pro Regular'), local('SourceSansPro-Regular'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK3dSBYKcSV-LCoeQqfX1RYOo3qNq7lqDY.woff2) format('woff2'); unicode-range: U+0100-024F, U+0259, U+1E00-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF; } /* latin */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 400; src: local('Source Sans Pro Regular'), local('SourceSansPro-Regular'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK3dSBYKcSV-LCoeQqfX1RYOo3qOK7l.woff2) format('woff2'); unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD; } /* cyrillic-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 700; src: local('Source Sans Pro Bold'), local('SourceSansPro-Bold'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwmhduz8A.woff2) format('woff2'); unicode-range: U+0460-052F, U+1C80-1C88, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F; } /* cyrillic */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 700; src: local('Source Sans Pro Bold'), local('SourceSansPro-Bold'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwkxduz8A.woff2) format('woff2'); unicode-range: U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116; } /* greek-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 700; src: local('Source Sans Pro Bold'), local('SourceSansPro-Bold'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwmxduz8A.woff2) format('woff2'); unicode-range: U+1F00-1FFF; } /* greek */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 700; src: local('Source Sans Pro Bold'), local('SourceSansPro-Bold'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwlBduz8A.woff2) format('woff2'); unicode-range: U+0370-03FF; } /* vietnamese */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 700; src: local('Source Sans Pro Bold'), local('SourceSansPro-Bold'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwmBduz8A.woff2) format('woff2'); unicode-range: U+0102-0103, U+0110-0111, U+1EA0-1EF9, U+20AB; } /* latin-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 700; src: local('Source Sans Pro Bold'), local('SourceSansPro-Bold'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwmRduz8A.woff2) format('woff2'); unicode-range: U+0100-024F, U+0259, U+1E00-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF; } /* latin */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 700; src: local('Source Sans Pro Bold'), local('SourceSansPro-Bold'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwlxdu.woff2) format('woff2'); unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD; } @font-face { font-family: 'Material Icons'; font-style: normal; font-weight: 400; src: url(https://fonts.gstatic.com/s/materialicons/v48/flUhRq6tzZclQEJ-Vdg-IuiaDsNc.woff2) format('woff2'); } .material-icons { font-family: 'Material Icons'; font-weight: normal; font-style: normal; font-size: 24px; line-height: 1; letter-spacing: normal; text-transform: none; display: inline-block; white-space: nowrap; word-wrap: normal; direction: ltr; -moz-font-feature-settings: 'liga'; -moz-osx-font-smoothing: grayscale; }raft-0.11.3/docs/_themes/material/static/material.css_t000066400000000000000000000420771415614527300230460ustar00rootroot00000000000000/* * material.css_t */ @import url("fonts.css"); html { -webkit-text-size-adjust: none; -moz-text-size-adjust: none; -ms-text-size-adjust: none; text-size-adjust: none; font-size: 137.5%; overflow-x: hidden; box-sizing: border-box; } *, ::after, ::before { box-sizing: inherit; } body, html { height: 100%; } body, input { color: rgba(0,0,0,.87); font-family: "Source Sans Pro","Helvetica Neue",Helvetica,Arial,sans-serif; font-feature-settings: "kern","liga"; } body { position: relative; font-size: .5rem; margin: 0; } a { color: inherit; text-decoration: none; } td, th { font-weight: 400; vertical-align: top; } input { border: 0; outline: 0; } .md-header, .md-hero { background-color: #3f51b5; } .md-header[data-md-state="shadow"] { -webkit-transition: background-color .25s,color .25s,box-shadow .25s; transition: background-color .25s,color .25s,box-shadow .25s; box-shadow: 0 0 .2rem rgba(0,0,0,.1),0 .2rem .4rem rgba(0,0,0,.2); } .md-header { position: fixed; top: 0; right: 0; left: 0; height: 2.4rem; color: #fff; z-index: 2; backface-visibility: hidden; } .md-header-nav { padding: 0 .2rem; } .md-header-nav__button { position: relative; -webkit-transition: opacity .25s; transition: opacity .25s; z-index: 1; } .md-header-nav__button.md-logo * { display: block; } .md-clipboard::before, .md-icon, .md-nav__button, .md-nav__link::after, .md-nav__title::before, .md-search-result__article--document::before, .md-source-file::before, .md-typeset .admonition > .admonition-title::before, .md-typeset .admonition > summary::before, .md-typeset .critic.comment::before, .md-typeset .footnote-backref, .md-typeset .task-list-control .task-list-indicator::before, .md-typeset details > .admonition-title::before, .md-typeset details > summary::before, .md-typeset summary::after { font-family: Material Icons; font-style: normal; font-variant: normal; font-weight: 400; line-height: 1; text-transform: none; white-space: nowrap; speak: none; word-wrap: normal; direction: ltr; } .md-content__icon, .md-footer-nav__button, .md-header-nav__button, .md-nav__button, .md-nav__title::before, .md-search-result__article--document::before { display: inline-block; margin: .2rem; padding: .4rem; font-size: 1.2rem; cursor: pointer; } .md-header-nav__title { padding: 0 1rem; font-size: .9rem; line-height: 2.4rem; } .md-header-nav__topic { display: block; position: absolute; -webkit-transition: opacity .15s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1); transition: opacity .15s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1); transition: transform .4s cubic-bezier(.1,.7,.1,1),opacity .15s; transition: transform .4s cubic-bezier(.1,.7,.1,1),opacity .15s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1); text-overflow: ellipsis; white-space: nowrap; overflow: hidden; } .md-header-nav__button.md-icon--search { display: none; } .md-header-nav__button { position: relative; -webkit-transition: opacity .25s; transition: opacity .25s; z-index: 1; } .md-content__icon, .md-footer-nav__button, .md-header-nav__button, .md-nav__button, .md-nav__title::before, .md-search-result__article--document::before { display: inline-block; margin: .2rem; padding: .4rem; font-size: 1.2rem; cursor: pointer; } .md-search { padding: .2rem; } .md-search__overlay { position: fixed; top: 0; left: 0; width: 0; height: 0; -webkit-transition: width 0s .25s,height 0s .25s,opacity .25s; transition: width 0s .25s,height 0s .25s,opacity .25s; background-color: rgba(0,0,0,.54); cursor: pointer; } .md-search__inner { position: relative; width: 11.5rem; margin-right: .8rem; padding: .1rem 0; float: right; -webkit-transition: width .25s cubic-bezier(.1,.7,.1,1); transition: width .25s cubic-bezier(.1,.7,.1,1); } .md-search__inner { margin-right: 1.2rem; } .md-search__overlay { opacity: 0; z-index: 1; } .md-search__form, .md-search__input { border-radius: .1rem; } .md-search__form { position: relative; } .md-search__input { position: relative; padding: 0 2.2rem 0 3.6rem; text-overflow: ellipsis; z-index: 2; } .md-search__form, .md-search__input { border-radius: .1rem; } .md-search__input { width: 100%; height: 1.8rem; padding-left: 2.2rem; -webkit-transition: background-color .25s cubic-bezier(.1,.7,.1,1),color .25s cubic-bezier(.1,.7,.1,1); transition: background-color .25s cubic-bezier(.1,.7,.1,1),color .25s cubic-bezier(.1,.7,.1,1); background-color: rgba(0,0,0,.26); color: inherit; font-size: .8rem; } .md-search__input + .md-search__icon { color: inherit; } .md-search__icon[for="__search"] { top: .3rem; left: .5rem; } .md-search__icon { position: absolute; -webkit-transition: color .25s cubic-bezier(.1,.7,.1,1),opacity .25s; transition: color .25s cubic-bezier(.1,.7,.1,1),opacity .25s; font-size: 1.2rem; cursor: pointer; z-index: 2; } .md-search__icon[type="reset"] { top: .3rem; right: .5rem; -webkit-transform: scale(.125); transform: scale(.125); -webkit-transition: opacity .15s,-webkit-transform .15s cubic-bezier(.1,.7,.1,1); transition: opacity .15s,-webkit-transform .15s cubic-bezier(.1,.7,.1,1); transition: transform .15s cubic-bezier(.1,.7,.1,1),opacity .15s; transition: transform .15s cubic-bezier(.1,.7,.1,1),opacity .15s,-webkit-transform .15s cubic-bezier(.1,.7,.1,1); opacity: 0; } .md-search__icon[for="__search"]::before { content: "\E8B6"; } .md-search__icon { position: absolute; -webkit-transition: color .25s cubic-bezier(.1,.7,.1,1),opacity .25s; transition: color .25s cubic-bezier(.1,.7,.1,1),opacity .25s; font-size: 1.2rem; cursor: pointer; z-index: 2; } .md-search__output { top: 1.9rem; -webkit-transition: opacity .4s; transition: opacity .4s; opacity: 0; } .md-search__output { position: absolute; width: 100%; border-radius: 0 0 .1rem .1rem; overflow: hidden; z-index: 1; } .md-search__scrollwrap, [data-md-toggle="search"]:checked ~ .md-header .md-search__inner { width: 34.4rem; } .md-search__scrollwrap { max-height: 0; } .md-search__scrollwrap { height: 100%; background-color: #fff; box-shadow: inset 0 .05rem 0 rgba(0,0,0,.07); overflow-y: auto; -webkit-overflow-scrolling: touch; } .md-container { display: table; width: 100%; height: 100%; padding-top: 2.4rem; table-layout: fixed; } .md-container, .md-main { overflow: auto; } .md-main { display: table-row; height: 100%; } .md-main__inner { height: 100%; padding-top: 1.5rem; padding-bottom: .05rem; } .md-sidebar { position: absolute; width: 12.1rem; padding: 1.2rem 0; overflow: hidden; } .md-sidebar__scrollwrap { max-height: 100%; margin: 0 .2rem; overflow-y: auto; -webkit-backface-visibility: hidden; backface-visibility: hidden; } .md-nav { font-size: .7rem; line-height: 1.3; } .md-nav__title { display: block; padding: 0 .6rem; font-weight: 700; text-overflow: ellipsis; overflow: hidden; } .md-nav ul { margin: 0; padding: 0; list-style: none; } .md-nav ul li { padding: 0 .6rem; } .md-nav ul li a { display: block; margin-top: .625em; -webkit-transition: color .125s; transition: color .125s; text-overflow: ellipsis; cursor: pointer; overflow: hidden; } .md-nav ul li a.current { color: #fb8c00; } /* Hide sub-sub sections */ .md-nav--secondary ul ul ul { display: none; } /* Don't indent sub items */ .md-nav--secondary ul ul li { padding: 0; } /* Hide top-level local toc item */ .md-nav--secondary ul li a[href="#"] { display: none; } .md-sidebar--secondary { display: block; margin-left: 100%; -webkit-transform: translate(-100%); transform: translate(-100%); } .md-sidebar--secondary { margin-left: 70rem; } .md-content { margin-left: 12.1rem; } .md-content { margin-right: 12.1rem; } .md-content__inner { margin-right: 1.2rem; margin-left: 1.2rem; } .md-content__inner { margin: 0 .8rem 1.2rem; margin-right: 0.8rem; margin-left: 0.8rem; padding-top: .6rem; } #toc.section { display: none; } .md-typeset { font-size: .7rem; line-height: 1.4; -webkit-print-color-adjust: exact; } .md-typeset a { color: #3f51b5; word-break: break-word; } .md-typeset h1, .md-typeset h2 { font-weight: 300; letter-spacing: -.01em; } .md-typeset h1 { margin: 0 0 1rem; color: rgb(33,34,36); font-size: 1.5625rem; line-height: 1.3; font-weight: 500; } .md-typeset h2 { font-size: 1.2rem; border-bottom: 1px solid #eaecef; font-weight: 400; } .md-typeset h3 { margin: 1rem 0 .8rem; font-size: 0.9rem; font-weight: 400; letter-spacing: -.01em; line-height: 1.5; } .md-typeset blockquote, .md-typeset ol, .md-typeset p, .md-typeset ul { margin: 1em 0; margin-left: 0px; } .md-typeset blockquote, .md-typeset ol, .md-typeset p, .md-typeset ul { margin: 1em 0; } .md-typeset ul { list-style-type: disc; } .md-typeset ol, .md-typeset ul { margin-left: .625em; padding: 0; } .md-typeset ol li, .md-typeset ul li { margin-bottom: .5em; margin-left: 1.25em; } .md-typeset ol li p, .md-typeset ul li p { margin: 0; } .md-typeset .headerlink { color: rgba(0,0,0,.26); } .md-typeset .headerlink { display: inline-block; margin-left: .5rem; -webkit-transform: translateY(.25rem); transform: translateY(.25rem); -webkit-transition: color .25s,opacity .125s .25s,-webkit-transform .25s .25s; transition: color .25s,opacity .125s .25s,-webkit-transform .25s .25s; transition: transform .25s .25s,color .25s,opacity .125s .25s; transition: transform .25s .25s,color .25s,opacity .125s .25s,-webkit-transform .25s .25s; opacity: 0; } [dir=rtl] .md-typeset .headerlink { margin-right: .5rem; margin-left: 0 } .md-typeset h1 .headerlink:focus, .md-typeset h1:hover .headerlink, .md-typeset h1:target .headerlink { -webkit-transform: translate(0); transform: translate(0); opacity: 1 } .md-typeset h1 .headerlink:focus, .md-typeset h1:hover .headerlink:hover, .md-typeset h1:target .headerlink { color: #536dfe } .md-typeset h2:before { display: block; margin-top: -0px; padding-top: 0px; content: "" } .md-typeset h2:target:before { margin-top: -3.4rem; padding-top: 3.4rem } .md-typeset h2 .headerlink:focus, .md-typeset h2:hover .headerlink, .md-typeset h2:target .headerlink { -webkit-transform: translate(0); transform: translate(0); opacity: 1 } .md-typeset h2 .headerlink:focus, .md-typeset h2:hover .headerlink:hover, .md-typeset h2:target .headerlink { color: #536dfe } .md-typeset h3:before { display: block; margin-top: -9px; content: "" } .md-typeset h3:target:before { margin-top: -3.45rem; padding-top: 3.45rem } .md-typeset h3 .headerlink:focus, .md-typeset h3:hover .headerlink, .md-typeset h3:target .headerlink { -webkit-transform: translate(0); transform: translate(0); opacity: 1 } .md-typeset h3 .headerlink:focus, .md-typeset h3:hover .headerlink:hover, .md-typeset h3:target .headerlink { color: #536dfe } .md-typeset h4:before { display: block; margin-top: -9px; padding-top: 9px; content: "" } .md-typeset h4:target:before { margin-top: -3.45rem; padding-top: 3.45rem } .md-typeset h4 .headerlink:focus, .md-typeset h4:hover .headerlink, .md-typeset h4:target .headerlink { -webkit-transform: translate(0); transform: translate(0); opacity: 1 } .md-typeset h4 .headerlink:focus, .md-typeset h4:hover .headerlink:hover, .md-typeset h4:target .headerlink { color: #536dfe } .md-typeset h5:before { display: block; margin-top: -11px; padding-top: 11px; content: "" } .md-typeset h5:target:before { margin-top: -3.55rem; padding-top: 3.55rem } .md-typeset h5 .headerlink:focus, .md-typeset h5:hover .headerlink, .md-typeset h5:target .headerlink { -webkit-transform: translate(0); transform: translate(0); opacity: 1 } .md-typeset h5 .headerlink:focus, .md-typeset h5:hover .headerlink:hover, .md-typeset h5:target .headerlink { color: #536dfe } .md-typeset h6:before { display: block; margin-top: -11px; padding-top: 11px; content: "" } .md-typeset h6:target:before { margin-top: -3.55rem; padding-top: 3.55rem } .md-typeset h6 .headerlink:focus, .md-typeset h6:hover .headerlink, .md-typeset h6:target .headerlink { -webkit-transform: translate(0); transform: translate(0); opacity: 1 } .md-typeset h6 .headerlink:focus, .md-typeset h6:hover .headerlink:hover, .md-typeset h6:target .headerlink { color: #536dfe } .md-typeset .literal .pre { color: #a61717; } .md-typeset .literal { color: #37474f; line-height: 1.4; } .md-typeset code { word-break: break-word; } .md-typeset code { color: #37474f; font-size: 85%; direction: ltr; } .md-typeset code { margin: 0 .29412em; padding: .07353em 0; } .md-typeset code { position: relative; margin: 1em 0; padding: 0; border-radius: .1rem; background-color: hsla(0,0%,92.5%,.5); color: #37474f; line-height: 1.4; -webkit-overflow-scrolling: touch; } .md-typeset code { margin: 0 .29412em; padding: .07353em 0; border-radius: .1rem; box-shadow: .29412em 0 0 hsla(0,0%,92.5%,.5),-.29412em 0 0 hsla(0,0%,92.5%,.5); word-break: break-word; -webkit-box-decoration-break: clone; box-decoration-break: clone; } .md-typeset .highlight code, .md-typeset .highlight pre { display: block; margin: 0; padding: .525rem .6rem; background-color: transparent; overflow: auto; vertical-align: top; } .md-typeset .highlighttable .highlight, .md-typeset .highlighttable .linenodiv { margin: 0; border-radius: 0; } .md-typeset .highlight { position: relative; margin: 1em 0; padding: 0; border-radius: .1rem; background-color: hsla(0,0%,92.5%,.5); color: #37474f; line-height: 1.4; -webkit-overflow-scrolling: touch; } .md-typeset .highlighttable .code { -webkit-box-flex: 1; flex: 1; overflow: hidden; } .md-typeset .highlighttable tbody, .md-typeset .highlighttable td { display: block; padding: 0; } .md-typeset pre { line-height: 1.4; } .md-typeset pre { position: relative; margin: 1em 0; border-radius: .1rem; line-height: 1.4; -webkit-overflow-scrolling: touch; } .md-typeset code, .md-typeset pre { background-color: hsla(0,0%,92.5%,.5); color: #37474f; font-size: 85%; direction: ltr; } .md-typeset .highlighttable .linenos pre { margin: 0; padding: 0; background-color: transparent; color: inherit; text-align: right; } .md-typeset .highlighttable .linenos { background-color: rgba(0,0,0,.07); color: rgba(0,0,0,.26); -webkit-user-select: none; -moz-user-select: none; -ms-user-select: none; user-select: none; } .md-typeset .highlighttable tbody, .md-typeset .highlighttable td { display: block; padding: 0; } .md-typeset .highlighttable .linenodiv { padding: .525rem .6rem; } .md-typeset .highlighttable .linenodiv { margin: 0; border-radius: 0; } .md-typeset .highlighttable tr { display: -webkit-box; display: flex; } .md-typeset .highlight .s, .md-typeset .highlight .sb, .md-typeset .highlight .sc { color: #0d904f; } .md-typeset .highlight .na, .md-typeset .highlight .nb { color: #c2185b; } .md-typeset .highlight .cp { color: #666; } .md-typeset .highlight .k { color: #3b78e7; } .md-typeset .highlight .il, .md-typeset .highlight .m, .md-typeset .highlight .mf, .md-typeset .highlight .mh, .md-typeset .highlight .mi, .md-typeset .highlight .mo { color: #e74c3c; } .md-typeset .highlight .ne, .md-typeset .highlight .nf { color: #c2185b; } .md-typeset .highlight .kr, .md-typeset .highlight .kt { color: #3e61a2; } code, kbd, pre { font-family: "","Courier New",Courier,monospace; } code, kbd, pre { color: rgba(0,0,0,.87); font-feature-settings: "kern"; font-family: Courier New,Courier,monospace; } .md-grid { max-width: 70rem; margin-right: auto; margin-left: auto; } .md-flex { display: table; } .md-flex__cell { display: table-cell; position: relative; vertical-align: top; } .md-flex__cell--shrink { width: 0; } .md-flex__cell--stretch { display: table; width: 100%; table-layout: fixed; } .md-flex__ellipsis { display: table-cell; text-overflow: ellipsis; white-space: nowrap; overflow: hidden; } .md-typeset .function .sig-name, .md-typeset .type .sig-name, .md-typeset .member .sig-name { background-color: transparent; color: #222; font-weight: bold; box-shadow: none; } dd > p:first-child { margin-top: 0px; } raft-0.11.3/docs/_themes/material/theme.conf000066400000000000000000000000611415614527300206600ustar00rootroot00000000000000[theme] inherit = none stylesheet = material.css raft-0.11.3/docs/api.rst000066400000000000000000000001341415614527300147710ustar00rootroot00000000000000.. _api: API reference ============= .. toctree:: :maxdepth: 1 server fsm io raft-0.11.3/docs/build/000077500000000000000000000000001415614527300145675ustar00rootroot00000000000000raft-0.11.3/docs/build/.buildinfo000066400000000000000000000003461415614527300165460ustar00rootroot00000000000000# Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. config: 0d585cdd238672c6713aabbb411aa1e7 tags: 645f666f9bcd5a90fca523b33c5a78b7 raft-0.11.3/docs/build/.doctrees/000077500000000000000000000000001415614527300164555ustar00rootroot00000000000000raft-0.11.3/docs/build/.doctrees/api.doctree000066400000000000000000000057071415614527300206060ustar00rootroot00000000000000€•¼ Œdocutils.nodes”Œdocument”“”)”}”(Œ rawsource”Œ”Œchildren”]”(hŒtarget”“”)”}”(hŒ.. _api:”h]”Œ attributes”}”(Œids”]”Œclasses”]”Œnames”]”Œdupnames”]”Œbackrefs”]”Œrefid”Œapi”uŒtagname”h Œline”KŒparent”hhhŒsource”Œ/srv/src/c/raft/doc/api.rst”ubhŒsection”“”)”}”(hhh]”(hŒtitle”“”)”}”(hŒ API reference”h]”hŒText”“”Œ API reference”…””}”(hh+hh)hhh NhNubah}”(h]”h]”h]”h]”h]”uhh'hh$hhh h!hKubhŒcompound”“”)”}”(hhh]”Œsphinx.addnodes”Œtoctree”“”)”}”(hhh]”h}”(h]”h]”h]”h]”h]”hŒapi”Œentries”]”(NŒserver”†”NŒfsm”†”NŒio”†”eŒ includefiles”]”(hMhOhQeŒmaxdepth”KŒcaption”NŒglob”‰Œhidden”‰Œ includehidden”‰Œnumbered”KŒ titlesonly”‰Œ rawentries”]”uhh?h h!hKhh;ubah}”(h]”h]”Œtoctree-wrapper”ah]”h]”h]”uhh9hh$hhh h!hNubeh}”(h]”(Œ api-reference”heh]”h]”(Œ api reference”Œapi”eh]”h]”uhh"hhhhh h!hKŒexpect_referenced_by_name”}”hkh sŒexpect_referenced_by_id”}”hh subeh}”(h]”h]”h]”h]”h]”Œsource”h!uhhŒcurrent_source”NŒ current_line”NŒsettings”Œdocutils.frontend”ŒValues”“”)”}”(h'NŒ generator”NŒ datestamp”NŒ source_link”NŒ source_url”NŒ toc_backlinks”Œentry”Œfootnote_backlinks”KŒ sectnum_xform”KŒstrip_comments”NŒstrip_elements_with_classes”NŒ strip_classes”NŒ report_level”KŒ halt_level”KŒexit_status_level”KŒdebug”NŒwarning_stream”NŒ traceback”ˆŒinput_encoding”Œ utf-8-sig”Œinput_encoding_error_handler”Œstrict”Œoutput_encoding”Œutf-8”Œoutput_encoding_error_handler”h•Œerror_encoding”ŒUTF-8”Œerror_encoding_error_handler”Œbackslashreplace”Œ language_code”Œen”Œrecord_dependencies”NŒconfig”NŒ id_prefix”hŒauto_id_prefix”Œid”Œ dump_settings”NŒdump_internals”NŒdump_transforms”NŒdump_pseudo_xml”NŒexpose_internals”NŒstrict_visitor”NŒ_disable_config”NŒ_source”h!Œ _destination”NŒ _config_files”]”Œpep_references”NŒ pep_base_url”Œ https://www.python.org/dev/peps/”Œpep_file_url_template”Œpep-%04d”Œrfc_references”NŒ rfc_base_url”Œhttps://tools.ietf.org/html/”Œ tab_width”KŒtrim_footnote_reference_space”‰Œfile_insertion_enabled”ˆŒ raw_enabled”KŒsyntax_highlight”Œlong”Œ smart_quotes”ˆŒsmartquotes_locales”]”Œcharacter_level_inline_markup”‰Œdoctitle_xform”‰Œ docinfo_xform”KŒsectsubtitle_xform”‰Œembed_stylesheet”‰Œcloak_email_addresses”ˆŒenv”NubŒreporter”NŒindirect_targets”]”Œsubstitution_defs”}”Œsubstitution_names”}”Œrefnames”}”Œrefids”}”h]”h asŒnameids”}”(hkhhjhguŒ nametypes”}”(hkˆhjNuh}”(hh$hgh$uŒ footnote_refs”}”Œ citation_refs”}”Œ autofootnotes”]”Œautofootnote_refs”]”Œsymbol_footnotes”]”Œsymbol_footnote_refs”]”Œ footnotes”]”Œ citations”]”Œautofootnote_start”KŒsymbol_footnote_start”KŒid_start”KŒparse_messages”]”Œtransform_messages”]”hŒsystem_message”“”)”}”(hhh]”hŒ paragraph”“”)”}”(hhh]”h.Œ)Hyperlink target "api" is not referenced.”…””}”(hhhhöubah}”(h]”h]”h]”h]”h]”uhhôhhñubah}”(h]”h]”h]”h]”h]”Œlevel”KŒtype”ŒINFO”Œsource”h!Œline”KuhhïubaŒ transformer”NŒ decoration”Nhhub.raft-0.11.3/docs/build/.doctrees/environment.pickle000066400000000000000000000542141415614527300222200ustar00rootroot00000000000000€•XŒsphinx.environment”ŒBuildEnvironment”“”)”}”(Œapp”NŒ doctreedir”Œ#/srv/src/c/raft/doc/build/.doctrees”Œsrcdir”Œ/srv/src/c/raft/doc”Œconfig”Œ sphinx.config”ŒConfig”“”)”}”(Œ overrides”}”Œsetup”NŒ extensions”]”Œ master_doc”Œindex”Œproject”ŒC-Raft documentation”Œ copyright”Œ2019-present, Canonical”Œversion”Œ0.9”Œrelease”hŒ html_theme”Œmaterial”Œhtml_theme_path”]”Œ_themes”aŒ html_title”hŒhtml_short_title”ŒC-Raft”Œhtmlhelp_basename”Œraft”Œepub_css_files”]”Œhtml_css_files”]”Œ html_js_files”]”Œ source_suffix”Œ collections”Œ OrderedDict”“”)R”Œ.rst”Œrestructuredtext”sŒ numfig_format”}”(Œsection”Œ Section %s”Œfigure”ŒFig. %s”Œtable”ŒTable %s”Œ code-block”Œ Listing %s”uŒvalues”}”(hhŒenv”N‡”Œauthor”Œunknown”hAN‡”hhŒhtml”N‡”hhhAN‡”hhhAN‡”Œtoday”Œ”hAN‡”Œ today_fmt”NhAN‡”Œlanguage”NhAN‡”Œ locale_dirs”]”Œlocales”ahAN‡”Œfigure_language_filename”Œ{root}.{language}{ext}”hAN‡”hhhAN‡”h.h2hAN‡”Œsource_encoding”Œ utf-8-sig”hAN‡”Œsource_parsers”}”hAN‡”Œexclude_patterns”]”hAN‡”Œ default_role”NhAN‡”Œadd_function_parentheses”ˆhAN‡”Œadd_module_names”ˆhAN‡”Œtrim_footnote_reference_space”‰hAN‡”Œ show_authors”‰hAN‡”Œpygments_style”NhFN‡”Œhighlight_language”Œdefault”hAN‡”Œhighlight_options”}”hAN‡”Œtemplates_path”]”hFN‡”Œtemplate_bridge”NhFN‡”Œ keep_warnings”‰hAN‡”Œsuppress_warnings”]”hAN‡”Œmodindex_common_prefix”]”hFN‡”Œ rst_epilog”NhAN‡”Œ rst_prolog”NhAN‡”Œtrim_doctest_flags”ˆhAN‡”Œprimary_domain”Œpy”hAN‡”Œ needs_sphinx”NNN‡”Œneeds_extensions”}”NN‡”Œ manpages_url”NhAN‡”Œnitpicky”‰NN‡”Œnitpick_ignore”]”NN‡”Œnumfig”‰hAN‡”Œnumfig_secnum_depth”KhAN‡”h5h6hAN‡”Œmath_number_all”‰hAN‡”Œmath_eqref_format”NhAN‡”Œ math_numfig”ˆhAN‡”Œ tls_verify”ˆhAN‡”Œ tls_cacerts”NhAN‡”Œ user_agent”NhAN‡”Œ smartquotes”ˆhAN‡”Œsmartquotes_action”ŒqDe”hAN‡”Œsmartquotes_excludes”}”(Œ languages”]”Œja”aŒbuilders”]”(Œman”Œtext”euhAN‡”Œ epub_basename”ŒC-Raftdocumentation”NN‡”Œ epub_version”G@Œepub”N‡”Œ epub_theme”h»h»N‡”Œepub_theme_options”}”h»N‡”Œ epub_title”hh»N‡”Œ epub_author”hDh»N‡”Œ epub_language”Œen”h»N‡”Œepub_publisher”hDh»N‡”Œepub_copyright”hh»N‡”Œepub_identifier”hDh»N‡”Œ epub_scheme”hDh»N‡”Œepub_uid”hDhAN‡”Œ epub_cover”)hAN‡”Œ epub_guide”)hAN‡”Œepub_pre_files”]”hAN‡”Œepub_post_files”]”hAN‡”h(h)h»N‡”Œepub_exclude_files”]”hAN‡”Œ epub_tocdepth”KhAN‡”Œ epub_tocdup”ˆhAN‡”Œ epub_tocscope”hphAN‡”Œepub_fix_images”‰hAN‡”Œepub_max_image_width”KhAN‡”Œepub_show_urls”Œinline”h»N‡”Œepub_use_index”ˆh»N‡”Œepub_description”hDh»N‡”Œepub_contributor”hDh»N‡”Œepub_writing_mode”Œ horizontal”h»N‡”hhhFN‡”h h!hFN‡”Œhtml_theme_options”}”hFN‡”h#hhFN‡”h$h%hFN‡”Œ html_style”NhFN‡”Œ html_logo”NhFN‡”Œ html_favicon”NhFN‡”h*h+hFN‡”h,h-hFN‡”Œhtml_static_path”]”hFN‡”Œhtml_extra_path”]”hFN‡”Œhtml_last_updated_fmt”NhFN‡”Œ html_sidebars”}”hFN‡”Œhtml_additional_pages”}”hFN‡”Œhtml_domain_indices”ˆhFN‡”Œhtml_add_permalinks”Œ¶”hFN‡”Œhtml_use_index”ˆhFN‡”Œhtml_split_index”‰hFN‡”Œhtml_copy_source”ˆhFN‡”Œhtml_show_sourcelink”ˆhFN‡”Œhtml_sourcelink_suffix”Œ.txt”hFN‡”Œhtml_use_opensearch”hKhFN‡”Œhtml_file_suffix”NhFN‡”Œhtml_link_suffix”NhFN‡”Œhtml_show_copyright”ˆhFN‡”Œhtml_show_sphinx”ˆhFN‡”Œ html_context”}”hFN‡”Œhtml_output_encoding”Œutf-8”hFN‡”Œhtml_compact_lists”ˆhFN‡”Œhtml_secnumber_suffix”Œ. ”hFN‡”Œhtml_search_language”NhFN‡”Œhtml_search_options”}”hFN‡”Œhtml_search_scorer”hKNN‡”Œhtml_scaled_image_link”ˆhFN‡”Œ html_baseurl”hKhFN‡”Œhtml_math_renderer”NhAN‡”Œ html4_writer”‰hFN‡”Œ mathjax_path”Œ[https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML”hFN‡”Œmathjax_options”}”hFN‡”Œmathjax_inline”]”(Œ\(”Œ\)”ehFN‡”Œmathjax_display”]”(Œ\[”Œ\]”ehFN‡”Œmathjax_config”NhFN‡”Œgettext_compact”ˆŒgettext”N‡”Œgettext_location”ˆj[N‡”Œ gettext_uuid”‰j[N‡”Œgettext_auto_build”ˆhAN‡”Œgettext_additional_targets”]”hAN‡”Œgettext_last_translator”ŒFULL NAME ”j[N‡”Œgettext_language_team”ŒLANGUAGE ”j[N‡”Œ latex_engine”Œpdflatex”NN‡”Œlatex_documents”]”(hŒc-raftdocumentation.tex”ŒC-Raft documentation”Œunknown”Œmanual”t”aNN‡”Œ latex_logo”NNN‡”Œlatex_appendices”]”NN‡”Œlatex_use_latex_multicolumn”‰NN‡”Œlatex_use_xindy”‰NN‡”Œlatex_toplevel_sectioning”NNN‡”Œlatex_domain_indices”ˆNN‡”Œlatex_show_urls”Œno”NN‡”Œlatex_show_pagerefs”‰NN‡”Œlatex_elements”}”NN‡”Œlatex_additional_files”]”NN‡”Œlatex_docclass”}”NN‡”Œlinkcheck_ignore”]”NN‡”Œlinkcheck_auth”]”NN‡”Œlinkcheck_retries”KNN‡”Œlinkcheck_timeout”NNN‡”Œlinkcheck_workers”KNN‡”Œlinkcheck_anchors”ˆNN‡”Œlinkcheck_anchors_ignore”]”Œ^!”aNN‡”Œ man_pages”]”(hŒc-raftdocumentation”ŒC-Raft documentation 0.9”]”hDaKt”aNN‡”Œ man_show_urls”‰NN‡”Œsinglehtml_sidebars”jhFN‡”Œtexinfo_documents”]”(hŒc-raftdocumentation”hhDj±ŒOne line description of project”Œ Miscellaneous”t”aNN‡”Œtexinfo_appendices”]”NN‡”Œtexinfo_elements”}”NN‡”Œtexinfo_domain_indices”ˆNN‡”Œtexinfo_show_urls”Œfootnote”NN‡”Œtexinfo_no_detailmenu”‰NN‡”Œtext_sectionchars”Œ*=-~"+`”hAN‡”Œ text_newlines”Œunix”hAN‡”Œtext_add_secnumbers”ˆhAN‡”Œtext_secnumber_suffix”Œ. ”hAN‡”Œ xml_pretty”ˆhAN‡”Œcpp_index_common_prefix”]”hAN‡”Œcpp_id_attributes”]”hAN‡”Œcpp_paren_attributes”]”hAN‡”Œapplehelp_bundle_name”ŒC-Raftdocumentation”Œ applehelp”N‡”Œapplehelp_bundle_id”NjÛN‡”Œapplehelp_dev_region”Œen-us”jÛN‡”Œapplehelp_bundle_version”Œ1”jÛN‡”Œapplehelp_icon”NjÛN‡”Œapplehelp_kb_product”ŒC-Raftdocumentation-0.9”jÛN‡”Œapplehelp_kb_url”NjÛN‡”Œapplehelp_remote_url”NjÛN‡”Œapplehelp_index_anchors”‰jÛN‡”Œapplehelp_min_term_length”NjÛN‡”Œapplehelp_stopwords”hÇjÛN‡”Œapplehelp_locale”hÇjÛN‡”Œapplehelp_title”ŒC-Raft documentation Help”jÛN‡”Œapplehelp_codesign_identity”NjÛN‡”Œapplehelp_codesign_flags”]”jÛN‡”Œapplehelp_indexer_path”Œ/usr/bin/hiutil”jÛN‡”Œapplehelp_codesign_path”Œ/usr/bin/codesign”jÛN‡”Œ applehelp_disable_external_tools”‰NN‡”Œdevhelp_basename”ŒC-Raftdocumentation”NN‡”h&h'NN‡”Œhtmlhelp_file_suffix”NhFN‡”Œhtmlhelp_link_suffix”NhFN‡”Œqthelp_basename”ŒC-Raftdocumentation”NN‡”Œqthelp_namespace”NhFN‡”Œ qthelp_theme”Œnonav”hFN‡”Œqthelp_theme_options”}”hFN‡”uubŒ config_status”KŒconfig_status_extra”hKŒevents”NhŒsphinx.project”ŒProject”“”)”}”(hh h.h2Œdocnames””(Œgetting-started”Œfsm”Œindex”Œapi”Œserver”Œio”ubh}”(Œsphinx.domains.c”KŒsphinx.domains.changeset”KŒsphinx.domains.citation”KŒsphinx.domains.cpp”KŒsphinx.domains.javascript”KŒsphinx.domains.math”KŒsphinx.domains.python”KŒsphinx.domains.rst”KŒsphinx.domains.std”KŒsphinx”K8uŒversioning_condition”‰Œversioning_compare”‰Œdomains”}”Œsettings”}”(Œembed_stylesheet”‰Œcloak_email_addresses”ˆŒ pep_base_url”Œ https://www.python.org/dev/peps/”Œpep_references”NŒ rfc_base_url”Œhttps://tools.ietf.org/html/”Œrfc_references”NŒinput_encoding”h[Œdoctitle_xform”‰Œsectsubtitle_xform”‰Œ halt_level”KŒfile_insertion_enabled”ˆŒsmartquotes_locales”]”hAhhi‰Œ language_code”hÇŒ smart_quotes”ˆuŒall_docs”}”(j&GAׄ””BùÎj$GAׄ””C×@j#GAׄ””Døj%GAׄ””E&§j(GAׄ””G†šj'GAׄ””HŸuŒ dependencies”h/Œ defaultdict”“”Œbuiltins”Œset”“”…”R”Œincluded”jOjR…”R”Œ reread_always””Œmetadata”jOjPŒdict”“”…”R”Œtitles”}”(j&Œdocutils.nodes”Œtitle”“”)”}”(Œ rawsource”hKŒchildren”]”jaŒText”“”Œ API reference”…””}”(jfŒ API reference”Œparent”jdubaŒ attributes”}”(Œids”]”Œclasses”]”Œnames”]”Œdupnames”]”Œbackrefs”]”uŒtagname”jbubj$jc)”}”(jfhKjg]”(jaŒliteral”“”)”}”(jfŒstruct raft_fsm”jg]”jjŒstruct raft_fsm”…””}”(jfhKjpjƒubajq}”(js]”ju]”(Œxref”Œc”Œc-type”ejw]”jy]”j{]”uj}jŒsource”NŒline”Njpj~ubjjŒ — Application state machine”…””}”(jfŒ --- Application state machine”jpj~ubejq}”(js]”ju]”jw]”jy]”j{]”uj}jbubj#jc)”}”(jfhKjg]”jjŒGetting started”…””}”(jfŒGetting started”jpj¡ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jbubj%jc)”}”(jfhKjg]”jjŒC-Raft”…””}”(jfŒC-Raft”jpj¯ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jbubj(jc)”}”(jfhKjg]”(j‚)”}”(jfŒstruct raft_io”jg]”jjŒstruct raft_io”…””}”(jfhKjpjÀubajq}”(js]”ju]”(jŽjŒc-type”ejw]”jy]”j{]”uj}jj”Nj•Njpj½ubjjŒ — I/O backend interface”…””}”(jfŒ --- I/O backend interface”jpj½ubejq}”(js]”ju]”jw]”jy]”j{]”uj}jbubj'jc)”}”(jfhKjg]”(j‚)”}”(jfŒ struct raft”jg]”jjŒ struct raft”…””}”(jfhKjpjÝubajq}”(js]”ju]”(jŽjŒc-type”ejw]”jy]”j{]”uj}jj”Nj•NjpjÚubjjŒ — Raft server”…””}”(jfŒ --- Raft server”jpjÚubejq}”(js]”ju]”jw]”jy]”j{]”uj}jbubuŒ longtitles”}”(j&jdj$j~j#j¡j%j¯j(j½j'jÚuŒtocs”}”(j&jaŒ bullet_list”“”)”}”(jfhKjg]”jaŒ list_item”“”)”}”(jfhKjg]”(Œsphinx.addnodes”Œcompact_paragraph”“”)”}”(jfhKjg]”jaŒ reference”“”)”}”(jfhKjg]”jjŒ API reference”…””}”(jfjojpj ubajq}”(js]”ju]”jw]”jy]”j{]”Œinternal”ˆŒrefuri”j&Œ anchorname”hKuj}j jpjubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpjubjü)”}”(jfhKjg]”jŒtoctree”“”)”}”(jfhKjg]”jq}”(js]”ju]”jw]”jy]”j{]”Œparent”j&Œentries”]”(NŒserver”†”NŒfsm”†”NŒio”†”eŒ includefiles”]”(j4j6j8eŒmaxdepth”KŒcaption”NŒglob”‰Œhidden”‰Œ includehidden”‰Œnumbered”KŒ titlesonly”‰Œ rawentries”]”uj}j&j”Œ/srv/src/c/raft/doc/api.rst”j•Kjpj#ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jûjpjubejq}”(js]”ju]”jw]”jy]”j{]”uj}jjpjýubajq}”(js]”ju]”jw]”jy]”j{]”uj}jûubj$jü)”}”(jfhKjg]”j)”}”(jfhKjg]”(j)”}”(jfhKjg]”j )”}”(jfhKjg]”(j‚)”}”(jfj…jg]”jjŒstruct raft_fsm”…””}”(jfhKjpjdubajq}”(js]”ju]”(jŽjjejw]”jy]”j{]”uj}jj”Nj•NjpjaubjjŒ — Application state machine”…””}”(jfjšjpjaubejq}”(js]”ju]”jw]”jy]”j{]”Œinternal”ˆŒrefuri”j$Œ anchorname”hKuj}j jpj^ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj[ubjü)”}”(jfhKjg]”j)”}”(jfhKjg]”(j)”}”(jfhKjg]”j )”}”(jfhKjg]”jjŒ Data types”…””}”(jfŒ Data types”jpjubajq}”(js]”ju]”jw]”jy]”j{]”Œinternal”ˆŒrefuri”j$Œ anchorname”Œ #data-types”uj}j jpjŠubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj‡ubjü)”}”(jfhKjg]”j)”}”(jfhKjg]”j)”}”(jfhKjg]”j )”}”(jfhKjg]”jjŒPublic members”…””}”(jfŒPublic members”jpj®ubajq}”(js]”ju]”jw]”jy]”j{]”Œinternal”ˆŒrefuri”j$Œ anchorname”Œ#public-members”uj}j jpj«ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj¨ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj¥ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jûjpj‡ubejq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj„ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jûjpj[ubejq}”(js]”ju]”jw]”jy]”j{]”uj}jjpjXubajq}”(js]”ju]”jw]”jy]”j{]”uj}jûubj#jü)”}”(jfhKjg]”j)”}”(jfhKjg]”j)”}”(jfhKjg]”j )”}”(jfhKjg]”jjŒGetting started”…””}”(jfj¨jpjóubajq}”(js]”ju]”jw]”jy]”j{]”Œinternal”ˆŒrefuri”j#Œ anchorname”hKuj}j jpjðubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpjíubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpjêubajq}”(js]”ju]”jw]”jy]”j{]”uj}jûubj%jü)”}”(jfhKjg]”j)”}”(jfhKjg]”(j)”}”(jfhKjg]”j )”}”(jfhKjg]”jjŒC-Raft”…””}”(jfj¶jpjubajq}”(js]”ju]”jw]”jy]”j{]”Œinternal”ˆŒrefuri”j%Œ anchorname”hKuj}j jpjubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpjubjü)”}”(jfhKjg]”(j)”}”(jfhKjg]”j)”}”(jfhKjg]”j )”}”(jfhKjg]”jjŒDesign”…””}”(jfŒDesign”jpj=ubajq}”(js]”ju]”jw]”jy]”j{]”Œinternal”ˆŒrefuri”j%Œ anchorname”Œ#design”uj}j jpj:ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj7ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj4ubj)”}”(jfhKjg]”j)”}”(jfhKjg]”j )”}”(jfhKjg]”jjŒFeatures”…””}”(jfŒFeatures”jpjaubajq}”(js]”ju]”jw]”jy]”j{]”Œinternal”ˆŒrefuri”j%Œ anchorname”Œ #features”uj}j jpj^ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj[ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj4ubj)”}”(jfhKjg]”j)”}”(jfhKjg]”j )”}”(jfhKjg]”jjŒ Quick start”…””}”(jfŒ Quick start”jpj…ubajq}”(js]”ju]”jw]”jy]”j{]”Œinternal”ˆŒrefuri”j%Œ anchorname”Œ #quick-start”uj}j jpj‚ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpjubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj4ubj)”}”(jfhKjg]”(j)”}”(jfhKjg]”j )”}”(jfhKjg]”jjŒLicence”…””}”(jfŒLicence”jpj©ubajq}”(js]”ju]”jw]”jy]”j{]”Œinternal”ˆŒrefuri”j%Œ anchorname”Œ#licence”uj}j jpj¦ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj£ubjü)”}”(jfhKjg]”j)”}”(jfhKjg]”(j)”}”(jfhKjg]”j )”}”(jfhKjg]”jjŒtoc”…””}”(jfŒtoc”jpjÊubajq}”(js]”ju]”jw]”jy]”j{]”Œinternal”ˆŒrefuri”j%Œ anchorname”Œ#toc”uj}j jpjÇubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpjÄubjü)”}”(jfhKjg]”j')”}”(jfhKjg]”jq}”(js]”ju]”jw]”jy]”j{]”Œparent”j%Œentries”]”(NŒself”†”NŒgetting-started”†”NŒapi”†”eŒ includefiles”]”(jójõeŒmaxdepth”KŒcaption”NŒglob”‰Œhidden”‰Œ includehidden”‰Œnumbered”KŒ titlesonly”‰Œ rawentries”]”uj}j&j”Œ/srv/src/c/raft/doc/index.rst”j•Kjpjâubajq}”(js]”ju]”jw]”jy]”j{]”uj}jûjpjÄubejq}”(js]”ju]”jw]”jy]”j{]”uj}jjpjÁubajq}”(js]”ju]”jw]”jy]”j{]”uj}jûjpj£ubejq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj4ubejq}”(js]”ju]”jw]”jy]”j{]”uj}jûjpjubejq}”(js]”ju]”jw]”jy]”j{]”uj}jjpjubajq}”(js]”ju]”jw]”jy]”j{]”uj}jûubj(jü)”}”(jfhKjg]”j)”}”(jfhKjg]”(j)”}”(jfhKjg]”j )”}”(jfhKjg]”(j‚)”}”(jfjÂjg]”jjŒstruct raft_io”…””}”(jfhKjpj9ubajq}”(js]”ju]”(jŽjjËejw]”jy]”j{]”uj}jj”Nj•Njpj6ubjjŒ — I/O backend interface”…””}”(jfjÓjpj6ubejq}”(js]”ju]”jw]”jy]”j{]”Œinternal”ˆŒrefuri”j(Œ anchorname”hKuj}j jpj3ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj0ubjü)”}”(jfhKjg]”j)”}”(jfhKjg]”(j)”}”(jfhKjg]”j )”}”(jfhKjg]”jjŒ Data types”…””}”(jfŒ Data types”jpjbubajq}”(js]”ju]”jw]”jy]”j{]”Œinternal”ˆŒrefuri”j(Œ anchorname”Œ #data-types”uj}j jpj_ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj\ubjü)”}”(jfhKjg]”j)”}”(jfhKjg]”j)”}”(jfhKjg]”j )”}”(jfhKjg]”jjŒPublic members”…””}”(jfŒPublic members”jpjƒubajq}”(js]”ju]”jw]”jy]”j{]”Œinternal”ˆŒrefuri”j(Œ anchorname”Œ#public-members”uj}j jpj€ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj}ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpjzubajq}”(js]”ju]”jw]”jy]”j{]”uj}jûjpj\ubejq}”(js]”ju]”jw]”jy]”j{]”uj}jjpjYubajq}”(js]”ju]”jw]”jy]”j{]”uj}jûjpj0ubejq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj-ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jûubj'jü)”}”(jfhKjg]”j)”}”(jfhKjg]”(j)”}”(jfhKjg]”j )”}”(jfhKjg]”(j‚)”}”(jfjßjg]”jjŒ struct raft”…””}”(jfhKjpjËubajq}”(js]”ju]”(jŽjjèejw]”jy]”j{]”uj}jj”Nj•NjpjÈubjjŒ — Raft server”…””}”(jfjðjpjÈubejq}”(js]”ju]”jw]”jy]”j{]”Œinternal”ˆŒrefuri”j'Œ anchorname”hKuj}j jpjÅubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpjÂubjü)”}”(jfhKjg]”(j)”}”(jfhKjg]”(j)”}”(jfhKjg]”j )”}”(jfhKjg]”jjŒ Data types”…””}”(jfŒ Data types”jpjôubajq}”(js]”ju]”jw]”jy]”j{]”Œinternal”ˆŒrefuri”j'Œ anchorname”Œ #data-types”uj}j jpjñubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpjîubjü)”}”(jfhKjg]”j)”}”(jfhKjg]”j)”}”(jfhKjg]”j )”}”(jfhKjg]”jjŒPublic members”…””}”(jfŒPublic members”jpjubajq}”(js]”ju]”jw]”jy]”j{]”Œinternal”ˆŒrefuri”j'Œ anchorname”Œ#public-members”uj}j jpjubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpjubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jûjpjîubejq}”(js]”ju]”jw]”jy]”j{]”uj}jjpjëubj)”}”(jfhKjg]”j)”}”(jfhKjg]”j )”}”(jfhKjg]”jjŒAPI”…””}”(jfŒAPI”jpjEubajq}”(js]”ju]”jw]”jy]”j{]”Œinternal”ˆŒrefuri”j'Œ anchorname”Œ#api”uj}j jpjBubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj?ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jjpjëubejq}”(js]”ju]”jw]”jy]”j{]”uj}jûjpjÂubejq}”(js]”ju]”jw]”jy]”j{]”uj}jjpj¿ubajq}”(js]”ju]”jw]”jy]”j{]”uj}jûubuŒtoc_num_entries”}”(j&Kj$Kj#Kj%Kj(Kj'KuŒtoc_secnumbers”}”Œtoc_fignumbers”}”Œtoctree_includes”}”(j&]”(j4j6j8ej%]”(jójõeuŒfiles_to_rebuild”}”(j4”(j&j6”(j&j8”(j&jó”(j%jõ”(j%uŒ glob_toctrees””Œnumbered_toctrees””Œ domaindata”}”(j}”(Œobjects”}”(Œraft_fsm”j$Œtype”†”Œdata”j'Œmember”†”Œversion”j(Œmember”†”Œapply”j$Œmember”†”Œsnapshot”j$Œmember”†”Œrestore”j$Œmember”†”Œraft_io”j(Œtype”†”Œraft_io_close_cb”j(Œtype”†”Œraft_io_tick_cb”j(Œtype”†”Œraft_io_recv_cb”j(Œtype”†”Œimpl”j(Œmember”†”Œinit”j(Œmember”†”Œclose”j(Œmember”†”Œload”j(Œmember”†”Œstart”j(Œmember”†”Œ bootstrap”j(Œmember”†”Œrecover”j(Œmember”†”Œset_term”j(Œmember”†”Œset_vote”j(Œmember”†”Œsend”j(Œmember”†”Œappend”j(Œmember”†”Œtruncate”j(Œmember”†”Œ snapshot_put”j(Œmember”†”Œ snapshot_get”j(Œmember”†”Œtime”j(Œmember”†”Œrandom”j(Œmember”†”Œraft”j'Œtype”†”Œraft_id”j'Œtype”†”Œ raft_close_cb”j'Œtype”†”Œid”j'Œmember”†”Œ raft_init”j'Œfunction”†”Œ raft_close”j'Œfunction”†”Œ raft_start”j'Œfunction”†”uhKuŒ changeset”}”(Œchanges”}”hKuŒcitation”}”(hKŒ citations”}”Œ citation_refs”}”uŒcpp”}”(Œ root_symbol”j-ŒSymbol”“”)”}”(jpNŒ identOrOp”NŒtemplateParams”NŒ templateArgs”NŒ declaration”NŒdocname”NŒisRedeclaration”‰Œ _children”]”Œ _anonChildren”]”ubjw}”hKuŒjs”}”(j}”Œmodules”}”hKuŒmath”}”(j}”Œ has_equations”}”(j&‰j$‰j#‰j%‰j(‰j'‰uhKuh‰}”(j}”j}”hKuŒrst”}”(j}”hKuŒstd”}”(Œ progoptions”}”j}”Œlabels”}”(Œgenindex”j%hKŒ sphinx.locale”Œ_TranslationProxy”“”(j&Œ_lazy_translate”“”j3Œgeneral”ŒIndex”t””j*j3j+j,‡”†”b‡”Œmodindex”Œ py-modindex”hKj((j*j3j+Œ Module Index”t””j*j3j+j4‡”†”b‡”Œsearch”j:hKj((j*j3j+Œ Search Page”t””j*j3j+j;‡”†”b‡”Œapi”j&Œapi”Œ API reference”‡”Œfsm”j$Œfsm”Œ-struct raft_fsm — Application state machine”‡”Œio”j(Œio”Œ(struct raft_io — I/O backend interface”‡”Œserver”j'Œserver”Œstruct raft — Raft server”‡”uŒ anonlabels”}”(j%j%hK†”j2Œ py-modindex”hK†”j:j:hK†”jAj&jB†”jEj$jF†”jIj(jJ†”jMj'jN†”uhKuuŒ indexentries”}”(j&]”j$]”((Œsingle”Œraft_fsm (C type)”Œ c.raft_fsm”hKNt”(j_Œdata (C member)”Œc.data”hKNt”(j_Œversion (C member)”Œ c.version”hKNt”(j_Œapply (C member)”Œc.apply”hKNt”(j_Œsnapshot (C member)”Œ c.snapshot”hKNt”(j_Œrestore (C member)”Œ c.restore”hKNt”ej#]”j%]”j(]”((j_Œraft_io (C type)”Œ c.raft_io”hKNt”(j_Œraft_io_close_cb (C type)”Œc.raft_io_close_cb”hKNt”(j_Œraft_io_tick_cb (C type)”Œc.raft_io_tick_cb”hKNt”(j_Œraft_io_recv_cb (C type)”Œc.raft_io_recv_cb”hKNt”(j_Œdata (C member)”Œc.data”hKNt”(j_Œversion (C member)”Œ c.version”hKNt”(j_Œimpl (C member)”Œc.impl”hKNt”(j_Œinit (C member)”Œc.init”hKNt”(j_Œclose (C member)”Œc.close”hKNt”(j_Œload (C member)”Œc.load”hKNt”(j_Œstart (C member)”Œc.start”hKNt”(j_Œbootstrap (C member)”Œ c.bootstrap”hKNt”(j_Œrecover (C member)”Œ c.recover”hKNt”(j_Œset_term (C member)”Œ c.set_term”hKNt”(j_Œset_vote (C member)”Œ c.set_vote”hKNt”(j_Œsend (C member)”Œc.send”hKNt”(j_Œappend (C member)”Œc.append”hKNt”(j_Œtruncate (C member)”Œ c.truncate”hKNt”(j_Œsnapshot_put (C member)”Œc.snapshot_put”hKNt”(j_Œsnapshot_get (C member)”Œc.snapshot_get”hKNt”(j_Œtime (C member)”Œc.time”hKNt”(j_Œrandom (C member)”Œc.random”hKNt”ej']”((j_Œ raft (C type)”Œc.raft”hKNt”(j_Œraft_id (C type)”Œ c.raft_id”hKNt”(j_Œraft_close_cb (C type)”Œc.raft_close_cb”hKNt”(j_Œdata (C member)”Œc.data”hKNt”(j_Œ id (C member)”Œc.id”hKNt”(j_Œraft_init (C function)”Œ c.raft_init”hKNt”(j_Œraft_close (C function)”Œ c.raft_close”hKNt”(j_Œraft_start (C function)”Œ c.raft_start”hKNt”euŒimages”Œ sphinx.util”ŒFilenameUniqDict”“”)””bŒdlfiles”jÑŒ DownloadFiles”“”)”Œoriginal_image_uri”}”Œ temp_data”}”Œ ref_context”}”ub.raft-0.11.3/docs/build/.doctrees/fsm.doctree000066400000000000000000000320231415614527300206110ustar00rootroot00000000000000€•4Œdocutils.nodes”Œdocument”“”)”}”(Œ rawsource”Œ”Œchildren”]”(hŒtarget”“”)”}”(hŒ.. _fsm:”h]”Œ attributes”}”(Œids”]”Œclasses”]”Œnames”]”Œdupnames”]”Œbackrefs”]”Œrefid”Œfsm”uŒtagname”h Œline”KŒparent”hhhŒsource”Œ/srv/src/c/raft/doc/fsm.rst”ubhŒsection”“”)”}”(hhh]”(hŒtitle”“”)”}”(hŒ7:c:type:`struct raft_fsm` --- Application state machine”h]”(Œsphinx.addnodes”Œ pending_xref”“”)”}”(hŒ:c:type:`struct raft_fsm`”h]”hŒliteral”“”)”}”(hŒstruct raft_fsm”h]”hŒText”“”Œstruct raft_fsm”…””}”(hhhh6ubah}”(h]”h]”(Œxref”Œc”Œc-type”eh]”h]”h]”uhh4hh0ubah}”(h]”h]”h]”h]”h]”Œrefdoc”Œfsm”Œ refdomain”hDŒreftype”Œtype”Œ refexplicit”‰Œrefwarn”‰Œ reftarget”h8uhh.h h!hKhh)ubh;Œ — Application state machine”…””}”(hŒ --- Application state machine”hh)hhh NhNubeh}”(h]”h]”h]”h]”h]”uhh'hh$hhh h!hKubhŒ paragraph”“”)”}”(hŒ{The FSM struct defines the interface that the application's state machine must implement in order to be replicated by Raft.”h]”h;Œ}The FSM struct defines the interface that the application’s state machine must implement in order to be replicated by Raft.”…””}”(hhfhhdhhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKhh$hhubh#)”}”(hhh]”(h()”}”(hŒ Data types”h]”h;Œ Data types”…””}”(hhwhhuhhh NhNubah}”(h]”h]”h]”h]”h]”uhh'hhrhhh h!hK ubh-Œindex”“”)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(Œsingle”Œraft_fsm (C type)”Œ c.raft_fsm”hNt”auhhƒhhrhhh h!hNubh-Œdesc”“”)”}”(hhh]”(h-Œdesc_signature”“”)”}”(hŒstruct raft_fsm”h]”(h-Œ desc_type”“”)”}”(hŒstruct ”h]”(h;Œstruct”…””}”(hŒstruct”hh¡hhh NhNubh;Œ ”…””}”(hŒ ”hh¡hhh NhNubeh}”(h]”h]”h]”h]”h]”Œ xml:space”Œpreserve”uhhŸhh›hhh h!hKubh-Œ desc_name”“”)”}”(hŒraft_fsm”h]”h;Œraft_fsm”…””}”(hhhh¹hhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hh›hhh h!hKubeh}”(h]”h’ah]”h]”h’ah]”h]”Œfirst”‰uhh™hh–hhh h!hKubh-Œ desc_content”“”)”}”(hhh]”hc)”}”(hŒ?Hold pointers to an actual implementation of the FSM interface.”h]”h;Œ?Hold pointers to an actual implementation of the FSM interface.”…””}”(hhÕhhÓhhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKhhÐhhubah}”(h]”h]”h]”h]”h]”uhhÎhh–hhh h!hKubeh}”(h]”h]”h]”h]”h]”Œdomain”hDŒobjtype”Œtype”Œdesctype”hïŒnoindex”‰uhh”hhhhrh h!hNubh#)”}”(hhh]”(h()”}”(hŒPublic members”h]”h;ŒPublic members”…””}”(hh÷hhõhhh NhNubah}”(h]”h]”h]”h]”h]”uhh'hhòhhh h!hKubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒdata (C member)”Œc.data”hNt”auhhƒhhòhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ void* data”h]”(h )”}”(hŒvoid* ”h]”(h;Œvoid”…””}”(hŒvoid”hjhhh NhNubh;Œ* ”…””}”(hŒ* ”hjhhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhjhhh h!hKubh¸)”}”(hŒdata”h]”h;Œdata”…””}”(hhhj,hhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hjhhh h!hKubeh}”(h]”jah]”h]”jah]”h]”h͉uhh™hjhhh h!hKubhÏ)”}”(hhh]”hc)”}”(hŒYSpace for user-defined arbitrary data. C-Raft does not use and does not touch this field.”h]”h;ŒYSpace for user-defined arbitrary data. C-Raft does not use and does not touch this field.”…””}”(hjEhjChhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKhj@hhubah}”(h]”h]”h]”h]”h]”uhhÎhjhhh h!hKubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðj]hñ‰uhh”hhhhòh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒversion (C member)”Œ c.version”hNt”auhhƒhhòhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ int version”h]”(h )”}”(hŒint ”h]”(h;Œint”…””}”(hŒint”hjshhh NhNubh;Œ ”…””}”(hh®hjshhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhjohhh h!hKubh¸)”}”(hŒversion”h]”h;Œversion”…””}”(hhhj†hhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hjohhh h!hKubeh}”(h]”jjah]”h]”jjah]”h]”h͉uhh™hjlhhh h!hKubhÏ)”}”(hhh]”hc)”}”(hŒ6API version implemented by this instance. Currently 1.”h]”h;Œ6API version implemented by this instance. Currently 1.”…””}”(hjŸhjhhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKhjšhhubah}”(h]”h]”h]”h]”h]”uhhÎhjlhhh h!hKubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðj·hñ‰uhh”hhhhòh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒapply (C member)”Œc.apply”hNt”auhhƒhhòhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒQint (*apply)(struct raft_fsm *fsm, const struct raft_buffer *buf, void **result)”h]”(h )”}”(hŒint ”h]”(h;Œint”…””}”(hŒint”hjÍhhh NhNubh;Œ ”…””}”(hh®hjÍhhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhjÉhhh h!hKubh¸)”}”(hŒ(*apply)”h]”h;Œ(*apply)”…””}”(hhhjàhhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hjÉhhh h!hKubh-Œdesc_parameterlist”“”)”}”(hŒEstruct raft_fsm *fsm, const struct raft_buffer *buf, void **result”h]”(h-Œdesc_parameter”“”)”}”(hŒstruct raft_fsm *fsm”h]”(h;Œstruct”…””}”(hŒstruct”hjöubh;Œ ”…””}”(hh®hjöubh/)”}”(hhh]”h;Œraft_fsm”…””}”(hŒraft_fsm”hjubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”Œtype”Œ reftarget”j Œmodname”NŒ classname”Nuhh.hjöubhŒemphasis”“”)”}”(hŒ *fsm”h]”h;Œ *fsm”…””}”(hhhjubah}”(h]”h]”h]”h]”h]”uhjhjöubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhjôhjðubjõ)”}”(hŒconst struct raft_buffer *buf”h]”(h;Œconst”…””}”(hŒconst”hj.ubh;Œ ”…””}”(hh®hj.ubh;Œstruct”…””}”(hŒstruct”hj.ubh;Œ ”…””}”(hh®hj.ubh/)”}”(hhh]”h;Œ raft_buffer”…””}”(hŒ raft_buffer”hjDubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jŒ reftarget”jKŒmodname”NŒ classname”Nuhh.hj.ubj)”}”(hŒ *buf”h]”h;Œ *buf”…””}”(hhhjWubah}”(h]”h]”h]”h]”h]”uhjhj.ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhjôhjðubjõ)”}”(hŒvoid **result”h]”(h;Œvoid”…””}”(hŒvoid”hjlubj)”}”(hŒ **result”h]”h;Œ  **result”…””}”(hhhjuubah}”(h]”h]”h]”h]”h]”uhjhjlubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhjôhjðubeh}”(h]”h]”h]”h]”h]”hµh¶uhjîhjÉhhh h!hKubeh}”(h]”jÄah]”h]”jÄah]”h]”h͉uhh™hjÆhhh h!hKubhÏ)”}”(hhh]”hc)”}”(hŒ:Apply a committed RAFT_COMMAND entry to the state machine.”h]”h;Œ:Apply a committed RAFT_COMMAND entry to the state machine.”…””}”(hj›hj™hhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKhj–hhubah}”(h]”h]”h]”h]”h]”uhhÎhjÆhhh h!hKubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðj³hñ‰uhh”hhhhòh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒsnapshot (C member)”Œ c.snapshot”hNt”auhhƒhhòhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒSint (*snapshot)(struct raft_fsm *fsm, struct raft_buffer *bufs[], unsigned *n_bufs)”h]”(h )”}”(hŒint ”h]”(h;Œint”…””}”(hŒint”hjÉhhh NhNubh;Œ ”…””}”(hh®hjÉhhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhjÅhhh h!hK#ubh¸)”}”(hŒ (*snapshot)”h]”h;Œ (*snapshot)”…””}”(hhhjÜhhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hjÅhhh h!hK#ubjï)”}”(hŒEstruct raft_fsm *fsm, struct raft_buffer *bufs[], unsigned *n_bufs”h]”(jõ)”}”(hŒstruct raft_fsm *fsm”h]”(h;Œstruct”…””}”(hŒstruct”hjîubh;Œ ”…””}”(hh®hjîubh/)”}”(hhh]”h;Œraft_fsm”…””}”(hŒraft_fsm”hjûubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jŒ reftarget”jŒmodname”NŒ classname”Nuhh.hjîubj)”}”(hŒ *fsm”h]”h;Œ *fsm”…””}”(hhhjubah}”(h]”h]”h]”h]”h]”uhjhjîubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhjôhjêubjõ)”}”(hŒstruct raft_buffer *bufs[]”h]”(h;Œstruct”…””}”(hŒstruct”hj#ubh;Œ ”…””}”(hh®hj#ubh/)”}”(hhh]”h;Œ raft_buffer”…””}”(hŒ raft_buffer”hj0ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jŒ reftarget”j7Œmodname”NŒ classname”Nuhh.hj#ubj)”}”(hŒ *bufs[]”h]”h;Œ  *bufs[]”…””}”(hhhjCubah}”(h]”h]”h]”h]”h]”uhjhj#ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhjôhjêubjõ)”}”(hŒunsigned *n_bufs”h]”(h;Œunsigned”…””}”(hŒunsigned”hjXubj)”}”(hŒ *n_bufs”h]”h;Œ  *n_bufs”…””}”(hhhjaubah}”(h]”h]”h]”h]”h]”uhjhjXubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhjôhjêubeh}”(h]”h]”h]”h]”h]”hµh¶uhjîhjÅhhh h!hK#ubeh}”(h]”jÀah]”h]”jÀah]”h]”h͉uhh™hjÂhhh h!hK#ubhÏ)”}”(hhh]”hc)”}”(hŒ%Take a snapshot of the state machine.”h]”h;Œ%Take a snapshot of the state machine.”…””}”(hj‡hj…hhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hK"hj‚hhubah}”(h]”h]”h]”h]”h]”uhhÎhjÂhhh h!hK#ubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðjŸhñ‰uhh”hhhhòh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒrestore (C member)”Œ c.restore”hNt”auhhƒhhòhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ=int (*restore)(struct raft_fsm *fsm, struct raft_buffer *buf)”h]”(h )”}”(hŒint ”h]”(h;Œint”…””}”(hŒint”hjµhhh NhNubh;Œ ”…””}”(hh®hjµhhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhj±hhh h!hK&ubh¸)”}”(hŒ (*restore)”h]”h;Œ (*restore)”…””}”(hhhjÈhhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hj±hhh h!hK&ubjï)”}”(hŒ/struct raft_fsm *fsm, struct raft_buffer *buf”h]”(jõ)”}”(hŒstruct raft_fsm *fsm”h]”(h;Œstruct”…””}”(hŒstruct”hjÚubh;Œ ”…””}”(hh®hjÚubh/)”}”(hhh]”h;Œraft_fsm”…””}”(hŒraft_fsm”hjçubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jŒ reftarget”jîŒmodname”NŒ classname”Nuhh.hjÚubj)”}”(hŒ *fsm”h]”h;Œ *fsm”…””}”(hhhjúubah}”(h]”h]”h]”h]”h]”uhjhjÚubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhjôhjÖubjõ)”}”(hŒstruct raft_buffer *buf”h]”(h;Œstruct”…””}”(hŒstruct”hjubh;Œ ”…””}”(hh®hjubh/)”}”(hhh]”h;Œ raft_buffer”…””}”(hŒ raft_buffer”hjubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jŒ reftarget”j#Œmodname”NŒ classname”Nuhh.hjubj)”}”(hŒ *buf”h]”h;Œ *buf”…””}”(hhhj/ubah}”(h]”h]”h]”h]”h]”uhjhjubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhjôhjÖubeh}”(h]”h]”h]”h]”h]”hµh¶uhjîhj±hhh h!hK&ubeh}”(h]”j¬ah]”h]”j¬ah]”h]”h͉uhh™hj®hhh h!hK&ubhÏ)”}”(hhh]”hc)”}”(hŒ(Restore a snapshot of the state machine.”h]”h;Œ(Restore a snapshot of the state machine.”…””}”(hjUhjShhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hK&hjPhhubah}”(h]”h]”h]”h]”h]”uhhÎhj®hhh h!hK&ubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðjmhñ‰uhh”hhhhòh h!hNubeh}”(h]”Œpublic-members”ah]”h]”Œpublic members”ah]”h]”uhh"hhrhhh h!hKubeh}”(h]”Œ data-types”ah]”h]”Œ data types”ah]”h]”uhh"hh$hhh h!hK ubeh}”(h]”(Œ)struct-raft-fsm-application-state-machine”heh]”h]”(Œ-struct raft_fsm --- application state machine”Œfsm”eh]”h]”uhh"hhhhh h!hKŒexpect_referenced_by_name”}”j„h sŒexpect_referenced_by_id”}”hh subeh}”(h]”h]”h]”h]”h]”Œsource”h!uhhŒcurrent_source”NŒ current_line”NŒsettings”Œdocutils.frontend”ŒValues”“”)”}”(h'NŒ generator”NŒ datestamp”NŒ source_link”NŒ source_url”NŒ toc_backlinks”Œentry”Œfootnote_backlinks”KŒ sectnum_xform”KŒstrip_comments”NŒstrip_elements_with_classes”NŒ strip_classes”NŒ report_level”KŒ halt_level”KŒexit_status_level”KŒdebug”NŒwarning_stream”NŒ traceback”ˆŒinput_encoding”Œ utf-8-sig”Œinput_encoding_error_handler”Œstrict”Œoutput_encoding”Œutf-8”Œoutput_encoding_error_handler”j®Œerror_encoding”ŒUTF-8”Œerror_encoding_error_handler”Œbackslashreplace”Œ language_code”Œen”Œrecord_dependencies”NŒconfig”NŒ id_prefix”hŒauto_id_prefix”Œid”Œ dump_settings”NŒdump_internals”NŒdump_transforms”NŒdump_pseudo_xml”NŒexpose_internals”NŒstrict_visitor”NŒ_disable_config”NŒ_source”h!Œ _destination”NŒ _config_files”]”Œpep_references”NŒ pep_base_url”Œ https://www.python.org/dev/peps/”Œpep_file_url_template”Œpep-%04d”Œrfc_references”NŒ rfc_base_url”Œhttps://tools.ietf.org/html/”Œ tab_width”KŒtrim_footnote_reference_space”‰Œfile_insertion_enabled”ˆŒ raw_enabled”KŒsyntax_highlight”Œlong”Œ smart_quotes”ˆŒsmartquotes_locales”]”Œcharacter_level_inline_markup”‰Œdoctitle_xform”‰Œ docinfo_xform”KŒsectsubtitle_xform”‰Œembed_stylesheet”‰Œcloak_email_addresses”ˆŒenv”NubŒreporter”NŒindirect_targets”]”Œsubstitution_defs”}”Œsubstitution_names”}”Œrefnames”}”Œrefids”}”h]”h asŒnameids”}”(j„hjƒj€j{jxh’h’jsjpjjjjjjjÄjÄjÀjÀj¬j¬uŒ nametypes”}”(j„ˆjƒNj{Nh’ˆjsNjˆjjˆjĈjÀˆj¬ˆuh}”(hh$j€h$jxhrh’h›jphòjjjjjojÄjÉjÀjÅj¬j±uŒ footnote_refs”}”Œ citation_refs”}”Œ autofootnotes”]”Œautofootnote_refs”]”Œsymbol_footnotes”]”Œsymbol_footnote_refs”]”Œ footnotes”]”Œ citations”]”Œautofootnote_start”KŒsymbol_footnote_start”KŒid_start”KŒparse_messages”]”Œtransform_messages”]”hŒsystem_message”“”)”}”(hhh]”hc)”}”(hhh]”h;Œ)Hyperlink target "fsm" is not referenced.”…””}”(hhhj ubah}”(h]”h]”h]”h]”h]”uhhbhj ubah}”(h]”h]”h]”h]”h]”Œlevel”KŒtype”ŒINFO”Œsource”h!Œline”KuhjubaŒ transformer”NŒ decoration”Nhhub.raft-0.11.3/docs/build/.doctrees/getting-started.doctree000066400000000000000000000042671415614527300231420ustar00rootroot00000000000000€•¬Œdocutils.nodes”Œdocument”“”)”}”(Œ rawsource”Œ”Œchildren”]”hŒsection”“”)”}”(hhh]”hŒtitle”“”)”}”(hŒGetting started”h]”hŒText”“”ŒGetting started”…””}”(hhŒparent”hhhŒsource”NŒline”NubaŒ attributes”}”(Œids”]”Œclasses”]”Œnames”]”Œdupnames”]”Œbackrefs”]”uŒtagname”hhh hhhŒ'/srv/src/c/raft/doc/getting-started.rst”hKubah}”(h]”Œgetting-started”ah!]”h#]”Œgetting started”ah%]”h']”uh)h hhhhhh*hKubah}”(h]”h!]”h#]”h%]”h']”Œsource”h*uh)hŒcurrent_source”NŒ current_line”NŒsettings”Œdocutils.frontend”ŒValues”“”)”}”(hNŒ generator”NŒ datestamp”NŒ source_link”NŒ source_url”NŒ toc_backlinks”Œentry”Œfootnote_backlinks”KŒ sectnum_xform”KŒstrip_comments”NŒstrip_elements_with_classes”NŒ strip_classes”NŒ report_level”KŒ halt_level”KŒexit_status_level”KŒdebug”NŒwarning_stream”NŒ traceback”ˆŒinput_encoding”Œ utf-8-sig”Œinput_encoding_error_handler”Œstrict”Œoutput_encoding”Œutf-8”Œoutput_encoding_error_handler”hVŒerror_encoding”ŒUTF-8”Œerror_encoding_error_handler”Œbackslashreplace”Œ language_code”Œen”Œrecord_dependencies”NŒconfig”NŒ id_prefix”hŒauto_id_prefix”Œid”Œ dump_settings”NŒdump_internals”NŒdump_transforms”NŒdump_pseudo_xml”NŒexpose_internals”NŒstrict_visitor”NŒ_disable_config”NŒ_source”h*Œ _destination”NŒ _config_files”]”Œpep_references”NŒ pep_base_url”Œ https://www.python.org/dev/peps/”Œpep_file_url_template”Œpep-%04d”Œrfc_references”NŒ rfc_base_url”Œhttps://tools.ietf.org/html/”Œ tab_width”KŒtrim_footnote_reference_space”‰Œfile_insertion_enabled”ˆŒ raw_enabled”KŒsyntax_highlight”Œlong”Œ smart_quotes”ˆŒsmartquotes_locales”]”Œcharacter_level_inline_markup”‰Œdoctitle_xform”‰Œ docinfo_xform”KŒsectsubtitle_xform”‰Œembed_stylesheet”‰Œcloak_email_addresses”ˆŒenv”NubŒreporter”NŒindirect_targets”]”Œsubstitution_defs”}”Œsubstitution_names”}”Œrefnames”}”Œrefids”}”Œnameids”}”h0h-sŒ nametypes”}”h0Nsh}”h-h sŒ footnote_refs”}”Œ citation_refs”}”Œ autofootnotes”]”Œautofootnote_refs”]”Œsymbol_footnotes”]”Œsymbol_footnote_refs”]”Œ footnotes”]”Œ citations”]”Œautofootnote_start”KŒsymbol_footnote_start”KŒid_start”KŒparse_messages”]”Œtransform_messages”]”Œ transformer”NŒ decoration”Nhhub.raft-0.11.3/docs/build/.doctrees/index.doctree000066400000000000000000000337221415614527300211420ustar00rootroot00000000000000€•Ç7Œdocutils.nodes”Œdocument”“”)”}”(Œ rawsource”Œ”Œchildren”]”hŒsection”“”)”}”(hhh]”(hŒtitle”“”)”}”(hŒC-Raft”h]”hŒText”“”ŒC-Raft”…””}”(hhŒparent”hhhŒsource”NŒline”NubaŒ attributes”}”(Œids”]”Œclasses”]”Œnames”]”Œdupnames”]”Œbackrefs”]”uŒtagname”hhh hhhŒ/srv/src/c/raft/doc/index.rst”hKubhŒ paragraph”“”)”}”(hŒOC-Raft is a fully asynchronous C implementation of the Raft consensus protocol.”h]”hŒOC-Raft is a fully asynchronous C implementation of the Raft consensus protocol.”…””}”(hh/hh-hhhNhNubah}”(h]”h!]”h#]”h%]”h']”uh)h+hh*hKhh hhubh )”}”(hhh]”(h)”}”(hŒDesign”h]”hŒDesign”…””}”(hh@hh>hhhNhNubah}”(h]”h!]”h#]”h%]”h']”uh)hhh;hhhh*hKubh,)”}”(hŒëThe library has modular design: its core part implements only the core Raft algorithm logic, in a fully platform independent way. On top of that, a pluggable interface defines the I/O implementation for networking and disk persistence.”h]”hŒëThe library has modular design: its core part implements only the core Raft algorithm logic, in a fully platform independent way. On top of that, a pluggable interface defines the I/O implementation for networking and disk persistence.”…””}”(hhNhhLhhhNhNubah}”(h]”h!]”h#]”h%]”h']”uh)h+hh*hK hh;hhubh,)”}”(hŒ®A stock implementation of the I/O interface is provided when building the library with default options. It is based on `libuv`_ and should fit the vast majority of use cases.”h]”(hŒwA stock implementation of the I/O interface is provided when building the library with default options. It is based on ”…””}”(hŒwA stock implementation of the I/O interface is provided when building the library with default options. It is based on ”hhZhhhNhNubhŒ reference”“”)”}”(hŒ`libuv`_”h]”hŒlibuv”…””}”(hŒlibuv”hheubah}”(h]”h!]”h#]”h%]”h']”Œname”hmŒrefuri”Œhttp://libuv.org”uh)hchhZŒresolved”KubhŒ/ and should fit the vast majority of use cases.”…””}”(hŒ/ and should fit the vast majority of use cases.”hhZhhhNhNubeh}”(h]”h!]”h#]”h%]”h']”uh)h+hh*hKhh;hhubhŒtarget”“”)”}”(hŒ.. _libuv: http://libuv.org”h]”h}”(h]”Œlibuv”ah!]”h#]”Œlibuv”ah%]”h']”huhvuh)hƒhKhh;hhhh*Œ referenced”Kubeh}”(h]”Œdesign”ah!]”h#]”Œdesign”ah%]”h']”uh)h hh hhhh*hKubh )”}”(hhh]”(h)”}”(hŒFeatures”h]”hŒFeatures”…””}”(hhŸhhhhhNhNubah}”(h]”h!]”h#]”h%]”h']”uh)hhhšhhhh*hKubh,)”}”(hŒLC-Raft implements all the basic features described in the Raft dissertation:”h]”hŒLC-Raft implements all the basic features described in the Raft dissertation:”…””}”(hh­hh«hhhNhNubah}”(h]”h!]”h#]”h%]”h']”uh)h+hh*hKhhšhhubhŒ bullet_list”“”)”}”(hhh]”(hŒ list_item”“”)”}”(hŒLeader election”h]”h,)”}”(hhÂh]”hŒLeader election”…””}”(hhÂhhÄubah}”(h]”h!]”h#]”h%]”h']”uh)h+hh*hKhhÀubah}”(h]”h!]”h#]”h%]”h']”uh)h¾hh»hhhh*hNubh¿)”}”(hŒLog replication”h]”h,)”}”(hhÙh]”hŒLog replication”…””}”(hhÙhhÛubah}”(h]”h!]”h#]”h%]”h']”uh)h+hh*hKhh×ubah}”(h]”h!]”h#]”h%]”h']”uh)h¾hh»hhhh*hNubh¿)”}”(hŒLog compaction”h]”h,)”}”(hhðh]”hŒLog compaction”…””}”(hhðhhòubah}”(h]”h!]”h#]”h%]”h']”uh)h+hh*hKhhîubah}”(h]”h!]”h#]”h%]”h']”uh)h¾hh»hhhh*hNubh¿)”}”(hŒMembership changes ”h]”h,)”}”(hŒMembership changes”h]”hŒMembership changes”…””}”(hj hj ubah}”(h]”h!]”h#]”h%]”h']”uh)h+hh*hKhjubah}”(h]”h!]”h#]”h%]”h']”uh)h¾hh»hhhh*hNubeh}”(h]”h!]”h#]”h%]”h']”Œbullet”Œ*”uh)h¹hh*hKhhšhhubh,)”}”(hŒ-It also includes a few optional enhancements:”h]”hŒ-It also includes a few optional enhancements:”…””}”(hj'hj%hhhNhNubah}”(h]”h!]”h#]”h%]”h']”uh)h+hh*hKhhšhhubhº)”}”(hhh]”(h¿)”}”(hŒ7Optimistic pipelining to reduce log replication latency”h]”h,)”}”(hj8h]”hŒ7Optimistic pipelining to reduce log replication latency”…””}”(hj8hj:ubah}”(h]”h!]”h#]”h%]”h']”uh)h+hh*hK!hj6ubah}”(h]”h!]”h#]”h%]”h']”uh)h¾hj3hhhh*hNubh¿)”}”(hŒ$Writing to leader's disk in parallel”h]”h,)”}”(hjOh]”hŒ&Writing to leader’s disk in parallel”…””}”(hjOhjQubah}”(h]”h!]”h#]”h%]”h']”uh)h+hh*hK"hjMubah}”(h]”h!]”h#]”h%]”h']”uh)h¾hj3hhhh*hNubh¿)”}”(hŒ4Automatic stepping down when the leader loses quorum”h]”h,)”}”(hjfh]”hŒ4Automatic stepping down when the leader loses quorum”…””}”(hjfhjhubah}”(h]”h!]”h#]”h%]”h']”uh)h+hh*hK#hjdubah}”(h]”h!]”h#]”h%]”h']”uh)h¾hj3hhhh*hNubh¿)”}”(hŒLeadership transfer extension”h]”h,)”}”(hj}h]”hŒLeadership transfer extension”…””}”(hj}hjubah}”(h]”h!]”h#]”h%]”h']”uh)h+hh*hK$hj{ubah}”(h]”h!]”h#]”h%]”h']”uh)h¾hj3hhhh*hNubh¿)”}”(hŒNon-voting servers ”h]”h,)”}”(hŒNon-voting servers”h]”hŒNon-voting servers”…””}”(hj˜hj–ubah}”(h]”h!]”h#]”h%]”h']”uh)h+hh*hK%hj’ubah}”(h]”h!]”h#]”h%]”h']”uh)h¾hj3hhhh*hNubeh}”(h]”h!]”h#]”h%]”h']”j#j$uh)h¹hh*hK!hhšhhubeh}”(h]”Œfeatures”ah!]”h#]”Œfeatures”ah%]”h']”uh)h hh hhhh*hKubh )”}”(hhh]”(h)”}”(hŒ Quick start”h]”hŒ Quick start”…””}”(hj½hj»hhhNhNubah}”(h]”h!]”h#]”h%]”h']”uh)hhj¸hhhh*hK(ubh,)”}”(hŒ>Make sure that `libuv`_ is installed on your system, then run:”h]”(hŒMake sure that ”…””}”(hŒMake sure that ”hjÉhhhNhNubhd)”}”(hŒ`libuv`_”h]”hŒlibuv”…””}”(hŒlibuv”hjÒubah}”(h]”h!]”h#]”h%]”h']”Œname”jÚhuhvuh)hchjÉhwKubhŒ' is installed on your system, then run:”…””}”(hŒ' is installed on your system, then run:”hjÉhhhNhNubeh}”(h]”h!]”h#]”h%]”h']”uh)h+hh*hK*hj¸hhubhŒ literal_block”“”)”}”(hŒ/autoreconf -i ./configure --enable-example make”h]”hŒ/autoreconf -i ./configure --enable-example make”…””}”(hhhjïubah}”(h]”h!]”h#]”h%]”h']”Œ xml:space”Œpreserve”Œlinenos”ˆŒforce”‰Œlanguage”Œbash”Œhighlight_args”}”uh)jíhh*hK,hj¸hhubh,)”}”(hŒ¤Then create a :file:`main.c` file with this simple test program that just runs a single raft server and implements a basic state machine for incrementing a counter:”h]”(hŒThen create a ”…””}”(hŒThen create a ”hjhhhNhNubhŒliteral”“”)”}”(hŒ:file:`main.c`”h]”hŒmain.c”…””}”(hŒmain.c”hjubah}”(h]”h!]”Œfile”ah#]”h%]”h']”Œrole”Œfile”uh)jhjubhŒˆ file with this simple test program that just runs a single raft server and implements a basic state machine for incrementing a counter:”…””}”(hŒˆ file with this simple test program that just runs a single raft server and implements a basic state machine for incrementing a counter:”hjhhhNhNubeh}”(h]”h!]”h#]”h%]”h']”uh)h+hh*hK3hj¸hhubjî)”}”(hX”#include #include static raft_id id = 12345; static const char *address = "127.0.0.1:8080"; static const char *dir = "/tmp/raft-quick-start"; static struct uv_loop_s loop; static struct raft_uv_transport transport; static struct raft_io io; static struct raft_fsm fsm; static struct raft raft; static struct raft_configuration conf; static struct uv_timer_s timer; static struct raft_apply apply; static unsigned counter = 0; static uint64_t command; static int applyCommand(struct raft_fsm *fsm, const struct raft_buffer *buf, void **result) { counter += *(uint64_t *)buf->base; printf("counter: %u\n", counter); return 0; } static void submitCommand(uv_timer_t *timer) { struct raft_buffer buf; command = uv_now(timer->loop) % 10; buf.len = sizeof command; buf.base = &command; raft_apply(&raft, &apply, &buf, 1, NULL); } int main() { mkdir(dir, 0755); uv_loop_init(&loop); raft_uv_tcp_init(&transport, &loop); raft_uv_init(&io, &loop, dir, &transport); fsm.apply = applyCommand; raft_init(&raft, &io, &fsm, id, address); raft_configuration_init(&conf); raft_configuration_add(&conf, id, address, RAFT_VOTER); raft_bootstrap(&raft, &conf); raft_start(&raft); uv_timer_init(&loop, &timer); uv_timer_start(&timer, submitCommand, 0, 1000); uv_run(&loop, UV_RUN_DEFAULT); }”h]”hX”#include #include static raft_id id = 12345; static const char *address = "127.0.0.1:8080"; static const char *dir = "/tmp/raft-quick-start"; static struct uv_loop_s loop; static struct raft_uv_transport transport; static struct raft_io io; static struct raft_fsm fsm; static struct raft raft; static struct raft_configuration conf; static struct uv_timer_s timer; static struct raft_apply apply; static unsigned counter = 0; static uint64_t command; static int applyCommand(struct raft_fsm *fsm, const struct raft_buffer *buf, void **result) { counter += *(uint64_t *)buf->base; printf("counter: %u\n", counter); return 0; } static void submitCommand(uv_timer_t *timer) { struct raft_buffer buf; command = uv_now(timer->loop) % 10; buf.len = sizeof command; buf.base = &command; raft_apply(&raft, &apply, &buf, 1, NULL); } int main() { mkdir(dir, 0755); uv_loop_init(&loop); raft_uv_tcp_init(&transport, &loop); raft_uv_init(&io, &loop, dir, &transport); fsm.apply = applyCommand; raft_init(&raft, &io, &fsm, id, address); raft_configuration_init(&conf); raft_configuration_add(&conf, id, address, RAFT_VOTER); raft_bootstrap(&raft, &conf); raft_start(&raft); uv_timer_init(&loop, &timer); uv_timer_start(&timer, submitCommand, 0, 1000); uv_run(&loop, UV_RUN_DEFAULT); }”…””}”(hhhj-ubah}”(h]”h!]”h#]”h%]”h']”jýjþjÿˆj‰jŒC”j}”uh)jíhh*hK7hj¸hhubh,)”}”(hŒ You can compile and run it with:”h]”hŒ You can compile and run it with:”…””}”(hj?hj=hhhNhNubah}”(h]”h!]”h#]”h%]”h']”uh)h+hh*hKkhj¸hhubjî)”}”(hŒ'cc main.c -o main -lraft -luv && ./main”h]”hŒ'cc main.c -o main -lraft -luv && ./main”…””}”(hhhjKubah}”(h]”h!]”h#]”h%]”h']”jýjþjÿˆj‰jŒbash”j}”uh)jíhh*hKmhj¸hhubeh}”(h]”Œ quick-start”ah!]”h#]”Œ quick start”ah%]”h']”uh)h hh hhhh*hK(ubh )”}”(hhh]”(h)”}”(hŒLicence”h]”hŒLicence”…””}”(hjhhjfhhhNhNubah}”(h]”h!]”h#]”h%]”h']”uh)hhjchhhh*hKsubh,)”}”(hXThis raft C library is released under a slightly modified version of LGPLv3, that includes a copiright exception letting users to statically link the library code in their project and release the final work under their own terms. See the full `license`_ text.”h]”(hŒóThis raft C library is released under a slightly modified version of LGPLv3, that includes a copiright exception letting users to statically link the library code in their project and release the final work under their own terms. See the full ”…””}”(hŒóThis raft C library is released under a slightly modified version of LGPLv3, that includes a copiright exception letting users to statically link the library code in their project and release the final work under their own terms. See the full ”hjthhhNhNubhd)”}”(hŒ `license`_”h]”hŒlicense”…””}”(hŒlicense”hj}ubah}”(h]”h!]”h#]”h%]”h']”Œname”j…huŒ5https://github.com/canonical/raft/blob/master/LICENSE”uh)hchjthwKubhŒ text.”…””}”(hŒ text.”hjthhhNhNubeh}”(h]”h!]”h#]”h%]”h']”uh)h+hh*hKuhjchhubh„)”}”(hŒB.. _license: https://github.com/canonical/raft/blob/master/LICENSE”h]”h}”(h]”Œlicense”ah!]”h#]”Œlicense”ah%]”h']”hujuh)hƒhKzhjchhhh*h‘Kubh )”}”(hhh]”(h)”}”(hŒtoc”h]”hŒtoc”…””}”(hjªhj¨hhhNhNubah}”(h]”h!]”h#]”h%]”h']”uh)hhj¥hhhh*hK}ubhŒcompound”“”)”}”(hhh]”Œsphinx.addnodes”Œtoctree”“”)”}”(hhh]”h}”(h]”h!]”h#]”h%]”h']”hŒindex”Œentries”]”(NŒself”†”NŒgetting-started”†”NŒapi”†”eŒ includefiles”]”(jÌjÎeŒmaxdepth”KŒcaption”NŒglob”‰Œhidden”‰Œ includehidden”‰Œnumbered”KŒ titlesonly”‰Œ rawentries”]”uh)j¼hh*hKhj¸ubah}”(h]”h!]”Œtoctree-wrapper”ah#]”h%]”h']”uh)j¶hj¥hhhh*hNubeh}”(h]”Œtoc”ah!]”h#]”Œtoc”ah%]”h']”uh)h hjchhhh*hK}ubeh}”(h]”Œlicence”ah!]”h#]”Œlicence”ah%]”h']”uh)h hh hhhh*hKsubeh}”(h]”Œc-raft”ah!]”h#]”Œc-raft”ah%]”h']”uh)h hhhhhh*hKubah}”(h]”h!]”h#]”h%]”h']”Œsource”h*uh)hŒcurrent_source”NŒ current_line”NŒsettings”Œdocutils.frontend”ŒValues”“”)”}”(hNŒ generator”NŒ datestamp”NŒ source_link”NŒ source_url”NŒ toc_backlinks”Œentry”Œfootnote_backlinks”KŒ sectnum_xform”KŒstrip_comments”NŒstrip_elements_with_classes”NŒ strip_classes”NŒ report_level”KŒ halt_level”KŒexit_status_level”KŒdebug”NŒwarning_stream”NŒ traceback”ˆŒinput_encoding”Œ utf-8-sig”Œinput_encoding_error_handler”Œstrict”Œoutput_encoding”Œutf-8”Œoutput_encoding_error_handler”jŒerror_encoding”ŒUTF-8”Œerror_encoding_error_handler”Œbackslashreplace”Œ language_code”Œen”Œrecord_dependencies”NŒconfig”NŒ id_prefix”hŒauto_id_prefix”Œid”Œ dump_settings”NŒdump_internals”NŒdump_transforms”NŒdump_pseudo_xml”NŒexpose_internals”NŒstrict_visitor”NŒ_disable_config”NŒ_source”h*Œ _destination”NŒ _config_files”]”Œpep_references”NŒ pep_base_url”Œ https://www.python.org/dev/peps/”Œpep_file_url_template”Œpep-%04d”Œrfc_references”NŒ rfc_base_url”Œhttps://tools.ietf.org/html/”Œ tab_width”KŒtrim_footnote_reference_space”‰Œfile_insertion_enabled”ˆŒ raw_enabled”KŒsyntax_highlight”Œlong”Œ smart_quotes”ˆŒsmartquotes_locales”]”Œcharacter_level_inline_markup”‰Œdoctitle_xform”‰Œ docinfo_xform”KŒsectsubtitle_xform”‰Œembed_stylesheet”‰Œcloak_email_addresses”ˆŒenv”NubŒreporter”NŒindirect_targets”]”Œsubstitution_defs”}”Œsubstitution_names”}”Œrefnames”}”(Œlibuv”]”(hejÒeŒlicense”]”j}auŒrefids”}”Œnameids”}”(j÷jôh—h”hŽh‹jµj²j`j]jïjìj¢jŸjçjäuŒ nametypes”}”(j÷Nh—NhŽˆjµNj`NjïNj¢ˆjçNuh}”(jôh h”h;h‹h…j²hšj]j¸jìjcjŸj™jäj¥uŒ footnote_refs”}”Œ citation_refs”}”Œ autofootnotes”]”Œautofootnote_refs”]”Œsymbol_footnotes”]”Œsymbol_footnote_refs”]”Œ footnotes”]”Œ citations”]”Œautofootnote_start”KŒsymbol_footnote_start”KŒid_start”KŒparse_messages”]”Œtransform_messages”]”Œ transformer”NŒ decoration”Nhhub.raft-0.11.3/docs/build/.doctrees/io.doctree000066400000000000000000001652451415614527300204500ustar00rootroot00000000000000€•—êŒdocutils.nodes”Œdocument”“”)”}”(Œ rawsource”Œ”Œchildren”]”(hŒtarget”“”)”}”(hŒ.. _io:”h]”Œ attributes”}”(Œids”]”Œclasses”]”Œnames”]”Œdupnames”]”Œbackrefs”]”Œrefid”Œio”uŒtagname”h Œline”KŒparent”hhhŒsource”Œ/srv/src/c/raft/doc/io.rst”ubhŒsection”“”)”}”(hhh]”(hŒtitle”“”)”}”(hŒ2:c:type:`struct raft_io` --- I/O backend interface”h]”(Œsphinx.addnodes”Œ pending_xref”“”)”}”(hŒ:c:type:`struct raft_io`”h]”hŒliteral”“”)”}”(hŒstruct raft_io”h]”hŒText”“”Œstruct raft_io”…””}”(hhhh6ubah}”(h]”h]”(Œxref”Œc”Œc-type”eh]”h]”h]”uhh4hh0ubah}”(h]”h]”h]”h]”h]”Œrefdoc”Œio”Œ refdomain”hDŒreftype”Œtype”Œ refexplicit”‰Œrefwarn”‰Œ reftarget”h8uhh.h h!hKhh)ubh;Œ — I/O backend interface”…””}”(hŒ --- I/O backend interface”hh)hhh NhNubeh}”(h]”h]”h]”h]”h]”uhh'hh$hhh h!hKubhŒ paragraph”“”)”}”(hŒThe I/O backend struct defines an interface for performing periodic ticks, log store read/write and send/receive of network RPCs.”h]”h;ŒThe I/O backend struct defines an interface for performing periodic ticks, log store read/write and send/receive of network RPCs.”…””}”(hhfhhdhhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKhh$hhubh#)”}”(hhh]”(h()”}”(hŒ Data types”h]”h;Œ Data types”…””}”(hhwhhuhhh NhNubah}”(h]”h]”h]”h]”h]”uhh'hhrhhh h!hK ubh-Œindex”“”)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(Œsingle”Œraft_io (C type)”Œ c.raft_io”hNt”auhhƒhhrhhh h!hNubh-Œdesc”“”)”}”(hhh]”(h-Œdesc_signature”“”)”}”(hŒstruct raft_io”h]”(h-Œ desc_type”“”)”}”(hŒstruct ”h]”(h;Œstruct”…””}”(hŒstruct”hh¡hhh NhNubh;Œ ”…””}”(hŒ ”hh¡hhh NhNubeh}”(h]”h]”h]”h]”h]”Œ xml:space”Œpreserve”uhhŸhh›hhh h!hKubh-Œ desc_name”“”)”}”(hŒraft_io”h]”h;Œraft_io”…””}”(hhhh¹hhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hh›hhh h!hKubeh}”(h]”h’ah]”h]”h’ah]”h]”Œfirst”‰uhh™hh–hhh h!hKubh-Œ desc_content”“”)”}”(hhh]”hc)”}”(hŒGHold pointers to an actual implementation of the I/O backend interface.”h]”h;ŒGHold pointers to an actual implementation of the I/O backend interface.”…””}”(hhÕhhÓhhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKhhÐhhubah}”(h]”h]”h]”h]”h]”uhhÎhh–hhh h!hKubeh}”(h]”h]”h]”h]”h]”Œdomain”hDŒobjtype”Œtype”Œdesctype”hïŒnoindex”‰uhh”hhhhrh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒraft_io_close_cb (C type)”Œc.raft_io_close_cb”hNt”auhhƒhhrhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ,void (*raft_io_close_cb)(struct raft_io *io)”h]”(h )”}”(hŒvoid ”h]”(h;Œvoid”…””}”(hŒvoid”hjhhh NhNubh;Œ ”…””}”(hh®hjhhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhjhhh h!hKubh¸)”}”(hŒ(*raft_io_close_cb)”h]”h;Œ(*raft_io_close_cb)”…””}”(hhhjhhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hjhhh h!hKubh-Œdesc_parameterlist”“”)”}”(hŒstruct raft_io *io”h]”h-Œdesc_parameter”“”)”}”(hŒstruct raft_io *io”h]”(h;Œstruct”…””}”(hŒstruct”hj0ubh;Œ ”…””}”(hh®hj0ubh/)”}”(hhh]”h;Œraft_io”…””}”(hŒraft_io”hj=ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”Œtype”Œ reftarget”jDŒmodname”NŒ classname”Nuhh.hj0ubhŒemphasis”“”)”}”(hŒ *io”h]”h;Œ *io”…””}”(hhhjSubah}”(h]”h]”h]”h]”h]”uhjQhj0ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hj*ubah}”(h]”h]”h]”h]”h]”hµh¶uhj(hjhhh h!hKubeh}”(h]”hþah]”h]”hþah]”h]”h͉uhh™hjhhh h!hKubhÏ)”}”(hhh]”hc)”}”(hŒAType definition for callback passed to :c:func:`raft_io.close()`.”h]”(h;Œ'Type definition for callback passed to ”…””}”(hŒ'Type definition for callback passed to ”hjwhhh NhNubh/)”}”(hŒ:c:func:`raft_io.close()`”h]”h5)”}”(hŒraft_io.close()”h]”h;Œraft_io.close()”…””}”(hhhj„ubah}”(h]”h]”(hChDŒc-func”eh]”h]”h]”uhh4hj€ubah}”(h]”h]”h]”h]”h]”Œrefdoc”hPŒ refdomain”hDŒreftype”Œfunc”Œ refexplicit”‰Œrefwarn”‰hVŒ raft_io.close”uhh.h h!hKhjwubh;Œ.”…””}”(hŒ.”hjwhhh NhNubeh}”(h]”h]”h]”h]”h]”uhhbh h!hKhjthhubah}”(h]”h]”h]”h]”h]”uhhÎhjhhh h!hKubeh}”(h]”h]”h]”h]”h]”híhDhîŒtype”hðj·hñ‰uhh”hhhhrh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒraft_io_tick_cb (C type)”Œc.raft_io_tick_cb”hNt”auhhƒhhrhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ+void (*raft_io_tick_cb)(struct raft_io *io)”h]”(h )”}”(hŒvoid ”h]”(h;Œvoid”…””}”(hŒvoid”hjÍhhh NhNubh;Œ ”…””}”(hh®hjÍhhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhjÉhhh h!hKubh¸)”}”(hŒ(*raft_io_tick_cb)”h]”h;Œ(*raft_io_tick_cb)”…””}”(hhhjàhhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hjÉhhh h!hKubj))”}”(hŒstruct raft_io *io”h]”j/)”}”(hŒstruct raft_io *io”h]”(h;Œstruct”…””}”(hŒstruct”hjòubh;Œ ”…””}”(hh®hjòubh/)”}”(hhh]”h;Œraft_io”…””}”(hŒraft_io”hjÿubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jŒmodname”NŒ classname”Nuhh.hjòubjR)”}”(hŒ *io”h]”h;Œ *io”…””}”(hhhjubah}”(h]”h]”h]”h]”h]”uhjQhjòubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjîubah}”(h]”h]”h]”h]”h]”hµh¶uhj(hjÉhhh h!hKubeh}”(h]”jÄah]”h]”jÄah]”h]”h͉uhh™hjÆhhh h!hKubhÏ)”}”(hhh]”hc)”}”(hŒ@Callback invoked by the I/O implementation at regular intervals.”h]”h;Œ@Callback invoked by the I/O implementation at regular intervals.”…””}”(hj8hj6hhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKhj3hhubah}”(h]”h]”h]”h]”h]”uhhÎhjÆhhh h!hKubeh}”(h]”h]”h]”h]”h]”híhDhîŒtype”hðjPhñ‰uhh”hhhhrh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒraft_io_recv_cb (C type)”Œc.raft_io_recv_cb”hNt”auhhƒhhrhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒEvoid (*raft_io_recv_cb)(struct raft_io *io, struct raft_message *msg)”h]”(h )”}”(hŒvoid ”h]”(h;Œvoid”…””}”(hŒvoid”hjfhhh NhNubh;Œ ”…””}”(hh®hjfhhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhjbhhh h!hKubh¸)”}”(hŒ(*raft_io_recv_cb)”h]”h;Œ(*raft_io_recv_cb)”…””}”(hhhjyhhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hjbhhh h!hKubj))”}”(hŒ.struct raft_io *io, struct raft_message *msg”h]”(j/)”}”(hŒstruct raft_io *io”h]”(h;Œstruct”…””}”(hŒstruct”hj‹ubh;Œ ”…””}”(hh®hj‹ubh/)”}”(hhh]”h;Œraft_io”…””}”(hŒraft_io”hj˜ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jŸŒmodname”NŒ classname”Nuhh.hj‹ubjR)”}”(hŒ *io”h]”h;Œ *io”…””}”(hhhj«ubah}”(h]”h]”h]”h]”h]”uhjQhj‹ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hj‡ubj/)”}”(hŒstruct raft_message *msg”h]”(h;Œstruct”…””}”(hŒstruct”hjÀubh;Œ ”…””}”(hh®hjÀubh/)”}”(hhh]”h;Œ raft_message”…””}”(hŒ raft_message”hjÍubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jÔŒmodname”NŒ classname”Nuhh.hjÀubjR)”}”(hŒ *msg”h]”h;Œ *msg”…””}”(hhhjàubah}”(h]”h]”h]”h]”h]”uhjQhjÀubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hj‡ubeh}”(h]”h]”h]”h]”h]”hµh¶uhj(hjbhhh h!hKubeh}”(h]”j]ah]”h]”j]ah]”h]”h͉uhh™hj_hhh h!hKubhÏ)”}”(hhh]”hc)”}”(hŒKCallback invoked by the I/O implementation when an RPC message is received.”h]”h;ŒKCallback invoked by the I/O implementation when an RPC message is received.”…””}”(hjhjhhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKhjhhubah}”(h]”h]”h]”h]”h]”uhhÎhj_hhh h!hKubeh}”(h]”h]”h]”h]”h]”híhDhîŒtype”hðjhñ‰uhh”hhhhrh h!hNubh#)”}”(hhh]”(h()”}”(hŒPublic members”h]”h;ŒPublic members”…””}”(hj$hj"hhh NhNubah}”(h]”h]”h]”h]”h]”uhh'hjhhh h!hKubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒdata (C member)”Œc.data”hNt”auhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ void* data”h]”(h )”}”(hŒvoid* ”h]”(h;Œvoid”…””}”(hŒvoid”hjEhhh NhNubh;Œ* ”…””}”(hŒ* ”hjEhhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhjAhhh h!hK$ubh¸)”}”(hŒdata”h]”h;Œdata”…””}”(hhhjYhhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hjAhhh h!hK$ubeh}”(h]”j<ah]”h]”j<ah]”h]”h͉uhh™hj>hhh h!hK$ubhÏ)”}”(hhh]”hc)”}”(hŒYSpace for user-defined arbitrary data. C-Raft does not use and does not touch this field.”h]”h;ŒYSpace for user-defined arbitrary data. C-Raft does not use and does not touch this field.”…””}”(hjrhjphhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hK"hjmhhubah}”(h]”h]”h]”h]”h]”uhhÎhj>hhh h!hK$ubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðjŠhñ‰uhh”hhhjh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒversion (C member)”Œ c.version”hNt”auhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ int version”h]”(h )”}”(hŒint ”h]”(h;Œint”…””}”(hŒint”hj hhh NhNubh;Œ ”…””}”(hh®hj hhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhjœhhh h!hK(ubh¸)”}”(hŒversion”h]”h;Œversion”…””}”(hhhj³hhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hjœhhh h!hK(ubeh}”(h]”j—ah]”h]”j—ah]”h]”h͉uhh™hj™hhh h!hK(ubhÏ)”}”(hhh]”hc)”}”(hŒ6API version implemented by this instance. Currently 1.”h]”h;Œ6API version implemented by this instance. Currently 1.”…””}”(hjÌhjÊhhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hK'hjÇhhubah}”(h]”h]”h]”h]”h]”uhhÎhj™hhh h!hK(ubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðjähñ‰uhh”hhhjh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒimpl (C member)”Œc.impl”hNt”auhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ void* impl”h]”(h )”}”(hŒvoid* ”h]”(h;Œvoid”…””}”(hŒvoid”hjúhhh NhNubh;Œ* ”…””}”(hŒ* ”hjúhhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhjöhhh h!hK,ubh¸)”}”(hŒimpl”h]”h;Œimpl”…””}”(hhhjhhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hjöhhh h!hK,ubeh}”(h]”jñah]”h]”jñah]”h]”h͉uhh™hjóhhh h!hK,ubhÏ)”}”(hhh]”hc)”}”(hŒImplementation-defined state.”h]”h;ŒImplementation-defined state.”…””}”(hj'hj%hhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hK+hj"hhubah}”(h]”h]”h]”h]”h]”uhhÎhjóhhh h!hK,ubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðj?hñ‰uhh”hhhjh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”uhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ!char errmsg[RAFT_ERRMSG_BUF_SIZE]”h]”h¸)”}”(hjPh]”h;Œ!char errmsg[RAFT_ERRMSG_BUF_SIZE]”…””}”(hhhjRhhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hjNhhh h!hK1ubah}”(h]”h]”h]”h]”h]”h͉uhh™hjKhhh h!hK1ubhÏ)”}”(hhh]”hc)”}”(hŒVHuman-readable message providing diagnostic information about the last error occurred.”h]”h;ŒVHuman-readable message providing diagnostic information about the last error occurred.”…””}”(hjjhjhhhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hK/hjehhubah}”(h]”h]”h]”h]”h]”uhhÎhjKhhh h!hK1ubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðj‚hñ‰uhh”hhhjh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒinit (C member)”Œc.init”hNt”auhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ@int (*init)(struct raft_io *io, raft_id id, const char *address)”h]”(h )”}”(hŒint ”h]”(h;Œint”…””}”(hŒint”hj˜hhh NhNubh;Œ ”…””}”(hh®hj˜hhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhj”hhh h!hK6ubh¸)”}”(hŒ(*init)”h]”h;Œ(*init)”…””}”(hhhj«hhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hj”hhh h!hK6ubj))”}”(hŒ6struct raft_io *io, raft_id id, const char *address”h]”(j/)”}”(hŒstruct raft_io *io”h]”(h;Œstruct”…””}”(hŒstruct”hj½ubh;Œ ”…””}”(hh®hj½ubh/)”}”(hhh]”h;Œraft_io”…””}”(hŒraft_io”hjÊubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jÑŒmodname”NŒ classname”Nuhh.hj½ubjR)”}”(hŒ *io”h]”h;Œ *io”…””}”(hhhjÝubah}”(h]”h]”h]”h]”h]”uhjQhj½ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hj¹ubj/)”}”(hŒ raft_id id”h]”(h/)”}”(hhh]”h;Œraft_id”…””}”(hŒraft_id”hjöubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jýŒmodname”NŒ classname”Nuhh.hjòubjR)”}”(hŒ id”h]”h;Œ id”…””}”(hhhj ubah}”(h]”h]”h]”h]”h]”uhjQhjòubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hj¹ubj/)”}”(hŒconst char *address”h]”(h;Œconst”…””}”(hŒconst”hjubh;Œ ”…””}”(hh®hjubh;Œchar”…””}”(hŒchar”hjubjR)”}”(hŒ *address”h]”h;Œ  *address”…””}”(hhhj0ubah}”(h]”h]”h]”h]”h]”uhjQhjubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hj¹ubeh}”(h]”h]”h]”h]”h]”hµh¶uhj(hj”hhh h!hK6ubeh}”(h]”jah]”h]”jah]”h]”h͉uhh™hj‘hhh h!hK6ubhÏ)”}”(hhh]”hc)”}”(hŒQInitialize the backend with operational parameters such as server ID and address.”h]”h;ŒQInitialize the backend with operational parameters such as server ID and address.”…””}”(hjVhjThhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hK4hjQhhubah}”(h]”h]”h]”h]”h]”uhhÎhj‘hhh h!hK6ubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðjnhñ‰uhh”hhhjh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒclose (C member)”Œc.close”hNt”auhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ6void (*close)(struct raft_io *io, raft_io_close_cb cb)”h]”(h )”}”(hŒvoid ”h]”(h;Œvoid”…””}”(hŒvoid”hj„hhh NhNubh;Œ ”…””}”(hh®hj„hhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhj€hhh h!hK?ubh¸)”}”(hŒ(*close)”h]”h;Œ(*close)”…””}”(hhhj—hhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hj€hhh h!hK?ubj))”}”(hŒ)struct raft_io *io, raft_io_close_cb cb”h]”(j/)”}”(hŒstruct raft_io *io”h]”(h;Œstruct”…””}”(hŒstruct”hj©ubh;Œ ”…””}”(hh®hj©ubh/)”}”(hhh]”h;Œraft_io”…””}”(hŒraft_io”hj¶ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j½Œmodname”NŒ classname”Nuhh.hj©ubjR)”}”(hŒ *io”h]”h;Œ *io”…””}”(hhhjÉubah}”(h]”h]”h]”h]”h]”uhjQhj©ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hj¥ubj/)”}”(hŒraft_io_close_cb cb”h]”(h/)”}”(hhh]”h;Œraft_io_close_cb”…””}”(hŒraft_io_close_cb”hjâubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jéŒmodname”NŒ classname”Nuhh.hjÞubjR)”}”(hŒ cb”h]”h;Œ cb”…””}”(hhhjõubah}”(h]”h]”h]”h]”h]”uhjQhjÞubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hj¥ubeh}”(h]”h]”h]”h]”h]”hµh¶uhj(hj€hhh h!hK?ubeh}”(h]”j{ah]”h]”j{ah]”h]”h͉uhh™hj}hhh h!hK?ubhÏ)”}”(hhh]”(hc)”}”(hŒ*Release all resources used by the backend.”h]”h;Œ*Release all resources used by the backend.”…””}”(hjhjhhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hK9hjhhubhc)”}”(hŒäThe :code:`tick` and :code:`recv` callbacks must not be invoked anymore, and pending asynchronous requests be completed or canceled as soon as possible. Invoke the close callback once the :c:type:`raft_io` instance can be freed.”h]”(h;ŒThe ”…””}”(hŒThe ”hj'hhh NhNubh5)”}”(hŒ :code:`tick`”h]”h;Œtick”…””}”(hŒtick”hj0ubah}”(h]”h]”Œcode”ah]”h]”h]”uhh4hj'ubh;Œ and ”…””}”(hŒ and ”hj'hhh NhNubh5)”}”(hŒ :code:`recv`”h]”h;Œrecv”…””}”(hŒrecv”hjEubah}”(h]”h]”j<ah]”h]”h]”uhh4hj'ubh;Œ› callbacks must not be invoked anymore, and pending asynchronous requests be completed or canceled as soon as possible. Invoke the close callback once the ”…””}”(hŒ› callbacks must not be invoked anymore, and pending asynchronous requests be completed or canceled as soon as possible. Invoke the close callback once the ”hj'hhh NhNubh/)”}”(hŒ:c:type:`raft_io`”h]”h5)”}”(hŒraft_io”h]”h;Œraft_io”…””}”(hhhj]ubah}”(h]”h]”(hChDŒc-type”eh]”h]”h]”uhh4hjYubah}”(h]”h]”h]”h]”h]”Œrefdoc”hPŒ refdomain”hDŒreftype”Œtype”Œ refexplicit”‰Œrefwarn”‰hVj_uhh.h h!hK;hj'ubh;Œ instance can be freed.”…””}”(hŒ instance can be freed.”hj'hhh NhNubeh}”(h]”h]”h]”h]”h]”uhhbh h!hK;hjhhubeh}”(h]”h]”h]”h]”h]”uhhÎhj}hhh h!hK?ubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðjhñ‰uhh”hhhjh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒload (C member)”Œc.load”hNt”auhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ°int (*load)(struct raft_io *io, raft_term *term, raft_id *voted_for, struct raft_snapshot **snapshot, raft_index *start_index, struct raft_entry *entries[], size_t *n_entries)”h]”(h )”}”(hŒint ”h]”(h;Œint”…””}”(hŒint”hj¥hhh NhNubh;Œ ”…””}”(hh®hj¥hhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhj¡hhh h!hKOubh¸)”}”(hŒ(*load)”h]”h;Œ(*load)”…””}”(hhhj¸hhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hj¡hhh h!hKOubj))”}”(hŒ©struct raft_io *io, raft_term *term, raft_id *voted_for, struct raft_snapshot **snapshot, raft_index *start_index, struct raft_entry *entries[], size_t *n_entries”h]”(j/)”}”(hŒstruct raft_io *io”h]”(h;Œstruct”…””}”(hŒstruct”hjÊubh;Œ ”…””}”(hh®hjÊubh/)”}”(hhh]”h;Œraft_io”…””}”(hŒraft_io”hj×ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jÞŒmodname”NŒ classname”Nuhh.hjÊubjR)”}”(hŒ *io”h]”h;Œ *io”…””}”(hhhjêubah}”(h]”h]”h]”h]”h]”uhjQhjÊubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjÆubj/)”}”(hŒraft_term *term”h]”(h/)”}”(hhh]”h;Œ raft_term”…””}”(hŒ raft_term”hjubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j Œmodname”NŒ classname”Nuhh.hjÿubjR)”}”(hŒ *term”h]”h;Œ *term”…””}”(hhhjubah}”(h]”h]”h]”h]”h]”uhjQhjÿubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjÆubj/)”}”(hŒraft_id *voted_for”h]”(h/)”}”(hhh]”h;Œraft_id”…””}”(hŒraft_id”hj/ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j6Œmodname”NŒ classname”Nuhh.hj+ubjR)”}”(hŒ *voted_for”h]”h;Œ  *voted_for”…””}”(hhhjBubah}”(h]”h]”h]”h]”h]”uhjQhj+ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjÆubj/)”}”(hŒ struct raft_snapshot **snapshot”h]”(h;Œstruct”…””}”(hŒstruct”hjWubh;Œ ”…””}”(hh®hjWubh/)”}”(hhh]”h;Œ raft_snapshot”…””}”(hŒ raft_snapshot”hjdubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jkŒmodname”NŒ classname”Nuhh.hjWubjR)”}”(hŒ **snapshot”h]”h;Œ  **snapshot”…””}”(hhhjwubah}”(h]”h]”h]”h]”h]”uhjQhjWubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjÆubj/)”}”(hŒraft_index *start_index”h]”(h/)”}”(hhh]”h;Œ raft_index”…””}”(hŒ raft_index”hjubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j—Œmodname”NŒ classname”Nuhh.hjŒubjR)”}”(hŒ *start_index”h]”h;Œ *start_index”…””}”(hhhj£ubah}”(h]”h]”h]”h]”h]”uhjQhjŒubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjÆubj/)”}”(hŒstruct raft_entry *entries[]”h]”(h;Œstruct”…””}”(hŒstruct”hj¸ubh;Œ ”…””}”(hh®hj¸ubh/)”}”(hhh]”h;Œ raft_entry”…””}”(hŒ raft_entry”hjÅubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jÌŒmodname”NŒ classname”Nuhh.hj¸ubjR)”}”(hŒ *entries[]”h]”h;Œ  *entries[]”…””}”(hhhjØubah}”(h]”h]”h]”h]”h]”uhjQhj¸ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjÆubj/)”}”(hŒsize_t *n_entries”h]”(h;Œsize_t”…””}”(hŒsize_t”hjíubjR)”}”(hŒ *n_entries”h]”h;Œ  *n_entries”…””}”(hhhjöubah}”(h]”h]”h]”h]”h]”uhjQhjíubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjÆubeh}”(h]”h]”h]”h]”h]”hµh¶uhj(hj¡hhh h!hKOubeh}”(h]”jœah]”h]”jœah]”h]”h͉uhh™hjžhhh h!hKOubhÏ)”}”(hhh]”(hc)”}”(hŒ"Load persisted state from storage.”h]”h;Œ"Load persisted state from storage.”…””}”(hjhjhhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKBhjhhubhc)”}”(hŒ‘The implementation must synchronously load the current state from its storage backend and return information about it through the given pointers.”h]”h;Œ‘The implementation must synchronously load the current state from its storage backend and return information about it through the given pointers.”…””}”(hj*hj(hhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKDhjhhubhc)”}”(hŒÆThe implementation can safely assume that this method will be invoked exactly one time, before any call to :c:func:`raft_io.append()` or c:func:`raft_io.truncate()`, and then won't be invoked again.”h]”(h;ŒkThe implementation can safely assume that this method will be invoked exactly one time, before any call to ”…””}”(hŒkThe implementation can safely assume that this method will be invoked exactly one time, before any call to ”hj6hhh NhNubh/)”}”(hŒ:c:func:`raft_io.append()`”h]”h5)”}”(hŒraft_io.append()”h]”h;Œraft_io.append()”…””}”(hhhjCubah}”(h]”h]”(hChDŒc-func”eh]”h]”h]”uhh4hj?ubah}”(h]”h]”h]”h]”h]”Œrefdoc”hPŒ refdomain”hDŒreftype”Œfunc”Œ refexplicit”‰Œrefwarn”‰hVŒraft_io.append”uhh.h h!hKHhj6ubh;Œ or c:func:”…””}”(hŒ or c:func:”hj6hhh NhNubhŒtitle_reference”“”)”}”(hŒ`raft_io.truncate()`”h]”h;Œraft_io.truncate()”…””}”(hŒraft_io.truncate()”hjfubah}”(h]”h]”h]”h]”h]”uhjdhj6ubh;Œ$, and then won’t be invoked again.”…””}”(hŒ", and then won't be invoked again.”hj6hhh NhNubeh}”(h]”h]”h]”h]”h]”uhhbh h!hKHhjhhubhc)”}”(hŒÁThe snapshot object and entries array must be allocated and populated using :c:func:`raft_malloc`. If this function completes successfully, ownership of such memory is transferred to the caller.”h]”(h;ŒLThe snapshot object and entries array must be allocated and populated using ”…””}”(hŒLThe snapshot object and entries array must be allocated and populated using ”hj€hhh NhNubh/)”}”(hŒ:c:func:`raft_malloc`”h]”h5)”}”(hŒ raft_malloc”h]”h;Œ raft_malloc()”…””}”(hhhjubah}”(h]”h]”(hChDŒc-func”eh]”h]”h]”uhh4hj‰ubah}”(h]”h]”h]”h]”h]”Œrefdoc”hPŒ refdomain”hDŒreftype”Œfunc”Œ refexplicit”‰Œrefwarn”‰hVjuhh.h h!hKLhj€ubh;Œ`. If this function completes successfully, ownership of such memory is transferred to the caller.”…””}”(hŒ`. If this function completes successfully, ownership of such memory is transferred to the caller.”hj€hhh NhNubeh}”(h]”h]”h]”h]”h]”uhhbh h!hKLhjhhubeh}”(h]”h]”h]”h]”h]”uhhÎhjžhhh h!hKOubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðj¿hñ‰uhh”hhhjh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒstart (C member)”Œc.start”hNt”auhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ\int (*start)(struct raft_io *io, unsigned msecs, raft_io_tick_cb tick, raft_io_recv_cb recv)”h]”(h )”}”(hŒint ”h]”(h;Œint”…””}”(hŒint”hjÕhhh NhNubh;Œ ”…””}”(hh®hjÕhhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhjÑhhh h!hKWubh¸)”}”(hŒ(*start)”h]”h;Œ(*start)”…””}”(hhhjèhhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hjÑhhh h!hKWubj))”}”(hŒRstruct raft_io *io, unsigned msecs, raft_io_tick_cb tick, raft_io_recv_cb recv”h]”(j/)”}”(hŒstruct raft_io *io”h]”(h;Œstruct”…””}”(hŒstruct”hjúubh;Œ ”…””}”(hh®hjúubh/)”}”(hhh]”h;Œraft_io”…””}”(hŒraft_io”hj ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j Œmodname”NŒ classname”Nuhh.hjúubjR)”}”(hŒ *io”h]”h;Œ *io”…””}”(hhhj ubah}”(h]”h]”h]”h]”h]”uhjQhjúubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjöubj/)”}”(hŒunsigned msecs”h]”(h;Œunsigned”…””}”(hŒunsigned”hj/ ubjR)”}”(hŒ msecs”h]”h;Œ msecs”…””}”(hhhj8 ubah}”(h]”h]”h]”h]”h]”uhjQhj/ ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjöubj/)”}”(hŒraft_io_tick_cb tick”h]”(h/)”}”(hhh]”h;Œraft_io_tick_cb”…””}”(hŒraft_io_tick_cb”hjQ ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jX Œmodname”NŒ classname”Nuhh.hjM ubjR)”}”(hŒ tick”h]”h;Œ tick”…””}”(hhhjd ubah}”(h]”h]”h]”h]”h]”uhjQhjM ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjöubj/)”}”(hŒraft_io_recv_cb recv”h]”(h/)”}”(hhh]”h;Œraft_io_recv_cb”…””}”(hŒraft_io_recv_cb”hj} ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j„ Œmodname”NŒ classname”Nuhh.hjy ubjR)”}”(hŒ recv”h]”h;Œ recv”…””}”(hhhj ubah}”(h]”h]”h]”h]”h]”uhjQhjy ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjöubeh}”(h]”h]”h]”h]”h]”hµh¶uhj(hjÑhhh h!hKWubeh}”(h]”jÌah]”h]”jÌah]”h]”h͉uhh™hjÎhhh h!hKWubhÏ)”}”(hhh]”(hc)”}”(hŒStart the backend.”h]”h;ŒStart the backend.”…””}”(hj¶ hj´ hhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKRhj± hhubhc)”}”(hŒÐFrom now on the implementation must start accepting RPC requests and must invoke the :code:`tick` callback every :code:`msecs` milliseconds. The :code:`recv` callback must be invoked when receiving a message.”h]”(h;ŒUFrom now on the implementation must start accepting RPC requests and must invoke the ”…””}”(hŒUFrom now on the implementation must start accepting RPC requests and must invoke the ”hj hhh NhNubh5)”}”(hŒ :code:`tick`”h]”h;Œtick”…””}”(hŒtick”hjË ubah}”(h]”h]”j<ah]”h]”h]”uhh4hj ubh;Œ callback every ”…””}”(hŒ callback every ”hj hhh NhNubh5)”}”(hŒ :code:`msecs`”h]”h;Œmsecs”…””}”(hŒmsecs”hjß ubah}”(h]”h]”j<ah]”h]”h]”uhh4hj ubh;Œ milliseconds. The ”…””}”(hŒ milliseconds. The ”hj hhh NhNubh5)”}”(hŒ :code:`recv`”h]”h;Œrecv”…””}”(hŒrecv”hjó ubah}”(h]”h]”j<ah]”h]”h]”uhh4hj ubh;Œ3 callback must be invoked when receiving a message.”…””}”(hŒ3 callback must be invoked when receiving a message.”hj hhh NhNubeh}”(h]”h]”h]”h]”h]”uhhbh h!hKThj± hhubeh}”(h]”h]”h]”h]”h]”uhhÎhjÎhhh h!hKWubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðj hñ‰uhh”hhhjh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒbootstrap (C member)”Œ c.bootstrap”hNt”auhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒKint (*bootstrap)(struct raft_io *io, const struct raft_configuration *conf)”h]”(h )”}”(hŒint ”h]”(h;Œint”…””}”(hŒint”hj/ hhh NhNubh;Œ ”…””}”(hh®hj/ hhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhj+ hhh h!hKbubh¸)”}”(hŒ (*bootstrap)”h]”h;Œ (*bootstrap)”…””}”(hhhjB hhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hj+ hhh h!hKbubj))”}”(hŒ;struct raft_io *io, const struct raft_configuration *conf”h]”(j/)”}”(hŒstruct raft_io *io”h]”(h;Œstruct”…””}”(hŒstruct”hjT ubh;Œ ”…””}”(hh®hjT ubh/)”}”(hhh]”h;Œraft_io”…””}”(hŒraft_io”hja ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jh Œmodname”NŒ classname”Nuhh.hjT ubjR)”}”(hŒ *io”h]”h;Œ *io”…””}”(hhhjt ubah}”(h]”h]”h]”h]”h]”uhjQhjT ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjP ubj/)”}”(hŒ&const struct raft_configuration *conf”h]”(h;Œconst”…””}”(hŒconst”hj‰ ubh;Œ ”…””}”(hh®hj‰ ubh;Œstruct”…””}”(hŒstruct”hj‰ ubh;Œ ”…””}”(hh®hj‰ ubh/)”}”(hhh]”h;Œraft_configuration”…””}”(hŒraft_configuration”hjŸ ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j¦ Œmodname”NŒ classname”Nuhh.hj‰ ubjR)”}”(hŒ *conf”h]”h;Œ *conf”…””}”(hhhj² ubah}”(h]”h]”h]”h]”h]”uhjQhj‰ ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjP ubeh}”(h]”h]”h]”h]”h]”hµh¶uhj(hj+ hhh h!hKbubeh}”(h]”j& ah]”h]”j& ah]”h]”h͉uhh™hj( hhh h!hKbubhÏ)”}”(hhh]”(hc)”}”(hŒ.Bootstrap a server belonging to a new cluster.”h]”h;Œ.Bootstrap a server belonging to a new cluster.”…””}”(hjØ hjÖ hhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKZhjÓ hhubhc)”}”(hŒ¥The implementation must synchronously persist the given configuration as the first entry of the log. The current persisted term must be set to 1 and the vote to nil.”h]”h;Œ¥The implementation must synchronously persist the given configuration as the first entry of the log. The current persisted term must be set to 1 and the vote to nil.”…””}”(hjæ hjä hhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hK\hjÓ hhubhc)”}”(hŒrIf an attempt is made to bootstrap a server that has already some state, then RAFT_CANTBOOTSTRAP must be returned.”h]”h;ŒrIf an attempt is made to bootstrap a server that has already some state, then RAFT_CANTBOOTSTRAP must be returned.”…””}”(hjô hjò hhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hK`hjÓ hhubeh}”(h]”h]”h]”h]”h]”uhhÎhj( hhh h!hKbubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðj hñ‰uhh”hhhjh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒrecover (C member)”Œ c.recover”hNt”auhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒIint (*recover)(struct raft_io *io, const struct raft_configuration *conf)”h]”(h )”}”(hŒint ”h]”(h;Œint”…””}”(hŒint”hj" hhh NhNubh;Œ ”…””}”(hh®hj" hhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhj hhh h!hKfubh¸)”}”(hŒ (*recover)”h]”h;Œ (*recover)”…””}”(hhhj5 hhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hj hhh h!hKfubj))”}”(hŒ;struct raft_io *io, const struct raft_configuration *conf”h]”(j/)”}”(hŒstruct raft_io *io”h]”(h;Œstruct”…””}”(hŒstruct”hjG ubh;Œ ”…””}”(hh®hjG ubh/)”}”(hhh]”h;Œraft_io”…””}”(hŒraft_io”hjT ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j[ Œmodname”NŒ classname”Nuhh.hjG ubjR)”}”(hŒ *io”h]”h;Œ *io”…””}”(hhhjg ubah}”(h]”h]”h]”h]”h]”uhjQhjG ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjC ubj/)”}”(hŒ&const struct raft_configuration *conf”h]”(h;Œconst”…””}”(hŒconst”hj| ubh;Œ ”…””}”(hh®hj| ubh;Œstruct”…””}”(hŒstruct”hj| ubh;Œ ”…””}”(hh®hj| ubh/)”}”(hhh]”h;Œraft_configuration”…””}”(hŒraft_configuration”hj’ ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j™ Œmodname”NŒ classname”Nuhh.hj| ubjR)”}”(hŒ *conf”h]”h;Œ *conf”…””}”(hhhj¥ ubah}”(h]”h]”h]”h]”h]”uhjQhj| ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjC ubeh}”(h]”h]”h]”h]”h]”hµh¶uhj(hj hhh h!hKfubeh}”(h]”j ah]”h]”j ah]”h]”h͉uhh™hj hhh h!hKfubhÏ)”}”(hhh]”hc)”}”(hŒ=Force appending a new configuration as last entry of the log.”h]”h;Œ=Force appending a new configuration as last entry of the log.”…””}”(hjË hjÉ hhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKehjÆ hhubah}”(h]”h]”h]”h]”h]”uhhÎhj hhh h!hKfubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðjã hñ‰uhh”hhhjh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒset_term (C member)”Œ c.set_term”hNt”auhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ3int (*set_term)(struct raft_io *io, raft_term term)”h]”(h )”}”(hŒint ”h]”(h;Œint”…””}”(hŒint”hjù hhh NhNubh;Œ ”…””}”(hh®hjù hhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhjõ hhh h!hKmubh¸)”}”(hŒ (*set_term)”h]”h;Œ (*set_term)”…””}”(hhhj hhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hjõ hhh h!hKmubj))”}”(hŒ$struct raft_io *io, raft_term term”h]”(j/)”}”(hŒstruct raft_io *io”h]”(h;Œstruct”…””}”(hŒstruct”hj ubh;Œ ”…””}”(hh®hj ubh/)”}”(hhh]”h;Œraft_io”…””}”(hŒraft_io”hj+ ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j2 Œmodname”NŒ classname”Nuhh.hj ubjR)”}”(hŒ *io”h]”h;Œ *io”…””}”(hhhj> ubah}”(h]”h]”h]”h]”h]”uhjQhj ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hj ubj/)”}”(hŒraft_term term”h]”(h/)”}”(hhh]”h;Œ raft_term”…””}”(hŒ raft_term”hjW ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j^ Œmodname”NŒ classname”Nuhh.hjS ubjR)”}”(hŒ term”h]”h;Œ term”…””}”(hhhjj ubah}”(h]”h]”h]”h]”h]”uhjQhjS ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hj ubeh}”(h]”h]”h]”h]”h]”hµh¶uhj(hjõ hhh h!hKmubeh}”(h]”jð ah]”h]”jð ah]”h]”h͉uhh™hjò hhh h!hKmubhÏ)”}”(hhh]”(hc)”}”(hŒ2Synchronously persist current term (and nil vote).”h]”h;Œ2Synchronously persist current term (and nil vote).”…””}”(hj hjŽ hhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKihj‹ hhubhc)”}”(hŒwThe implementation MUST ensure that the change is durable before returning (e.g. using :code:`fdatasync()` or O_DSYNC).”h]”(h;ŒWThe implementation MUST ensure that the change is durable before returning (e.g. using ”…””}”(hŒWThe implementation MUST ensure that the change is durable before returning (e.g. using ”hjœ hhh NhNubh5)”}”(hŒ:code:`fdatasync()`”h]”h;Œ fdatasync()”…””}”(hŒ fdatasync()”hj¥ ubah}”(h]”h]”j<ah]”h]”h]”uhh4hjœ ubh;Œ or O_DSYNC).”…””}”(hŒ or O_DSYNC).”hjœ hhh NhNubeh}”(h]”h]”h]”h]”h]”uhhbh h!hKkhj‹ hhubeh}”(h]”h]”h]”h]”h]”uhhÎhjò hhh h!hKmubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðjË hñ‰uhh”hhhjh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒset_vote (C member)”Œ c.set_vote”hNt”auhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ6int (*set_vote)(struct raft_io *io, raft_id server_id)”h]”(h )”}”(hŒint ”h]”(h;Œint”…””}”(hŒint”hjá hhh NhNubh;Œ ”…””}”(hh®hjá hhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhjÝ hhh h!hKtubh¸)”}”(hŒ (*set_vote)”h]”h;Œ (*set_vote)”…””}”(hhhjô hhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hjÝ hhh h!hKtubj))”}”(hŒ'struct raft_io *io, raft_id server_id”h]”(j/)”}”(hŒstruct raft_io *io”h]”(h;Œstruct”…””}”(hŒstruct”hj ubh;Œ ”…””}”(hh®hj ubh/)”}”(hhh]”h;Œraft_io”…””}”(hŒraft_io”hj ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j Œmodname”NŒ classname”Nuhh.hj ubjR)”}”(hŒ *io”h]”h;Œ *io”…””}”(hhhj& ubah}”(h]”h]”h]”h]”h]”uhjQhj ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hj ubj/)”}”(hŒraft_id server_id”h]”(h/)”}”(hhh]”h;Œraft_id”…””}”(hŒraft_id”hj? ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jF Œmodname”NŒ classname”Nuhh.hj; ubjR)”}”(hŒ server_id”h]”h;Œ  server_id”…””}”(hhhjR ubah}”(h]”h]”h]”h]”h]”uhjQhj; ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hj ubeh}”(h]”h]”h]”h]”h]”hµh¶uhj(hjÝ hhh h!hKtubeh}”(h]”jØ ah]”h]”jØ ah]”h]”h͉uhh™hjÚ hhh h!hKtubhÏ)”}”(hhh]”(hc)”}”(hŒ'Synchronously persist who we voted for.”h]”h;Œ'Synchronously persist who we voted for.”…””}”(hjx hjv hhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKphjs hhubhc)”}”(hŒwThe implementation MUST ensure that the change is durable before returning (e.g. using :code:`fdatasync()` or O_DSYNC).”h]”(h;ŒWThe implementation MUST ensure that the change is durable before returning (e.g. using ”…””}”(hŒWThe implementation MUST ensure that the change is durable before returning (e.g. using ”hj„ hhh NhNubh5)”}”(hŒ:code:`fdatasync()`”h]”h;Œ fdatasync()”…””}”(hŒ fdatasync()”hj ubah}”(h]”h]”j<ah]”h]”h]”uhh4hj„ ubh;Œ or O_DSYNC).”…””}”(hŒ or O_DSYNC).”hj„ hhh NhNubeh}”(h]”h]”h]”h]”h]”uhhbh h!hKrhjs hhubeh}”(h]”h]”h]”h]”h]”uhhÎhjÚ hhh h!hKtubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðj³ hñ‰uhh”hhhjh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒsend (C member)”Œc.send”hNt”auhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒqint (*send)(struct raft_io *io, struct raft_io_send *req, const struct raft_message *message, raft_io_send_cb cb)”h]”(h )”}”(hŒint ”h]”(h;Œint”…””}”(hŒint”hjÉ hhh NhNubh;Œ ”…””}”(hh®hjÉ hhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhjÅ hhh h!hK{ubh¸)”}”(hŒ(*send)”h]”h;Œ(*send)”…””}”(hhhjÜ hhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hjÅ hhh h!hK{ubj))”}”(hŒhstruct raft_io *io, struct raft_io_send *req, const struct raft_message *message, raft_io_send_cb cb”h]”(j/)”}”(hŒstruct raft_io *io”h]”(h;Œstruct”…””}”(hŒstruct”hjî ubh;Œ ”…””}”(hh®hjî ubh/)”}”(hhh]”h;Œraft_io”…””}”(hŒraft_io”hjû ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jŒmodname”NŒ classname”Nuhh.hjî ubjR)”}”(hŒ *io”h]”h;Œ *io”…””}”(hhhjubah}”(h]”h]”h]”h]”h]”uhjQhjî ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjê ubj/)”}”(hŒstruct raft_io_send *req”h]”(h;Œstruct”…””}”(hŒstruct”hj#ubh;Œ ”…””}”(hh®hj#ubh/)”}”(hhh]”h;Œ raft_io_send”…””}”(hŒ raft_io_send”hj0ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j7Œmodname”NŒ classname”Nuhh.hj#ubjR)”}”(hŒ *req”h]”h;Œ *req”…””}”(hhhjCubah}”(h]”h]”h]”h]”h]”uhjQhj#ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjê ubj/)”}”(hŒ#const struct raft_message *message”h]”(h;Œconst”…””}”(hŒconst”hjXubh;Œ ”…””}”(hh®hjXubh;Œstruct”…””}”(hŒstruct”hjXubh;Œ ”…””}”(hh®hjXubh/)”}”(hhh]”h;Œ raft_message”…””}”(hŒ raft_message”hjnubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”juŒmodname”NŒ classname”Nuhh.hjXubjR)”}”(hŒ *message”h]”h;Œ  *message”…””}”(hhhjubah}”(h]”h]”h]”h]”h]”uhjQhjXubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjê ubj/)”}”(hŒraft_io_send_cb cb”h]”(h/)”}”(hhh]”h;Œraft_io_send_cb”…””}”(hŒraft_io_send_cb”hjšubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j¡Œmodname”NŒ classname”Nuhh.hj–ubjR)”}”(hŒ cb”h]”h;Œ cb”…””}”(hhhj­ubah}”(h]”h]”h]”h]”h]”uhjQhj–ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjê ubeh}”(h]”h]”h]”h]”h]”hµh¶uhj(hjÅ hhh h!hK{ubeh}”(h]”jÀ ah]”h]”jÀ ah]”h]”h͉uhh™hj hhh h!hK{ubhÏ)”}”(hhh]”(hc)”}”(hŒ#Asynchronously send an RPC message.”h]”h;Œ#Asynchronously send an RPC message.”…””}”(hjÓhjÑhhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKwhjÎhhubhc)”}”(hŒThe implementation is guaranteed that the memory referenced in the given message will not be released until the :code:`cb` callback is invoked.”h]”(h;ŒpThe implementation is guaranteed that the memory referenced in the given message will not be released until the ”…””}”(hŒpThe implementation is guaranteed that the memory referenced in the given message will not be released until the ”hjßhhh NhNubh5)”}”(hŒ :code:`cb`”h]”h;Œcb”…””}”(hŒcb”hjèubah}”(h]”h]”j<ah]”h]”h]”uhh4hjßubh;Œ callback is invoked.”…””}”(hŒ callback is invoked.”hjßhhh NhNubeh}”(h]”h]”h]”h]”h]”uhhbh h!hKyhjÎhhubeh}”(h]”h]”h]”h]”h]”uhhÎhj hhh h!hK{ubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðjhñ‰uhh”hhhjh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒappend (C member)”Œc.append”hNt”auhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ‚int (*append)(struct raft_io *io, struct raft_io_append *req, const struct raft_entry entries[], unsigned n, raft_io_append_cb cb)”h]”(h )”}”(hŒint ”h]”(h;Œint”…””}”(hŒint”hj$hhh NhNubh;Œ ”…””}”(hh®hj$hhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhj hhh h!hK‚ubh¸)”}”(hŒ (*append)”h]”h;Œ (*append)”…””}”(hhhj7hhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hj hhh h!hK‚ubj))”}”(hŒxstruct raft_io *io, struct raft_io_append *req, const struct raft_entry entries[], unsigned n, raft_io_append_cb cb”h]”(j/)”}”(hŒstruct raft_io *io”h]”(h;Œstruct”…””}”(hŒstruct”hjIubh;Œ ”…””}”(hh®hjIubh/)”}”(hhh]”h;Œraft_io”…””}”(hŒraft_io”hjVubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j]Œmodname”NŒ classname”Nuhh.hjIubjR)”}”(hŒ *io”h]”h;Œ *io”…””}”(hhhjiubah}”(h]”h]”h]”h]”h]”uhjQhjIubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjEubj/)”}”(hŒstruct raft_io_append *req”h]”(h;Œstruct”…””}”(hŒstruct”hj~ubh;Œ ”…””}”(hh®hj~ubh/)”}”(hhh]”h;Œraft_io_append”…””}”(hŒraft_io_append”hj‹ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j’Œmodname”NŒ classname”Nuhh.hj~ubjR)”}”(hŒ *req”h]”h;Œ *req”…””}”(hhhjžubah}”(h]”h]”h]”h]”h]”uhjQhj~ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjEubj/)”}”(hŒ"const struct raft_entry entries[]”h]”(h;Œconst”…””}”(hŒconst”hj³ubh;Œ ”…””}”(hh®hj³ubh;Œstruct”…””}”(hŒstruct”hj³ubh;Œ ”…””}”(hh®hj³ubh/)”}”(hhh]”h;Œ raft_entry”…””}”(hŒ raft_entry”hjÉubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jÐŒmodname”NŒ classname”Nuhh.hj³ubjR)”}”(hŒ entries[]”h]”h;Œ  entries[]”…””}”(hhhjÜubah}”(h]”h]”h]”h]”h]”uhjQhj³ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjEubj/)”}”(hŒ unsigned n”h]”(h;Œunsigned”…””}”(hŒunsigned”hjñubjR)”}”(hŒ n”h]”h;Œ n”…””}”(hhhjúubah}”(h]”h]”h]”h]”h]”uhjQhjñubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjEubj/)”}”(hŒraft_io_append_cb cb”h]”(h/)”}”(hhh]”h;Œraft_io_append_cb”…””}”(hŒraft_io_append_cb”hjubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jŒmodname”NŒ classname”Nuhh.hjubjR)”}”(hŒ cb”h]”h;Œ cb”…””}”(hhhj&ubah}”(h]”h]”h]”h]”h]”uhjQhjubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjEubeh}”(h]”h]”h]”h]”h]”hµh¶uhj(hj hhh h!hK‚ubeh}”(h]”jah]”h]”jah]”h]”h͉uhh™hjhhh h!hK‚ubhÏ)”}”(hhh]”(hc)”}”(hŒ3Asynchronously append the given entries to the log.”h]”h;Œ3Asynchronously append the given entries to the log.”…””}”(hjLhjJhhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hK~hjGhhubhc)”}”(hŒ‰The implementation is guaranteed that the memory holding the given entries will not be released until the :code:`cb` callback is invoked.”h]”(h;ŒjThe implementation is guaranteed that the memory holding the given entries will not be released until the ”…””}”(hŒjThe implementation is guaranteed that the memory holding the given entries will not be released until the ”hjXhhh NhNubh5)”}”(hŒ :code:`cb`”h]”h;Œcb”…””}”(hŒcb”hjaubah}”(h]”h]”j<ah]”h]”h]”uhh4hjXubh;Œ callback is invoked.”…””}”(hŒ callback is invoked.”hjXhhh NhNubeh}”(h]”h]”h]”h]”h]”uhhbh h!hK€hjGhhubeh}”(h]”h]”h]”h]”h]”uhhÎhjhhh h!hK‚ubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðj‡hñ‰uhh”hhhjh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒtruncate (C member)”Œ c.truncate”hNt”auhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ5int (*truncate)(struct raft_io *io, raft_index index)”h]”(h )”}”(hŒint ”h]”(h;Œint”…””}”(hŒint”hjhhh NhNubh;Œ ”…””}”(hh®hjhhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhj™hhh h!hK†ubh¸)”}”(hŒ (*truncate)”h]”h;Œ (*truncate)”…””}”(hhhj°hhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hj™hhh h!hK†ubj))”}”(hŒ&struct raft_io *io, raft_index index”h]”(j/)”}”(hŒstruct raft_io *io”h]”(h;Œstruct”…””}”(hŒstruct”hjÂubh;Œ ”…””}”(hh®hjÂubh/)”}”(hhh]”h;Œraft_io”…””}”(hŒraft_io”hjÏubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jÖŒmodname”NŒ classname”Nuhh.hjÂubjR)”}”(hŒ *io”h]”h;Œ *io”…””}”(hhhjâubah}”(h]”h]”h]”h]”h]”uhjQhjÂubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hj¾ubj/)”}”(hŒraft_index index”h]”(h/)”}”(hhh]”h;Œ raft_index”…””}”(hŒ raft_index”hjûubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jŒmodname”NŒ classname”Nuhh.hj÷ubjR)”}”(hŒ index”h]”h;Œ index”…””}”(hhhjubah}”(h]”h]”h]”h]”h]”uhjQhj÷ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hj¾ubeh}”(h]”h]”h]”h]”h]”hµh¶uhj(hj™hhh h!hK†ubeh}”(h]”j”ah]”h]”j”ah]”h]”h͉uhh™hj–hhh h!hK†ubhÏ)”}”(hhh]”hc)”}”(hŒEAsynchronously truncate all log entries from the given index onwards.”h]”h;ŒEAsynchronously truncate all log entries from the given index onwards.”…””}”(hj4hj2hhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hK…hj/hhubah}”(h]”h]”h]”h]”h]”uhhÎhj–hhh h!hK†ubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðjLhñ‰uhh”hhhjh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒsnapshot_put (C member)”Œc.snapshot_put”hNt”auhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒžint (*snapshot_put)(struct raft_io *io, unsigned trailing, struct raft_io_snapshot_put *req, const struct raft_snapshot *snapshot, raft_io_snapshot_put_cb cb)”h]”(h )”}”(hŒint ”h]”(h;Œint”…””}”(hŒint”hjbhhh NhNubh;Œ ”…””}”(hh®hjbhhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhj^hhh h!hK’ubh¸)”}”(hŒ(*snapshot_put)”h]”h;Œ(*snapshot_put)”…””}”(hhhjuhhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hj^hhh h!hK’ubj))”}”(hŒŽstruct raft_io *io, unsigned trailing, struct raft_io_snapshot_put *req, const struct raft_snapshot *snapshot, raft_io_snapshot_put_cb cb”h]”(j/)”}”(hŒstruct raft_io *io”h]”(h;Œstruct”…””}”(hŒstruct”hj‡ubh;Œ ”…””}”(hh®hj‡ubh/)”}”(hhh]”h;Œraft_io”…””}”(hŒraft_io”hj”ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j›Œmodname”NŒ classname”Nuhh.hj‡ubjR)”}”(hŒ *io”h]”h;Œ *io”…””}”(hhhj§ubah}”(h]”h]”h]”h]”h]”uhjQhj‡ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjƒubj/)”}”(hŒunsigned trailing”h]”(h;Œunsigned”…””}”(hŒunsigned”hj¼ubjR)”}”(hŒ trailing”h]”h;Œ  trailing”…””}”(hhhjÅubah}”(h]”h]”h]”h]”h]”uhjQhj¼ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjƒubj/)”}”(hŒ!struct raft_io_snapshot_put *req”h]”(h;Œstruct”…””}”(hŒstruct”hjÚubh;Œ ”…””}”(hh®hjÚubh/)”}”(hhh]”h;Œraft_io_snapshot_put”…””}”(hŒraft_io_snapshot_put”hjçubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jîŒmodname”NŒ classname”Nuhh.hjÚubjR)”}”(hŒ *req”h]”h;Œ *req”…””}”(hhhjúubah}”(h]”h]”h]”h]”h]”uhjQhjÚubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjƒubj/)”}”(hŒ%const struct raft_snapshot *snapshot”h]”(h;Œconst”…””}”(hŒconst”hjubh;Œ ”…””}”(hh®hjubh;Œstruct”…””}”(hŒstruct”hjubh;Œ ”…””}”(hh®hjubh/)”}”(hhh]”h;Œ raft_snapshot”…””}”(hŒ raft_snapshot”hj%ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j,Œmodname”NŒ classname”Nuhh.hjubjR)”}”(hŒ *snapshot”h]”h;Œ  *snapshot”…””}”(hhhj8ubah}”(h]”h]”h]”h]”h]”uhjQhjubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjƒubj/)”}”(hŒraft_io_snapshot_put_cb cb”h]”(h/)”}”(hhh]”h;Œraft_io_snapshot_put_cb”…””}”(hŒraft_io_snapshot_put_cb”hjQubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jXŒmodname”NŒ classname”Nuhh.hjMubjR)”}”(hŒ cb”h]”h;Œ cb”…””}”(hhhjdubah}”(h]”h]”h]”h]”h]”uhjQhjMubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjƒubeh}”(h]”h]”h]”h]”h]”hµh¶uhj(hj^hhh h!hK’ubeh}”(h]”jYah]”h]”jYah]”h]”h͉uhh™hj[hhh h!hK’ubhÏ)”}”(hhh]”(hc)”}”(hX™Asynchronously persist a new snapshot. If the :code:`trailing` parameter is greater than zero, then all entries older that :code:`snapshot->index - trailing` must be deleted. If the :code:`trailing` parameter is :code:`0`, then the snapshot completely replaces all existing entries, which should all be deleted. Subsequent calls to append() should append entries starting at index :code:`snapshot->index + 1`.”h]”(h;Œ.Asynchronously persist a new snapshot. If the ”…””}”(hŒ.Asynchronously persist a new snapshot. If the ”hjˆhhh NhNubh5)”}”(hŒ:code:`trailing`”h]”h;Œtrailing”…””}”(hŒtrailing”hj‘ubah}”(h]”h]”j<ah]”h]”h]”uhh4hjˆubh;Œ= parameter is greater than zero, then all entries older that ”…””}”(hŒ= parameter is greater than zero, then all entries older that ”hjˆhhh NhNubh5)”}”(hŒ":code:`snapshot->index - trailing`”h]”h;Œsnapshot->index - trailing”…””}”(hŒsnapshot->index - trailing”hj¥ubah}”(h]”h]”j<ah]”h]”h]”uhh4hjˆubh;Œ must be deleted. If the ”…””}”(hŒ must be deleted. If the ”hjˆhhh NhNubh5)”}”(hŒ:code:`trailing`”h]”h;Œtrailing”…””}”(hŒtrailing”hj¹ubah}”(h]”h]”j<ah]”h]”h]”uhh4hjˆubh;Œ parameter is ”…””}”(hŒ parameter is ”hjˆhhh NhNubh5)”}”(hŒ :code:`0`”h]”h;Œ0”…””}”(hŒ0”hjÍubah}”(h]”h]”j<ah]”h]”h]”uhh4hjˆubh;Œ , then the snapshot completely replaces all existing entries, which should all be deleted. Subsequent calls to append() should append entries starting at index ”…””}”(hŒ , then the snapshot completely replaces all existing entries, which should all be deleted. Subsequent calls to append() should append entries starting at index ”hjˆhhh NhNubh5)”}”(hŒ:code:`snapshot->index + 1`”h]”h;Œsnapshot->index + 1”…””}”(hŒsnapshot->index + 1”hjáubah}”(h]”h]”j<ah]”h]”h]”uhh4hjˆubh;Œ.”…””}”(hj¤hjˆhhh NhNubeh}”(h]”h]”h]”h]”h]”uhhbh h!hK‰hj…hhubhc)”}”(hŒoIf a request is submitted, the raft engine won't submit any other request until the original one has completed.”h]”h;ŒqIf a request is submitted, the raft engine won’t submit any other request until the original one has completed.”…””}”(hjühjúhhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKhj…hhubeh}”(h]”h]”h]”h]”h]”uhhÎhj[hhh h!hK’ubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðjhñ‰uhh”hhhjh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒsnapshot_get (C member)”Œc.snapshot_get”hNt”auhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒeint (*snapshot_get)(struct raft_io *io, struct raft_io_snapshot_get *req, raft_io_snapshot_get_cb cb)”h]”(h )”}”(hŒint ”h]”(h;Œint”…””}”(hŒint”hj*hhh NhNubh;Œ ”…””}”(hh®hj*hhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhj&hhh h!hK–ubh¸)”}”(hŒ(*snapshot_get)”h]”h;Œ(*snapshot_get)”…””}”(hhhj=hhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hj&hhh h!hK–ubj))”}”(hŒSstruct raft_io *io, struct raft_io_snapshot_get *req, raft_io_snapshot_get_cb cb”h]”(j/)”}”(hŒstruct raft_io *io”h]”(h;Œstruct”…””}”(hŒstruct”hjOubh;Œ ”…””}”(hh®hjOubh/)”}”(hhh]”h;Œraft_io”…””}”(hŒraft_io”hj\ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jcŒmodname”NŒ classname”Nuhh.hjOubjR)”}”(hŒ *io”h]”h;Œ *io”…””}”(hhhjoubah}”(h]”h]”h]”h]”h]”uhjQhjOubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjKubj/)”}”(hŒ!struct raft_io_snapshot_get *req”h]”(h;Œstruct”…””}”(hŒstruct”hj„ubh;Œ ”…””}”(hh®hj„ubh/)”}”(hhh]”h;Œraft_io_snapshot_get”…””}”(hŒraft_io_snapshot_get”hj‘ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j˜Œmodname”NŒ classname”Nuhh.hj„ubjR)”}”(hŒ *req”h]”h;Œ *req”…””}”(hhhj¤ubah}”(h]”h]”h]”h]”h]”uhjQhj„ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjKubj/)”}”(hŒraft_io_snapshot_get_cb cb”h]”(h/)”}”(hhh]”h;Œraft_io_snapshot_get_cb”…””}”(hŒraft_io_snapshot_get_cb”hj½ubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jÄŒmodname”NŒ classname”Nuhh.hj¹ubjR)”}”(hŒ cb”h]”h;Œ cb”…””}”(hhhjÐubah}”(h]”h]”h]”h]”h]”uhjQhj¹ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjKubeh}”(h]”h]”h]”h]”h]”hµh¶uhj(hj&hhh h!hK–ubeh}”(h]”j!ah]”h]”j!ah]”h]”h͉uhh™hj#hhh h!hK–ubhÏ)”}”(hhh]”hc)”}”(hŒ&Asynchronously load the last snapshot.”h]”h;Œ&Asynchronously load the last snapshot.”…””}”(hjöhjôhhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hK•hjñhhubah}”(h]”h]”h]”h]”h]”uhhÎhj#hhh h!hK–ubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðjhñ‰uhh”hhhjh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒtime (C member)”Œc.time”hNt”auhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ%raft_time (*time)(struct raft_io *io)”h]”(h )”}”(hŒ raft_time ”h]”(h/)”}”(hhh]”h;Œ raft_time”…””}”(hŒ raft_time”hj(hhh NhNubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”j/Œmodname”NŒ classname”Nuhh.hj$hhh h!hKšubh;Œ ”…””}”(hh®hj$hhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhj hhh h!hKšubh¸)”}”(hŒ(*time)”h]”h;Œ(*time)”…””}”(hhhjEhhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hj hhh h!hKšubj))”}”(hŒstruct raft_io *io”h]”j/)”}”(hŒstruct raft_io *io”h]”(h;Œstruct”…””}”(hŒstruct”hjWubh;Œ ”…””}”(hh®hjWubh/)”}”(hhh]”h;Œraft_io”…””}”(hŒraft_io”hjdubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jkŒmodname”NŒ classname”Nuhh.hjWubjR)”}”(hŒ *io”h]”h;Œ *io”…””}”(hhhjwubah}”(h]”h]”h]”h]”h]”uhjQhjWubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjSubah}”(h]”h]”h]”h]”h]”hµh¶uhj(hj hhh h!hKšubeh}”(h]”jah]”h]”jah]”h]”h͉uhh™hjhhh h!hKšubhÏ)”}”(hhh]”hc)”}”(hŒ3Return the current time, expressed in milliseconds.”h]”h;Œ3Return the current time, expressed in milliseconds.”…””}”(hjhj›hhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hK™hj˜hhubah}”(h]”h]”h]”h]”h]”uhhÎhjhhh h!hKšubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðjµhñ‰uhh”hhhjh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒrandom (C member)”Œc.random”hNt”auhhƒhjhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ3int (*random)(struct raft_io *io, int min, int max)”h]”(h )”}”(hŒint ”h]”(h;Œint”…””}”(hŒint”hjËhhh NhNubh;Œ ”…””}”(hh®hjËhhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhjÇhhh h!hKubh¸)”}”(hŒ (*random)”h]”h;Œ (*random)”…””}”(hhhjÞhhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hjÇhhh h!hKubj))”}”(hŒ'struct raft_io *io, int min, int max”h]”(j/)”}”(hŒstruct raft_io *io”h]”(h;Œstruct”…””}”(hŒstruct”hjðubh;Œ ”…””}”(hh®hjðubh/)”}”(hhh]”h;Œraft_io”…””}”(hŒraft_io”hjýubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jMŒ reftarget”jŒmodname”NŒ classname”Nuhh.hjðubjR)”}”(hŒ *io”h]”h;Œ *io”…””}”(hhhjubah}”(h]”h]”h]”h]”h]”uhjQhjðubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjìubj/)”}”(hŒint min”h]”(h;Œint”…””}”(hŒint”hj%ubjR)”}”(hŒ min”h]”h;Œ min”…””}”(hhhj.ubah}”(h]”h]”h]”h]”h]”uhjQhj%ubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjìubj/)”}”(hŒint max”h]”(h;Œint”…””}”(hŒint”hjCubjR)”}”(hŒ max”h]”h;Œ max”…””}”(hhhjLubah}”(h]”h]”h]”h]”h]”uhjQhjCubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj.hjìubeh}”(h]”h]”h]”h]”h]”hµh¶uhj(hjÇhhh h!hKubeh}”(h]”jÂah]”h]”jÂah]”h]”h͉uhh™hjÄhhh h!hKubhÏ)”}”(hhh]”hc)”}”(hŒ>Generate a random integer between :code:`min` and :code:`max`.”h]”(h;Œ"Generate a random integer between ”…””}”(hŒ"Generate a random integer between ”hjphhh NhNubh5)”}”(hŒ :code:`min`”h]”h;Œmin”…””}”(hŒmin”hjyubah}”(h]”h]”j<ah]”h]”h]”uhh4hjpubh;Œ and ”…””}”(hŒ and ”hjphhh NhNubh5)”}”(hŒ :code:`max`”h]”h;Œmax”…””}”(hŒmax”hjubah}”(h]”h]”j<ah]”h]”h]”uhh4hjpubh;Œ.”…””}”(hj¤hjphhh NhNubeh}”(h]”h]”h]”h]”h]”uhhbh h!hKhjmhhubah}”(h]”h]”h]”h]”h]”uhhÎhjÄhhh h!hKubeh}”(h]”h]”h]”h]”h]”híhDhîŒmember”hðj²hñ‰uhh”hhhjh h!hNubeh}”(h]”Œpublic-members”ah]”h]”Œpublic members”ah]”h]”uhh"hhrhhh h!hKubeh}”(h]”Œ data-types”ah]”h]”Œ data types”ah]”h]”uhh"hh$hhh h!hK ubeh}”(h]”(Œ$struct-raft-io-i-o-backend-interface”heh]”h]”(Œ(struct raft_io --- i/o backend interface”Œio”eh]”h]”uhh"hhhhh h!hKŒexpect_referenced_by_name”}”jÉh sŒexpect_referenced_by_id”}”hh subeh}”(h]”h]”h]”h]”h]”Œsource”h!uhhŒcurrent_source”NŒ current_line”NŒsettings”Œdocutils.frontend”ŒValues”“”)”}”(h'NŒ generator”NŒ datestamp”NŒ source_link”NŒ source_url”NŒ toc_backlinks”Œentry”Œfootnote_backlinks”KŒ sectnum_xform”KŒstrip_comments”NŒstrip_elements_with_classes”NŒ strip_classes”NŒ report_level”KŒ halt_level”KŒexit_status_level”KŒdebug”NŒwarning_stream”NŒ traceback”ˆŒinput_encoding”Œ utf-8-sig”Œinput_encoding_error_handler”Œstrict”Œoutput_encoding”Œutf-8”Œoutput_encoding_error_handler”jóŒerror_encoding”ŒUTF-8”Œerror_encoding_error_handler”Œbackslashreplace”Œ language_code”Œen”Œrecord_dependencies”NŒconfig”NŒ id_prefix”hŒauto_id_prefix”Œid”Œ dump_settings”NŒdump_internals”NŒdump_transforms”NŒdump_pseudo_xml”NŒexpose_internals”NŒstrict_visitor”NŒ_disable_config”NŒ_source”h!Œ _destination”NŒ _config_files”]”Œpep_references”NŒ pep_base_url”Œ https://www.python.org/dev/peps/”Œpep_file_url_template”Œpep-%04d”Œrfc_references”NŒ rfc_base_url”Œhttps://tools.ietf.org/html/”Œ tab_width”KŒtrim_footnote_reference_space”‰Œfile_insertion_enabled”ˆŒ raw_enabled”KŒsyntax_highlight”Œlong”Œ smart_quotes”ˆŒsmartquotes_locales”]”Œcharacter_level_inline_markup”‰Œdoctitle_xform”‰Œ docinfo_xform”KŒsectsubtitle_xform”‰Œembed_stylesheet”‰Œcloak_email_addresses”ˆŒenv”NubŒreporter”NŒindirect_targets”]”Œsubstitution_defs”}”Œsubstitution_names”}”Œrefnames”}”Œrefids”}”h]”h asŒnameids”}”(jÉhjÈjÅjÀj½h’h’hþhþjÄjÄj]j]j¸jµj<j<j—j—jñjñjjj{j{jœjœjÌjÌj& j& j j jð jð jØ jØ jÀ jÀ jjj”j”jYjYj!j!jjjÂjÂuŒ nametypes”}”(jɈjÈNjÀNh’ˆhþˆjĈj]ˆj¸Nj<ˆj—ˆjñˆjˆj{ˆjœˆj̈j& ˆj ˆjð ˆjØ ˆjÀ ˆjˆj”ˆjYˆj!ˆjˆjˆuh}”(hh$jÅh$j½hrh’h›hþjjÄjÉj]jbjµjj<jAj—jœjñjöjj”j{j€jœj¡jÌjÑj& j+ j j jð jõ jØ jÝ jÀ jÅ jj j”j™jYj^j!j&jj jÂjÇuŒ footnote_refs”}”Œ citation_refs”}”Œ autofootnotes”]”Œautofootnote_refs”]”Œsymbol_footnotes”]”Œsymbol_footnote_refs”]”Œ footnotes”]”Œ citations”]”Œautofootnote_start”KŒsymbol_footnote_start”KŒid_start”KŒparse_messages”]”Œtransform_messages”]”hŒsystem_message”“”)”}”(hhh]”hc)”}”(hhh]”h;Œ(Hyperlink target "io" is not referenced.”…””}”(hhhjRubah}”(h]”h]”h]”h]”h]”uhhbhjOubah}”(h]”h]”h]”h]”h]”Œlevel”KŒtype”ŒINFO”Œsource”h!Œline”KuhjMubaŒ transformer”NŒ decoration”Nhhub.raft-0.11.3/docs/build/.doctrees/server.doctree000066400000000000000000000421511415614527300213350ustar00rootroot00000000000000€•^DŒdocutils.nodes”Œdocument”“”)”}”(Œ rawsource”Œ”Œchildren”]”(hŒtarget”“”)”}”(hŒ .. _server:”h]”Œ attributes”}”(Œids”]”Œclasses”]”Œnames”]”Œdupnames”]”Œbackrefs”]”Œrefid”Œserver”uŒtagname”h Œline”KŒparent”hhhŒsource”Œ/srv/src/c/raft/doc/server.rst”ubhŒsection”“”)”}”(hhh]”(hŒtitle”“”)”}”(hŒ%:c:type:`struct raft` --- Raft server”h]”(Œsphinx.addnodes”Œ pending_xref”“”)”}”(hŒ:c:type:`struct raft`”h]”hŒliteral”“”)”}”(hŒ struct raft”h]”hŒText”“”Œ struct raft”…””}”(hhhh6ubah}”(h]”h]”(Œxref”Œc”Œc-type”eh]”h]”h]”uhh4hh0ubah}”(h]”h]”h]”h]”h]”Œrefdoc”Œserver”Œ refdomain”hDŒreftype”Œtype”Œ refexplicit”‰Œrefwarn”‰Œ reftarget”h8uhh.h h!hKhh)ubh;Œ — Raft server”…””}”(hŒ --- Raft server”hh)hhh NhNubeh}”(h]”h]”h]”h]”h]”uhh'hh$hhh h!hKubhŒ paragraph”“”)”}”(hŒxThe raft server struct is the central part of C-Raft. It holds and drive the state of a single raft server in a cluster.”h]”h;ŒxThe raft server struct is the central part of C-Raft. It holds and drive the state of a single raft server in a cluster.”…””}”(hhfhhdhhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKhh$hhubh#)”}”(hhh]”(h()”}”(hŒ Data types”h]”h;Œ Data types”…””}”(hhwhhuhhh NhNubah}”(h]”h]”h]”h]”h]”uhh'hhrhhh h!hK ubh-Œindex”“”)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(Œsingle”Œ raft (C type)”Œc.raft”hNt”auhhƒhhrhhh h!hNubh-Œdesc”“”)”}”(hhh]”(h-Œdesc_signature”“”)”}”(hŒ struct raft”h]”(h-Œ desc_type”“”)”}”(hŒstruct ”h]”(h;Œstruct”…””}”(hŒstruct”hh¡hhh NhNubh;Œ ”…””}”(hŒ ”hh¡hhh NhNubeh}”(h]”h]”h]”h]”h]”Œ xml:space”Œpreserve”uhhŸhh›hhh h!hKubh-Œ desc_name”“”)”}”(hŒraft”h]”h;Œraft”…””}”(hhhh¹hhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hh›hhh h!hKubeh}”(h]”h’ah]”h]”h’ah]”h]”Œfirst”‰uhh™hh–hhh h!hKubh-Œ desc_content”“”)”}”(hhh]”hc)”}”(hŒ"A single raft server in a cluster.”h]”h;Œ"A single raft server in a cluster.”…””}”(hhÕhhÓhhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKhhÐhhubah}”(h]”h]”h]”h]”h]”uhhÎhh–hhh h!hKubeh}”(h]”h]”h]”h]”h]”Œdomain”hDŒobjtype”Œtype”Œdesctype”hïŒnoindex”‰uhh”hhhhrh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒraft_id (C type)”Œ c.raft_id”hNt”auhhƒhhrhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒraft_id”h]”(h )”}”(hhh]”h}”(h]”h]”h]”h]”h]”hµh¶uhhŸhjhhh h!hKubh¸)”}”(hjh]”h;Œraft_id”…””}”(hhhjhhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hjhhh h!hKubeh}”(h]”hþah]”h]”hþah]”h]”h͉uhh™hjhhh h!hKubhÏ)”}”(hhh]”hc)”}”(hŒJHold the value of a raft server ID. Guaranteed to be at least 64-bit long.”h]”h;ŒJHold the value of a raft server ID. Guaranteed to be at least 64-bit long.”…””}”(hj(hj&hhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hKhj#hhubah}”(h]”h]”h]”h]”h]”uhhÎhjhhh h!hKubeh}”(h]”h]”h]”h]”h]”híhDhîŒtype”hðj@hñ‰uhh”hhhhrh h!hNubh„)”}”(hhh]”h}”(h]”h]”h]”h]”h]”Œentries”]”(hŒraft_close_cb (C type)”Œc.raft_close_cb”hNt”auhhƒhhrhhh h!hNubh•)”}”(hhh]”(hš)”}”(hŒ%void (*raft_close_cb)(struct raft *r)”h]”(h )”}”(hŒvoid ”h]”(h;Œvoid”…””}”(hŒvoid”hjVhhh NhNubh;Œ ”…””}”(hh®hjVhhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhjRhhh h!hKubh¸)”}”(hŒ(*raft_close_cb)”h]”h;Œ(*raft_close_cb)”…””}”(hhhjihhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hjRhhh h!hKubh-Œdesc_parameterlist”“”)”}”(hŒstruct raft *r”h]”h-Œdesc_parameter”“”)”}”(hŒstruct raft *r”h]”(h;Œstruct”…””}”(hŒstruct”hjubh;Œ ”…””}”(hh®hjubh/)”}”(hhh]”h;Œraft”…””}”(hŒraft”hjŒubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”Œtype”Œ reftarget”j“Œmodname”NŒ classname”Nuhh.hjubhŒemphasis”“”)”}”(hŒ *r”h]”h;Œ *r”…””}”(hhhj¢ubah}”(h]”h]”h]”h]”h]”uhj hjubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj}hjyubah}”(h]”h]”h]”h]”h]”hµh¶uhjwhjRhhh h!hKubeh}”(h]”jMah]”h]”jMah]”h]”h͉uhh™hjOhhh h!hKubhÏ)”}”(hhh]”hc)”}”(hŒhhh NhNubh;Œ ”…””}”(hh®hj>hhh NhNubeh}”(h]”h]”h]”h]”h]”hµh¶uhhŸhj:hhh h!hK:ubh¸)”}”(hŒ raft_start”h]”h;Œ raft_start”…””}”(hhhjQhhh NhNubah}”(h]”h]”h]”h]”h]”hµh¶uhh·hj:hhh h!hK:ubjx)”}”(hŒstruct raft* r”h]”j~)”}”(hŒstruct raft* r”h]”(h;Œstruct”…””}”(hŒstruct”hjcubh;Œ ”…””}”(hh®hjcubh/)”}”(hhh]”h;Œraft”…””}”(hŒraft”hjpubah}”(h]”h]”h]”h]”h]”Œ refdomain”hDŒreftype”jœŒ reftarget”jwŒmodname”NŒ classname”Nuhh.hjcubh;Œ*”…””}”(hj¯hjcubj¡)”}”(hŒ r”h]”h;Œ r”…””}”(hhhj‡ubah}”(h]”h]”h]”h]”h]”uhj hjcubeh}”(h]”h]”h]”h]”h]”Œnoemph”ˆhµh¶uhj}hj_ubah}”(h]”h]”h]”h]”h]”hµh¶uhjwhj:hhh h!hK:ubeh}”(h]”j5ah]”h]”j5ah]”h]”h͉uhh™hj7hhh h!hK:ubhÏ)”}”(hhh]”(hc)”}”(hŒStart a raft server.”h]”h;ŒStart a raft server.”…””}”(hj­hj«hhh NhNubah}”(h]”h]”h]”h]”h]”uhhbh h!hK5hj¨hhubhc)”}”(hX The initial term, vote, snapshot and entries will be loaded from disk using the :c:func:`raft_io->load()` method. The instance will start as follower, unless it's the only voting server in the cluster, in which case it will automatically elect itself and become leader.”h]”(h;ŒPThe initial term, vote, snapshot and entries will be loaded from disk using the ”…””}”(hŒPThe initial term, vote, snapshot and entries will be loaded from disk using the ”hj¹hhh NhNubh/)”}”(hŒ:c:func:`raft_io->load()`”h]”h5)”}”(hŒraft_io->load()”h]”h;Œraft_io->load()”…””}”(hhhjÆubah}”(h]”h]”(hChDŒc-func”eh]”h]”h]”uhh4hjÂubah}”(h]”h]”h]”h]”h]”Œrefdoc”hPŒ refdomain”hDŒreftype”Œfunc”Œ refexplicit”‰Œrefwarn”‰hVŒ raft_io->load”uhh.h h!hK7hj¹ubh;Œ¦ method. The instance will start as follower, unless it’s the only voting server in the cluster, in which case it will automatically elect itself and become leader.”…””}”(hŒ¤ method. The instance will start as follower, unless it's the only voting server in the cluster, in which case it will automatically elect itself and become leader.”hj¹hhh NhNubeh}”(h]”h]”h]”h]”h]”uhhbh h!hK7hj¨hhubeh}”(h]”h]”h]”h]”h]”uhhÎhj7hhh h!hK:ubeh}”(h]”h]”h]”h]”h]”híhDhîŒfunction”hðjùhñ‰uhh”hhhjêh h!hNubeh}”(h]”Œapi”ah]”h]”Œapi”ah]”h]”uhh"hh$hhh h!hK&ubeh}”(h]”(Œstruct-raft-raft-server”heh]”h]”(Œstruct raft --- raft server”Œserver”eh]”h]”uhh"hhhhh h!hKŒexpect_referenced_by_name”}”jh sŒexpect_referenced_by_id”}”hh subeh}”(h]”h]”h]”h]”h]”Œsource”h!uhhŒcurrent_source”NŒ current_line”NŒsettings”Œdocutils.frontend”ŒValues”“”)”}”(h'NŒ generator”NŒ datestamp”NŒ source_link”NŒ source_url”NŒ toc_backlinks”Œentry”Œfootnote_backlinks”KŒ sectnum_xform”KŒstrip_comments”NŒstrip_elements_with_classes”NŒ strip_classes”NŒ report_level”KŒ halt_level”KŒexit_status_level”KŒdebug”NŒwarning_stream”NŒ traceback”ˆŒinput_encoding”Œ utf-8-sig”Œinput_encoding_error_handler”Œstrict”Œoutput_encoding”Œutf-8”Œoutput_encoding_error_handler”j2Œerror_encoding”ŒUTF-8”Œerror_encoding_error_handler”Œbackslashreplace”Œ language_code”Œen”Œrecord_dependencies”NŒconfig”NŒ id_prefix”hŒauto_id_prefix”Œid”Œ dump_settings”NŒdump_internals”NŒdump_transforms”NŒdump_pseudo_xml”NŒexpose_internals”NŒstrict_visitor”NŒ_disable_config”NŒ_source”h!Œ _destination”NŒ _config_files”]”Œpep_references”NŒ pep_base_url”Œ https://www.python.org/dev/peps/”Œpep_file_url_template”Œpep-%04d”Œrfc_references”NŒ rfc_base_url”Œhttps://tools.ietf.org/html/”Œ tab_width”KŒtrim_footnote_reference_space”‰Œfile_insertion_enabled”ˆŒ raw_enabled”KŒsyntax_highlight”Œlong”Œ smart_quotes”ˆŒsmartquotes_locales”]”Œcharacter_level_inline_markup”‰Œdoctitle_xform”‰Œ docinfo_xform”KŒsectsubtitle_xform”‰Œembed_stylesheet”‰Œcloak_email_addresses”ˆŒenv”NubŒreporter”NŒindirect_targets”]”Œsubstitution_defs”}”Œsubstitution_names”}”Œrefnames”}”Œrefids”}”h]”h asŒnameids”}”(jhjjjçjäh’h’hþhþjMjMjßjÜj#j#j~j~jÿjüjjj]j]j5j5uŒ nametypes”}”(jˆjNjçNh’ˆhþˆjMˆjßNj#ˆj~ˆjÿNjˆj]ˆj5ˆuh}”(hh$jh$jährh’h›hþjjMjRjÜjj#j(j~jƒjüjêjj j]jbj5j:uŒ footnote_refs”}”Œ citation_refs”}”Œ autofootnotes”]”Œautofootnote_refs”]”Œsymbol_footnotes”]”Œsymbol_footnote_refs”]”Œ footnotes”]”Œ citations”]”Œautofootnote_start”KŒsymbol_footnote_start”KŒid_start”KŒparse_messages”]”Œtransform_messages”]”hŒsystem_message”“”)”}”(hhh]”hc)”}”(hhh]”h;Œ,Hyperlink target "server" is not referenced.”…””}”(hhhj‘ubah}”(h]”h]”h]”h]”h]”uhhbhjŽubah}”(h]”h]”h]”h]”h]”Œlevel”KŒtype”ŒINFO”Œsource”h!Œline”KuhjŒubaŒ transformer”NŒ decoration”Nhhub.raft-0.11.3/docs/build/_sources/000077500000000000000000000000001415614527300164115ustar00rootroot00000000000000raft-0.11.3/docs/build/_sources/api.rst.txt000066400000000000000000000001341415614527300205300ustar00rootroot00000000000000.. _api: API reference ============= .. toctree:: :maxdepth: 1 server fsm io raft-0.11.3/docs/build/_sources/fsm.rst.txt000066400000000000000000000020121415614527300205410ustar00rootroot00000000000000.. _fsm: :c:type:`struct raft_fsm` --- Application state machine ======================================================= The FSM struct defines the interface that the application's state machine must implement in order to be replicated by Raft. Data types ---------- .. c:type:: struct raft_fsm Hold pointers to an actual implementation of the FSM interface. Public members ^^^^^^^^^^^^^^ .. c:member:: void* data Space for user-defined arbitrary data. C-Raft does not use and does not touch this field. .. c:member:: int version API version implemented by this instance. Currently 1. .. c:member:: int (*apply)(struct raft_fsm *fsm, const struct raft_buffer *buf, void **result) Apply a committed RAFT_COMMAND entry to the state machine. .. c:member:: int (*snapshot)(struct raft_fsm *fsm, struct raft_buffer *bufs[], unsigned *n_bufs) Take a snapshot of the state machine. .. c:member:: int (*restore)(struct raft_fsm *fsm, struct raft_buffer *buf) Restore a snapshot of the state machine. raft-0.11.3/docs/build/_sources/getting-started.rst.txt000066400000000000000000000000401415614527300230600ustar00rootroot00000000000000Getting started =============== raft-0.11.3/docs/build/_sources/index.rst.txt000066400000000000000000000066111415614527300210740ustar00rootroot00000000000000C-Raft ====== C-Raft is a fully asynchronous C implementation of the Raft consensus protocol. Design ------ The library has modular design: its core part implements only the core Raft algorithm logic, in a fully platform independent way. On top of that, a pluggable interface defines the I/O implementation for networking and disk persistence. A stock implementation of the I/O interface is provided when building the library with default options. It is based on `libuv`_ and should fit the vast majority of use cases. .. _libuv: http://libuv.org Features -------- C-Raft implements all the basic features described in the Raft dissertation: * Leader election * Log replication * Log compaction * Membership changes It also includes a few optional enhancements: * Optimistic pipelining to reduce log replication latency * Writing to leader's disk in parallel * Automatic stepping down when the leader loses quorum * Leadership transfer extension * Non-voting servers Quick start ----------- Make sure that `libuv`_ is installed on your system, then run: .. code-block:: bash :linenos: autoreconf -i ./configure --enable-example make Then create a :file:`main.c` file with this simple test program that just runs a single raft server and implements a basic state machine for incrementing a counter: .. code-block:: C :linenos: #include #include static raft_id id = 12345; static const char *address = "127.0.0.1:8080"; static const char *dir = "/tmp/raft-quick-start"; static struct uv_loop_s loop; static struct raft_uv_transport transport; static struct raft_io io; static struct raft_fsm fsm; static struct raft raft; static struct raft_configuration conf; static struct uv_timer_s timer; static struct raft_apply apply; static unsigned counter = 0; static uint64_t command; static int applyCommand(struct raft_fsm *fsm, const struct raft_buffer *buf, void **result) { counter += *(uint64_t *)buf->base; printf("counter: %u\n", counter); return 0; } static void submitCommand(uv_timer_t *timer) { struct raft_buffer buf; command = uv_now(timer->loop) % 10; buf.len = sizeof command; buf.base = &command; raft_apply(&raft, &apply, &buf, 1, NULL); } int main() { mkdir(dir, 0755); uv_loop_init(&loop); raft_uv_tcp_init(&transport, &loop); raft_uv_init(&io, &loop, dir, &transport); fsm.apply = applyCommand; raft_init(&raft, &io, &fsm, id, address); raft_configuration_init(&conf); raft_configuration_add(&conf, id, address, RAFT_VOTER); raft_bootstrap(&raft, &conf); raft_start(&raft); uv_timer_init(&loop, &timer); uv_timer_start(&timer, submitCommand, 0, 1000); uv_run(&loop, UV_RUN_DEFAULT); } You can compile and run it with: .. code-block:: bash :linenos: cc main.c -o main -lraft -luv && ./main Licence ------- This raft C library is released under a slightly modified version of LGPLv3, that includes a copiright exception letting users to statically link the library code in their project and release the final work under their own terms. See the full `license`_ text. .. _license: https://github.com/canonical/raft/blob/master/LICENSE toc ~~~ .. toctree:: :maxdepth: 1 self getting-started api raft-0.11.3/docs/build/_sources/io.rst.txt000066400000000000000000000135111415614527300203710ustar00rootroot00000000000000.. _io: :c:type:`struct raft_io` --- I/O backend interface ================================================== The I/O backend struct defines an interface for performing periodic ticks, log store read/write and send/receive of network RPCs. Data types ---------- .. c:type:: struct raft_io Hold pointers to an actual implementation of the I/O backend interface. .. c:type:: void (*raft_io_close_cb)(struct raft_io *io) Type definition for callback passed to :c:func:`raft_io.close()`. .. c:type:: void (*raft_io_tick_cb)(struct raft_io *io) Callback invoked by the I/O implementation at regular intervals. .. c:type:: void (*raft_io_recv_cb)(struct raft_io *io, struct raft_message *msg) Callback invoked by the I/O implementation when an RPC message is received. Public members ^^^^^^^^^^^^^^ .. c:member:: void* data Space for user-defined arbitrary data. C-Raft does not use and does not touch this field. .. c:member:: int version API version implemented by this instance. Currently 1. .. c:member:: void* impl Implementation-defined state. .. c:member:: char errmsg[RAFT_ERRMSG_BUF_SIZE] Human-readable message providing diagnostic information about the last error occurred. .. c:member:: int (*init)(struct raft_io *io, raft_id id, const char *address) Initialize the backend with operational parameters such as server ID and address. .. c:member:: void (*close)(struct raft_io *io, raft_io_close_cb cb) Release all resources used by the backend. The :code:`tick` and :code:`recv` callbacks must not be invoked anymore, and pending asynchronous requests be completed or canceled as soon as possible. Invoke the close callback once the :c:type:`raft_io` instance can be freed. .. c:member:: int (*load)(struct raft_io *io, raft_term *term, raft_id *voted_for, struct raft_snapshot **snapshot, raft_index *start_index, struct raft_entry *entries[], size_t *n_entries) Load persisted state from storage. The implementation must synchronously load the current state from its storage backend and return information about it through the given pointers. The implementation can safely assume that this method will be invoked exactly one time, before any call to :c:func:`raft_io.append()` or c:func:`raft_io.truncate()`, and then won't be invoked again. The snapshot object and entries array must be allocated and populated using :c:func:`raft_malloc`. If this function completes successfully, ownership of such memory is transferred to the caller. .. c:member:: int (*start)(struct raft_io *io, unsigned msecs, raft_io_tick_cb tick, raft_io_recv_cb recv) Start the backend. From now on the implementation must start accepting RPC requests and must invoke the :code:`tick` callback every :code:`msecs` milliseconds. The :code:`recv` callback must be invoked when receiving a message. .. c:member:: int (*bootstrap)(struct raft_io *io, const struct raft_configuration *conf) Bootstrap a server belonging to a new cluster. The implementation must synchronously persist the given configuration as the first entry of the log. The current persisted term must be set to 1 and the vote to nil. If an attempt is made to bootstrap a server that has already some state, then RAFT_CANTBOOTSTRAP must be returned. .. c:member:: int (*recover)(struct raft_io *io, const struct raft_configuration *conf) Force appending a new configuration as last entry of the log. .. c:member:: int (*set_term)(struct raft_io *io, raft_term term) Synchronously persist current term (and nil vote). The implementation MUST ensure that the change is durable before returning (e.g. using :code:`fdatasync()` or O_DSYNC). .. c:member:: int (*set_vote)(struct raft_io *io, raft_id server_id) Synchronously persist who we voted for. The implementation MUST ensure that the change is durable before returning (e.g. using :code:`fdatasync()` or O_DSYNC). .. c:member:: int (*send)(struct raft_io *io, struct raft_io_send *req, const struct raft_message *message, raft_io_send_cb cb) Asynchronously send an RPC message. The implementation is guaranteed that the memory referenced in the given message will not be released until the :code:`cb` callback is invoked. .. c:member:: int (*append)(struct raft_io *io, struct raft_io_append *req, const struct raft_entry entries[], unsigned n, raft_io_append_cb cb) Asynchronously append the given entries to the log. The implementation is guaranteed that the memory holding the given entries will not be released until the :code:`cb` callback is invoked. .. c:member:: int (*truncate)(struct raft_io *io, raft_index index) Asynchronously truncate all log entries from the given index onwards. .. c:member:: int (*snapshot_put)(struct raft_io *io, unsigned trailing, struct raft_io_snapshot_put *req, const struct raft_snapshot *snapshot, raft_io_snapshot_put_cb cb) Asynchronously persist a new snapshot. If the :code:`trailing` parameter is greater than zero, then all entries older that :code:`snapshot->index - trailing` must be deleted. If the :code:`trailing` parameter is :code:`0`, then the snapshot completely replaces all existing entries, which should all be deleted. Subsequent calls to append() should append entries starting at index :code:`snapshot->index + 1`. If a request is submitted, the raft engine won't submit any other request until the original one has completed. .. c:member:: int (*snapshot_get)(struct raft_io *io, struct raft_io_snapshot_get *req, raft_io_snapshot_get_cb cb) Asynchronously load the last snapshot. .. c:member:: raft_time (*time)(struct raft_io *io) Return the current time, expressed in milliseconds. .. c:member:: int (*random)(struct raft_io *io, int min, int max) Generate a random integer between :code:`min` and :code:`max`. raft-0.11.3/docs/build/_sources/server.rst.txt000066400000000000000000000027211415614527300212710ustar00rootroot00000000000000.. _server: :c:type:`struct raft` --- Raft server ===================================== The raft server struct is the central part of C-Raft. It holds and drive the state of a single raft server in a cluster. Data types ---------- .. c:type:: struct raft A single raft server in a cluster. .. c:type:: raft_id Hold the value of a raft server ID. Guaranteed to be at least 64-bit long. .. c:type:: void (*raft_close_cb)(struct raft *r) Type definition for callback passed to :c:func:`raft_close`. Public members ^^^^^^^^^^^^^^ .. c:member:: void* data Space for user-defined arbitrary data. C-Raft does not use and does not touch this field. .. c:member:: raft_id id Server ID. Readonly. API --- .. c:function:: int raft_init(struct raft *r, struct raft_io *io, struct raft_fsm *fsm, raft_id id, const char *address) Initialize a raft server object. .. c:function:: int raft_close(struct raft* r, raft_close_cb cb) Close a raft server object, releasing all used resources. The memory of the object itself can be released only once the given close callback has been invoked. .. c:function:: int raft_start(struct raft* r) Start a raft server. The initial term, vote, snapshot and entries will be loaded from disk using the :c:func:`raft_io->load()` method. The instance will start as follower, unless it's the only voting server in the cluster, in which case it will automatically elect itself and become leader. raft-0.11.3/docs/build/_static/000077500000000000000000000000001415614527300162155ustar00rootroot00000000000000raft-0.11.3/docs/build/_static/fonts.css000066400000000000000000000252361415614527300200700ustar00rootroot00000000000000/* cyrillic-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: italic; font-weight: 400; src: local('Source Sans Pro Italic'), local('SourceSansPro-Italic'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK1dSBYKcSV-LCoeQqfX1RYOo3qPZ7qsDJT9g.woff2) format('woff2'); unicode-range: U+0460-052F, U+1C80-1C88, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F; } /* cyrillic */ @font-face { font-family: 'Source Sans Pro'; font-style: italic; font-weight: 400; src: local('Source Sans Pro Italic'), local('SourceSansPro-Italic'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK1dSBYKcSV-LCoeQqfX1RYOo3qPZ7jsDJT9g.woff2) format('woff2'); unicode-range: U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116; } /* greek-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: italic; font-weight: 400; src: local('Source Sans Pro Italic'), local('SourceSansPro-Italic'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK1dSBYKcSV-LCoeQqfX1RYOo3qPZ7rsDJT9g.woff2) format('woff2'); unicode-range: U+1F00-1FFF; } /* greek */ @font-face { font-family: 'Source Sans Pro'; font-style: italic; font-weight: 400; src: local('Source Sans Pro Italic'), local('SourceSansPro-Italic'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK1dSBYKcSV-LCoeQqfX1RYOo3qPZ7ksDJT9g.woff2) format('woff2'); unicode-range: U+0370-03FF; } /* vietnamese */ @font-face { font-family: 'Source Sans Pro'; font-style: italic; font-weight: 400; src: local('Source Sans Pro Italic'), local('SourceSansPro-Italic'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK1dSBYKcSV-LCoeQqfX1RYOo3qPZ7osDJT9g.woff2) format('woff2'); unicode-range: U+0102-0103, U+0110-0111, U+1EA0-1EF9, U+20AB; } /* latin-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: italic; font-weight: 400; src: local('Source Sans Pro Italic'), local('SourceSansPro-Italic'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK1dSBYKcSV-LCoeQqfX1RYOo3qPZ7psDJT9g.woff2) format('woff2'); unicode-range: U+0100-024F, U+0259, U+1E00-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF; } /* latin */ @font-face { font-family: 'Source Sans Pro'; font-style: italic; font-weight: 400; src: local('Source Sans Pro Italic'), local('SourceSansPro-Italic'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK1dSBYKcSV-LCoeQqfX1RYOo3qPZ7nsDI.woff2) format('woff2'); unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD; } /* cyrillic-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 300; src: local('Source Sans Pro Light'), local('SourceSansPro-Light'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ik4zwmhduz8A.woff2) format('woff2'); unicode-range: U+0460-052F, U+1C80-1C88, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F; } /* cyrillic */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 300; src: local('Source Sans Pro Light'), local('SourceSansPro-Light'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ik4zwkxduz8A.woff2) format('woff2'); unicode-range: U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116; } /* greek-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 300; src: local('Source Sans Pro Light'), local('SourceSansPro-Light'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ik4zwmxduz8A.woff2) format('woff2'); unicode-range: U+1F00-1FFF; } /* greek */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 300; src: local('Source Sans Pro Light'), local('SourceSansPro-Light'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ik4zwlBduz8A.woff2) format('woff2'); unicode-range: U+0370-03FF; } /* vietnamese */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 300; src: local('Source Sans Pro Light'), local('SourceSansPro-Light'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ik4zwmBduz8A.woff2) format('woff2'); unicode-range: U+0102-0103, U+0110-0111, U+1EA0-1EF9, U+20AB; } /* latin-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 300; src: local('Source Sans Pro Light'), local('SourceSansPro-Light'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ik4zwmRduz8A.woff2) format('woff2'); unicode-range: U+0100-024F, U+0259, U+1E00-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF; } /* latin */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 300; src: local('Source Sans Pro Light'), local('SourceSansPro-Light'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ik4zwlxdu.woff2) format('woff2'); unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD; } /* cyrillic-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 400; src: local('Source Sans Pro Regular'), local('SourceSansPro-Regular'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK3dSBYKcSV-LCoeQqfX1RYOo3qNa7lqDY.woff2) format('woff2'); unicode-range: U+0460-052F, U+1C80-1C88, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F; } /* cyrillic */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 400; src: local('Source Sans Pro Regular'), local('SourceSansPro-Regular'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK3dSBYKcSV-LCoeQqfX1RYOo3qPK7lqDY.woff2) format('woff2'); unicode-range: U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116; } /* greek-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 400; src: local('Source Sans Pro Regular'), local('SourceSansPro-Regular'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK3dSBYKcSV-LCoeQqfX1RYOo3qNK7lqDY.woff2) format('woff2'); unicode-range: U+1F00-1FFF; } /* greek */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 400; src: local('Source Sans Pro Regular'), local('SourceSansPro-Regular'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK3dSBYKcSV-LCoeQqfX1RYOo3qO67lqDY.woff2) format('woff2'); unicode-range: U+0370-03FF; } /* vietnamese */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 400; src: local('Source Sans Pro Regular'), local('SourceSansPro-Regular'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK3dSBYKcSV-LCoeQqfX1RYOo3qN67lqDY.woff2) format('woff2'); unicode-range: U+0102-0103, U+0110-0111, U+1EA0-1EF9, U+20AB; } /* latin-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 400; src: local('Source Sans Pro Regular'), local('SourceSansPro-Regular'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK3dSBYKcSV-LCoeQqfX1RYOo3qNq7lqDY.woff2) format('woff2'); unicode-range: U+0100-024F, U+0259, U+1E00-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF; } /* latin */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 400; src: local('Source Sans Pro Regular'), local('SourceSansPro-Regular'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xK3dSBYKcSV-LCoeQqfX1RYOo3qOK7l.woff2) format('woff2'); unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD; } /* cyrillic-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 700; src: local('Source Sans Pro Bold'), local('SourceSansPro-Bold'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwmhduz8A.woff2) format('woff2'); unicode-range: U+0460-052F, U+1C80-1C88, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F; } /* cyrillic */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 700; src: local('Source Sans Pro Bold'), local('SourceSansPro-Bold'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwkxduz8A.woff2) format('woff2'); unicode-range: U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116; } /* greek-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 700; src: local('Source Sans Pro Bold'), local('SourceSansPro-Bold'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwmxduz8A.woff2) format('woff2'); unicode-range: U+1F00-1FFF; } /* greek */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 700; src: local('Source Sans Pro Bold'), local('SourceSansPro-Bold'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwlBduz8A.woff2) format('woff2'); unicode-range: U+0370-03FF; } /* vietnamese */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 700; src: local('Source Sans Pro Bold'), local('SourceSansPro-Bold'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwmBduz8A.woff2) format('woff2'); unicode-range: U+0102-0103, U+0110-0111, U+1EA0-1EF9, U+20AB; } /* latin-ext */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 700; src: local('Source Sans Pro Bold'), local('SourceSansPro-Bold'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwmRduz8A.woff2) format('woff2'); unicode-range: U+0100-024F, U+0259, U+1E00-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF; } /* latin */ @font-face { font-family: 'Source Sans Pro'; font-style: normal; font-weight: 700; src: local('Source Sans Pro Bold'), local('SourceSansPro-Bold'), url(https://fonts.gstatic.com/s/sourcesanspro/v13/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwlxdu.woff2) format('woff2'); unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD; } @font-face { font-family: 'Material Icons'; font-style: normal; font-weight: 400; src: url(https://fonts.gstatic.com/s/materialicons/v48/flUhRq6tzZclQEJ-Vdg-IuiaDsNc.woff2) format('woff2'); } .material-icons { font-family: 'Material Icons'; font-weight: normal; font-style: normal; font-size: 24px; line-height: 1; letter-spacing: normal; text-transform: none; display: inline-block; white-space: nowrap; word-wrap: normal; direction: ltr; -moz-font-feature-settings: 'liga'; -moz-osx-font-smoothing: grayscale; }raft-0.11.3/docs/build/_static/material.css000066400000000000000000000420761415614527300205360ustar00rootroot00000000000000/* * material.css_t */ @import url("fonts.css"); html { -webkit-text-size-adjust: none; -moz-text-size-adjust: none; -ms-text-size-adjust: none; text-size-adjust: none; font-size: 137.5%; overflow-x: hidden; box-sizing: border-box; } *, ::after, ::before { box-sizing: inherit; } body, html { height: 100%; } body, input { color: rgba(0,0,0,.87); font-family: "Source Sans Pro","Helvetica Neue",Helvetica,Arial,sans-serif; font-feature-settings: "kern","liga"; } body { position: relative; font-size: .5rem; margin: 0; } a { color: inherit; text-decoration: none; } td, th { font-weight: 400; vertical-align: top; } input { border: 0; outline: 0; } .md-header, .md-hero { background-color: #3f51b5; } .md-header[data-md-state="shadow"] { -webkit-transition: background-color .25s,color .25s,box-shadow .25s; transition: background-color .25s,color .25s,box-shadow .25s; box-shadow: 0 0 .2rem rgba(0,0,0,.1),0 .2rem .4rem rgba(0,0,0,.2); } .md-header { position: fixed; top: 0; right: 0; left: 0; height: 2.4rem; color: #fff; z-index: 2; backface-visibility: hidden; } .md-header-nav { padding: 0 .2rem; } .md-header-nav__button { position: relative; -webkit-transition: opacity .25s; transition: opacity .25s; z-index: 1; } .md-header-nav__button.md-logo * { display: block; } .md-clipboard::before, .md-icon, .md-nav__button, .md-nav__link::after, .md-nav__title::before, .md-search-result__article--document::before, .md-source-file::before, .md-typeset .admonition > .admonition-title::before, .md-typeset .admonition > summary::before, .md-typeset .critic.comment::before, .md-typeset .footnote-backref, .md-typeset .task-list-control .task-list-indicator::before, .md-typeset details > .admonition-title::before, .md-typeset details > summary::before, .md-typeset summary::after { font-family: Material Icons; font-style: normal; font-variant: normal; font-weight: 400; line-height: 1; text-transform: none; white-space: nowrap; speak: none; word-wrap: normal; direction: ltr; } .md-content__icon, .md-footer-nav__button, .md-header-nav__button, .md-nav__button, .md-nav__title::before, .md-search-result__article--document::before { display: inline-block; margin: .2rem; padding: .4rem; font-size: 1.2rem; cursor: pointer; } .md-header-nav__title { padding: 0 1rem; font-size: .9rem; line-height: 2.4rem; } .md-header-nav__topic { display: block; position: absolute; -webkit-transition: opacity .15s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1); transition: opacity .15s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1); transition: transform .4s cubic-bezier(.1,.7,.1,1),opacity .15s; transition: transform .4s cubic-bezier(.1,.7,.1,1),opacity .15s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1); text-overflow: ellipsis; white-space: nowrap; overflow: hidden; } .md-header-nav__button.md-icon--search { display: none; } .md-header-nav__button { position: relative; -webkit-transition: opacity .25s; transition: opacity .25s; z-index: 1; } .md-content__icon, .md-footer-nav__button, .md-header-nav__button, .md-nav__button, .md-nav__title::before, .md-search-result__article--document::before { display: inline-block; margin: .2rem; padding: .4rem; font-size: 1.2rem; cursor: pointer; } .md-search { padding: .2rem; } .md-search__overlay { position: fixed; top: 0; left: 0; width: 0; height: 0; -webkit-transition: width 0s .25s,height 0s .25s,opacity .25s; transition: width 0s .25s,height 0s .25s,opacity .25s; background-color: rgba(0,0,0,.54); cursor: pointer; } .md-search__inner { position: relative; width: 11.5rem; margin-right: .8rem; padding: .1rem 0; float: right; -webkit-transition: width .25s cubic-bezier(.1,.7,.1,1); transition: width .25s cubic-bezier(.1,.7,.1,1); } .md-search__inner { margin-right: 1.2rem; } .md-search__overlay { opacity: 0; z-index: 1; } .md-search__form, .md-search__input { border-radius: .1rem; } .md-search__form { position: relative; } .md-search__input { position: relative; padding: 0 2.2rem 0 3.6rem; text-overflow: ellipsis; z-index: 2; } .md-search__form, .md-search__input { border-radius: .1rem; } .md-search__input { width: 100%; height: 1.8rem; padding-left: 2.2rem; -webkit-transition: background-color .25s cubic-bezier(.1,.7,.1,1),color .25s cubic-bezier(.1,.7,.1,1); transition: background-color .25s cubic-bezier(.1,.7,.1,1),color .25s cubic-bezier(.1,.7,.1,1); background-color: rgba(0,0,0,.26); color: inherit; font-size: .8rem; } .md-search__input + .md-search__icon { color: inherit; } .md-search__icon[for="__search"] { top: .3rem; left: .5rem; } .md-search__icon { position: absolute; -webkit-transition: color .25s cubic-bezier(.1,.7,.1,1),opacity .25s; transition: color .25s cubic-bezier(.1,.7,.1,1),opacity .25s; font-size: 1.2rem; cursor: pointer; z-index: 2; } .md-search__icon[type="reset"] { top: .3rem; right: .5rem; -webkit-transform: scale(.125); transform: scale(.125); -webkit-transition: opacity .15s,-webkit-transform .15s cubic-bezier(.1,.7,.1,1); transition: opacity .15s,-webkit-transform .15s cubic-bezier(.1,.7,.1,1); transition: transform .15s cubic-bezier(.1,.7,.1,1),opacity .15s; transition: transform .15s cubic-bezier(.1,.7,.1,1),opacity .15s,-webkit-transform .15s cubic-bezier(.1,.7,.1,1); opacity: 0; } .md-search__icon[for="__search"]::before { content: "\E8B6"; } .md-search__icon { position: absolute; -webkit-transition: color .25s cubic-bezier(.1,.7,.1,1),opacity .25s; transition: color .25s cubic-bezier(.1,.7,.1,1),opacity .25s; font-size: 1.2rem; cursor: pointer; z-index: 2; } .md-search__output { top: 1.9rem; -webkit-transition: opacity .4s; transition: opacity .4s; opacity: 0; } .md-search__output { position: absolute; width: 100%; border-radius: 0 0 .1rem .1rem; overflow: hidden; z-index: 1; } .md-search__scrollwrap, [data-md-toggle="search"]:checked ~ .md-header .md-search__inner { width: 34.4rem; } .md-search__scrollwrap { max-height: 0; } .md-search__scrollwrap { height: 100%; background-color: #fff; box-shadow: inset 0 .05rem 0 rgba(0,0,0,.07); overflow-y: auto; -webkit-overflow-scrolling: touch; } .md-container { display: table; width: 100%; height: 100%; padding-top: 2.4rem; table-layout: fixed; } .md-container, .md-main { overflow: auto; } .md-main { display: table-row; height: 100%; } .md-main__inner { height: 100%; padding-top: 1.5rem; padding-bottom: .05rem; } .md-sidebar { position: absolute; width: 12.1rem; padding: 1.2rem 0; overflow: hidden; } .md-sidebar__scrollwrap { max-height: 100%; margin: 0 .2rem; overflow-y: auto; -webkit-backface-visibility: hidden; backface-visibility: hidden; } .md-nav { font-size: .7rem; line-height: 1.3; } .md-nav__title { display: block; padding: 0 .6rem; font-weight: 700; text-overflow: ellipsis; overflow: hidden; } .md-nav ul { margin: 0; padding: 0; list-style: none; } .md-nav ul li { padding: 0 .6rem; } .md-nav ul li a { display: block; margin-top: .625em; -webkit-transition: color .125s; transition: color .125s; text-overflow: ellipsis; cursor: pointer; overflow: hidden; } .md-nav ul li a.current { color: #fb8c00; } /* Hide sub-sub sections */ .md-nav--secondary ul ul ul { display: none; } /* Don't indent sub items */ .md-nav--secondary ul ul li { padding: 0; } /* Hide top-level local toc item */ .md-nav--secondary ul li a[href="#"] { display: none; } .md-sidebar--secondary { display: block; margin-left: 100%; -webkit-transform: translate(-100%); transform: translate(-100%); } .md-sidebar--secondary { margin-left: 70rem; } .md-content { margin-left: 12.1rem; } .md-content { margin-right: 12.1rem; } .md-content__inner { margin-right: 1.2rem; margin-left: 1.2rem; } .md-content__inner { margin: 0 .8rem 1.2rem; margin-right: 0.8rem; margin-left: 0.8rem; padding-top: .6rem; } #toc.section { display: none; } .md-typeset { font-size: .7rem; line-height: 1.4; -webkit-print-color-adjust: exact; } .md-typeset a { color: #3f51b5; word-break: break-word; } .md-typeset h1, .md-typeset h2 { font-weight: 300; letter-spacing: -.01em; } .md-typeset h1 { margin: 0 0 1rem; color: rgb(33,34,36); font-size: 1.5625rem; line-height: 1.3; font-weight: 500; } .md-typeset h2 { font-size: 1.2rem; border-bottom: 1px solid #eaecef; font-weight: 400; } .md-typeset h3 { margin: 1rem 0 .8rem; font-size: 0.9rem; font-weight: 400; letter-spacing: -.01em; line-height: 1.5; } .md-typeset blockquote, .md-typeset ol, .md-typeset p, .md-typeset ul { margin: 1em 0; margin-left: 0px; } .md-typeset blockquote, .md-typeset ol, .md-typeset p, .md-typeset ul { margin: 1em 0; } .md-typeset ul { list-style-type: disc; } .md-typeset ol, .md-typeset ul { margin-left: .625em; padding: 0; } .md-typeset ol li, .md-typeset ul li { margin-bottom: .5em; margin-left: 1.25em; } .md-typeset ol li p, .md-typeset ul li p { margin: 0; } .md-typeset .headerlink { color: rgba(0,0,0,.26); } .md-typeset .headerlink { display: inline-block; margin-left: .5rem; -webkit-transform: translateY(.25rem); transform: translateY(.25rem); -webkit-transition: color .25s,opacity .125s .25s,-webkit-transform .25s .25s; transition: color .25s,opacity .125s .25s,-webkit-transform .25s .25s; transition: transform .25s .25s,color .25s,opacity .125s .25s; transition: transform .25s .25s,color .25s,opacity .125s .25s,-webkit-transform .25s .25s; opacity: 0; } [dir=rtl] .md-typeset .headerlink { margin-right: .5rem; margin-left: 0 } .md-typeset h1 .headerlink:focus, .md-typeset h1:hover .headerlink, .md-typeset h1:target .headerlink { -webkit-transform: translate(0); transform: translate(0); opacity: 1 } .md-typeset h1 .headerlink:focus, .md-typeset h1:hover .headerlink:hover, .md-typeset h1:target .headerlink { color: #536dfe } .md-typeset h2:before { display: block; margin-top: -0px; padding-top: 0px; content: "" } .md-typeset h2:target:before { margin-top: -3.4rem; padding-top: 3.4rem } .md-typeset h2 .headerlink:focus, .md-typeset h2:hover .headerlink, .md-typeset h2:target .headerlink { -webkit-transform: translate(0); transform: translate(0); opacity: 1 } .md-typeset h2 .headerlink:focus, .md-typeset h2:hover .headerlink:hover, .md-typeset h2:target .headerlink { color: #536dfe } .md-typeset h3:before { display: block; margin-top: -9px; content: "" } .md-typeset h3:target:before { margin-top: -3.45rem; padding-top: 3.45rem } .md-typeset h3 .headerlink:focus, .md-typeset h3:hover .headerlink, .md-typeset h3:target .headerlink { -webkit-transform: translate(0); transform: translate(0); opacity: 1 } .md-typeset h3 .headerlink:focus, .md-typeset h3:hover .headerlink:hover, .md-typeset h3:target .headerlink { color: #536dfe } .md-typeset h4:before { display: block; margin-top: -9px; padding-top: 9px; content: "" } .md-typeset h4:target:before { margin-top: -3.45rem; padding-top: 3.45rem } .md-typeset h4 .headerlink:focus, .md-typeset h4:hover .headerlink, .md-typeset h4:target .headerlink { -webkit-transform: translate(0); transform: translate(0); opacity: 1 } .md-typeset h4 .headerlink:focus, .md-typeset h4:hover .headerlink:hover, .md-typeset h4:target .headerlink { color: #536dfe } .md-typeset h5:before { display: block; margin-top: -11px; padding-top: 11px; content: "" } .md-typeset h5:target:before { margin-top: -3.55rem; padding-top: 3.55rem } .md-typeset h5 .headerlink:focus, .md-typeset h5:hover .headerlink, .md-typeset h5:target .headerlink { -webkit-transform: translate(0); transform: translate(0); opacity: 1 } .md-typeset h5 .headerlink:focus, .md-typeset h5:hover .headerlink:hover, .md-typeset h5:target .headerlink { color: #536dfe } .md-typeset h6:before { display: block; margin-top: -11px; padding-top: 11px; content: "" } .md-typeset h6:target:before { margin-top: -3.55rem; padding-top: 3.55rem } .md-typeset h6 .headerlink:focus, .md-typeset h6:hover .headerlink, .md-typeset h6:target .headerlink { -webkit-transform: translate(0); transform: translate(0); opacity: 1 } .md-typeset h6 .headerlink:focus, .md-typeset h6:hover .headerlink:hover, .md-typeset h6:target .headerlink { color: #536dfe } .md-typeset .literal .pre { color: #a61717; } .md-typeset .literal { color: #37474f; line-height: 1.4; } .md-typeset code { word-break: break-word; } .md-typeset code { color: #37474f; font-size: 85%; direction: ltr; } .md-typeset code { margin: 0 .29412em; padding: .07353em 0; } .md-typeset code { position: relative; margin: 1em 0; padding: 0; border-radius: .1rem; background-color: hsla(0,0%,92.5%,.5); color: #37474f; line-height: 1.4; -webkit-overflow-scrolling: touch; } .md-typeset code { margin: 0 .29412em; padding: .07353em 0; border-radius: .1rem; box-shadow: .29412em 0 0 hsla(0,0%,92.5%,.5),-.29412em 0 0 hsla(0,0%,92.5%,.5); word-break: break-word; -webkit-box-decoration-break: clone; box-decoration-break: clone; } .md-typeset .highlight code, .md-typeset .highlight pre { display: block; margin: 0; padding: .525rem .6rem; background-color: transparent; overflow: auto; vertical-align: top; } .md-typeset .highlighttable .highlight, .md-typeset .highlighttable .linenodiv { margin: 0; border-radius: 0; } .md-typeset .highlight { position: relative; margin: 1em 0; padding: 0; border-radius: .1rem; background-color: hsla(0,0%,92.5%,.5); color: #37474f; line-height: 1.4; -webkit-overflow-scrolling: touch; } .md-typeset .highlighttable .code { -webkit-box-flex: 1; flex: 1; overflow: hidden; } .md-typeset .highlighttable tbody, .md-typeset .highlighttable td { display: block; padding: 0; } .md-typeset pre { line-height: 1.4; } .md-typeset pre { position: relative; margin: 1em 0; border-radius: .1rem; line-height: 1.4; -webkit-overflow-scrolling: touch; } .md-typeset code, .md-typeset pre { background-color: hsla(0,0%,92.5%,.5); color: #37474f; font-size: 85%; direction: ltr; } .md-typeset .highlighttable .linenos pre { margin: 0; padding: 0; background-color: transparent; color: inherit; text-align: right; } .md-typeset .highlighttable .linenos { background-color: rgba(0,0,0,.07); color: rgba(0,0,0,.26); -webkit-user-select: none; -moz-user-select: none; -ms-user-select: none; user-select: none; } .md-typeset .highlighttable tbody, .md-typeset .highlighttable td { display: block; padding: 0; } .md-typeset .highlighttable .linenodiv { padding: .525rem .6rem; } .md-typeset .highlighttable .linenodiv { margin: 0; border-radius: 0; } .md-typeset .highlighttable tr { display: -webkit-box; display: flex; } .md-typeset .highlight .s, .md-typeset .highlight .sb, .md-typeset .highlight .sc { color: #0d904f; } .md-typeset .highlight .na, .md-typeset .highlight .nb { color: #c2185b; } .md-typeset .highlight .cp { color: #666; } .md-typeset .highlight .k { color: #3b78e7; } .md-typeset .highlight .il, .md-typeset .highlight .m, .md-typeset .highlight .mf, .md-typeset .highlight .mh, .md-typeset .highlight .mi, .md-typeset .highlight .mo { color: #e74c3c; } .md-typeset .highlight .ne, .md-typeset .highlight .nf { color: #c2185b; } .md-typeset .highlight .kr, .md-typeset .highlight .kt { color: #3e61a2; } code, kbd, pre { font-family: "","Courier New",Courier,monospace; } code, kbd, pre { color: rgba(0,0,0,.87); font-feature-settings: "kern"; font-family: Courier New,Courier,monospace; } .md-grid { max-width: 70rem; margin-right: auto; margin-left: auto; } .md-flex { display: table; } .md-flex__cell { display: table-cell; position: relative; vertical-align: top; } .md-flex__cell--shrink { width: 0; } .md-flex__cell--stretch { display: table; width: 100%; table-layout: fixed; } .md-flex__ellipsis { display: table-cell; text-overflow: ellipsis; white-space: nowrap; overflow: hidden; } .md-typeset .function .sig-name, .md-typeset .type .sig-name, .md-typeset .member .sig-name { background-color: transparent; color: #222; font-weight: bold; box-shadow: none; } dd > p:first-child { margin-top: 0px; }raft-0.11.3/docs/build/_static/pygments.css000066400000000000000000000001221415614527300205700ustar00rootroot00000000000000.highlight .hll { background-color: #ffffcc } .highlight { background: #ffffff; }raft-0.11.3/docs/build/api.html000066400000000000000000000127131415614527300162320ustar00rootroot00000000000000 API reference
raft-0.11.3/docs/build/fsm.html000066400000000000000000000175541415614527300162560ustar00rootroot00000000000000 struct raft_fsm — Application state machine

struct raft_fsm — Application state machine¶

The FSM struct defines the interface that the application’s state machine must implement in order to be replicated by Raft.

Data types¶

struct raft_fsm¶

Hold pointers to an actual implementation of the FSM interface.

Public members¶

void* data¶

Space for user-defined arbitrary data. C-Raft does not use and does not touch this field.

int version¶

API version implemented by this instance. Currently 1.

int (*apply)(struct raft_fsm *fsm, const struct raft_buffer *buf, void **result)¶

Apply a committed RAFT_COMMAND entry to the state machine.

int (*snapshot)(struct raft_fsm *fsm, struct raft_buffer *bufs[], unsigned *n_bufs)¶

Take a snapshot of the state machine.

int (*restore)(struct raft_fsm *fsm, struct raft_buffer *buf)¶

Restore a snapshot of the state machine.

raft-0.11.3/docs/build/genindex.html000066400000000000000000000000001415614527300172440ustar00rootroot00000000000000raft-0.11.3/docs/build/getting-started.html000066400000000000000000000113051415614527300205620ustar00rootroot00000000000000 Getting started

Getting started¶

raft-0.11.3/docs/build/index.html000066400000000000000000000416261415614527300165750ustar00rootroot00000000000000 C-Raft

C-Raft¶

C-Raft is a fully asynchronous C implementation of the Raft consensus protocol.

Design¶

The library has modular design: its core part implements only the core Raft algorithm logic, in a fully platform independent way. On top of that, a pluggable interface defines the I/O implementation for networking and disk persistence.

A stock implementation of the I/O interface is provided when building the library with default options. It is based on libuv and should fit the vast majority of use cases.

Features¶

C-Raft implements all the basic features described in the Raft dissertation:

  • Leader election

  • Log replication

  • Log compaction

  • Membership changes

It also includes a few optional enhancements:

  • Optimistic pipelining to reduce log replication latency

  • Writing to leader’s disk in parallel

  • Automatic stepping down when the leader loses quorum

  • Leadership transfer extension

  • Non-voting servers

Quick start¶

Make sure that libuv is installed on your system, then run:

1
2
3
autoreconf -i
./configure --enable-example
make

Then create a main.c file with this simple test program that just runs a single raft server and implements a basic state machine for incrementing a counter:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#include <raft.h>
#include <raft/uv.h>

static raft_id id = 12345;
static const char *address = "127.0.0.1:8080";
static const char *dir = "/tmp/raft-quick-start";
static struct uv_loop_s loop;
static struct raft_uv_transport transport;
static struct raft_io io;
static struct raft_fsm fsm;
static struct raft raft;
static struct raft_configuration conf;
static struct uv_timer_s timer;
static struct raft_apply apply;
static unsigned counter = 0;
static uint64_t command;

static int applyCommand(struct raft_fsm *fsm,
                        const struct raft_buffer *buf,
                        void **result) {
    counter += *(uint64_t *)buf->base;
    printf("counter: %u\n", counter);
    return 0;
}

static void submitCommand(uv_timer_t *timer) {
    struct raft_buffer buf;
    command = uv_now(timer->loop) % 10;
    buf.len = sizeof command;
    buf.base = &command;
    raft_apply(&raft, &apply, &buf, 1, NULL);
}

int main() {
    mkdir(dir, 0755);
    uv_loop_init(&loop);
    raft_uv_tcp_init(&transport, &loop);
    raft_uv_init(&io, &loop, dir, &transport);
    fsm.apply = applyCommand;
    raft_init(&raft, &io, &fsm, id, address);
    raft_configuration_init(&conf);
    raft_configuration_add(&conf, id, address, RAFT_VOTER);
    raft_bootstrap(&raft, &conf);
    raft_start(&raft);
    uv_timer_init(&loop, &timer);
    uv_timer_start(&timer, submitCommand, 0, 1000);
    uv_run(&loop, UV_RUN_DEFAULT);
}

You can compile and run it with:

1
cc main.c -o main -lraft -luv && ./main

Licence¶

This raft C library is released under a slightly modified version of LGPLv3, that includes a copiright exception letting users to statically link the library code in their project and release the final work under their own terms. See the full license text.

raft-0.11.3/docs/build/io.html000066400000000000000000000474361415614527300161020ustar00rootroot00000000000000 struct raft_io — I/O backend interface

struct raft_io — I/O backend interface¶

The I/O backend struct defines an interface for performing periodic ticks, log store read/write and send/receive of network RPCs.

Data types¶

struct raft_io¶

Hold pointers to an actual implementation of the I/O backend interface.

void (*raft_io_close_cb)(struct raft_io *io)¶

Type definition for callback passed to raft_io.close().

void (*raft_io_tick_cb)(struct raft_io *io)¶

Callback invoked by the I/O implementation at regular intervals.

void (*raft_io_recv_cb)(struct raft_io *io, struct raft_message *msg)¶

Callback invoked by the I/O implementation when an RPC message is received.

Public members¶

void* data¶

Space for user-defined arbitrary data. C-Raft does not use and does not touch this field.

int version¶

API version implemented by this instance. Currently 1.

void* impl¶

Implementation-defined state.

char errmsg[RAFT_ERRMSG_BUF_SIZE]

Human-readable message providing diagnostic information about the last error occurred.

int (*init)(struct raft_io *io, raft_id id, const char *address)¶

Initialize the backend with operational parameters such as server ID and address.

void (*close)(struct raft_io *io, raft_io_close_cb cb)¶

Release all resources used by the backend.

The tick and recv callbacks must not be invoked anymore, and pending asynchronous requests be completed or canceled as soon as possible. Invoke the close callback once the raft_io instance can be freed.

int (*load)(struct raft_io *io, raft_term *term, raft_id *voted_for, struct raft_snapshot **snapshot, raft_index *start_index, struct raft_entry *entries[], size_t *n_entries)¶

Load persisted state from storage.

The implementation must synchronously load the current state from its storage backend and return information about it through the given pointers.

The implementation can safely assume that this method will be invoked exactly one time, before any call to raft_io.append() or c:func:raft_io.truncate(), and then won’t be invoked again.

The snapshot object and entries array must be allocated and populated using raft_malloc(). If this function completes successfully, ownership of such memory is transferred to the caller.

int (*start)(struct raft_io *io, unsigned msecs, raft_io_tick_cb tick, raft_io_recv_cb recv)¶

Start the backend.

From now on the implementation must start accepting RPC requests and must invoke the tick callback every msecs milliseconds. The recv callback must be invoked when receiving a message.

int (*bootstrap)(struct raft_io *io, const struct raft_configuration *conf)¶

Bootstrap a server belonging to a new cluster.

The implementation must synchronously persist the given configuration as the first entry of the log. The current persisted term must be set to 1 and the vote to nil.

If an attempt is made to bootstrap a server that has already some state, then RAFT_CANTBOOTSTRAP must be returned.

int (*recover)(struct raft_io *io, const struct raft_configuration *conf)¶

Force appending a new configuration as last entry of the log.

int (*set_term)(struct raft_io *io, raft_term term)¶

Synchronously persist current term (and nil vote).

The implementation MUST ensure that the change is durable before returning (e.g. using fdatasync() or O_DSYNC).

int (*set_vote)(struct raft_io *io, raft_id server_id)¶

Synchronously persist who we voted for.

The implementation MUST ensure that the change is durable before returning (e.g. using fdatasync() or O_DSYNC).

int (*send)(struct raft_io *io, struct raft_io_send *req, const struct raft_message *message, raft_io_send_cb cb)¶

Asynchronously send an RPC message.

The implementation is guaranteed that the memory referenced in the given message will not be released until the cb callback is invoked.

int (*append)(struct raft_io *io, struct raft_io_append *req, const struct raft_entry entries[], unsigned n, raft_io_append_cb cb)¶

Asynchronously append the given entries to the log.

The implementation is guaranteed that the memory holding the given entries will not be released until the cb callback is invoked.

int (*truncate)(struct raft_io *io, raft_index index)¶

Asynchronously truncate all log entries from the given index onwards.

int (*snapshot_put)(struct raft_io *io, unsigned trailing, struct raft_io_snapshot_put *req, const struct raft_snapshot *snapshot, raft_io_snapshot_put_cb cb)¶

Asynchronously persist a new snapshot. If the trailing parameter is greater than zero, then all entries older that snapshot->index - trailing must be deleted. If the trailing parameter is 0, then the snapshot completely replaces all existing entries, which should all be deleted. Subsequent calls to append() should append entries starting at index snapshot->index + 1.

If a request is submitted, the raft engine won’t submit any other request until the original one has completed.

int (*snapshot_get)(struct raft_io *io, struct raft_io_snapshot_get *req, raft_io_snapshot_get_cb cb)¶

Asynchronously load the last snapshot.

raft_time (*time)(struct raft_io *io)¶

Return the current time, expressed in milliseconds.

int (*random)(struct raft_io *io, int min, int max)¶

Generate a random integer between min and max.

raft-0.11.3/docs/build/objects.inv000066400000000000000000000011271415614527300167370ustar00rootroot00000000000000# Sphinx inventory version 2 # Project: C-Raft documentation # Version: 0.9 # The remainder of this file is compressed using zlib. xÚ•TËnÛ0¼û+DW¹È5· ‡À‡¢AúE®l"âäÚ¨oùˆ|a¿¤K=iǢܛęÎÎ’äΑLstruct raft_fsm \u2014 Application state machine","Getting started","C-Raft","struct raft_io \u2014 I/O backend interface","struct raft \u2014 Raft server"],titleterms:{"public":[1,4,5],api:[0,5],applic:1,backend:4,data:[1,4,5],design:3,featur:3,get:2,interfac:4,licenc:3,machin:1,member:[1,4,5],quick:3,raft:[3,5],raft_fsm:1,raft_io:4,refer:0,server:5,start:[2,3],state:1,struct:[1,4,5],toc:3,type:[1,4,5]}})raft-0.11.3/docs/build/server.html000066400000000000000000000227201415614527300167660ustar00rootroot00000000000000 struct raft — Raft server

struct raft — Raft server¶

The raft server struct is the central part of C-Raft. It holds and drive the state of a single raft server in a cluster.

Data types¶

struct raft¶

A single raft server in a cluster.

raft_id¶

Hold the value of a raft server ID. Guaranteed to be at least 64-bit long.

void (*raft_close_cb)(struct raft *r)¶

Type definition for callback passed to raft_close().

Public members¶

void* data¶

Space for user-defined arbitrary data. C-Raft does not use and does not touch this field.

raft_id id¶

Server ID. Readonly.

API¶

int raft_init(struct raft *r, struct raft_io *io, struct raft_fsm *fsm, raft_id id, const char *address)¶

Initialize a raft server object.

int raft_close(struct raft* r, raft_close_cb cb)¶

Close a raft server object, releasing all used resources.

The memory of the object itself can be released only once the given close callback has been invoked.

int raft_start(struct raft* r)¶

Start a raft server.

The initial term, vote, snapshot and entries will be loaded from disk using the raft_io->load() method. The instance will start as follower, unless it’s the only voting server in the cluster, in which case it will automatically elect itself and become leader.

raft-0.11.3/docs/conf.py000066400000000000000000000040021415614527300147630ustar00rootroot00000000000000# The master toctree document. master_doc = 'index' # General information about the project. project = u'C-Raft documentation' copyright = u'2019-present, Canonical' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = u'0.9' # The full version, including alpha/beta/rc tags. release = version # The name of the Pygments (syntax highlighting) style to use. # pygments_style = 'sphinx' # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. #html_theme = 'material' # html_theme = 'material' html_theme = 'material' html_theme_path = ['_themes'] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". html_title = 'C-Raft documentation' # A shorter title for the navigation bar. Default is the same as html_title. html_short_title = 'C-Raft' # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = 'static/logo.png' # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = 'static/favicon.ico' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". #html_static_path = ['_static'] # Output file base name for HTML help builder. htmlhelp_basename = 'raft' raft-0.11.3/docs/disk-format.rst000066400000000000000000000063331415614527300164470ustar00rootroot00000000000000Disk format =========== The implementation of metadata and log persistency is virtually the same as the one found in `LogCabin`_. The disk files consist of metadata files, closed segments, and open segments. Metadata files are used to track Raft metadata, such as the server's current term, vote, and log's start index. Segments contain contiguous entries that are part of the log. Closed segments are never written to again (but may be renamed and truncated if a suffix of the log is truncated). Open segments are where newly appended entries go. Once an open segment reaches the maximum allowed size, it is closed and a new one is used. There are usually about 3 open segments at any given time, the one with the lower index is the one actively being written, and the other ones have been fallocate'd and are ready to be used as soon as the active one gets closed. Metadata files are named "metadata1" and "metadata2". The code alternates between these so that there is always at least one readable metadata file. On startup, the readable metadata file with the higher version number is used. The format of a metadata file is: * [8 bytes] Format (currently 1). * [8 bytes] Incremental version number. * [8 bytes] Current term. * [8 bytes] ID of server we voted for. All values are in little endian encoding. Closed segments are named by the format string "%lu-%lu" with their start and end indexes, both inclusive. Closed segments always contain at least one entry, and the end index is always at least as large as the start index. Closed segment files may occasionally include data past their filename's end index (these are ignored but a warning is logged). This can happen if the suffix of the segment is truncated and a crash occurs at an inopportune time (the segment file is first renamed, then truncated, and a crash occurs in between). Open segments are named by the format string "open-%lu" with a unique number. These should not exist when the server shuts down cleanly, but they exist while the server is running and may be left around during a crash. Open segments either contain entries which come after the last closed segment or are full of zeros. When the server crashes while appending to an open segment, the end of that file may be corrupt. We can't distinguish between a corrupt file and a partially written entry. The code assumes it's a partially written entry, logs a warning, and ignores it. Truncating a suffix of the log will remove all entries that are no longer part of the log. Truncating a prefix of the log will only remove complete segments that are before the new log start index. For example, if a segment has entries 10 through 20 and the prefix of the log is truncated to start at entry 15, that entire segment will be retained. Each segment file starts with a segment header, which currently contains just an 8-byte version number for the format of that segment. The current format (version 1) is just a concatenation of serialized entry batches. Each batch has the following format: * [4 bytes] CRC32 checksum of the batch header, little endian. * [4 bytes] CRC32 checksum of the batch data, little endian. * [ ... ] Batch of one or more entries. .. _LogCabin: https://github.com/logcabin/logcabin/blob/master/Storage/SegmentedLog.h raft-0.11.3/docs/fsm.rst000066400000000000000000000020121415614527300150020ustar00rootroot00000000000000.. _fsm: :c:type:`struct raft_fsm` --- Application state machine ======================================================= The FSM struct defines the interface that the application's state machine must implement in order to be replicated by Raft. Data types ---------- .. c:type:: struct raft_fsm Hold pointers to an actual implementation of the FSM interface. Public members ^^^^^^^^^^^^^^ .. c:member:: void* data Space for user-defined arbitrary data. C-Raft does not use and does not touch this field. .. c:member:: int version API version implemented by this instance. Currently 1. .. c:member:: int (*apply)(struct raft_fsm *fsm, const struct raft_buffer *buf, void **result) Apply a committed RAFT_COMMAND entry to the state machine. .. c:member:: int (*snapshot)(struct raft_fsm *fsm, struct raft_buffer *bufs[], unsigned *n_bufs) Take a snapshot of the state machine. .. c:member:: int (*restore)(struct raft_fsm *fsm, struct raft_buffer *buf) Restore a snapshot of the state machine. raft-0.11.3/docs/getting-started.rst000066400000000000000000000000401415614527300173210ustar00rootroot00000000000000Getting started =============== raft-0.11.3/docs/index.rst000066400000000000000000000066301415614527300153360ustar00rootroot00000000000000C-Raft ====== C-Raft is a fully asynchronous C implementation of the Raft consensus protocol. Design ------ The library has modular design: its core part implements only the core Raft algorithm logic, in a fully platform independent way. On top of that, a pluggable interface defines the I/O implementation for networking and disk persistence. A stock implementation of the I/O interface is provided when building the library with default options. It is based on `libuv`_ and should fit the vast majority of use cases. .. _libuv: http://libuv.org Features -------- C-Raft implements all the basic features described in the Raft dissertation: * Leader election * Log replication * Log compaction * Membership changes It also includes a few optional enhancements: * Optimistic pipelining to reduce log replication latency * Writing to leader's disk in parallel * Automatic stepping down when the leader loses quorum * Leadership transfer extension * Non-voting servers Quick start ----------- Make sure that `libuv`_ is installed on your system, then run: .. code-block:: bash :linenos: autoreconf -i ./configure --enable-example make Then create a :file:`main.c` file with this simple test program that just runs a single raft server and implements a basic state machine for incrementing a counter: .. code-block:: C :linenos: #include #include static raft_id id = 12345; static const char *address = "127.0.0.1:8080"; static const char *dir = "/tmp/raft-quick-start"; static struct uv_loop_s loop; static struct raft_uv_transport transport; static struct raft_io io; static struct raft_fsm fsm; static struct raft raft; static struct raft_configuration conf; static struct uv_timer_s timer; static struct raft_apply apply; static unsigned counter = 0; static uint64_t command; static int applyCommand(struct raft_fsm *fsm, const struct raft_buffer *buf, void **result) { counter += *(uint64_t *)buf->base; printf("counter: %u\n", counter); return 0; } static void submitCommand(uv_timer_t *timer) { struct raft_buffer buf; command = uv_now(timer->loop) % 10; buf.len = sizeof command; buf.base = &command; raft_apply(&raft, &apply, &buf, 1, NULL); } int main() { mkdir(dir, 0755); uv_loop_init(&loop); raft_uv_tcp_init(&transport, &loop); raft_uv_init(&io, &loop, dir, &transport); fsm.apply = applyCommand; raft_init(&raft, &io, &fsm, id, address); raft_configuration_init(&conf); raft_configuration_add(&conf, id, address, RAFT_VOTER); raft_bootstrap(&raft, &conf); raft_start(&raft); uv_timer_init(&loop, &timer); uv_timer_start(&timer, submitCommand, 0, 1000); uv_run(&loop, UV_RUN_DEFAULT); } You can compile and run it with: .. code-block:: bash :linenos: cc main.c -o main -lraft -luv && ./main Licence ------- This raft C library is released under a slightly modified version of LGPLv3, that includes a copyright exception letting users to statically link the library code in their project and release the final work under their own terms. See the full `license`_ text. .. _license: https://github.com/canonical/raft/blob/master/LICENSE toc ~~~ .. toctree:: :maxdepth: 1 self getting-started api disk-format raft-0.11.3/docs/io.rst000066400000000000000000000135111415614527300146320ustar00rootroot00000000000000.. _io: :c:type:`struct raft_io` --- I/O backend interface ================================================== The I/O backend struct defines an interface for performing periodic ticks, log store read/write and send/receive of network RPCs. Data types ---------- .. c:type:: struct raft_io Hold pointers to an actual implementation of the I/O backend interface. .. c:type:: void (*raft_io_close_cb)(struct raft_io *io) Type definition for callback passed to :c:func:`raft_io.close()`. .. c:type:: void (*raft_io_tick_cb)(struct raft_io *io) Callback invoked by the I/O implementation at regular intervals. .. c:type:: void (*raft_io_recv_cb)(struct raft_io *io, struct raft_message *msg) Callback invoked by the I/O implementation when an RPC message is received. Public members ^^^^^^^^^^^^^^ .. c:member:: void* data Space for user-defined arbitrary data. C-Raft does not use and does not touch this field. .. c:member:: int version API version implemented by this instance. Currently 1. .. c:member:: void* impl Implementation-defined state. .. c:member:: char errmsg[RAFT_ERRMSG_BUF_SIZE] Human-readable message providing diagnostic information about the last error occurred. .. c:member:: int (*init)(struct raft_io *io, raft_id id, const char *address) Initialize the backend with operational parameters such as server ID and address. .. c:member:: void (*close)(struct raft_io *io, raft_io_close_cb cb) Release all resources used by the backend. The :code:`tick` and :code:`recv` callbacks must not be invoked anymore, and pending asynchronous requests be completed or canceled as soon as possible. Invoke the close callback once the :c:type:`raft_io` instance can be freed. .. c:member:: int (*load)(struct raft_io *io, raft_term *term, raft_id *voted_for, struct raft_snapshot **snapshot, raft_index *start_index, struct raft_entry *entries[], size_t *n_entries) Load persisted state from storage. The implementation must synchronously load the current state from its storage backend and return information about it through the given pointers. The implementation can safely assume that this method will be invoked exactly one time, before any call to :c:func:`raft_io.append()` or c:func:`raft_io.truncate()`, and then won't be invoked again. The snapshot object and entries array must be allocated and populated using :c:func:`raft_malloc`. If this function completes successfully, ownership of such memory is transferred to the caller. .. c:member:: int (*start)(struct raft_io *io, unsigned msecs, raft_io_tick_cb tick, raft_io_recv_cb recv) Start the backend. From now on the implementation must start accepting RPC requests and must invoke the :code:`tick` callback every :code:`msecs` milliseconds. The :code:`recv` callback must be invoked when receiving a message. .. c:member:: int (*bootstrap)(struct raft_io *io, const struct raft_configuration *conf) Bootstrap a server belonging to a new cluster. The implementation must synchronously persist the given configuration as the first entry of the log. The current persisted term must be set to 1 and the vote to nil. If an attempt is made to bootstrap a server that has already some state, then RAFT_CANTBOOTSTRAP must be returned. .. c:member:: int (*recover)(struct raft_io *io, const struct raft_configuration *conf) Force appending a new configuration as last entry of the log. .. c:member:: int (*set_term)(struct raft_io *io, raft_term term) Synchronously persist current term (and nil vote). The implementation MUST ensure that the change is durable before returning (e.g. using :code:`fdatasync()` or O_DSYNC). .. c:member:: int (*set_vote)(struct raft_io *io, raft_id server_id) Synchronously persist who we voted for. The implementation MUST ensure that the change is durable before returning (e.g. using :code:`fdatasync()` or O_DSYNC). .. c:member:: int (*send)(struct raft_io *io, struct raft_io_send *req, const struct raft_message *message, raft_io_send_cb cb) Asynchronously send an RPC message. The implementation is guaranteed that the memory referenced in the given message will not be released until the :code:`cb` callback is invoked. .. c:member:: int (*append)(struct raft_io *io, struct raft_io_append *req, const struct raft_entry entries[], unsigned n, raft_io_append_cb cb) Asynchronously append the given entries to the log. The implementation is guaranteed that the memory holding the given entries will not be released until the :code:`cb` callback is invoked. .. c:member:: int (*truncate)(struct raft_io *io, raft_index index) Asynchronously truncate all log entries from the given index onwards. .. c:member:: int (*snapshot_put)(struct raft_io *io, unsigned trailing, struct raft_io_snapshot_put *req, const struct raft_snapshot *snapshot, raft_io_snapshot_put_cb cb) Asynchronously persist a new snapshot. If the :code:`trailing` parameter is greater than zero, then all entries older that :code:`snapshot->index - trailing` must be deleted. If the :code:`trailing` parameter is :code:`0`, then the snapshot completely replaces all existing entries, which should all be deleted. Subsequent calls to append() should append entries starting at index :code:`snapshot->index + 1`. If a request is submitted, the raft engine won't submit any other request until the original one has completed. .. c:member:: int (*snapshot_get)(struct raft_io *io, struct raft_io_snapshot_get *req, raft_io_snapshot_get_cb cb) Asynchronously load the last snapshot. .. c:member:: raft_time (*time)(struct raft_io *io) Return the current time, expressed in milliseconds. .. c:member:: int (*random)(struct raft_io *io, int min, int max) Generate a random integer between :code:`min` and :code:`max`. raft-0.11.3/docs/server.rst000066400000000000000000000027211415614527300155320ustar00rootroot00000000000000.. _server: :c:type:`struct raft` --- Raft server ===================================== The raft server struct is the central part of C-Raft. It holds and drive the state of a single raft server in a cluster. Data types ---------- .. c:type:: struct raft A single raft server in a cluster. .. c:type:: raft_id Hold the value of a raft server ID. Guaranteed to be at least 64-bit long. .. c:type:: void (*raft_close_cb)(struct raft *r) Type definition for callback passed to :c:func:`raft_close`. Public members ^^^^^^^^^^^^^^ .. c:member:: void* data Space for user-defined arbitrary data. C-Raft does not use and does not touch this field. .. c:member:: raft_id id Server ID. Readonly. API --- .. c:function:: int raft_init(struct raft *r, struct raft_io *io, struct raft_fsm *fsm, raft_id id, const char *address) Initialize a raft server object. .. c:function:: int raft_close(struct raft* r, raft_close_cb cb) Close a raft server object, releasing all used resources. The memory of the object itself can be released only once the given close callback has been invoked. .. c:function:: int raft_start(struct raft* r) Start a raft server. The initial term, vote, snapshot and entries will be loaded from disk using the :c:func:`raft_io->load()` method. The instance will start as follower, unless it's the only voting server in the cluster, in which case it will automatically elect itself and become leader. raft-0.11.3/example/000077500000000000000000000000001415614527300141735ustar00rootroot00000000000000raft-0.11.3/example/cluster.c000066400000000000000000000055441415614527300160300ustar00rootroot00000000000000#include #include #include #include #include #include #include #include #define N_SERVERS 3 /* Number of servers in the example cluster */ static int ensureDir(const char *dir) { int rv; struct stat sb; rv = stat(dir, &sb); if (rv == -1) { if (errno == ENOENT) { rv = mkdir(dir, 0700); if (rv != 0) { printf("error: create directory '%s': %s", dir, strerror(errno)); return 1; } } else { printf("error: stat directory '%s': %s", dir, strerror(errno)); return 1; } } else { if ((sb.st_mode & S_IFMT) != S_IFDIR) { printf("error: path '%s' is not a directory", dir); return 1; } } return 0; } static void forkServer(const char *topLevelDir, unsigned i, pid_t *pid) { *pid = fork(); if (*pid == 0) { char *dir = malloc(strlen(topLevelDir) + strlen("/D") + 1); char *id = malloc(N_SERVERS / 10 + 2); char *argv[] = {"./example/server", dir, id, NULL}; char *envp[] = {NULL}; int rv; sprintf(dir, "%s/%u", topLevelDir, i + 1); rv = ensureDir(dir); if (rv != 0) { abort(); } sprintf(id, "%u", i + 1); execve("./example/server", argv, envp); } } int main(int argc, char *argv[]) { const char *topLevelDir = "/tmp/raft"; struct timespec now; pid_t pids[N_SERVERS]; unsigned i; int rv; if (argc > 2) { printf("usage: example-cluster []\n"); return 1; } if (argc == 2) { topLevelDir = argv[1]; } /* Make sure the top level directory exists. */ rv = ensureDir(topLevelDir); if (rv != 0) { return rv; } /* Spawn the cluster nodes */ for (i = 0; i < N_SERVERS; i++) { forkServer(topLevelDir, i, &pids[i]); } /* Seed the random generator */ timespec_get(&now, TIME_UTC); srandom((unsigned)(now.tv_nsec ^ now.tv_sec)); while (1) { struct timespec interval; int status; /* Sleep a little bit. */ interval.tv_sec = 1 + random() % 15; interval.tv_nsec = 0; rv = nanosleep(&interval, NULL); if (rv != 0) { printf("error: sleep: %s", strerror(errno)); } /* Kill a random server. */ i = (unsigned)(random() % N_SERVERS); rv = kill(pids[i], SIGINT); if (rv != 0) { printf("error: kill server %d: %s", i, strerror(errno)); } waitpid(pids[i], &status, 0); rv = nanosleep(&interval, NULL); if (rv != 0) { printf("error: sleep: %s", strerror(errno)); } forkServer(topLevelDir, i, &pids[i]); } return 0; } raft-0.11.3/example/server.c000066400000000000000000000270621415614527300156540ustar00rootroot00000000000000#include #include #include #include #include "../include/raft.h" #include "../include/raft/uv.h" #define N_SERVERS 3 /* Number of servers in the example cluster */ #define APPLY_RATE 125 /* Apply a new entry every 125 milliseconds */ #define Log(SERVER_ID, FORMAT) printf("%d: " FORMAT "\n", SERVER_ID) #define Logf(SERVER_ID, FORMAT, ...) \ printf("%d: " FORMAT "\n", SERVER_ID, __VA_ARGS__) /******************************************************************** * * Sample application FSM that just increases a counter. * ********************************************************************/ struct Fsm { unsigned long long count; }; static int FsmApply(struct raft_fsm *fsm, const struct raft_buffer *buf, void **result) { struct Fsm *f = fsm->data; if (buf->len != 8) { return RAFT_MALFORMED; } f->count += *(uint64_t *)buf->base; *result = &f->count; return 0; } static int FsmSnapshot(struct raft_fsm *fsm, struct raft_buffer *bufs[], unsigned *n_bufs) { struct Fsm *f = fsm->data; *n_bufs = 1; *bufs = raft_malloc(sizeof **bufs); if (*bufs == NULL) { return RAFT_NOMEM; } (*bufs)[0].len = sizeof(uint64_t); (*bufs)[0].base = raft_malloc((*bufs)[0].len); if ((*bufs)[0].base == NULL) { return RAFT_NOMEM; } *(uint64_t *)(*bufs)[0].base = f->count; return 0; } static int FsmRestore(struct raft_fsm *fsm, struct raft_buffer *buf) { struct Fsm *f = fsm->data; if (buf->len != sizeof(uint64_t)) { return RAFT_MALFORMED; } f->count = *(uint64_t *)buf->base; raft_free(buf->base); return 0; } static int FsmInit(struct raft_fsm *fsm) { struct Fsm *f = raft_malloc(sizeof *f); if (f == NULL) { return RAFT_NOMEM; } f->count = 0; fsm->version = 1; fsm->data = f; fsm->apply = FsmApply; fsm->snapshot = FsmSnapshot; fsm->restore = FsmRestore; return 0; } static void FsmClose(struct raft_fsm *f) { if (f->data != NULL) { raft_free(f->data); } } /******************************************************************** * * Example struct holding a single raft server instance and all its * dependencies. * ********************************************************************/ struct Server; typedef void (*ServerCloseCb)(struct Server *server); struct Server { void *data; /* User data context. */ struct uv_loop_s *loop; /* UV loop. */ struct uv_timer_s timer; /* To periodically apply a new entry. */ const char *dir; /* Data dir of UV I/O backend. */ struct raft_uv_transport transport; /* UV I/O backend transport. */ struct raft_io io; /* UV I/O backend. */ struct raft_fsm fsm; /* Sample application FSM. */ unsigned id; /* Raft instance ID. */ char address[64]; /* Raft instance address. */ struct raft raft; /* Raft instance. */ struct raft_transfer transfer; /* Transfer leadership request. */ ServerCloseCb close_cb; /* Optional close callback. */ }; static void serverRaftCloseCb(struct raft *raft) { struct Server *s = raft->data; raft_uv_close(&s->io); raft_uv_tcp_close(&s->transport); FsmClose(&s->fsm); if (s->close_cb != NULL) { s->close_cb(s); } } static void serverTransferCb(struct raft_transfer *req) { struct Server *s = req->data; raft_id id; const char *address; raft_leader(&s->raft, &id, &address); raft_close(&s->raft, serverRaftCloseCb); } /* Final callback in the shutdown sequence, invoked after the timer handle has * been closed. */ static void serverTimerCloseCb(struct uv_handle_s *handle) { struct Server *s = handle->data; if (s->raft.data != NULL) { if (s->raft.state == RAFT_LEADER) { int rv; rv = raft_transfer(&s->raft, &s->transfer, 0, serverTransferCb); if (rv == 0) { return; } } raft_close(&s->raft, serverRaftCloseCb); } } /* Initialize the example server struct, without starting it yet. */ static int ServerInit(struct Server *s, struct uv_loop_s *loop, const char *dir, unsigned id) { struct raft_configuration configuration; struct timespec now; unsigned i; int rv; memset(s, 0, sizeof *s); /* Seed the random generator */ timespec_get(&now, TIME_UTC); srandom((unsigned)(now.tv_nsec ^ now.tv_sec)); s->loop = loop; /* Add a timer to periodically try to propose a new entry. */ rv = uv_timer_init(s->loop, &s->timer); if (rv != 0) { Logf(s->id, "uv_timer_init(): %s", uv_strerror(rv)); goto err; } s->timer.data = s; /* Initialize the TCP-based RPC transport. */ rv = raft_uv_tcp_init(&s->transport, s->loop); if (rv != 0) { goto err; } /* Initialize the libuv-based I/O backend. */ rv = raft_uv_init(&s->io, s->loop, dir, &s->transport); if (rv != 0) { Logf(s->id, "raft_uv_init(): %s", s->io.errmsg); goto err_after_uv_tcp_init; } /* Initialize the finite state machine. */ rv = FsmInit(&s->fsm); if (rv != 0) { Logf(s->id, "FsmInit(): %s", raft_strerror(rv)); goto err_after_uv_init; } /* Save the server ID. */ s->id = id; /* Render the address. */ sprintf(s->address, "127.0.0.1:900%d", id); /* Initialize and start the engine, using the libuv-based I/O backend. */ rv = raft_init(&s->raft, &s->io, &s->fsm, id, s->address); if (rv != 0) { Logf(s->id, "raft_init(): %s", raft_errmsg(&s->raft)); goto err_after_fsm_init; } s->raft.data = s; /* Bootstrap the initial configuration if needed. */ raft_configuration_init(&configuration); for (i = 0; i < N_SERVERS; i++) { char address[64]; unsigned server_id = i + 1; sprintf(address, "127.0.0.1:900%d", server_id); rv = raft_configuration_add(&configuration, server_id, address, RAFT_VOTER); if (rv != 0) { Logf(s->id, "raft_configuration_add(): %s", raft_strerror(rv)); goto err_after_configuration_init; } } rv = raft_bootstrap(&s->raft, &configuration); if (rv != 0 && rv != RAFT_CANTBOOTSTRAP) { goto err_after_configuration_init; } raft_configuration_close(&configuration); raft_set_snapshot_threshold(&s->raft, 64); raft_set_snapshot_trailing(&s->raft, 16); raft_set_pre_vote(&s->raft, true); s->transfer.data = s; return 0; err_after_configuration_init: raft_configuration_close(&configuration); err_after_fsm_init: FsmClose(&s->fsm); err_after_uv_init: raft_uv_close(&s->io); err_after_uv_tcp_init: raft_uv_tcp_close(&s->transport); err: return rv; } /* Called after a request to apply a new command to the FSM has been * completed. */ static void serverApplyCb(struct raft_apply *req, int status, void *result) { struct Server *s = req->data; int count; raft_free(req); if (status != 0) { if (status != RAFT_LEADERSHIPLOST) { Logf(s->id, "raft_apply() callback: %s (%d)", raft_errmsg(&s->raft), status); } return; } count = *(int *)result; if (count % 100 == 0) { Logf(s->id, "count %d", count); } } /* Called periodically every APPLY_RATE milliseconds. */ static void serverTimerCb(uv_timer_t *timer) { struct Server *s = timer->data; struct raft_buffer buf; struct raft_apply *req; int rv; if (s->raft.state != RAFT_LEADER) { return; } buf.len = sizeof(uint64_t); buf.base = raft_malloc(buf.len); if (buf.base == NULL) { Log(s->id, "serverTimerCb(): out of memory"); return; } *(uint64_t *)buf.base = 1; req = raft_malloc(sizeof *req); if (req == NULL) { Log(s->id, "serverTimerCb(): out of memory"); return; } req->data = s; rv = raft_apply(&s->raft, req, &buf, 1, serverApplyCb); if (rv != 0) { Logf(s->id, "raft_apply(): %s", raft_errmsg(&s->raft)); return; } } /* Start the example server. */ static int ServerStart(struct Server *s) { int rv; Log(s->id, "starting"); rv = raft_start(&s->raft); if (rv != 0) { Logf(s->id, "raft_start(): %s", raft_errmsg(&s->raft)); goto err; } rv = uv_timer_start(&s->timer, serverTimerCb, 0, 125); if (rv != 0) { Logf(s->id, "uv_timer_start(): %s", uv_strerror(rv)); goto err; } return 0; err: return rv; } /* Release all resources used by the example server. */ static void ServerClose(struct Server *s, ServerCloseCb cb) { s->close_cb = cb; Log(s->id, "stopping"); /* Close the timer asynchronously if it was successfully * initialized. Otherwise invoke the callback immediately. */ if (s->timer.data != NULL) { uv_close((struct uv_handle_s *)&s->timer, serverTimerCloseCb); } else { s->close_cb(s); } } /******************************************************************** * * Top-level main loop. * ********************************************************************/ static void mainServerCloseCb(struct Server *server) { struct uv_signal_s *sigint = server->data; uv_close((struct uv_handle_s *)sigint, NULL); } /* Handler triggered by SIGINT. It will initiate the shutdown sequence. */ static void mainSigintCb(struct uv_signal_s *handle, int signum) { struct Server *server = handle->data; assert(signum == SIGINT); uv_signal_stop(handle); server->data = handle; ServerClose(server, mainServerCloseCb); } int main(int argc, char *argv[]) { struct uv_loop_s loop; struct uv_signal_s sigint; /* To catch SIGINT and exit. */ struct Server server; const char *dir; unsigned id; int rv; if (argc != 3) { printf("usage: example-server \n"); return 1; } dir = argv[1]; id = (unsigned)atoi(argv[2]); /* Ignore SIGPIPE, see https://github.com/joyent/libuv/issues/1254 */ signal(SIGPIPE, SIG_IGN); /* Initialize the libuv loop. */ rv = uv_loop_init(&loop); if (rv != 0) { Logf(id, "uv_loop_init(): %s", uv_strerror(rv)); goto err; } /* Initialize the example server. */ rv = ServerInit(&server, &loop, dir, id); if (rv != 0) { goto err_after_server_init; } /* Add a signal handler to stop the example server upon SIGINT. */ rv = uv_signal_init(&loop, &sigint); if (rv != 0) { Logf(id, "uv_signal_init(): %s", uv_strerror(rv)); goto err_after_server_init; } sigint.data = &server; rv = uv_signal_start(&sigint, mainSigintCb, SIGINT); if (rv != 0) { Logf(id, "uv_signal_start(): %s", uv_strerror(rv)); goto err_after_signal_init; } /* Start the server. */ rv = ServerStart(&server); if (rv != 0) { goto err_after_signal_init; } /* Run the event loop until we receive SIGINT. */ rv = uv_run(&loop, UV_RUN_DEFAULT); if (rv != 0) { Logf(id, "uv_run_start(): %s", uv_strerror(rv)); } uv_loop_close(&loop); return rv; err_after_signal_init: uv_close((struct uv_handle_s *)&sigint, NULL); err_after_server_init: ServerClose(&server, NULL); uv_run(&loop, UV_RUN_DEFAULT); uv_loop_close(&loop); err: return rv; } raft-0.11.3/include/000077500000000000000000000000001415614527300141635ustar00rootroot00000000000000raft-0.11.3/include/raft.h000066400000000000000000001051031415614527300152700ustar00rootroot00000000000000#ifndef RAFT_H #define RAFT_H #include #include #include #include #define RAFT_API __attribute__((visibility("default"))) /** * Error codes. */ #define RAFT_NOMEM 1 /* Out of memory */ #define RAFT_BADID 2 /* Server ID is not valid */ #define RAFT_DUPLICATEID 3 /* Server ID already in use */ #define RAFT_DUPLICATEADDRESS 4 /* Server address already in use */ #define RAFT_BADROLE 5 /* Server role is not valid */ #define RAFT_MALFORMED 6 #define RAFT_NOTLEADER 7 #define RAFT_LEADERSHIPLOST 8 #define RAFT_SHUTDOWN 9 #define RAFT_CANTBOOTSTRAP 10 #define RAFT_CANTCHANGE 11 #define RAFT_CORRUPT 12 #define RAFT_CANCELED 13 #define RAFT_NAMETOOLONG 14 #define RAFT_TOOBIG 15 #define RAFT_NOCONNECTION 16 #define RAFT_BUSY 17 #define RAFT_IOERR 18 /* File system or storage error */ #define RAFT_NOTFOUND 19 /* Resource not found */ #define RAFT_INVALID 20 /* Invalid parameter */ #define RAFT_UNAUTHORIZED 21 /* No access to a resource */ #define RAFT_NOSPACE 22 /* Not enough space on disk */ #define RAFT_TOOMANY 23 /* Some system or raft limit was hit */ /** * Size of human-readable error message buffers. */ #define RAFT_ERRMSG_BUF_SIZE 256 /** * Return the error message describing the given error code. */ RAFT_API const char *raft_strerror(int errnum); typedef unsigned long long raft_id; /** * Hold the value of a raft term. Guaranteed to be at least 64-bit long. */ typedef unsigned long long raft_term; /** * Hold the value of a raft entry index. Guaranteed to be at least 64-bit long. */ typedef unsigned long long raft_index; /** * Hold a time value expressed in milliseconds since the epoch. */ typedef unsigned long long raft_time; /** * A data buffer. */ struct raft_buffer { void *base; /* Pointer to the buffer data. */ size_t len; /* Length of the buffer. */ }; /** * A type for storing unknown bools. */ typedef enum { raft_tribool_unknown, raft_tribool_true, raft_tribool_false, } raft_tribool; #define TO_RAFT_TRIBOOL(b) ((b) ? raft_tribool_true : raft_tribool_false) /** * Server role codes. */ #define RAFT_STANDBY 0 /* Replicate log, does not participate in quorum. */ #define RAFT_VOTER 1 /* Replicate log, does participate in quorum. */ #define RAFT_SPARE 2 /* Does not replicate log, or participate in quorum. */ /** * Hold information about a single server in the cluster configuration. */ struct raft_server { raft_id id; /* Server ID, must be greater than zero. */ char *address; /* Server address. User defined. */ int role; /* Server role. */ }; /** * Hold information about all servers currently part of the cluster. */ struct raft_configuration { struct raft_server *servers; /* Array of servers member of the cluster. */ unsigned n; /* Number of servers in the array. */ }; /** * Initialize an empty raft configuration. */ RAFT_API void raft_configuration_init(struct raft_configuration *c); /** * Release all memory used by the given configuration object. */ RAFT_API void raft_configuration_close(struct raft_configuration *c); /** * Add a server to a raft configuration. * * The @id must be greater than zero and @address point to a valid string. * * The @role must be either #RAFT_VOTER, #RAFT_STANDBY, #RAFT_SPARE. * * If @id or @address are already in use by another server in the configuration, * an error is returned. * * The @address string will be copied and can be released after this function * returns. */ RAFT_API int raft_configuration_add(struct raft_configuration *c, raft_id id, const char *address, int role); /** * Encode the given configuration object. * * The memory of the returned buffer is allocated using raft_malloc(), and * client code is responsible for releasing it when no longer needed. */ RAFT_API int raft_configuration_encode(const struct raft_configuration *c, struct raft_buffer *buf); /** * Hash function which outputs a 64-bit value based on a text and a number. * * This can be used to generate a unique ID for a new server being added, for * example based on its address and on the current time in milliseconds since * the Epoch. * * It's internally implemented as a SHA1 where only the last 8 bytes of the hash * value are kept. */ RAFT_API unsigned long long raft_digest(const char *text, unsigned long long n); /** * Log entry types. */ enum { RAFT_COMMAND = 1, /* Command for the application FSM. */ RAFT_BARRIER, /* Wait for all previous commands to be applied. */ RAFT_CHANGE /* Raft configuration change. */ }; /** * A single entry in the raft log. * * An entry that originated from this raft instance while it was the leader * (typically via client calls to raft_apply()) should normally have a @buf * attribute referencing directly the memory that was originally allocated by * the client itself to contain the entry data, and the @batch attribute set to * #NULL. * * An entry that was received from the network as part of an AppendEntries RPC * or that was loaded from disk at startup should normally have a @batch * attribute that points to a contiguous chunk of memory that contains the data * of the entry itself plus possibly the data for other entries that were * received or loaded with it at the same time. In this case the @buf pointer * will be equal to the @batch pointer plus an offset, that locates the position * of the entry's data within the batch. * * When the @batch attribute is not #NULL the raft library will take care of * releasing that memory only once there are no more references to the * associated entries. * * This arrangement makes it possible to minimize the amount of memory-copying * when performing I/O. */ struct raft_entry { raft_term term; /* Term in which the entry was created. */ unsigned short type; /* Type (FSM command, barrier, config change). */ struct raft_buffer buf; /* Entry data. */ void *batch; /* Batch that buf's memory points to, if any. */ }; /** * Counter for outstanding references to a log entry. * * When an entry is first appended to the log, its refcount is set to one (the * log itself is the only one referencing the entry). Whenever an entry is * included in an I/O request (to write it to disk or to send it to other * servers) its refcount is increased by one. Whenever an entry gets deleted * from the log its refcount is decreased by one. Likewise, whenever an I/O * request is completed the refcount of the relevant entries is decreased by * one. When the refcount drops to zero the memory that its @buf attribute * points to gets released, or, if the @batch attribute is non-NULL, a check is * made to see if all other entries of the same batch also have a zero refcount, * and the memory that @batch points to gets released if that's the case. */ struct raft_entry_ref { raft_term term; /* Term of the entry being ref-counted. */ raft_index index; /* Index of the entry being ref-counted. */ unsigned short count; /* Number of references. */ struct raft_entry_ref *next; /* Next item in the bucket (for collisions). */ }; /** * In-memory cache of the persistent raft log stored on disk. * * The raft log cache is implemented as a circular buffer of log entries, which * makes some frequent operations very efficient (e.g. deleting the first N * entries when snapshotting). */ struct raft_log { struct raft_entry *entries; /* Circular buffer of log entries. */ size_t size; /* Number of available slots in the buffer. */ size_t front, back; /* Indexes of used slots [front, back). */ raft_index offset; /* Index of first entry is offset+1. */ struct raft_entry_ref *refs; /* Log entries reference counts hash table. */ size_t refs_size; /* Size of the reference counts hash table. */ struct /* Information about last snapshot, or zero. */ { raft_index last_index; /* Snapshot replaces all entries up to here. */ raft_term last_term; /* Term of last index. */ } snapshot; }; /** * Hold the arguments of a RequestVote RPC. * * The RequestVote RPC is invoked by candidates to gather votes. */ struct raft_request_vote { raft_term term; /* Candidate's term. */ raft_id candidate_id; /* ID of the server requesting the vote. */ raft_index last_log_index; /* Index of candidate's last log entry. */ raft_index last_log_term; /* Term of log entry at last_log_index. */ bool disrupt_leader; /* True if current leader should be discarded. */ bool pre_vote; /* True if this is a pre-vote request. */ }; /** * Hold the result of a RequestVote RPC. */ struct raft_request_vote_result { raft_term term; /* Receiver's current term (candidate updates itself). */ bool vote_granted; /* True means candidate received vote. */ raft_tribool pre_vote; /* The response to a pre-vote RequestVote or not. */ }; /** * Hold the arguments of an AppendEntries RPC. * * The AppendEntries RPC is invoked by the leader to replicate log entries. It's * also used as heartbeat (figure 3.1). */ struct raft_append_entries { raft_term term; /* Leader's term. */ raft_index prev_log_index; /* Index of log entry preceeding new ones. */ raft_term prev_log_term; /* Term of entry at prev_log_index. */ raft_index leader_commit; /* Leader's commit index. */ struct raft_entry *entries; /* Log entries to append. */ unsigned n_entries; /* Size of the log entries array. */ }; /** * Hold the result of an AppendEntries RPC (figure 3.1). */ struct raft_append_entries_result { raft_term term; /* Receiver's current_term. */ raft_index rejected; /* If non-zero, the index that was rejected. */ raft_index last_log_index; /* Receiver's last log entry index, as hint. */ }; /** * Hold the arguments of an InstallSnapshot RPC (figure 5.3). */ struct raft_install_snapshot { raft_term term; /* Leader's term. */ raft_index last_index; /* Index of last entry in the snapshot. */ raft_term last_term; /* Term of last_index. */ struct raft_configuration conf; /* Config as of last_index. */ raft_index conf_index; /* Commit index of conf. */ struct raft_buffer data; /* Raw snapshot data. */ }; /** * Hold the arguments of a TimeoutNow RPC. * * The TimeoutNow RPC is invoked by leaders to transfer leadership to a * follower. */ struct raft_timeout_now { raft_term term; /* Leader's term. */ raft_index last_log_index; /* Index of leader's last log entry. */ raft_index last_log_term; /* Term of log entry at last_log_index. */ }; /** * Type codes for RPC messages. */ enum { RAFT_IO_APPEND_ENTRIES = 1, RAFT_IO_APPEND_ENTRIES_RESULT, RAFT_IO_REQUEST_VOTE, RAFT_IO_REQUEST_VOTE_RESULT, RAFT_IO_INSTALL_SNAPSHOT, RAFT_IO_TIMEOUT_NOW }; /** * A single RPC message that can be sent or received over the network. */ struct raft_message { unsigned short type; /* RPC type code. */ raft_id server_id; /* ID of sending or destination server. */ const char *server_address; /* Address of sending or destination server. */ union { /* Type-specific data */ struct raft_request_vote request_vote; struct raft_request_vote_result request_vote_result; struct raft_append_entries append_entries; struct raft_append_entries_result append_entries_result; struct raft_install_snapshot install_snapshot; struct raft_timeout_now timeout_now; }; }; /** * Hold the details of a snapshot. */ struct raft_snapshot { /* Index and term of last entry included in the snapshot. */ raft_index index; raft_term term; /* Last committed configuration included in the snapshot, along with the * index it was committed at. */ struct raft_configuration configuration; raft_index configuration_index; /* Content of the snapshot. When a snapshot is taken, the user FSM can fill * the bufs array with more than one buffer. When a snapshot is restored, * there will always be a single buffer. */ struct raft_buffer *bufs; unsigned n_bufs; }; /** * Asynchronous request to send an RPC message. */ struct raft_io_send; typedef void (*raft_io_send_cb)(struct raft_io_send *req, int status); struct raft_io_send { void *data; /* User data */ raft_io_send_cb cb; /* Request callback */ }; /** * Asynchronous request to store new log entries. */ struct raft_io_append; typedef void (*raft_io_append_cb)(struct raft_io_append *req, int status); struct raft_io_append { void *data; /* User data */ raft_io_append_cb cb; /* Request callback */ }; /** * Asynchronous request to store a new snapshot. */ struct raft_io_snapshot_put; typedef void (*raft_io_snapshot_put_cb)(struct raft_io_snapshot_put *req, int status); struct raft_io_snapshot_put { void *data; /* User data */ raft_io_snapshot_put_cb cb; /* Request callback */ }; /** * Asynchronous request to load the most recent snapshot available. */ struct raft_io_snapshot_get; typedef void (*raft_io_snapshot_get_cb)(struct raft_io_snapshot_get *req, struct raft_snapshot *snapshot, int status); struct raft_io_snapshot_get { void *data; /* User data */ raft_io_snapshot_get_cb cb; /* Request callback */ }; /** * Customizable tracer, for debugging purposes. */ struct raft_tracer { /** * Implementation-defined state object. */ void *impl; /** * Whether this tracer should emit messages. */ bool enabled; /** * Emit the given trace message, possibly decorating it with the provided * metadata. */ void (*emit)(struct raft_tracer *t, const char *file, int line, const char *message); }; struct raft_io; /* Forward declaration. */ /** * Callback invoked by the I/O implementation at regular intervals. */ typedef void (*raft_io_tick_cb)(struct raft_io *io); /** * Callback invoked by the I/O implementation when an RPC message is received. */ typedef void (*raft_io_recv_cb)(struct raft_io *io, struct raft_message *msg); typedef void (*raft_io_close_cb)(struct raft_io *io); struct raft_io { int version; void *data; void *impl; char errmsg[RAFT_ERRMSG_BUF_SIZE]; int (*init)(struct raft_io *io, raft_id id, const char *address); void (*close)(struct raft_io *io, raft_io_close_cb cb); int (*load)(struct raft_io *io, raft_term *term, raft_id *voted_for, struct raft_snapshot **snapshot, raft_index *start_index, struct raft_entry *entries[], size_t *n_entries); int (*start)(struct raft_io *io, unsigned msecs, raft_io_tick_cb tick, raft_io_recv_cb recv); int (*bootstrap)(struct raft_io *io, const struct raft_configuration *conf); int (*recover)(struct raft_io *io, const struct raft_configuration *conf); int (*set_term)(struct raft_io *io, raft_term term); int (*set_vote)(struct raft_io *io, raft_id server_id); int (*send)(struct raft_io *io, struct raft_io_send *req, const struct raft_message *message, raft_io_send_cb cb); int (*append)(struct raft_io *io, struct raft_io_append *req, const struct raft_entry entries[], unsigned n, raft_io_append_cb cb); int (*truncate)(struct raft_io *io, raft_index index); int (*snapshot_put)(struct raft_io *io, unsigned trailing, struct raft_io_snapshot_put *req, const struct raft_snapshot *snapshot, raft_io_snapshot_put_cb cb); int (*snapshot_get)(struct raft_io *io, struct raft_io_snapshot_get *req, raft_io_snapshot_get_cb cb); raft_time (*time)(struct raft_io *io); int (*random)(struct raft_io *io, int min, int max); }; struct raft_fsm { int version; void *data; int (*apply)(struct raft_fsm *fsm, const struct raft_buffer *buf, void **result); int (*snapshot)(struct raft_fsm *fsm, struct raft_buffer *bufs[], unsigned *n_bufs); int (*restore)(struct raft_fsm *fsm, struct raft_buffer *buf); }; /** * State codes. */ enum { RAFT_UNAVAILABLE, RAFT_FOLLOWER, RAFT_CANDIDATE, RAFT_LEADER }; /** * Used by leaders to keep track of replication progress for each server. */ struct raft_progress { unsigned short state; /* Probe, pipeline or snapshot. */ raft_index next_index; /* Next entry to send. */ raft_index match_index; /* Highest index reported as replicated. */ raft_index snapshot_index; /* Last index of most recent snapshot sent. */ raft_time last_send; /* Timestamp of last AppendEntries RPC. */ raft_time snapshot_last_send; /* Timestamp of last InstallSnaphot RPC. */ bool recent_recv; /* A msg was received within election timeout. */ }; struct raft; /* Forward declaration. */ /** * Close callback. * * It's safe to release the memory of a raft instance only after this callback * has fired. */ typedef void (*raft_close_cb)(struct raft *raft); struct raft_change; /* Forward declaration */ struct raft_transfer; /* Forward declaration */ /** * Hold and drive the state of a single raft server in a cluster. */ struct raft { void *data; /* Custom user data. */ struct raft_tracer *tracer; /* Tracer implementation. */ struct raft_io *io; /* Disk and network I/O implementation. */ struct raft_fsm *fsm; /* User-defined FSM to apply commands to. */ raft_id id; /* Server ID of this raft instance. */ char *address; /* Server address of this raft instance. */ /* * Cache of the server's persistent state, updated on stable storage before * responding to RPCs (Figure 3.1). */ raft_term current_term; /* Latest term server has seen. */ raft_id voted_for; /* Candidate that received vote in current term. */ struct raft_log log; /* Log entries. */ /* * Current membership configuration (Chapter 4). * * At any given moment the current configuration can be committed or * uncommitted. * * If a server is voting, the log entry with index 1 must always contain the * first committed configuration. * * The possible scenarios are: * * 1. #configuration_index and #configuration_uncommitted_index are both * zero. This should only happen when a brand new server starts joining a * cluster and is waiting to receive log entries from the current * leader. In this case #configuration must be empty and have no servers. * * 2. #configuration_index is non-zero while #configuration_uncommitted_index * is zero. In this case the content of #configuration must match the one * of the log entry at #configuration_index. * * 3. #configuration_index and #configuration_uncommitted_index are both * non-zero, with the latter being greater than the former. In this case * the content of #configuration must match the one of the log entry at * #configuration_uncommitted_index. */ struct raft_configuration configuration; raft_index configuration_index; raft_index configuration_uncommitted_index; /* * Election timeout in milliseconds (default 1000). * * From 3.4: * * Raft uses a heartbeat mechanism to trigger leader election. When * servers start up, they begin as followers. A server remains in follower * state as long as it receives valid RPCs from a leader or * candidate. Leaders send periodic heartbeats (AppendEntries RPCs that * carry no log entries) to all followers in order to maintain their * authority. If a follower receives no communication over a period of * time called the election timeout, then it assumes there is no viable * leader and begins an election to choose a new leader. * * This is the baseline value and will be randomized between 1x and 2x. * * See raft_change_election_timeout() to customize the value of this * attribute. */ unsigned election_timeout; /* * Heartbeat timeout in milliseconds (default 100). This is relevant only * for when the raft instance is in leader state: empty AppendEntries RPCs * will be sent if this amount of milliseconds elapses without any * user-triggered AppendEntries RCPs being sent. * * From Figure 3.1: * * [Leaders] Send empty AppendEntries RPC during idle periods to prevent * election timeouts. */ unsigned heartbeat_timeout; /* * When the leader sends an InstallSnapshot RPC to a follower it will consider * the RPC as failed after this timeout and retry. */ unsigned install_snapshot_timeout; /* * The fields below hold the part of the server's volatile state which is * always applicable regardless of the whether the server is follower, * candidate or leader (Figure 3.1). This state is rebuilt automatically * after a server restart. */ raft_index commit_index; /* Highest log entry known to be committed */ raft_index last_applied; /* Highest log entry applied to the FSM */ raft_index last_stored; /* Highest log entry persisted on disk */ /* * Current server state of this raft instance, along with a union defining * state-specific values. */ unsigned short state; union { struct /* Follower */ { unsigned randomized_election_timeout; /* Timer expiration. */ struct /* Current leader info. */ { raft_id id; char *address; } current_leader; } follower_state; struct { unsigned randomized_election_timeout; /* Timer expiration. */ bool *votes; /* Vote results. */ bool disrupt_leader; /* For leadership transfer */ bool in_pre_vote; /* True in pre-vote phase. */ } candidate_state; struct { struct raft_progress *progress; /* Per-server replication state. */ struct raft_change *change; /* Pending membership change. */ raft_id promotee_id; /* ID of server being promoted. */ unsigned short round_number; /* Current sync round. */ raft_index round_index; /* Target of the current round. */ raft_time round_start; /* Start of current round. */ void *requests[2]; /* Outstanding client requests. */ } leader_state; }; /* Election timer start. * * This timer has different purposes depending on the state. Followers * convert to candidate after the randomized election timeout has elapsed * without leader contact. Candidates start a new election after the * randomized election timeout has elapsed without a winner. Leaders step * down after the election timeout has elapsed without contacting a majority * of voting servers. */ raft_time election_timer_start; /* In-progress leadership transfer request, if any. */ struct raft_transfer *transfer; /* * Information about the last snapshot that was taken (if any). */ struct { unsigned threshold; /* N. of entries before snapshot */ unsigned trailing; /* N. of trailing entries to retain */ struct raft_snapshot pending; /* In progress snapshot */ struct raft_io_snapshot_put put; /* Store snapshot request */ } snapshot; /* * Callback to invoke once a close request has completed. */ raft_close_cb close_cb; /* * Human-readable message providing diagnostic information about the last * error occurred. */ char errmsg[RAFT_ERRMSG_BUF_SIZE]; /* Whether to use pre-vote to avoid disconnected servers disrupting the * current leader, as described in 4.2.3 and 9.6. */ bool pre_vote; /* Limit how long to wait for a stand-by to catch-up with the log when its * being promoted to voter. */ unsigned max_catch_up_rounds; unsigned max_catch_up_round_duration; }; RAFT_API int raft_init(struct raft *r, struct raft_io *io, struct raft_fsm *fsm, raft_id id, const char *address); RAFT_API void raft_close(struct raft *r, raft_close_cb cb); /** * Bootstrap this raft instance using the given configuration. The instance must * not have been started yet and must be completely pristine, otherwise * #RAFT_CANTBOOTSTRAP will be returned. */ RAFT_API int raft_bootstrap(struct raft *r, const struct raft_configuration *conf); /** * Force a new configuration in order to recover from a loss of quorum where the * current configuration cannot be restored, such as when a majority of servers * die at the same time. * * This works by appending the new configuration directly to the log stored on * disk. * * In order for this operation to be safe you must follow these steps: * * 1. Make sure that no servers in the cluster are running, either because they * died or because you manually stopped them. * * 2. Run @raft_recover exactly one time, on the non-dead server which has * the highest term and the longest log. * * 3. Copy the data directory of the server you ran @raft_recover on to all * other non-dead servers in the cluster, replacing their current data * directory. * * 4. Restart all servers. */ RAFT_API int raft_recover(struct raft *r, const struct raft_configuration *conf); RAFT_API int raft_start(struct raft *r); /** * Set the election timeout. * * Every raft instance is initialized with a default election timeout of 1000 * milliseconds. If you wish to tweak it, call this function before starting * your event loop. * * From Chapter 9: * * We recommend a range that is 10-20 times the one-way network latency, which * keeps split votes rates under 40% in all cases for reasonably sized * clusters, and typically results in much lower rates. * * Note that the current random election timer will be reset and a new one timer * will be generated. */ RAFT_API void raft_set_election_timeout(struct raft *r, unsigned msecs); /** * Set the heartbeat timeout. */ RAFT_API void raft_set_heartbeat_timeout(struct raft *r, unsigned msecs); /** * Set the snapshot install timeout. */ RAFT_API void raft_set_install_snapshot_timeout(struct raft *r, unsigned msecs); /** * Number of outstanding log entries before starting a new snapshot. The default * is 1024. */ RAFT_API void raft_set_snapshot_threshold(struct raft *r, unsigned n); /** * Enable or disable pre-vote support. Pre-vote is turned off by default. */ RAFT_API void raft_set_pre_vote(struct raft *r, bool enabled); /** * Number of outstanding log entries to keep in the log after a snapshot has * been taken. This avoids sending snapshots when a follower is behind by just a * few entries. The default is 128. */ RAFT_API void raft_set_snapshot_trailing(struct raft *r, unsigned n); /** * Set the maximum number of a catch-up rounds to try when replicating entries * to a stand-by server that is being promoted to voter, before giving up and * failing the configuration change. The default is 10. */ RAFT_API void raft_set_max_catch_up_rounds(struct raft *r, unsigned n); /** * Set the maximum duration of a catch-up round when replicating entries to a * stand-by server that is being promoted to voter. The default is 5 seconds. */ RAFT_API void raft_set_max_catch_up_round_duration(struct raft *r, unsigned msecs); /** * Return a human-readable description of the last error occurred. */ RAFT_API const char *raft_errmsg(struct raft *r); /** * Return the code of the current raft state. */ RAFT_API int raft_state(struct raft *r); /** * Return the ID and address of the current known leader, if any. */ RAFT_API void raft_leader(struct raft *r, raft_id *id, const char **address); /** * Return the index of the last entry that was appended to the local log. */ RAFT_API raft_index raft_last_index(struct raft *r); /** * Return the index of the last entry that was applied to the local FSM. */ RAFT_API raft_index raft_last_applied(struct raft *r); /* Common fields across client request types. */ #define RAFT__REQUEST \ void *data; \ int type; \ raft_index index; \ void *queue[2] /** * Asynchronous request to append a new command entry to the log and apply it to * the FSM when a quorum is reached. */ struct raft_apply; typedef void (*raft_apply_cb)(struct raft_apply *req, int status, void *result); struct raft_apply { RAFT__REQUEST; raft_apply_cb cb; }; /** * Propose to append commands to the log and apply them to the FSM once * committed. * * If this server is the leader, it will create @n new log entries of type * #RAFT_COMMAND using the given buffers as their payloads, append them to its * own log and attempt to replicate them on other servers by sending * AppendEntries RPCs. * * The memory pointed at by the @base attribute of each #raft_buffer in the * given array must have been allocated with raft_malloc() or a compatible * allocator. If this function returns 0, the ownership of this memory is * implicitly transferred to the raft library, which will take care of releasing * it when appropriate. Any further client access to such memory leads to * undefined behavior. * * The ownership of the memory of the @bufs array itself is not transferred to * the raft library, and, if allocated dynamically, must be deallocated by the * caller. */ RAFT_API int raft_apply(struct raft *r, struct raft_apply *req, const struct raft_buffer bufs[], const unsigned n, raft_apply_cb cb); /** * Asynchronous request to append a barrier entry. */ struct raft_barrier; typedef void (*raft_barrier_cb)(struct raft_barrier *req, int status); struct raft_barrier { RAFT__REQUEST; raft_barrier_cb cb; }; /** * Propose to append a log entry of type #RAFT_BARRIER. * * This can be used to ensure that there are no unapplied commands. */ RAFT_API int raft_barrier(struct raft *r, struct raft_barrier *req, raft_barrier_cb cb); /** * Asynchronous request to change the raft configuration. */ typedef void (*raft_change_cb)(struct raft_change *req, int status); struct raft_change { void *data; raft_change_cb cb; }; /** * Add a new server to the cluster configuration. Its initial role will be * #RAFT_SPARE. */ RAFT_API int raft_add(struct raft *r, struct raft_change *req, raft_id id, const char *address, raft_change_cb cb); /** * Assign a new role to the given server. * * If the server has already the given role, or if the given role is unknown, * #RAFT_BADROLE is returned. */ RAFT_API int raft_assign(struct raft *r, struct raft_change *req, raft_id id, int role, raft_change_cb cb); /** * Remove the given server from the cluster configuration. */ RAFT_API int raft_remove(struct raft *r, struct raft_change *req, raft_id id, raft_change_cb cb); /** * Asynchronous request to transfer leadership. */ typedef void (*raft_transfer_cb)(struct raft_transfer *req); struct raft_transfer { void *data; /* User data */ raft_id id; /* ID of target server. */ raft_time start; /* Start of leadership transfer. */ struct raft_io_send send; /* For sending TimeoutNow */ raft_transfer_cb cb; /* User callback */ }; /** * Transfer leadership to the server with the given ID. * * If the target server is not part of the configuration, or it's the leader * itself, or it's not a #RAFT_VOTER, then #RAFT_BADID is returned. * * The special value #0 means to automatically select a voting follower to * transfer leadership to. If there are no voting followers, return * #RAFT_NOTFOUND. * * When this server detects that the target server has become the leader, or * when @election_timeout milliseconds have elapsed, the given callback will be * invoked. * * After the callback files, clients can check whether the operation was * successful or not by calling @raft_leader() and checking if it returns the * target server. */ RAFT_API int raft_transfer(struct raft *r, struct raft_transfer *req, raft_id id, raft_transfer_cb cb); /** * User-definable dynamic memory allocation functions. * * The @data field will be passed as first argument to all functions. */ struct raft_heap { void *data; /* User data */ void *(*malloc)(void *data, size_t size); void (*free)(void *data, void *ptr); void *(*calloc)(void *data, size_t nmemb, size_t size); void *(*realloc)(void *data, void *ptr, size_t size); void *(*aligned_alloc)(void *data, size_t alignment, size_t size); void (*aligned_free)(void *data, size_t alignment, void *ptr); }; RAFT_API void *raft_malloc(size_t size); RAFT_API void raft_free(void *ptr); RAFT_API void *raft_calloc(size_t nmemb, size_t size); RAFT_API void *raft_realloc(void *ptr, size_t size); RAFT_API void *raft_aligned_alloc(size_t alignment, size_t size); RAFT_API void raft_aligned_free(size_t alignment, void *ptr); /** * Use a custom dynamic memory allocator. */ RAFT_API void raft_heap_set(struct raft_heap *heap); /** * Use the default dynamic memory allocator (from the stdlib). This clears any * custom allocator specified with @raft_heap_set. */ RAFT_API void raft_heap_set_default(void); #undef RAFT__REQUEST #endif /* RAFT_H */ raft-0.11.3/include/raft/000077500000000000000000000000001415614527300151175ustar00rootroot00000000000000raft-0.11.3/include/raft/fixture.h000066400000000000000000000411601415614527300167600ustar00rootroot00000000000000/** * Raft cluster test fixture, using an in-memory @raft_io implementation. This * is meant to be used in unit tests. */ #ifndef RAFT_FIXTURE_H #define RAFT_FIXTURE_H #include "../raft.h" #define RAFT_FIXTURE_MAX_SERVERS 8 /** * Fixture step event types. */ enum { RAFT_FIXTURE_TICK = 1, /* The tick callback has been invoked */ RAFT_FIXTURE_NETWORK, /* A network request has been sent or received */ RAFT_FIXTURE_DISK /* An I/O request has been submitted */ }; /** * State of a single server in a cluster fixture. */ struct raft_fixture_server { bool alive; /* If false, the server is down. */ raft_id id; /* Server ID. */ char address[16]; /* Server address (stringified ID). */ struct raft_tracer tracer; /* Tracer. */ struct raft_io io; /* In-memory raft_io implementation. */ struct raft raft; /* Raft instance. */ }; /** * Information about a test cluster event triggered by the fixture. */ struct raft_fixture_event { unsigned server_index; /* Index of the server the event occurred on. */ int type; /* Type of the event. */ }; /** * Event callback. See raft_fixture_hook(). */ struct raft_fixture; typedef void (*raft_fixture_event_cb)(struct raft_fixture *f, struct raft_fixture_event *event); /** * Test implementation of a cluster of @n servers, each having a user-provided * FSM. * * The cluster can simulate network latency and time elapsed on individual * servers. * * Servers can be alive or dead. Network messages sent to dead servers are * dropped. Dead servers do not have their @raft_io_tick_cb callback invoked. * * Any two servers can be connected or disconnected. Network messages sent * between disconnected servers are dropped. */ struct raft_fixture { raft_time time; /* Global time, common to all servers. */ unsigned n; /* Number of servers. */ raft_id leader_id; /* ID of current leader, or 0 if none. */ struct raft_log log; /* Copy of current leader's log. */ raft_index commit_index; /* Current commit index on leader. */ struct raft_fixture_event event; /* Last event occurred. */ raft_fixture_event_cb hook; /* Event callback. */ struct raft_fixture_server servers[RAFT_FIXTURE_MAX_SERVERS]; }; /** * Initialize a raft cluster fixture with @n servers. Each server will use an * in-memory @raft_io implementation and one of the given @fsms. All servers * will be initially connected to one another, but they won't be bootstrapped or * started. */ RAFT_API int raft_fixture_init(struct raft_fixture *f, unsigned n, struct raft_fsm *fsms); /** * Release all memory used by the fixture. */ RAFT_API void raft_fixture_close(struct raft_fixture *f); /** * Convenience to generate a configuration object containing all servers in the * cluster. The first @n_voting servers will be voting ones. */ RAFT_API int raft_fixture_configuration(struct raft_fixture *f, unsigned n_voting, struct raft_configuration *conf); /** * Convenience to bootstrap all servers in the cluster using the given * configuration. */ RAFT_API int raft_fixture_bootstrap(struct raft_fixture *f, struct raft_configuration *conf); /** * Convenience to start all servers in the fixture. */ RAFT_API int raft_fixture_start(struct raft_fixture *f); /** * Return the number of servers in the fixture. */ RAFT_API unsigned raft_fixture_n(struct raft_fixture *f); /** * Return the current cluster global time. All raft instances see the same time. */ RAFT_API raft_time raft_fixture_time(struct raft_fixture *f); /** * Return the raft instance associated with the @i'th server of the fixture. */ RAFT_API struct raft *raft_fixture_get(struct raft_fixture *f, unsigned i); /** * Return @true if the @i'th server hasn't been killed. */ RAFT_API bool raft_fixture_alive(struct raft_fixture *f, unsigned i); /** * Return the index of the current leader, or the current number of servers if * there's no leader. */ RAFT_API unsigned raft_fixture_leader_index(struct raft_fixture *f); /** * Return the ID of the server the @i'th server has voted for, or zero . */ RAFT_API raft_id raft_fixture_voted_for(struct raft_fixture *f, unsigned i); /** * Drive the cluster so the @i'th server gets elected as leader. * * This is achieved by bumping the randomized election timeout of all other * servers to a very high value, letting the one of the @i'th server expire and * then stepping the cluster until the election is won. * * There must currently be no leader and no candidate and the given server must * be a voting one. Also, the @i'th server must be connected to a majority of * voting servers. */ RAFT_API void raft_fixture_elect(struct raft_fixture *f, unsigned i); /** * Drive the cluster so the current leader gets deposed. * * This is achieved by dropping all AppendEntries result messages sent by * followers to the leader, until the leader decides to step down because it has * lost connectivity to a majority of followers. */ RAFT_API void raft_fixture_depose(struct raft_fixture *f); /** * Step through the cluster state advancing the time to the minimum value needed * for it to make progress (i.e. for a message to be delivered, for an I/O * operation to complete or for a single time tick to occur). * * In particular, the following happens: * * 1. If there are pending #raft_io_send requests, that have been submitted * using #raft_io->send() and not yet sent, the oldest one is picked and the * relevant callback fired. This simulates completion of a socket write, * which means that the send request has been completed. The receiver does * not immediately receives the message, as the message is propagating * through the network. However any memory associated with the #raft_io_send * request can be released (e.g. log entries). The in-memory I/O * implementation assigns a latency to each RPC message, which will get * delivered to the receiver only after that amount of time elapses. If the * sender and the receiver are currently disconnected, the RPC message is * simply dropped. If a callback was fired, jump directly to 3. and skip 2. * * 2. All pending #raft_io_append disk writes across all servers, that have been * submitted using #raft_io->append() but not yet completed, are scanned and * the one with the lowest completion time is picked. All in-flight network * messages waiting to be delivered are scanned and the one with the lowest * delivery time is picked. All servers are scanned, and the one with the * lowest tick expiration time is picked. The three times are compared and * the lowest one is picked. If a #raft_io_append disk write has completed, * the relevant callback will be invoked, if there's a network message to be * delivered, the receiver's @raft_io_recv_cb callback gets fired, if a tick * timer has expired the relevant #raft_io->tick() callback will be * invoked. Only one event will be fired. If there is more than one event to * fire, one of them is picked according to the following rules: events for * servers with lower index are fired first, tick events take precedence over * disk events, and disk events take precedence over network events. * * 3. The current cluster leader is detected (if any). When detecting the leader * the Election Safety property is checked: no servers can be in leader state * for the same term. The server in leader state with the highest term is * considered the current cluster leader, as long as it's "stable", i.e. it * has been acknowledged by all servers connected to it, and those servers * form a majority (this means that no further leader change can happen, * unless the network gets disrupted). If there is a stable leader and it has * not changed with respect to the previous call to @raft_fixture_step(), * then the Leader Append-Only property is checked, by comparing its log with * a copy of it that was taken during the previous iteration. * * 4. If there is a stable leader, its current log is copied, in order to be * able to check the Leader Append-Only property at the next call. * * 5. If there is a stable leader, its commit index gets copied. * * The function returns information about which particular event occurred * (either in step 1 or 2). */ RAFT_API struct raft_fixture_event *raft_fixture_step(struct raft_fixture *f); /** * Call raft_fixture_step() exactly @n times, and return the last event fired. */ RAFT_API struct raft_fixture_event *raft_fixture_step_n(struct raft_fixture *f, unsigned n); /** * Step the cluster until the given @stop function returns #true, or @max_msecs * have elapsed. * * Return #true if the @stop function has returned #true within @max_msecs. */ RAFT_API bool raft_fixture_step_until(struct raft_fixture *f, bool (*stop)(struct raft_fixture *f, void *arg), void *arg, unsigned max_msecs); /** * Step the cluster until @msecs have elapsed. */ RAFT_API void raft_fixture_step_until_elapsed(struct raft_fixture *f, unsigned msecs); /** * Step the cluster until a leader is elected, or @max_msecs have elapsed. */ RAFT_API bool raft_fixture_step_until_has_leader(struct raft_fixture *f, unsigned max_msecs); /** * Step the cluster until the current leader gets deposed, or @max_msecs have * elapsed. */ RAFT_API bool raft_fixture_step_until_has_no_leader(struct raft_fixture *f, unsigned max_msecs); /** * Step the cluster until the @i'th server has applied the entry at the given * index, or @max_msecs have elapsed. If @i equals the number of servers, then * step until all servers have applied the given entry. */ RAFT_API bool raft_fixture_step_until_applied(struct raft_fixture *f, unsigned i, raft_index index, unsigned max_msecs); /** * Step the cluster until the state of the @i'th server matches the given one, * or @max_msecs have elapsed. */ RAFT_API bool raft_fixture_step_until_state_is(struct raft_fixture *f, unsigned i, int state, unsigned max_msecs); /** * Step the cluster until the term of the @i'th server matches the given one, * or @max_msecs have elapsed. */ RAFT_API bool raft_fixture_step_until_term_is(struct raft_fixture *f, unsigned i, raft_term term, unsigned max_msecs); /** * Step the cluster until the @i'th server has voted for the @j'th one, or * @max_msecs have elapsed. */ RAFT_API bool raft_fixture_step_until_voted_for(struct raft_fixture *f, unsigned i, unsigned j, unsigned max_msecs); /** * Step the cluster until all pending network messages from the @i'th server to * the @j'th server have been delivered, or @max_msecs have elapsed. */ RAFT_API bool raft_fixture_step_until_delivered(struct raft_fixture *f, unsigned i, unsigned j, unsigned max_msecs); /** * Set a function to be called after every time a fixture event occurs as * consequence of a step. */ RAFT_API void raft_fixture_hook(struct raft_fixture *f, raft_fixture_event_cb hook); /** * Disconnect the @i'th and the @j'th servers, so attempts to send a message * from @i to @j will fail with #RAFT_NOCONNECTION. */ RAFT_API void raft_fixture_disconnect(struct raft_fixture *f, unsigned i, unsigned j); /** * Reconnect the @i'th and the @j'th servers, so attempts to send a message * from @i to @j will succeed again. */ RAFT_API void raft_fixture_reconnect(struct raft_fixture *f, unsigned i, unsigned j); /** * Saturate the connection between the @i'th and the @j'th servers, so messages * sent by @i to @j will be silently dropped. */ RAFT_API void raft_fixture_saturate(struct raft_fixture *f, unsigned i, unsigned j); /** * Return true if the connection from the @i'th to the @j'th server has been set * as saturated. */ RAFT_API bool raft_fixture_saturated(struct raft_fixture *f, unsigned i, unsigned j); /** * Desaturate the connection between the @i'th and the @j'th servers, so * messages sent by @i to @j will start being delivered again. */ RAFT_API void raft_fixture_desaturate(struct raft_fixture *f, unsigned i, unsigned j); /** * Kill the server with the given index. The server won't receive any message * and its tick callback won't be invoked. */ RAFT_API void raft_fixture_kill(struct raft_fixture *f, unsigned i); /** * Add a new empty server to the cluster and connect it to all others. */ RAFT_API int raft_fixture_grow(struct raft_fixture *f, struct raft_fsm *fsm); /** * Set the value that will be returned to the @i'th raft instance when it asks * the underlying #raft_io implementation for a randomized election timeout * value. The default value is 1000 + @i * 100, meaning that the election timer * of server 0 will expire first. */ RAFT_API void raft_fixture_set_randomized_election_timeout( struct raft_fixture *f, unsigned i, unsigned msecs); /** * Set the network latency in milliseconds. Each RPC message sent by the @i'th * server from now on will take @msecs milliseconds to be delivered. The default * value is 15. */ RAFT_API void raft_fixture_set_network_latency(struct raft_fixture *f, unsigned i, unsigned msecs); /** * Set the disk I/O latency in milliseconds. Each append request will take this * amount of milliseconds to complete. The default value is 10. */ RAFT_API void raft_fixture_set_disk_latency(struct raft_fixture *f, unsigned i, unsigned msecs); /** * Set the persisted term of the @i'th server. */ RAFT_API void raft_fixture_set_term(struct raft_fixture *f, unsigned i, raft_term term); /** * Set the most recent persisted snapshot on the @i'th server. */ RAFT_API void raft_fixture_set_snapshot(struct raft_fixture *f, unsigned i, struct raft_snapshot *snapshot); /** * Add an entry to the persisted entries of the @i'th server. */ RAFT_API void raft_fixture_add_entry(struct raft_fixture *f, unsigned i, struct raft_entry *entry); /** * Inject an I/O failure that will be triggered on the @i'th server after @delay * I/O requests and occur @repeat times. */ RAFT_API void raft_fixture_io_fault(struct raft_fixture *f, unsigned i, int delay, int repeat); /** * Return the number of messages of the given type that the @i'th server has * successfully sent so far. */ RAFT_API unsigned raft_fixture_n_send(struct raft_fixture *f, unsigned i, int type); /** * Return the number of messages of the given type that the @i'th server has * received so far. */ RAFT_API unsigned raft_fixture_n_recv(struct raft_fixture *f, unsigned i, int type); #endif /* RAFT_FIXTURE_H */ raft-0.11.3/include/raft/uv.h000066400000000000000000000211121415614527300157170ustar00rootroot00000000000000#ifndef RAFT_UV_H #define RAFT_UV_H #include #include "../raft.h" struct raft_uv_transport; /** * Configure the given @raft_io instance to use a libuv-based I/O * implementation. * * The @dir path will be copied, and its memory can possibly be released once * this function returns. * * Return #RAFT_NAMETOOLONG if @dir exceeds the size of the internal buffer * that should hold it * * Return #RAFT_NOTFOUND if @dir does not exist. * * Return #RAFT_INVALID if @dir exists but it's not a directory. * * The implementation of metadata and log persistency is virtually the same as * the one found in LogCabin [0]. * * The disk files consist of metadata files, closed segments, and open * segments. Metadata files are used to track Raft metadata, such as the * server's current term, vote, and log's start index. Segments contain * contiguous entries that are part of the log. Closed segments are never * written to again (but may be renamed and truncated if a suffix of the log is * truncated). Open segments are where newly appended entries go. Once an open * segment reaches the maximum allowed size, it is closed and a new one is used. * * Metadata files are named "metadata1" and "metadata2". The code alternates * between these so that there is always at least one readable metadata file. * On boot, the readable metadata file with the higher version number is used. * * The format of a metadata file is: * * [8 bytes] Format (currently 1). * [8 bytes] Incremental version number. * [8 bytes] Current term. * [8 bytes] ID of server we voted for. * * Closed segments are named by the format string "%lu-%lu" with their * start and end indexes, both inclusive. Closed segments always contain at * least one entry; the end index is always at least as large as the start * index. Closed segment files may occasionally include data past their * filename's end index (these are ignored but a warning is logged). This can * happen if the suffix of the segment is truncated and a crash occurs at an * inopportune time (the segment file is first renamed, then truncated, and a * crash occurs in between). * * Open segments are named by the format string "open-%lu" with a unique * number. These should not exist when the server shuts down cleanly, but they * exist while the server is running and may be left around during a crash. * Open segments either contain entries which come after the last closed * segment or are full of zeros. When the server crashes while appending to an * open segment, the end of that file may be corrupt. We can't distinguish * between a corrupt file and a partially written entry. The code assumes it's * a partially written entry, logs a warning, and ignores it. * * Truncating a suffix of the log will remove all entries that are no longer * part of the log. Truncating a prefix of the log will only remove complete * segments that are before the new log start index. For example, if a * segment has entries 10 through 20 and the prefix of the log is truncated to * start at entry 15, that entire segment will be retained. * * Each segment file starts with a segment header, which currently contains * just an 8-byte version number for the format of that segment. The current * format (version 1) is just a concatenation of serialized entry batches. * * Each batch has the following format: * * [4 bytes] CRC32 checksum of the batch header, little endian. * [4 bytes] CRC32 checksum of the batch data, little endian. * [ ... ] Batch (as described in @raft_decode_entries_batch). * * [0] https://github.com/logcabin/logcabin/blob/master/Storage/SegmentedLog.h */ RAFT_API int raft_uv_init(struct raft_io *io, struct uv_loop_s *loop, const char *dir, struct raft_uv_transport *transport); /** * Release any memory allocated internally. */ RAFT_API void raft_uv_close(struct raft_io *io); /** * Set the block size that will be used for direct I/O. * * The default is to automatically detect the appropriate block size. */ RAFT_API void raft_uv_set_block_size(struct raft_io *io, size_t size); /** * Set the maximum initial size of newly created open segments. * * If the given size is not a multiple of the block size, the actual size will * be reduced to the closest multiple. * * The default is 8 megabytes. */ RAFT_API void raft_uv_set_segment_size(struct raft_io *io, size_t size); /** * Turn snapshot compression on or off. * Returns non-0 on failure, this can e.g. happen when compression is requested * while no suitable compression library is found. * * By default snapshots are compressed if the appropriate libraries are found. */ RAFT_API int raft_uv_set_snapshot_compression(struct raft_io *io, bool compressed); /** * Set how many milliseconds to wait between subsequent retries when * establishing a connection with another server. The default is 1000 * milliseconds. */ RAFT_API void raft_uv_set_connect_retry_delay(struct raft_io *io, unsigned msecs); /** * Emit low-level debug messages using the given tracer. */ RAFT_API void raft_uv_set_tracer(struct raft_io *io, struct raft_tracer *tracer); /** * Callback invoked by the transport implementation when a new incoming * connection has been established. * * No references to @address must be kept after this function returns. * * Ownership of @stream is transferred to user code, which is responsible of * uv_close()'ing it and then releasing its memory. */ typedef void (*raft_uv_accept_cb)(struct raft_uv_transport *t, raft_id id, const char *address, struct uv_stream_s *stream); /** * Callback invoked by the transport implementation after a connect request has * completed. If status is #0, then @stream will point to a valid handle, which * user code is then responsible to uv_close() and then release. */ struct raft_uv_connect; typedef void (*raft_uv_connect_cb)(struct raft_uv_connect *req, struct uv_stream_s *stream, int status); /** * Handle to a connect request. */ struct raft_uv_connect { void *data; /* User data */ raft_uv_connect_cb cb; /* Callback */ }; /** * Callback invoked by the transport implementation after a close request is * completed. */ typedef void (*raft_uv_transport_close_cb)(struct raft_uv_transport *t); /** * Interface to establish outgoing connections to other Raft servers and to * accept incoming connections from them. */ struct raft_uv_transport { /** * User defined data. */ void *data; /** * Implementation-defined state. */ void *impl; /** * Human-readable message providing diagnostic information about the last * error occurred. */ char errmsg[RAFT_ERRMSG_BUF_SIZE]; /** * Initialize the transport with the given server's identity. */ int (*init)(struct raft_uv_transport *t, raft_id id, const char *address); /** * Start listening for incoming connections. * * Once a new connection is accepted, the @cb callback passed in the * initializer must be invoked with the relevant details of the connecting * Raft server. */ int (*listen)(struct raft_uv_transport *t, raft_uv_accept_cb cb); /** * Connect to the server with the given ID and address. * * The @cb callback must be invoked when the connection has been established * or the connection attempt has failed. The memory pointed by @req can be * released only after @cb has fired. */ int (*connect)(struct raft_uv_transport *t, struct raft_uv_connect *req, raft_id id, const char *address, raft_uv_connect_cb cb); /** * Close the transport. * * The implementation must: * * - Stop accepting incoming connections. The @cb callback passed to @listen * must not be invoked anymore. * * - Cancel all pending @connect requests. * * - Invoke the @cb callback passed to this method once it's safe to release * the memory of the transport object. */ void (*close)(struct raft_uv_transport *t, raft_uv_transport_close_cb cb); }; /** * Init a transport interface that uses TCP sockets. */ RAFT_API int raft_uv_tcp_init(struct raft_uv_transport *t, struct uv_loop_s *loop); /** * Release any memory allocated internally. */ RAFT_API void raft_uv_tcp_close(struct raft_uv_transport *t); #endif /* RAFT_UV_H */ raft-0.11.3/m4/000077500000000000000000000000001415614527300130605ustar00rootroot00000000000000raft-0.11.3/m4/.gitignore000066400000000000000000000003261415614527300150510ustar00rootroot00000000000000*.m4 !attributes.m4 !ax_ac_append_to_file.m4 !ax_ac_print_to_file.m4 !ax_add_am_macro_static.m4 !ax_am_macros_static.m4 !ax_check_gnu_make.m4 !ax_code_coverage.m4 !ax_compare_version.m4 !ax_file_escapes.m4 !pkg.m4 raft-0.11.3/m4/attributes.m4000066400000000000000000000240211415614527300155070ustar00rootroot00000000000000dnl Macros to check the presence of generic (non-typed) symbols. dnl Copyright (c) 2006-2008 Diego Pettenò dnl Copyright (c) 2006-2008 xine project dnl Copyright (c) 2012 Lucas De Marchi dnl dnl This program is free software; you can redistribute it and/or modify dnl it under the terms of the GNU General Public License as published by dnl the Free Software Foundation; either version 2, or (at your option) dnl any later version. dnl dnl This program is distributed in the hope that it will be useful, dnl but WITHOUT ANY WARRANTY; without even the implied warranty of dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the dnl GNU General Public License for more details. dnl dnl You should have received a copy of the GNU General Public License dnl along with this program; if not, write to the Free Software dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA dnl 02110-1301, USA. dnl dnl As a special exception, the copyright owners of the dnl macro gives unlimited permission to copy, distribute and modify the dnl configure scripts that are the output of Autoconf when processing the dnl Macro. You need not follow the terms of the GNU General Public dnl License when using or distributing such scripts, even though portions dnl of the text of the Macro appear in them. The GNU General Public dnl License (GPL) does govern all other use of the material that dnl constitutes the Autoconf Macro. dnl dnl This special exception to the GPL applies to versions of the dnl Autoconf Macro released by this project. When you make and dnl distribute a modified version of the Autoconf Macro, you may extend dnl this special exception to the GPL to apply to your modified version as dnl well. dnl Check if FLAG in ENV-VAR is supported by compiler and append it dnl to WHERE-TO-APPEND variable. Note that we invert -Wno-* checks to dnl -W* as gcc cannot test for negated warnings. If a C snippet is passed, dnl use it, otherwise use a simple main() definition that just returns 0. dnl CC_CHECK_FLAG_APPEND([WHERE-TO-APPEND], [ENV-VAR], [FLAG], [C-SNIPPET]) AC_DEFUN([CC_CHECK_FLAG_APPEND], [ AC_CACHE_CHECK([if $CC supports flag $3 in envvar $2], AS_TR_SH([cc_cv_$2_$3]), [eval "AS_TR_SH([cc_save_$2])='${$2}'" eval "AS_TR_SH([$2])='${cc_save_$2} -Werror `echo "$3" | sed 's/^-Wno-/-W/'`'" AC_LINK_IFELSE([AC_LANG_SOURCE(ifelse([$4], [], [int main(void) { return 0; } ], [$4]))], [eval "AS_TR_SH([cc_cv_$2_$3])='yes'"], [eval "AS_TR_SH([cc_cv_$2_$3])='no'"]) eval "AS_TR_SH([$2])='$cc_save_$2'"]) AS_IF([eval test x$]AS_TR_SH([cc_cv_$2_$3])[ = xyes], [eval "$1='${$1} $3'"]) ]) dnl CC_CHECK_FLAGS_APPEND([WHERE-TO-APPEND], [ENV-VAR], [FLAG1 FLAG2], [C-SNIPPET]) AC_DEFUN([CC_CHECK_FLAGS_APPEND], [ for flag in [$3]; do CC_CHECK_FLAG_APPEND([$1], [$2], $flag, [$4]) done ]) dnl Check if the flag is supported by linker (cacheable) dnl CC_CHECK_LDFLAGS([FLAG], [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND]) AC_DEFUN([CC_CHECK_LDFLAGS], [ AC_CACHE_CHECK([if $CC supports $1 flag], AS_TR_SH([cc_cv_ldflags_$1]), [ac_save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS $1" AC_LINK_IFELSE([int main() { return 1; }], [eval "AS_TR_SH([cc_cv_ldflags_$1])='yes'"], [eval "AS_TR_SH([cc_cv_ldflags_$1])="]) LDFLAGS="$ac_save_LDFLAGS" ]) AS_IF([eval test x$]AS_TR_SH([cc_cv_ldflags_$1])[ = xyes], [$2], [$3]) ]) dnl define the LDFLAGS_NOUNDEFINED variable with the correct value for dnl the current linker to avoid undefined references in a shared object. AC_DEFUN([CC_NOUNDEFINED], [ dnl We check $host for which systems to enable this for. AC_REQUIRE([AC_CANONICAL_HOST]) case $host in dnl FreeBSD (et al.) does not complete linking for shared objects when pthreads dnl are requested, as different implementations are present; to avoid problems dnl use -Wl,-z,defs only for those platform not behaving this way. *-freebsd* | *-openbsd*) ;; *) dnl First of all check for the --no-undefined variant of GNU ld. This allows dnl for a much more readable command line, so that people can understand what dnl it does without going to look for what the heck -z defs does. for possible_flags in "-Wl,--no-undefined" "-Wl,-z,defs"; do CC_CHECK_LDFLAGS([$possible_flags], [LDFLAGS_NOUNDEFINED="$possible_flags"]) break done ;; esac AC_SUBST([LDFLAGS_NOUNDEFINED]) ]) dnl Check for a -Werror flag or equivalent. -Werror is the GCC dnl and ICC flag that tells the compiler to treat all the warnings dnl as fatal. We usually need this option to make sure that some dnl constructs (like attributes) are not simply ignored. dnl dnl Other compilers don't support -Werror per se, but they support dnl an equivalent flag: dnl - Sun Studio compiler supports -errwarn=%all AC_DEFUN([CC_CHECK_WERROR], [ AC_CACHE_CHECK( [for $CC way to treat warnings as errors], [cc_cv_werror], [CC_CHECK_CFLAGS_SILENT([-Werror], [cc_cv_werror=-Werror], [CC_CHECK_CFLAGS_SILENT([-errwarn=%all], [cc_cv_werror=-errwarn=%all])]) ]) ]) AC_DEFUN([CC_CHECK_ATTRIBUTE], [ AC_REQUIRE([CC_CHECK_WERROR]) AC_CACHE_CHECK([if $CC supports __attribute__(( ifelse([$2], , [$1], [$2]) ))], AS_TR_SH([cc_cv_attribute_$1]), [ac_save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS $cc_cv_werror" AC_COMPILE_IFELSE([AC_LANG_SOURCE([$3])], [eval "AS_TR_SH([cc_cv_attribute_$1])='yes'"], [eval "AS_TR_SH([cc_cv_attribute_$1])='no'"]) CFLAGS="$ac_save_CFLAGS" ]) AS_IF([eval test x$]AS_TR_SH([cc_cv_attribute_$1])[ = xyes], [AC_DEFINE( AS_TR_CPP([SUPPORT_ATTRIBUTE_$1]), 1, [Define this if the compiler supports __attribute__(( ifelse([$2], , [$1], [$2]) ))] ) $4], [$5]) ]) AC_DEFUN([CC_ATTRIBUTE_CONSTRUCTOR], [ CC_CHECK_ATTRIBUTE( [constructor],, [void __attribute__((constructor)) ctor() { int a; }], [$1], [$2]) ]) AC_DEFUN([CC_ATTRIBUTE_FORMAT], [ CC_CHECK_ATTRIBUTE( [format], [format(printf, n, n)], [void __attribute__((format(printf, 1, 2))) printflike(const char *fmt, ...) { fmt = (void *)0; }], [$1], [$2]) ]) AC_DEFUN([CC_ATTRIBUTE_FORMAT_ARG], [ CC_CHECK_ATTRIBUTE( [format_arg], [format_arg(printf)], [char *__attribute__((format_arg(1))) gettextlike(const char *fmt) { fmt = (void *)0; }], [$1], [$2]) ]) AC_DEFUN([CC_ATTRIBUTE_VISIBILITY], [ CC_CHECK_ATTRIBUTE( [visibility_$1], [visibility("$1")], [void __attribute__((visibility("$1"))) $1_function() { }], [$2], [$3]) ]) AC_DEFUN([CC_ATTRIBUTE_NONNULL], [ CC_CHECK_ATTRIBUTE( [nonnull], [nonnull()], [void __attribute__((nonnull())) some_function(void *foo, void *bar) { foo = (void*)0; bar = (void*)0; }], [$1], [$2]) ]) AC_DEFUN([CC_ATTRIBUTE_UNUSED], [ CC_CHECK_ATTRIBUTE( [unused], , [void some_function(void *foo, __attribute__((unused)) void *bar);], [$1], [$2]) ]) AC_DEFUN([CC_ATTRIBUTE_SENTINEL], [ CC_CHECK_ATTRIBUTE( [sentinel], , [void some_function(void *foo, ...) __attribute__((sentinel));], [$1], [$2]) ]) AC_DEFUN([CC_ATTRIBUTE_DEPRECATED], [ CC_CHECK_ATTRIBUTE( [deprecated], , [void some_function(void *foo, ...) __attribute__((deprecated));], [$1], [$2]) ]) AC_DEFUN([CC_ATTRIBUTE_ALIAS], [ CC_CHECK_ATTRIBUTE( [alias], [weak, alias], [void other_function(void *foo) { } void some_function(void *foo) __attribute__((weak, alias("other_function")));], [$1], [$2]) ]) AC_DEFUN([CC_ATTRIBUTE_MALLOC], [ CC_CHECK_ATTRIBUTE( [malloc], , [void * __attribute__((malloc)) my_alloc(int n);], [$1], [$2]) ]) AC_DEFUN([CC_ATTRIBUTE_PACKED], [ CC_CHECK_ATTRIBUTE( [packed], , [struct astructure { char a; int b; long c; void *d; } __attribute__((packed));], [$1], [$2]) ]) AC_DEFUN([CC_ATTRIBUTE_CONST], [ CC_CHECK_ATTRIBUTE( [const], , [int __attribute__((const)) twopow(int n) { return 1 << n; } ], [$1], [$2]) ]) AC_DEFUN([CC_FLAG_VISIBILITY], [ AC_REQUIRE([CC_CHECK_WERROR]) AC_CACHE_CHECK([if $CC supports -fvisibility=hidden], [cc_cv_flag_visibility], [cc_flag_visibility_save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS $cc_cv_werror" CC_CHECK_CFLAGS_SILENT([-fvisibility=hidden], cc_cv_flag_visibility='yes', cc_cv_flag_visibility='no') CFLAGS="$cc_flag_visibility_save_CFLAGS"]) AS_IF([test "x$cc_cv_flag_visibility" = "xyes"], [AC_DEFINE([SUPPORT_FLAG_VISIBILITY], 1, [Define this if the compiler supports the -fvisibility flag]) $1], [$2]) ]) AC_DEFUN([CC_FUNC_EXPECT], [ AC_REQUIRE([CC_CHECK_WERROR]) AC_CACHE_CHECK([if compiler has __builtin_expect function], [cc_cv_func_expect], [ac_save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS $cc_cv_werror" AC_COMPILE_IFELSE([AC_LANG_SOURCE( [int some_function() { int a = 3; return (int)__builtin_expect(a, 3); }])], [cc_cv_func_expect=yes], [cc_cv_func_expect=no]) CFLAGS="$ac_save_CFLAGS" ]) AS_IF([test "x$cc_cv_func_expect" = "xyes"], [AC_DEFINE([SUPPORT__BUILTIN_EXPECT], 1, [Define this if the compiler supports __builtin_expect() function]) $1], [$2]) ]) AC_DEFUN([CC_ATTRIBUTE_ALIGNED], [ AC_REQUIRE([CC_CHECK_WERROR]) AC_CACHE_CHECK([highest __attribute__ ((aligned ())) supported], [cc_cv_attribute_aligned], [ac_save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS $cc_cv_werror" for cc_attribute_align_try in 64 32 16 8 4 2; do AC_COMPILE_IFELSE([AC_LANG_SOURCE([ int main() { static char c __attribute__ ((aligned($cc_attribute_align_try))) = 0; return c; }])], [cc_cv_attribute_aligned=$cc_attribute_align_try; break]) done CFLAGS="$ac_save_CFLAGS" ]) if test "x$cc_cv_attribute_aligned" != "x"; then AC_DEFINE_UNQUOTED([ATTRIBUTE_ALIGNED_MAX], [$cc_cv_attribute_aligned], [Define the highest alignment supported]) fi ]) raft-0.11.3/m4/ax_ac_append_to_file.m4000066400000000000000000000016221415614527300174260ustar00rootroot00000000000000# =========================================================================== # https://www.gnu.org/software/autoconf-archive/ax_ac_append_to_file.html # =========================================================================== # # SYNOPSIS # # AX_AC_APPEND_TO_FILE([FILE],[DATA]) # # DESCRIPTION # # Appends the specified data to the specified Autoconf is run. If you want # to append to a file when configure is run use AX_APPEND_TO_FILE instead. # # LICENSE # # Copyright (c) 2009 Allan Caffee # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. #serial 10 AC_DEFUN([AX_AC_APPEND_TO_FILE],[ AC_REQUIRE([AX_FILE_ESCAPES]) m4_esyscmd( AX_FILE_ESCAPES [ printf "%s" "$2" >> "$1" ]) ]) raft-0.11.3/m4/ax_ac_print_to_file.m4000066400000000000000000000016111415614527300173110ustar00rootroot00000000000000# =========================================================================== # https://www.gnu.org/software/autoconf-archive/ax_ac_print_to_file.html # =========================================================================== # # SYNOPSIS # # AX_AC_PRINT_TO_FILE([FILE],[DATA]) # # DESCRIPTION # # Writes the specified data to the specified file when Autoconf is run. If # you want to print to a file when configure is run use AX_PRINT_TO_FILE # instead. # # LICENSE # # Copyright (c) 2009 Allan Caffee # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. #serial 10 AC_DEFUN([AX_AC_PRINT_TO_FILE],[ m4_esyscmd( AC_REQUIRE([AX_FILE_ESCAPES]) [ printf "%s" "$2" > "$1" ]) ]) raft-0.11.3/m4/ax_add_am_macro_static.m4000066400000000000000000000015251415614527300177520ustar00rootroot00000000000000# =========================================================================== # https://www.gnu.org/software/autoconf-archive/ax_add_am_macro_static.html # =========================================================================== # # SYNOPSIS # # AX_ADD_AM_MACRO_STATIC([RULE]) # # DESCRIPTION # # Adds the specified rule to $AMINCLUDE. # # LICENSE # # Copyright (c) 2009 Tom Howard # Copyright (c) 2009 Allan Caffee # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. #serial 8 AC_DEFUN([AX_ADD_AM_MACRO_STATIC],[ AC_REQUIRE([AX_AM_MACROS_STATIC]) AX_AC_APPEND_TO_FILE(AMINCLUDE_STATIC,[$1]) ]) raft-0.11.3/m4/ax_am_macros_static.m4000066400000000000000000000021251415614527300173220ustar00rootroot00000000000000# =========================================================================== # https://www.gnu.org/software/autoconf-archive/ax_am_macros_static.html # =========================================================================== # # SYNOPSIS # # AX_AM_MACROS_STATIC # # DESCRIPTION # # Adds support for macros that create Automake rules. You must manually # add the following line # # include $(top_srcdir)/aminclude_static.am # # to your Makefile.am files. # # LICENSE # # Copyright (c) 2009 Tom Howard # Copyright (c) 2009 Allan Caffee # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. #serial 11 AC_DEFUN([AMINCLUDE_STATIC],[aminclude_static.am]) AC_DEFUN([AX_AM_MACROS_STATIC], [ AX_AC_PRINT_TO_FILE(AMINCLUDE_STATIC,[ # ]AMINCLUDE_STATIC[ generated automatically by Autoconf # from AX_AM_MACROS_STATIC on ]m4_esyscmd([LC_ALL=C date])[ ]) ]) raft-0.11.3/m4/ax_check_gnu_make.m4000066400000000000000000000077261415614527300167510ustar00rootroot00000000000000# =========================================================================== # https://www.gnu.org/software/autoconf-archive/ax_check_gnu_make.html # =========================================================================== # # SYNOPSIS # # AX_CHECK_GNU_MAKE([run-if-true],[run-if-false]) # # DESCRIPTION # # This macro searches for a GNU version of make. If a match is found: # # * The makefile variable `ifGNUmake' is set to the empty string, otherwise # it is set to "#". This is useful for including a special features in a # Makefile, which cannot be handled by other versions of make. # * The makefile variable `ifnGNUmake' is set to #, otherwise # it is set to the empty string. This is useful for including a special # features in a Makefile, which can be handled # by other versions of make or to specify else like clause. # * The variable `_cv_gnu_make_command` is set to the command to invoke # GNU make if it exists, the empty string otherwise. # * The variable `ax_cv_gnu_make_command` is set to the command to invoke # GNU make by copying `_cv_gnu_make_command`, otherwise it is unset. # * If GNU Make is found, its version is extracted from the output of # `make --version` as the last field of a record of space-separated # columns and saved into the variable `ax_check_gnu_make_version`. # * Additionally if GNU Make is found, run shell code run-if-true # else run shell code run-if-false. # # Here is an example of its use: # # Makefile.in might contain: # # # A failsafe way of putting a dependency rule into a makefile # $(DEPEND): # $(CC) -MM $(srcdir)/*.c > $(DEPEND) # # @ifGNUmake@ ifeq ($(DEPEND),$(wildcard $(DEPEND))) # @ifGNUmake@ include $(DEPEND) # @ifGNUmake@ else # fallback code # @ifGNUmake@ endif # # Then configure.in would normally contain: # # AX_CHECK_GNU_MAKE() # AC_OUTPUT(Makefile) # # Then perhaps to cause gnu make to override any other make, we could do # something like this (note that GNU make always looks for GNUmakefile # first): # # if ! test x$_cv_gnu_make_command = x ; then # mv Makefile GNUmakefile # echo .DEFAULT: > Makefile ; # echo \ $_cv_gnu_make_command \$@ >> Makefile; # fi # # Then, if any (well almost any) other make is called, and GNU make also # exists, then the other make wraps the GNU make. # # LICENSE # # Copyright (c) 2008 John Darrington # Copyright (c) 2015 Enrico M. Crisostomo # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. #serial 11 AC_DEFUN([AX_CHECK_GNU_MAKE],dnl [AC_PROG_AWK AC_CACHE_CHECK([for GNU make],[_cv_gnu_make_command],[dnl _cv_gnu_make_command="" ; dnl Search all the common names for GNU make for a in "$MAKE" make gmake gnumake ; do if test -z "$a" ; then continue ; fi ; if "$a" --version 2> /dev/null | grep GNU 2>&1 > /dev/null ; then _cv_gnu_make_command=$a ; AX_CHECK_GNU_MAKE_HEADLINE=$("$a" --version 2> /dev/null | grep "GNU Make") ax_check_gnu_make_version=$(echo ${AX_CHECK_GNU_MAKE_HEADLINE} | ${AWK} -F " " '{ print $(NF); }') break ; fi done ;]) dnl If there was a GNU version, then set @ifGNUmake@ to the empty string, '#' otherwise AS_VAR_IF([_cv_gnu_make_command], [""], [AS_VAR_SET([ifGNUmake], ["#"])], [AS_VAR_SET([ifGNUmake], [""])]) AS_VAR_IF([_cv_gnu_make_command], [""], [AS_VAR_SET([ifnGNUmake], [""])], [AS_VAR_SET([ifGNUmake], ["#"])]) AS_VAR_IF([_cv_gnu_make_command], [""], [AS_UNSET(ax_cv_gnu_make_command)], [AS_VAR_SET([ax_cv_gnu_make_command], [${_cv_gnu_make_command}])]) AS_VAR_IF([_cv_gnu_make_command], [""],[$2],[$1]) AC_SUBST([ifGNUmake]) AC_SUBST([ifnGNUmake]) ]) raft-0.11.3/m4/ax_code_coverage.m4000066400000000000000000000276141415614527300166110ustar00rootroot00000000000000# =========================================================================== # https://www.gnu.org/software/autoconf-archive/ax_code_coverage.html # =========================================================================== # # SYNOPSIS # # AX_CODE_COVERAGE() # # DESCRIPTION # # Defines CODE_COVERAGE_CPPFLAGS, CODE_COVERAGE_CFLAGS, # CODE_COVERAGE_CXXFLAGS and CODE_COVERAGE_LIBS which should be included # in the CPPFLAGS, CFLAGS CXXFLAGS and LIBS/LIBADD variables of every # build target (program or library) which should be built with code # coverage support. Also add rules using AX_ADD_AM_MACRO_STATIC; and # $enable_code_coverage which can be used in subsequent configure output. # CODE_COVERAGE_ENABLED is defined and substituted, and corresponds to the # value of the --enable-code-coverage option, which defaults to being # disabled. # # Test also for gcov program and create GCOV variable that could be # substituted. # # Note that all optimization flags in CFLAGS must be disabled when code # coverage is enabled. # # Usage example: # # configure.ac: # # AX_CODE_COVERAGE # # Makefile.am: # # include $(top_srcdir)/aminclude_static.am # # my_program_LIBS = ... $(CODE_COVERAGE_LIBS) ... # my_program_CPPFLAGS = ... $(CODE_COVERAGE_CPPFLAGS) ... # my_program_CFLAGS = ... $(CODE_COVERAGE_CFLAGS) ... # my_program_CXXFLAGS = ... $(CODE_COVERAGE_CXXFLAGS) ... # # clean-local: code-coverage-clean # distclean-local: code-coverage-dist-clean # # This results in a "check-code-coverage" rule being added to any # Makefile.am which do "include $(top_srcdir)/aminclude_static.am" # (assuming the module has been configured with --enable-code-coverage). # Running `make check-code-coverage` in that directory will run the # module's test suite (`make check`) and build a code coverage report # detailing the code which was touched, then print the URI for the report. # # This code was derived from Makefile.decl in GLib, originally licensed # under LGPLv2.1+. # # LICENSE # # Copyright (c) 2012, 2016 Philip Withnall # Copyright (c) 2012 Xan Lopez # Copyright (c) 2012 Christian Persch # Copyright (c) 2012 Paolo Borelli # Copyright (c) 2012 Dan Winship # Copyright (c) 2015,2018 Bastien ROUCARIES # # This library is free software; you can redistribute it and/or modify it # under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # This library is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser # General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . #serial 32 m4_define(_AX_CODE_COVERAGE_RULES,[ AX_ADD_AM_MACRO_STATIC([ # Code coverage # # Optional: # - CODE_COVERAGE_DIRECTORY: Top-level directory for code coverage reporting. # Multiple directories may be specified, separated by whitespace. # (Default: \$(top_builddir)) # - CODE_COVERAGE_OUTPUT_FILE: Filename and path for the .info file generated # by lcov for code coverage. (Default: # \$(PACKAGE_NAME)-\$(PACKAGE_VERSION)-coverage.info) # - CODE_COVERAGE_OUTPUT_DIRECTORY: Directory for generated code coverage # reports to be created. (Default: # \$(PACKAGE_NAME)-\$(PACKAGE_VERSION)-coverage) # - CODE_COVERAGE_BRANCH_COVERAGE: Set to 1 to enforce branch coverage, # set to 0 to disable it and leave empty to stay with the default. # (Default: empty) # - CODE_COVERAGE_LCOV_SHOPTS_DEFAULT: Extra options shared between both lcov # instances. (Default: based on $CODE_COVERAGE_BRANCH_COVERAGE) # - CODE_COVERAGE_LCOV_SHOPTS: Extra options to shared between both lcov # instances. (Default: $CODE_COVERAGE_LCOV_SHOPTS_DEFAULT) # - CODE_COVERAGE_LCOV_OPTIONS_GCOVPATH: --gcov-tool pathtogcov # - CODE_COVERAGE_LCOV_OPTIONS_DEFAULT: Extra options to pass to the # collecting lcov instance. (Default: $CODE_COVERAGE_LCOV_OPTIONS_GCOVPATH) # - CODE_COVERAGE_LCOV_OPTIONS: Extra options to pass to the collecting lcov # instance. (Default: $CODE_COVERAGE_LCOV_OPTIONS_DEFAULT) # - CODE_COVERAGE_LCOV_RMOPTS_DEFAULT: Extra options to pass to the filtering # lcov instance. (Default: empty) # - CODE_COVERAGE_LCOV_RMOPTS: Extra options to pass to the filtering lcov # instance. (Default: $CODE_COVERAGE_LCOV_RMOPTS_DEFAULT) # - CODE_COVERAGE_GENHTML_OPTIONS_DEFAULT: Extra options to pass to the # genhtml instance. (Default: based on $CODE_COVERAGE_BRANCH_COVERAGE) # - CODE_COVERAGE_GENHTML_OPTIONS: Extra options to pass to the genhtml # instance. (Default: $CODE_COVERAGE_GENHTML_OPTIONS_DEFAULT) # - CODE_COVERAGE_IGNORE_PATTERN: Extra glob pattern of files to ignore # # The generated report will be titled using the \$(PACKAGE_NAME) and # \$(PACKAGE_VERSION). In order to add the current git hash to the title, # use the git-version-gen script, available online. # Optional variables # run only on top dir if CODE_COVERAGE_ENABLED ifeq (\$(abs_builddir), \$(abs_top_builddir)) CODE_COVERAGE_DIRECTORY ?= \$(top_builddir) CODE_COVERAGE_OUTPUT_FILE ?= \$(PACKAGE_NAME)-\$(PACKAGE_VERSION)-coverage.info CODE_COVERAGE_OUTPUT_DIRECTORY ?= \$(PACKAGE_NAME)-\$(PACKAGE_VERSION)-coverage CODE_COVERAGE_BRANCH_COVERAGE ?= CODE_COVERAGE_LCOV_SHOPTS_DEFAULT ?= \$(if \$(CODE_COVERAGE_BRANCH_COVERAGE),\ --rc lcov_branch_coverage=\$(CODE_COVERAGE_BRANCH_COVERAGE)) CODE_COVERAGE_LCOV_SHOPTS ?= \$(CODE_COVERAGE_LCOV_SHOPTS_DEFAULT) CODE_COVERAGE_LCOV_OPTIONS_GCOVPATH ?= --gcov-tool \"\$(GCOV)\" CODE_COVERAGE_LCOV_OPTIONS_DEFAULT ?= \$(CODE_COVERAGE_LCOV_OPTIONS_GCOVPATH) CODE_COVERAGE_LCOV_OPTIONS ?= \$(CODE_COVERAGE_LCOV_OPTIONS_DEFAULT) CODE_COVERAGE_LCOV_RMOPTS_DEFAULT ?= CODE_COVERAGE_LCOV_RMOPTS ?= \$(CODE_COVERAGE_LCOV_RMOPTS_DEFAULT) CODE_COVERAGE_GENHTML_OPTIONS_DEFAULT ?=\ \$(if \$(CODE_COVERAGE_BRANCH_COVERAGE),\ --rc genhtml_branch_coverage=\$(CODE_COVERAGE_BRANCH_COVERAGE)) CODE_COVERAGE_GENHTML_OPTIONS ?= \$(CODE_COVERAGE_GENHTML_OPTIONS_DEFAULT) CODE_COVERAGE_IGNORE_PATTERN ?= GITIGNOREFILES = \$(GITIGNOREFILES) \$(CODE_COVERAGE_OUTPUT_FILE) \$(CODE_COVERAGE_OUTPUT_DIRECTORY) code_coverage_v_lcov_cap = \$(code_coverage_v_lcov_cap_\$(V)) code_coverage_v_lcov_cap_ = \$(code_coverage_v_lcov_cap_\$(AM_DEFAULT_VERBOSITY)) code_coverage_v_lcov_cap_0 = @echo \" LCOV --capture\" \$(CODE_COVERAGE_OUTPUT_FILE); code_coverage_v_lcov_ign = \$(code_coverage_v_lcov_ign_\$(V)) code_coverage_v_lcov_ign_ = \$(code_coverage_v_lcov_ign_\$(AM_DEFAULT_VERBOSITY)) code_coverage_v_lcov_ign_0 = @echo \" LCOV --remove /tmp/*\" \$(CODE_COVERAGE_IGNORE_PATTERN); code_coverage_v_genhtml = \$(code_coverage_v_genhtml_\$(V)) code_coverage_v_genhtml_ = \$(code_coverage_v_genhtml_\$(AM_DEFAULT_VERBOSITY)) code_coverage_v_genhtml_0 = @echo \" GEN \" \"\$(CODE_COVERAGE_OUTPUT_DIRECTORY)\"; code_coverage_quiet = \$(code_coverage_quiet_\$(V)) code_coverage_quiet_ = \$(code_coverage_quiet_\$(AM_DEFAULT_VERBOSITY)) code_coverage_quiet_0 = --quiet # sanitizes the test-name: replaces with underscores: dashes and dots code_coverage_sanitize = \$(subst -,_,\$(subst .,_,\$(1))) # Use recursive makes in order to ignore errors during check check-code-coverage: -\$(AM_V_at)\$(MAKE) \$(AM_MAKEFLAGS) -k check \$(AM_V_at)\$(MAKE) \$(AM_MAKEFLAGS) code-coverage-capture # Capture code coverage data code-coverage-capture: code-coverage-capture-hook \$(code_coverage_v_lcov_cap)\$(LCOV) \$(code_coverage_quiet) \$(addprefix --directory ,\$(CODE_COVERAGE_DIRECTORY)) --capture --output-file \"\$(CODE_COVERAGE_OUTPUT_FILE).tmp\" --test-name \"\$(call code_coverage_sanitize,\$(PACKAGE_NAME)-\$(PACKAGE_VERSION))\" --no-checksum --compat-libtool \$(CODE_COVERAGE_LCOV_SHOPTS) \$(CODE_COVERAGE_LCOV_OPTIONS) \$(code_coverage_v_lcov_ign)\$(LCOV) \$(code_coverage_quiet) \$(addprefix --directory ,\$(CODE_COVERAGE_DIRECTORY)) --remove \"\$(CODE_COVERAGE_OUTPUT_FILE).tmp\" \"/tmp/*\" \$(CODE_COVERAGE_IGNORE_PATTERN) --output-file \"\$(CODE_COVERAGE_OUTPUT_FILE)\" \$(CODE_COVERAGE_LCOV_SHOPTS) \$(CODE_COVERAGE_LCOV_RMOPTS) -@rm -f \"\$(CODE_COVERAGE_OUTPUT_FILE).tmp\" \$(code_coverage_v_genhtml)LANG=C \$(GENHTML) \$(code_coverage_quiet) \$(addprefix --prefix ,\$(CODE_COVERAGE_DIRECTORY)) --output-directory \"\$(CODE_COVERAGE_OUTPUT_DIRECTORY)\" --title \"\$(PACKAGE_NAME)-\$(PACKAGE_VERSION) Code Coverage\" --legend --show-details \"\$(CODE_COVERAGE_OUTPUT_FILE)\" \$(CODE_COVERAGE_GENHTML_OPTIONS) @echo \"file://\$(abs_builddir)/\$(CODE_COVERAGE_OUTPUT_DIRECTORY)/index.html\" code-coverage-clean: -\$(LCOV) --directory \$(top_builddir) -z -rm -rf \"\$(CODE_COVERAGE_OUTPUT_FILE)\" \"\$(CODE_COVERAGE_OUTPUT_FILE).tmp\" \"\$(CODE_COVERAGE_OUTPUT_DIRECTORY)\" -find . \\( -name \"*.gcda\" -o -name \"*.gcno\" -o -name \"*.gcov\" \\) -delete code-coverage-dist-clean: A][M_DISTCHECK_CONFIGURE_FLAGS = \$(A][M_DISTCHECK_CONFIGURE_FLAGS) --disable-code-coverage else # ifneq (\$(abs_builddir), \$(abs_top_builddir)) check-code-coverage: code-coverage-capture: code-coverage-capture-hook code-coverage-clean: code-coverage-dist-clean: endif # ifeq (\$(abs_builddir), \$(abs_top_builddir)) else #! CODE_COVERAGE_ENABLED # Use recursive makes in order to ignore errors during check check-code-coverage: @echo \"Need to reconfigure with --enable-code-coverage\" # Capture code coverage data code-coverage-capture: code-coverage-capture-hook @echo \"Need to reconfigure with --enable-code-coverage\" code-coverage-clean: code-coverage-dist-clean: endif #CODE_COVERAGE_ENABLED # Hook rule executed before code-coverage-capture, overridable by the user code-coverage-capture-hook: .PHONY: check-code-coverage code-coverage-capture code-coverage-dist-clean code-coverage-clean code-coverage-capture-hook ]) ]) AC_DEFUN([_AX_CODE_COVERAGE_ENABLED],[ AX_CHECK_GNU_MAKE([],[AC_MSG_ERROR([not using GNU make that is needed for coverage])]) AC_REQUIRE([AX_ADD_AM_MACRO_STATIC]) # check for gcov AC_CHECK_TOOL([GCOV], [$_AX_CODE_COVERAGE_GCOV_PROG_WITH], [:]) AS_IF([test "X$GCOV" = "X:"], [AC_MSG_ERROR([gcov is needed to do coverage])]) AC_SUBST([GCOV]) dnl Check if gcc is being used AS_IF([ test "$GCC" = "no" ], [ AC_MSG_ERROR([not compiling with gcc, which is required for gcov code coverage]) ]) AC_CHECK_PROG([LCOV], [lcov], [lcov]) AC_CHECK_PROG([GENHTML], [genhtml], [genhtml]) AS_IF([ test x"$LCOV" = x ], [ AC_MSG_ERROR([To enable code coverage reporting you must have lcov installed]) ]) AS_IF([ test x"$GENHTML" = x ], [ AC_MSG_ERROR([Could not find genhtml from the lcov package]) ]) dnl Build the code coverage flags dnl Define CODE_COVERAGE_LDFLAGS for backwards compatibility CODE_COVERAGE_CPPFLAGS="-DNDEBUG" CODE_COVERAGE_CFLAGS="-O0 -g -fprofile-arcs -ftest-coverage" CODE_COVERAGE_CXXFLAGS="-O0 -g -fprofile-arcs -ftest-coverage" CODE_COVERAGE_LIBS="-lgcov" AC_SUBST([CODE_COVERAGE_CPPFLAGS]) AC_SUBST([CODE_COVERAGE_CFLAGS]) AC_SUBST([CODE_COVERAGE_CXXFLAGS]) AC_SUBST([CODE_COVERAGE_LIBS]) ]) AC_DEFUN([AX_CODE_COVERAGE],[ dnl Check for --enable-code-coverage # allow to override gcov location AC_ARG_WITH([gcov], [AS_HELP_STRING([--with-gcov[=GCOV]], [use given GCOV for coverage (GCOV=gcov).])], [_AX_CODE_COVERAGE_GCOV_PROG_WITH=$with_gcov], [_AX_CODE_COVERAGE_GCOV_PROG_WITH=gcov]) AC_MSG_CHECKING([whether to build with code coverage support]) AC_ARG_ENABLE([code-coverage], AS_HELP_STRING([--enable-code-coverage], [Whether to enable code coverage support]),, enable_code_coverage=no) AM_CONDITIONAL([CODE_COVERAGE_ENABLED], [test "x$enable_code_coverage" = xyes]) AC_SUBST([CODE_COVERAGE_ENABLED], [$enable_code_coverage]) AC_MSG_RESULT($enable_code_coverage) AS_IF([ test "x$enable_code_coverage" = xyes ], [ _AX_CODE_COVERAGE_ENABLED ]) _AX_CODE_COVERAGE_RULES ]) raft-0.11.3/m4/ax_compare_version.m4000066400000000000000000000146531415614527300172160ustar00rootroot00000000000000# =========================================================================== # https://www.gnu.org/software/autoconf-archive/ax_compare_version.html # =========================================================================== # # SYNOPSIS # # AX_COMPARE_VERSION(VERSION_A, OP, VERSION_B, [ACTION-IF-TRUE], [ACTION-IF-FALSE]) # # DESCRIPTION # # This macro compares two version strings. Due to the various number of # minor-version numbers that can exist, and the fact that string # comparisons are not compatible with numeric comparisons, this is not # necessarily trivial to do in a autoconf script. This macro makes doing # these comparisons easy. # # The six basic comparisons are available, as well as checking equality # limited to a certain number of minor-version levels. # # The operator OP determines what type of comparison to do, and can be one # of: # # eq - equal (test A == B) # ne - not equal (test A != B) # le - less than or equal (test A <= B) # ge - greater than or equal (test A >= B) # lt - less than (test A < B) # gt - greater than (test A > B) # # Additionally, the eq and ne operator can have a number after it to limit # the test to that number of minor versions. # # eq0 - equal up to the length of the shorter version # ne0 - not equal up to the length of the shorter version # eqN - equal up to N sub-version levels # neN - not equal up to N sub-version levels # # When the condition is true, shell commands ACTION-IF-TRUE are run, # otherwise shell commands ACTION-IF-FALSE are run. The environment # variable 'ax_compare_version' is always set to either 'true' or 'false' # as well. # # Examples: # # AX_COMPARE_VERSION([3.15.7],[lt],[3.15.8]) # AX_COMPARE_VERSION([3.15],[lt],[3.15.8]) # # would both be true. # # AX_COMPARE_VERSION([3.15.7],[eq],[3.15.8]) # AX_COMPARE_VERSION([3.15],[gt],[3.15.8]) # # would both be false. # # AX_COMPARE_VERSION([3.15.7],[eq2],[3.15.8]) # # would be true because it is only comparing two minor versions. # # AX_COMPARE_VERSION([3.15.7],[eq0],[3.15]) # # would be true because it is only comparing the lesser number of minor # versions of the two values. # # Note: The characters that separate the version numbers do not matter. An # empty string is the same as version 0. OP is evaluated by autoconf, not # configure, so must be a string, not a variable. # # The author would like to acknowledge Guido Draheim whose advice about # the m4_case and m4_ifvaln functions make this macro only include the # portions necessary to perform the specific comparison specified by the # OP argument in the final configure script. # # LICENSE # # Copyright (c) 2008 Tim Toolan # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. #serial 13 dnl ######################################################################### AC_DEFUN([AX_COMPARE_VERSION], [ AC_REQUIRE([AC_PROG_AWK]) # Used to indicate true or false condition ax_compare_version=false # Convert the two version strings to be compared into a format that # allows a simple string comparison. The end result is that a version # string of the form 1.12.5-r617 will be converted to the form # 0001001200050617. In other words, each number is zero padded to four # digits, and non digits are removed. AS_VAR_PUSHDEF([A],[ax_compare_version_A]) A=`echo "$1" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \ -e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \ -e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \ -e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \ -e 's/[[^0-9]]//g'` AS_VAR_PUSHDEF([B],[ax_compare_version_B]) B=`echo "$3" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \ -e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \ -e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \ -e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \ -e 's/[[^0-9]]//g'` dnl # In the case of le, ge, lt, and gt, the strings are sorted as necessary dnl # then the first line is used to determine if the condition is true. dnl # The sed right after the echo is to remove any indented white space. m4_case(m4_tolower($2), [lt],[ ax_compare_version=`echo "x$A x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/false/;s/x${B}/true/;1q"` ], [gt],[ ax_compare_version=`echo "x$A x$B" | sed 's/^ *//' | sort | sed "s/x${A}/false/;s/x${B}/true/;1q"` ], [le],[ ax_compare_version=`echo "x$A x$B" | sed 's/^ *//' | sort | sed "s/x${A}/true/;s/x${B}/false/;1q"` ], [ge],[ ax_compare_version=`echo "x$A x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/true/;s/x${B}/false/;1q"` ],[ dnl Split the operator from the subversion count if present. m4_bmatch(m4_substr($2,2), [0],[ # A count of zero means use the length of the shorter version. # Determine the number of characters in A and B. ax_compare_version_len_A=`echo "$A" | $AWK '{print(length)}'` ax_compare_version_len_B=`echo "$B" | $AWK '{print(length)}'` # Set A to no more than B's length and B to no more than A's length. A=`echo "$A" | sed "s/\(.\{$ax_compare_version_len_B\}\).*/\1/"` B=`echo "$B" | sed "s/\(.\{$ax_compare_version_len_A\}\).*/\1/"` ], [[0-9]+],[ # A count greater than zero means use only that many subversions A=`echo "$A" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"` B=`echo "$B" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"` ], [.+],[ AC_WARNING( [invalid OP numeric parameter: $2]) ],[]) # Pad zeros at end of numbers to make same length. ax_compare_version_tmp_A="$A`echo $B | sed 's/./0/g'`" B="$B`echo $A | sed 's/./0/g'`" A="$ax_compare_version_tmp_A" # Check for equality or inequality as necessary. m4_case(m4_tolower(m4_substr($2,0,2)), [eq],[ test "x$A" = "x$B" && ax_compare_version=true ], [ne],[ test "x$A" != "x$B" && ax_compare_version=true ],[ AC_WARNING([invalid OP parameter: $2]) ]) ]) AS_VAR_POPDEF([A])dnl AS_VAR_POPDEF([B])dnl dnl # Execute ACTION-IF-TRUE / ACTION-IF-FALSE. if test "$ax_compare_version" = "true" ; then m4_ifvaln([$4],[$4],[:])dnl m4_ifvaln([$5],[else $5])dnl fi ]) dnl AX_COMPARE_VERSION raft-0.11.3/m4/ax_file_escapes.m4000066400000000000000000000013731415614527300164400ustar00rootroot00000000000000# =========================================================================== # https://www.gnu.org/software/autoconf-archive/ax_file_escapes.html # =========================================================================== # # SYNOPSIS # # AX_FILE_ESCAPES # # DESCRIPTION # # Writes the specified data to the specified file. # # LICENSE # # Copyright (c) 2008 Tom Howard # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. #serial 8 AC_DEFUN([AX_FILE_ESCAPES],[ AX_DOLLAR="\$" AX_SRB="\\135" AX_SLB="\\133" AX_BS="\\\\" AX_DQ="\"" ]) raft-0.11.3/m4/pkg.m4000066400000000000000000000240111415614527300141010ustar00rootroot00000000000000dnl pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- dnl serial 11 (pkg-config-0.29.1) dnl dnl Copyright © 2004 Scott James Remnant . dnl Copyright © 2012-2015 Dan Nicholson dnl dnl This program is free software; you can redistribute it and/or modify dnl it under the terms of the GNU General Public License as published by dnl the Free Software Foundation; either version 2 of the License, or dnl (at your option) any later version. dnl dnl This program is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU dnl General Public License for more details. dnl dnl You should have received a copy of the GNU General Public License dnl along with this program; if not, write to the Free Software dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA dnl 02111-1307, USA. dnl dnl As a special exception to the GNU General Public License, if you dnl distribute this file as part of a program that contains a dnl configuration script generated by Autoconf, you may include it under dnl the same distribution terms that you use for the rest of that dnl program. dnl PKG_PREREQ(MIN-VERSION) dnl ----------------------- dnl Since: 0.29 dnl dnl Verify that the version of the pkg-config macros are at least dnl MIN-VERSION. Unlike PKG_PROG_PKG_CONFIG, which checks the user's dnl installed version of pkg-config, this checks the developer's version dnl of pkg.m4 when generating configure. dnl dnl To ensure that this macro is defined, also add: dnl m4_ifndef([PKG_PREREQ], dnl [m4_fatal([must install pkg-config 0.29 or later before running autoconf/autogen])]) dnl dnl See the "Since" comment for each macro you use to see what version dnl of the macros you require. m4_defun([PKG_PREREQ], [m4_define([PKG_MACROS_VERSION], [0.29.1]) m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1, [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])]) ])dnl PKG_PREREQ dnl PKG_PROG_PKG_CONFIG([MIN-VERSION]) dnl ---------------------------------- dnl Since: 0.16 dnl dnl Search for the pkg-config tool and set the PKG_CONFIG variable to dnl first found in the path. Checks that the version of pkg-config found dnl is at least MIN-VERSION. If MIN-VERSION is not specified, 0.9.0 is dnl used since that's the first version where most current features of dnl pkg-config existed. AC_DEFUN([PKG_PROG_PKG_CONFIG], [m4_pattern_forbid([^_?PKG_[A-Z_]+$]) m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$]) m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$]) AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility]) AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path]) AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path]) if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then AC_PATH_TOOL([PKG_CONFIG], [pkg-config]) fi if test -n "$PKG_CONFIG"; then _pkg_min_version=m4_default([$1], [0.9.0]) AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version]) if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no]) PKG_CONFIG="" fi fi[]dnl ])dnl PKG_PROG_PKG_CONFIG dnl PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) dnl ------------------------------------------------------------------- dnl Since: 0.18 dnl dnl Check to see whether a particular set of modules exists. Similar to dnl PKG_CHECK_MODULES(), but does not set variables or print errors. dnl dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) dnl only at the first occurence in configure.ac, so if the first place dnl it's called might be skipped (such as if it is within an "if", you dnl have to call PKG_CHECK_EXISTS manually AC_DEFUN([PKG_CHECK_EXISTS], [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl if test -n "$PKG_CONFIG" && \ AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then m4_default([$2], [:]) m4_ifvaln([$3], [else $3])dnl fi]) dnl _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) dnl --------------------------------------------- dnl Internal wrapper calling pkg-config via PKG_CONFIG and setting dnl pkg_failed based on the result. m4_define([_PKG_CONFIG], [if test -n "$$1"; then pkg_cv_[]$1="$$1" elif test -n "$PKG_CONFIG"; then PKG_CHECK_EXISTS([$3], [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null` test "x$?" != "x0" && pkg_failed=yes ], [pkg_failed=yes]) else pkg_failed=untried fi[]dnl ])dnl _PKG_CONFIG dnl _PKG_SHORT_ERRORS_SUPPORTED dnl --------------------------- dnl Internal check to see if pkg-config supports short errors. AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED], [AC_REQUIRE([PKG_PROG_PKG_CONFIG]) if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi[]dnl ])dnl _PKG_SHORT_ERRORS_SUPPORTED dnl PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], dnl [ACTION-IF-NOT-FOUND]) dnl -------------------------------------------------------------- dnl Since: 0.4.0 dnl dnl Note that if there is a possibility the first call to dnl PKG_CHECK_MODULES might not happen, you should be sure to include an dnl explicit call to PKG_PROG_PKG_CONFIG in your configure.ac AC_DEFUN([PKG_CHECK_MODULES], [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl pkg_failed=no AC_MSG_CHECKING([for $1]) _PKG_CONFIG([$1][_CFLAGS], [cflags], [$2]) _PKG_CONFIG([$1][_LIBS], [libs], [$2]) m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS and $1[]_LIBS to avoid the need to call pkg-config. See the pkg-config man page for more details.]) if test $pkg_failed = yes; then AC_MSG_RESULT([no]) _PKG_SHORT_ERRORS_SUPPORTED if test $_pkg_short_errors_supported = yes; then $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` else $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` fi # Put the nasty error message in config.log where it belongs echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD m4_default([$4], [AC_MSG_ERROR( [Package requirements ($2) were not met: $$1_PKG_ERRORS Consider adjusting the PKG_CONFIG_PATH environment variable if you installed software in a non-standard prefix. _PKG_TEXT])[]dnl ]) elif test $pkg_failed = untried; then AC_MSG_RESULT([no]) m4_default([$4], [AC_MSG_FAILURE( [The pkg-config script could not be found or is too old. Make sure it is in your PATH or set the PKG_CONFIG environment variable to the full path to pkg-config. _PKG_TEXT To get pkg-config, see .])[]dnl ]) else $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS $1[]_LIBS=$pkg_cv_[]$1[]_LIBS AC_MSG_RESULT([yes]) $3 fi[]dnl ])dnl PKG_CHECK_MODULES dnl PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], dnl [ACTION-IF-NOT-FOUND]) dnl --------------------------------------------------------------------- dnl Since: 0.29 dnl dnl Checks for existence of MODULES and gathers its build flags with dnl static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags dnl and VARIABLE-PREFIX_LIBS from --libs. dnl dnl Note that if there is a possibility the first call to dnl PKG_CHECK_MODULES_STATIC might not happen, you should be sure to dnl include an explicit call to PKG_PROG_PKG_CONFIG in your dnl configure.ac. AC_DEFUN([PKG_CHECK_MODULES_STATIC], [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl _save_PKG_CONFIG=$PKG_CONFIG PKG_CONFIG="$PKG_CONFIG --static" PKG_CHECK_MODULES($@) PKG_CONFIG=$_save_PKG_CONFIG[]dnl ])dnl PKG_CHECK_MODULES_STATIC dnl PKG_INSTALLDIR([DIRECTORY]) dnl ------------------------- dnl Since: 0.27 dnl dnl Substitutes the variable pkgconfigdir as the location where a module dnl should install pkg-config .pc files. By default the directory is dnl $libdir/pkgconfig, but the default can be changed by passing dnl DIRECTORY. The user can override through the --with-pkgconfigdir dnl parameter. AC_DEFUN([PKG_INSTALLDIR], [m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])]) m4_pushdef([pkg_description], [pkg-config installation directory @<:@]pkg_default[@:>@]) AC_ARG_WITH([pkgconfigdir], [AS_HELP_STRING([--with-pkgconfigdir], pkg_description)],, [with_pkgconfigdir=]pkg_default) AC_SUBST([pkgconfigdir], [$with_pkgconfigdir]) m4_popdef([pkg_default]) m4_popdef([pkg_description]) ])dnl PKG_INSTALLDIR dnl PKG_NOARCH_INSTALLDIR([DIRECTORY]) dnl -------------------------------- dnl Since: 0.27 dnl dnl Substitutes the variable noarch_pkgconfigdir as the location where a dnl module should install arch-independent pkg-config .pc files. By dnl default the directory is $datadir/pkgconfig, but the default can be dnl changed by passing DIRECTORY. The user can override through the dnl --with-noarch-pkgconfigdir parameter. AC_DEFUN([PKG_NOARCH_INSTALLDIR], [m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])]) m4_pushdef([pkg_description], [pkg-config arch-independent installation directory @<:@]pkg_default[@:>@]) AC_ARG_WITH([noarch-pkgconfigdir], [AS_HELP_STRING([--with-noarch-pkgconfigdir], pkg_description)],, [with_noarch_pkgconfigdir=]pkg_default) AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir]) m4_popdef([pkg_default]) m4_popdef([pkg_description]) ])dnl PKG_NOARCH_INSTALLDIR dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE, dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) dnl ------------------------------------------- dnl Since: 0.28 dnl dnl Retrieves the value of the pkg-config variable for the given module. AC_DEFUN([PKG_CHECK_VAR], [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl _PKG_CONFIG([$1], [variable="][$3]["], [$2]) AS_VAR_COPY([$1], [pkg_cv_][$1]) AS_VAR_IF([$1], [""], [$5], [$4])dnl ])dnl PKG_CHECK_VAR raft-0.11.3/raft.pc.in000066400000000000000000000003501415614527300144230ustar00rootroot00000000000000prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: raft Description: C implementation of the Raft Consensus protocol Version: @PACKAGE_VERSION@ Libs: -L${libdir} -lraft Cflags: -I${includedir} raft-0.11.3/src/000077500000000000000000000000001415614527300133275ustar00rootroot00000000000000raft-0.11.3/src/array.h000066400000000000000000000017751415614527300146300ustar00rootroot00000000000000/* Macros to manipulate contiguous arrays. */ #ifndef ARRAY_H_ #define ARRAY_H_ #include "../include/raft.h" /* Append item I of type T to array A which currently has N items. * * A and N must both by pointers. Set RV to -1 in case of failure. */ #define ARRAY__APPEND(T, I, A, N, RV) \ { \ T *tmp_array; \ tmp_array = raft_realloc(*A, (*N + 1) * sizeof **A); \ if (tmp_array != NULL) { \ (*N)++; \ *A = tmp_array; \ (*A)[(*N) - 1] = I; \ RV = 0; \ } else { \ RV = -1; \ } \ } #endif /* ARRAY_H_ */ raft-0.11.3/src/assert.h000066400000000000000000000016041415614527300150020ustar00rootroot00000000000000/* Define the assert() macro, either as the standard one or the test one. */ #ifndef ASSERT_H_ #define ASSERT_H_ #if defined(RAFT_TEST) extern void munit_errorf_ex(const char *filename, int line, const char *format, ...); #define assert(expr) \ do { \ if (!expr) { \ munit_errorf_ex(__FILE__, __LINE__, "assertion failed: ", #expr); \ } \ } while (0) #elif defined(NDEBUG) #define assert(x) \ do { \ (void)sizeof(x); \ } while (0) #else #include #endif #endif /* ASSERT_H_ */ raft-0.11.3/src/byte.c000066400000000000000000000315461415614527300144470ustar00rootroot00000000000000#include "byte.h" /* Taken from https://github.com/gcc-mirror/gcc/blob/master/libiberty/crc32.c */ static const unsigned byteCrcTable[] = { 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b, 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4}; unsigned byteCrc32(const void *buf, const size_t size, const unsigned init) { unsigned crc = init; uint8_t *cursor = (uint8_t *)buf; size_t count = size; while (count--) { crc = (crc << 8) ^ byteCrcTable[((crc >> 24) ^ *cursor) & 255]; cursor++; } return crc; } /* ================ sha1.c ================ */ /* SHA-1 in C By Steve Reid 100% Public Domain Test Vectors (from FIPS PUB 180-1) "abc" A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" 84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1 A million repetitions of "a" 34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F */ /* #define LITTLE_ENDIAN * This should be #define'd already, if true. */ /* #define SHA1HANDSOFF * Copies data before messing with it. */ #define SHA1HANDSOFF #include #include #include /* for u_int*_t */ #if defined(__sun) #include "solarisfixes.h" #endif #ifndef BYTE_ORDER #if (BSD >= 199103) #include #else #if defined(linux) || defined(__linux__) #include #else #define LITTLE_ENDIAN 1234 /* least-significant byte first (vax, pc) */ #define BIG_ENDIAN 4321 /* most-significant byte first (IBM, net) */ #define PDP_ENDIAN 3412 /* LSB first in word, MSW first in long (pdp)*/ #if defined(vax) || defined(ns32000) || defined(sun386) || \ defined(__i386__) || defined(MIPSEL) || defined(_MIPSEL) || \ defined(BIT_ZERO_ON_RIGHT) || defined(__alpha__) || defined(__alpha) #define BYTE_ORDER LITTLE_ENDIAN #endif #if defined(sel) || defined(pyr) || defined(mc68000) || defined(sparc) || \ defined(is68k) || defined(tahoe) || defined(ibm032) || defined(ibm370) || \ defined(MIPSEB) || defined(_MIPSEB) || defined(_IBMR2) || defined(DGUX) || \ defined(apollo) || defined(__convex__) || defined(_CRAY) || \ defined(__hppa) || defined(__hp9000) || defined(__hp9000s300) || \ defined(__hp9000s700) || defined(BIT_ZERO_ON_LEFT) || defined(m68k) || \ defined(__sparc) #define BYTE_ORDER BIG_ENDIAN #endif #endif /* linux */ #endif /* BSD */ #endif /* BYTE_ORDER */ #if defined(__BYTE_ORDER) && !defined(BYTE_ORDER) #if (__BYTE_ORDER == __LITTLE_ENDIAN) #define BYTE_ORDER LITTLE_ENDIAN #else #define BYTE_ORDER BIG_ENDIAN #endif #endif #if !defined(BYTE_ORDER) || \ (BYTE_ORDER != BIG_ENDIAN && BYTE_ORDER != LITTLE_ENDIAN && \ BYTE_ORDER != PDP_ENDIAN) /* you must determine what the correct bit order is for * your compiler - the next line is an intentional error * which will force your compiles to bomb until you fix * the above macros. */ #error "Undefined or invalid BYTE_ORDER" #endif #define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits)))) /* blk0() and blk() perform the initial expand. */ /* I got the idea of expanding during the round function from SSLeay */ #if BYTE_ORDER == LITTLE_ENDIAN #define blk0(i) \ (block->l[i] = (rol(block->l[i], 24) & 0xFF00FF00) | \ (rol(block->l[i], 8) & 0x00FF00FF)) #elif BYTE_ORDER == BIG_ENDIAN #define blk0(i) block->l[i] #else #error "Endianness not defined!" #endif #define blk(i) \ (block->l[i & 15] = rol(block->l[(i + 13) & 15] ^ block->l[(i + 8) & 15] ^ \ block->l[(i + 2) & 15] ^ block->l[i & 15], \ 1)) /* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */ #define R0(v, w, x, y, z, i) \ z += ((w & (x ^ y)) ^ y) + blk0(i) + 0x5A827999 + rol(v, 5); \ w = rol(w, 30); #define R1(v, w, x, y, z, i) \ z += ((w & (x ^ y)) ^ y) + blk(i) + 0x5A827999 + rol(v, 5); \ w = rol(w, 30); #define R2(v, w, x, y, z, i) \ z += (w ^ x ^ y) + blk(i) + 0x6ED9EBA1 + rol(v, 5); \ w = rol(w, 30); #define R3(v, w, x, y, z, i) \ z += (((w | x) & y) | (w & x)) + blk(i) + 0x8F1BBCDC + rol(v, 5); \ w = rol(w, 30); #define R4(v, w, x, y, z, i) \ z += (w ^ x ^ y) + blk(i) + 0xCA62C1D6 + rol(v, 5); \ w = rol(w, 30); static void byteSha1Transform(uint32_t state[5], const uint8_t buffer[64]) { uint32_t a, b, c, d, e; typedef union { uint8_t c[64]; uint32_t l[16]; } CHAR64LONG16; #ifdef SHA1HANDSOFF CHAR64LONG16 block[1]; /* use array to appear as a pointer */ memcpy(block, buffer, 64); #else /* The following had better never be used because it causes the * pointer-to-const buffer to be cast into a pointer to non-const. * And the result is written through. I threw a "const" in, hoping * this will cause a diagnostic. */ CHAR64LONG16 *block = (const CHAR64LONG16 *)buffer; #endif /* Copy context->state[] to working vars */ a = state[0]; b = state[1]; c = state[2]; d = state[3]; e = state[4]; /* 4 rounds of 20 operations each. Loop unrolled. */ R0(a, b, c, d, e, 0); R0(e, a, b, c, d, 1); R0(d, e, a, b, c, 2); R0(c, d, e, a, b, 3); R0(b, c, d, e, a, 4); R0(a, b, c, d, e, 5); R0(e, a, b, c, d, 6); R0(d, e, a, b, c, 7); R0(c, d, e, a, b, 8); R0(b, c, d, e, a, 9); R0(a, b, c, d, e, 10); R0(e, a, b, c, d, 11); R0(d, e, a, b, c, 12); R0(c, d, e, a, b, 13); R0(b, c, d, e, a, 14); R0(a, b, c, d, e, 15); R1(e, a, b, c, d, 16); R1(d, e, a, b, c, 17); R1(c, d, e, a, b, 18); R1(b, c, d, e, a, 19); R2(a, b, c, d, e, 20); R2(e, a, b, c, d, 21); R2(d, e, a, b, c, 22); R2(c, d, e, a, b, 23); R2(b, c, d, e, a, 24); R2(a, b, c, d, e, 25); R2(e, a, b, c, d, 26); R2(d, e, a, b, c, 27); R2(c, d, e, a, b, 28); R2(b, c, d, e, a, 29); R2(a, b, c, d, e, 30); R2(e, a, b, c, d, 31); R2(d, e, a, b, c, 32); R2(c, d, e, a, b, 33); R2(b, c, d, e, a, 34); R2(a, b, c, d, e, 35); R2(e, a, b, c, d, 36); R2(d, e, a, b, c, 37); R2(c, d, e, a, b, 38); R2(b, c, d, e, a, 39); R3(a, b, c, d, e, 40); R3(e, a, b, c, d, 41); R3(d, e, a, b, c, 42); R3(c, d, e, a, b, 43); R3(b, c, d, e, a, 44); R3(a, b, c, d, e, 45); R3(e, a, b, c, d, 46); R3(d, e, a, b, c, 47); R3(c, d, e, a, b, 48); R3(b, c, d, e, a, 49); R3(a, b, c, d, e, 50); R3(e, a, b, c, d, 51); R3(d, e, a, b, c, 52); R3(c, d, e, a, b, 53); R3(b, c, d, e, a, 54); R3(a, b, c, d, e, 55); R3(e, a, b, c, d, 56); R3(d, e, a, b, c, 57); R3(c, d, e, a, b, 58); R3(b, c, d, e, a, 59); R4(a, b, c, d, e, 60); R4(e, a, b, c, d, 61); R4(d, e, a, b, c, 62); R4(c, d, e, a, b, 63); R4(b, c, d, e, a, 64); R4(a, b, c, d, e, 65); R4(e, a, b, c, d, 66); R4(d, e, a, b, c, 67); R4(c, d, e, a, b, 68); R4(b, c, d, e, a, 69); R4(a, b, c, d, e, 70); R4(e, a, b, c, d, 71); R4(d, e, a, b, c, 72); R4(c, d, e, a, b, 73); R4(b, c, d, e, a, 74); R4(a, b, c, d, e, 75); R4(e, a, b, c, d, 76); R4(d, e, a, b, c, 77); R4(c, d, e, a, b, 78); R4(b, c, d, e, a, 79); /* Add the working vars back into context.state[] */ state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; /* Wipe variables */ a = b = c = d = e = 0; #ifdef SHA1HANDSOFF memset(block, '\0', sizeof(block)); #endif } void byteSha1Init(struct byteSha1 *s) { /* SHA1 initialization constants */ s->state[0] = 0x67452301; s->state[1] = 0xEFCDAB89; s->state[2] = 0x98BADCFE; s->state[3] = 0x10325476; s->state[4] = 0xC3D2E1F0; s->count[0] = s->count[1] = 0; } /* Run your data through this. */ void byteSha1Update(struct byteSha1 *s, const uint8_t *data, uint32_t len) { uint32_t i; uint32_t j; j = s->count[0]; if ((s->count[0] += len << 3) < j) s->count[1]++; s->count[1] += (len >> 29); j = (j >> 3) & 63; if ((j + len) > 63) { memcpy(&s->buffer[j], data, (i = 64 - j)); byteSha1Transform(s->state, s->buffer); for (; i + 63 < len; i += 64) { byteSha1Transform(s->state, &data[i]); } j = 0; } else i = 0; memcpy(&s->buffer[j], &data[i], len - i); } /* Add padding and return the message digest. */ void byteSha1Digest(struct byteSha1 *s, uint8_t value[20]) { unsigned i; uint8_t finalcount[8]; uint8_t c; #if 0 /* untested "improvement" by DHR */ /* Convert context->count to a sequence of bytes * in finalcount. Second element first, but * big-endian order within element. * But we do it all backwards. */ uint8_t *fcp = &finalcount[8]; for (i = 0; i < 2; i++) { u_int32_t t = context->count[i]; int j; for (j = 0; j < 4; t >>= 8, j++) *--fcp = (uint8_t) t } #else for (i = 0; i < 8; i++) { finalcount[i] = (uint8_t)((s->count[(i >= 4 ? 0 : 1)] >> ((3 - (i & 3)) * 8)) & 255); /* Endian independent */ } #endif c = 0200; byteSha1Update(s, &c, 1); while ((s->count[0] & 504) != 448) { c = 0000; byteSha1Update(s, &c, 1); } byteSha1Update(s, finalcount, 8); /* Should cause a SHA1Transform() */ for (i = 0; i < 20; i++) { value[i] = (uint8_t)((s->state[i >> 2] >> ((3 - (i & 3)) * 8)) & 255); } /* Wipe variables */ memset(s, '\0', sizeof(*s)); memset(&finalcount, '\0', sizeof(finalcount)); } /* ================ end of sha1.c ================ */ raft-0.11.3/src/byte.h000066400000000000000000000103661415614527300144510ustar00rootroot00000000000000/* Byte-level utilities. */ #ifndef BYTE_H_ #define BYTE_H_ #include #include #include #if defined(__cplusplus) #define BYTE__INLINE inline #else #if defined(__clang__) #define BYTE__INLINE static inline __attribute__((unused)) #else #define BYTE__INLINE static inline #endif #endif /* Compile-time endianess detection (best effort). */ #if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN)) || \ (defined(__ARMEL__) && (__ARMEL__ == 1)) #define BYTE__LITTLE_ENDIAN #elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) && \ defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 #define RAFT__BIG_ENDIAN #endif /* Flip a 32-bit number to network byte order (little endian) */ BYTE__INLINE uint32_t byteFlip32(uint32_t v) { #if defined(BYTE__LITTLE_ENDIAN) return v; #elif defined(RAFT__BIG_ENDIAN) return __builtin_bswap32(v); #else /* Unknown endianess */ union { uint32_t u; uint8_t v[4]; } s; s.v[0] = (uint8_t)v; s.v[1] = (uint8_t)(v >> 8); s.v[2] = (uint8_t)(v >> 16); s.v[3] = (uint8_t)(v >> 24); return s.u; #endif } /* Flip a 64-bit number to network byte order (little endian) */ BYTE__INLINE uint64_t byteFlip64(uint64_t v) { #if defined(BYTE__LITTLE_ENDIAN) return v; #elif defined(RAFT__BIG_ENDIAN) return __builtin_bswap64(v); #else union { uint64_t u; uint8_t v[8]; } s; s.v[0] = (uint8_t)v; s.v[1] = (uint8_t)(v >> 8); s.v[2] = (uint8_t)(v >> 16); s.v[3] = (uint8_t)(v >> 24); s.v[4] = (uint8_t)(v >> 32); s.v[5] = (uint8_t)(v >> 40); s.v[6] = (uint8_t)(v >> 48); s.v[7] = (uint8_t)(v >> 56); return s.u; #endif } BYTE__INLINE void bytePut8(void **cursor, uint8_t value) { uint8_t **p = (uint8_t **)cursor; **p = value; *p += 1; } BYTE__INLINE void bytePut32(void **cursor, uint32_t value) { uint32_t **p = (uint32_t **)cursor; **p = byteFlip32(value); *p += 1; } BYTE__INLINE void bytePut64(void **cursor, uint64_t value) { uint64_t **p = (uint64_t **)cursor; **p = byteFlip64(value); *p += 1; } BYTE__INLINE void bytePut64Unaligned(void **cursor, uint64_t value) { unsigned i; uint64_t flipped = byteFlip64(value); for (i = 0; i < sizeof(uint64_t); i++) { bytePut8(cursor, ((uint8_t *)(&flipped))[i]); } } BYTE__INLINE void bytePutString(void **cursor, const char *value) { char **p = (char **)cursor; strcpy(*p, value); *p += strlen(value) + 1; } BYTE__INLINE uint8_t byteGet8(const void **cursor) { const uint8_t **p = (const uint8_t **)cursor; uint8_t value = **p; *p += 1; return value; } BYTE__INLINE uint32_t byteGet32(const void **cursor) { const uint32_t **p = (const uint32_t **)cursor; uint32_t value = byteFlip32(**p); *p += 1; return value; } BYTE__INLINE uint64_t byteGet64(const void **cursor) { const uint64_t **p = (const uint64_t **)cursor; uint64_t value = byteFlip64(**p); *p += 1; return value; } BYTE__INLINE uint64_t byteGet64Unaligned(const void **cursor) { uint64_t value = 0; unsigned i; for (i = 0; i < sizeof(uint64_t); i++) { ((uint8_t *)(&value))[i] = byteGet8(cursor); } return byteFlip64(value); } BYTE__INLINE const char *byteGetString(const void **cursor, size_t max_len) { const char **p = (const char **)cursor; const char *value = *p; size_t len = 0; while (len < max_len) { if (*(*p + len) == 0) { break; } len++; } if (len == max_len) { return NULL; } *p += len + 1; return value; } /* Add padding to size if it's not a multiple of 8. */ BYTE__INLINE size_t bytePad64(size_t size) { size_t rest = size % sizeof(uint64_t); if (rest != 0) { size += sizeof(uint64_t) - rest; } return size; } /* Calculate the CRC32 checksum of the given data buffer. */ unsigned byteCrc32(const void *buf, size_t size, unsigned init); struct byteSha1 { uint32_t state[5]; uint32_t count[2]; uint8_t buffer[64]; uint8_t value[20]; }; void byteSha1Init(struct byteSha1 *s); void byteSha1Update(struct byteSha1 *s, const uint8_t *data, uint32_t len); void byteSha1Digest(struct byteSha1 *s, uint8_t value[20]); #endif /* BYTE_H_ */ raft-0.11.3/src/client.c000066400000000000000000000254451415614527300147630ustar00rootroot00000000000000#include "../include/raft.h" #include "assert.h" #include "configuration.h" #include "err.h" #include "log.h" #include "membership.h" #include "progress.h" #include "queue.h" #include "replication.h" #include "request.h" #include "tracing.h" #define tracef(...) Tracef(r->tracer, __VA_ARGS__) int raft_apply(struct raft *r, struct raft_apply *req, const struct raft_buffer bufs[], const unsigned n, raft_apply_cb cb) { raft_index index; int rv; tracef("raft_apply n %d", n); assert(r != NULL); assert(bufs != NULL); assert(n > 0); if (r->state != RAFT_LEADER || r->transfer != NULL) { rv = RAFT_NOTLEADER; ErrMsgFromCode(r->errmsg, rv); tracef("raft_apply not leader"); goto err; } /* Index of the first entry being appended. */ index = logLastIndex(&r->log) + 1; tracef("%u commands starting at %lld", n, index); req->type = RAFT_COMMAND; req->index = index; req->cb = cb; /* Append the new entries to the log. */ rv = logAppendCommands(&r->log, r->current_term, bufs, n); if (rv != 0) { goto err; } QUEUE_PUSH(&r->leader_state.requests, &req->queue); rv = replicationTrigger(r, index); if (rv != 0) { goto err_after_log_append; } return 0; err_after_log_append: logDiscard(&r->log, index); QUEUE_REMOVE(&req->queue); err: assert(rv != 0); return rv; } int raft_barrier(struct raft *r, struct raft_barrier *req, raft_barrier_cb cb) { raft_index index; struct raft_buffer buf; int rv; if (r->state != RAFT_LEADER || r->transfer != NULL) { rv = RAFT_NOTLEADER; goto err; } /* TODO: use a completely empty buffer */ buf.len = 8; buf.base = raft_malloc(buf.len); if (buf.base == NULL) { rv = RAFT_NOMEM; goto err; } /* Index of the barrier entry being appended. */ index = logLastIndex(&r->log) + 1; tracef("barrier starting at %lld", index); req->type = RAFT_BARRIER; req->index = index; req->cb = cb; rv = logAppend(&r->log, r->current_term, RAFT_BARRIER, &buf, NULL); if (rv != 0) { goto err_after_buf_alloc; } QUEUE_PUSH(&r->leader_state.requests, &req->queue); rv = replicationTrigger(r, index); if (rv != 0) { goto err_after_log_append; } return 0; err_after_log_append: logDiscard(&r->log, index); QUEUE_REMOVE(&req->queue); err_after_buf_alloc: raft_free(buf.base); err: return rv; } static int clientChangeConfiguration( struct raft *r, struct raft_change *req, const struct raft_configuration *configuration) { raft_index index; raft_term term = r->current_term; int rv; (void)req; /* Index of the entry being appended. */ index = logLastIndex(&r->log) + 1; /* Encode the new configuration and append it to the log. */ rv = logAppendConfiguration(&r->log, term, configuration); if (rv != 0) { goto err; } if (configuration->n != r->configuration.n) { rv = progressRebuildArray(r, configuration); if (rv != 0) { goto err; } } /* Update the current configuration if we've created a new object. */ if (configuration != &r->configuration) { raft_configuration_close(&r->configuration); r->configuration = *configuration; } /* Start writing the new log entry to disk and send it to the followers. */ rv = replicationTrigger(r, index); if (rv != 0) { /* TODO: restore the old next/match indexes and configuration. */ goto err_after_log_append; } r->configuration_uncommitted_index = index; return 0; err_after_log_append: logTruncate(&r->log, index); err: assert(rv != 0); return rv; } int raft_add(struct raft *r, struct raft_change *req, raft_id id, const char *address, raft_change_cb cb) { struct raft_configuration configuration; int rv; rv = membershipCanChangeConfiguration(r); if (rv != 0) { return rv; } tracef("add server: id %llu, address %s", id, address); /* Make a copy of the current configuration, and add the new server to * it. */ rv = configurationCopy(&r->configuration, &configuration); if (rv != 0) { goto err; } rv = raft_configuration_add(&configuration, id, address, RAFT_SPARE); if (rv != 0) { goto err_after_configuration_copy; } req->cb = cb; rv = clientChangeConfiguration(r, req, &configuration); if (rv != 0) { goto err_after_configuration_copy; } assert(r->leader_state.change == NULL); r->leader_state.change = req; return 0; err_after_configuration_copy: raft_configuration_close(&configuration); err: assert(rv != 0); return rv; } int raft_assign(struct raft *r, struct raft_change *req, raft_id id, int role, raft_change_cb cb) { const struct raft_server *server; unsigned server_index; raft_index last_index; int rv; tracef("raft_assign to id:%llu the role:%d", id, role); if (role != RAFT_STANDBY && role != RAFT_VOTER && role != RAFT_SPARE) { rv = RAFT_BADROLE; ErrMsgFromCode(r->errmsg, rv); return rv; } rv = membershipCanChangeConfiguration(r); if (rv != 0) { return rv; } server = configurationGet(&r->configuration, id); if (server == NULL) { rv = RAFT_NOTFOUND; ErrMsgPrintf(r->errmsg, "no server has ID %llu", id); goto err; } /* Check if we have already the desired role. */ if (server->role == role) { const char *name; rv = RAFT_BADROLE; switch (role) { case RAFT_VOTER: name = "voter"; break; case RAFT_STANDBY: name = "stand-by"; break; case RAFT_SPARE: name = "spare"; break; default: name = NULL; assert(0); break; } ErrMsgPrintf(r->errmsg, "server is already %s", name); goto err; } server_index = configurationIndexOf(&r->configuration, id); assert(server_index < r->configuration.n); last_index = logLastIndex(&r->log); req->cb = cb; assert(r->leader_state.change == NULL); r->leader_state.change = req; /* If we are not promoting to the voter role or if the log of this server is * already up-to-date, we can submit the configuration change * immediately. */ if (role != RAFT_VOTER || progressMatchIndex(r, server_index) == last_index) { int old_role = r->configuration.servers[server_index].role; r->configuration.servers[server_index].role = role; rv = clientChangeConfiguration(r, req, &r->configuration); if (rv != 0) { tracef("clientChangeConfiguration failed %d", rv); r->configuration.servers[server_index].role = old_role; return rv; } return 0; } r->leader_state.promotee_id = server->id; /* Initialize the first catch-up round. */ r->leader_state.round_number = 1; r->leader_state.round_index = last_index; r->leader_state.round_start = r->io->time(r->io); /* Immediately initiate an AppendEntries request. */ rv = replicationProgress(r, server_index); if (rv != 0 && rv != RAFT_NOCONNECTION) { /* This error is not fatal. */ tracef("failed to send append entries to server %llu: %s (%d)", server->id, raft_strerror(rv), rv); } return 0; err: assert(rv != 0); return rv; } int raft_remove(struct raft *r, struct raft_change *req, raft_id id, raft_change_cb cb) { const struct raft_server *server; struct raft_configuration configuration; int rv; rv = membershipCanChangeConfiguration(r); if (rv != 0) { return rv; } server = configurationGet(&r->configuration, id); if (server == NULL) { rv = RAFT_BADID; goto err; } tracef("remove server: id %llu", id); /* Make a copy of the current configuration, and remove the given server * from it. */ rv = configurationCopy(&r->configuration, &configuration); if (rv != 0) { goto err; } rv = configurationRemove(&configuration, id); if (rv != 0) { goto err_after_configuration_copy; } req->cb = cb; rv = clientChangeConfiguration(r, req, &configuration); if (rv != 0) { goto err_after_configuration_copy; } assert(r->leader_state.change == NULL); r->leader_state.change = req; return 0; err_after_configuration_copy: raft_configuration_close(&configuration); err: assert(rv != 0); return rv; } /* Find a suitable voting follower. */ static raft_id clientSelectTransferee(struct raft *r) { const struct raft_server *transferee = NULL; unsigned i; for (i = 0; i < r->configuration.n; i++) { const struct raft_server *server = &r->configuration.servers[i]; if (server->id == r->id || server->role != RAFT_VOTER) { continue; } transferee = server; if (progressIsUpToDate(r, i)) { break; } } if (transferee != NULL) { return transferee->id; } return 0; } int raft_transfer(struct raft *r, struct raft_transfer *req, raft_id id, raft_transfer_cb cb) { const struct raft_server *server; unsigned i; int rv; tracef("transfer to %llu", id); if (r->state != RAFT_LEADER || r->transfer != NULL) { tracef("transfer error - state:%d", r->state); rv = RAFT_NOTLEADER; ErrMsgFromCode(r->errmsg, rv); goto err; } if (id == 0) { id = clientSelectTransferee(r); if (id == 0) { rv = RAFT_NOTFOUND; ErrMsgPrintf(r->errmsg, "there's no other voting server"); goto err; } } server = configurationGet(&r->configuration, id); if (server == NULL || server->id == r->id || server->role != RAFT_VOTER) { rv = RAFT_BADID; ErrMsgFromCode(r->errmsg, rv); goto err; } /* If this follower is up-to-date, we can send it the TimeoutNow message * right away. */ i = configurationIndexOf(&r->configuration, server->id); assert(i < r->configuration.n); membershipLeadershipTransferInit(r, req, id, cb); if (progressIsUpToDate(r, i)) { rv = membershipLeadershipTransferStart(r); if (rv != 0) { r->transfer = NULL; goto err; } } return 0; err: assert(rv != 0); return rv; } #undef tracef raft-0.11.3/src/compress.c000066400000000000000000000200241415614527300153240ustar00rootroot00000000000000#include "compress.h" #ifdef LZ4_AVAILABLE #include #endif #include #include #include "assert.h" #include "byte.h" #include "err.h" #define min(a,b) ((a) < (b) ? (a) : (b)) #define max(a,b) ((a) > (b) ? (a) : (b)) #define MEGABYTE 1048576 int Compress(struct raft_buffer bufs[], unsigned n_bufs, struct raft_buffer *compressed, char *errmsg) { #ifndef LZ4_AVAILABLE (void) bufs; (void) n_bufs; (void) compressed; ErrMsgPrintf(errmsg, "LZ4 not available"); return RAFT_INVALID; #else assert(bufs != NULL); assert(n_bufs > 0); assert(compressed != NULL); assert(errmsg != NULL); int rv = RAFT_IOERR; size_t src_size = 0; size_t dst_size = 0; size_t src_offset = 0; size_t dst_offset = 0; size_t dst_size_needed = 0; /* Store minimal dst_size */ size_t ret = 0; /* Return value of LZ4F_XXX functions */ compressed->base = NULL; compressed->len = 0; /* Determine total uncompressed size */ for (unsigned i = 0; i < n_bufs; ++i) { src_size += bufs[i].len; } /* Set LZ4 preferences */ LZ4F_preferences_t lz4_pref; memset(&lz4_pref, 0, sizeof(lz4_pref)); /* Detect data corruption when decompressing */ lz4_pref.frameInfo.contentChecksumFlag = 1; /* For allocating a suitable buffer when decompressing */ lz4_pref.frameInfo.contentSize = src_size; /* Context to track compression progress */ LZ4F_compressionContext_t ctx; ret = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION); if (LZ4F_isError(ret)) { ErrMsgPrintf(errmsg, "LZ4F_createDecompressionContext %s", LZ4F_getErrorName(ret)); rv = RAFT_NOMEM; goto err; } /* Guestimate of eventual compressed size, mainly not to allocate a huge * buffer as `LZ4F_compressBound` calculates the worst case scenario. */ dst_size = LZ4F_compressBound( max(MEGABYTE, (size_t)lz4_pref.frameInfo.contentSize / 10), &lz4_pref); dst_size += LZ4F_HEADER_SIZE_MAX_RAFT; compressed->base = raft_malloc(dst_size); if (compressed->base == NULL) { rv = RAFT_NOMEM; goto err_after_ctx_alloc; } /* Returns the size of the lz4 header, data should be written after the * header */ dst_offset = LZ4F_compressBegin(ctx, compressed->base, dst_size, &lz4_pref); if (LZ4F_isError(dst_offset)) { ErrMsgPrintf(errmsg, "LZ4F_compressBegin %s", LZ4F_getErrorName(dst_offset)); rv = RAFT_IOERR; goto err_after_buff_alloc; } /* Compress all buffers */ for (unsigned i = 0; i < n_bufs; ++i) { src_offset = 0; while (src_offset < bufs[i].len) { /* Compress in chunks of maximum 1MB and check if there is enough * room in the dst buffer, if not realloc */ src_size = min(bufs[i].len - src_offset, (size_t)MEGABYTE); dst_size_needed = LZ4F_compressBound(src_size, &lz4_pref); if (dst_size - dst_offset < dst_size_needed) { dst_size += max(dst_size_needed, (size_t)lz4_pref.frameInfo.contentSize / 10); compressed->base = raft_realloc(compressed->base, dst_size); if (compressed->base == NULL) { rv = RAFT_NOMEM; goto err_after_ctx_alloc; } } /* There is guaranteed enough room in `dst` to perform the * compression */ ret = LZ4F_compressUpdate(ctx, (char*)compressed->base + dst_offset, dst_size - dst_offset, (char*)bufs[i].base + src_offset, src_size, NULL); if (LZ4F_isError(ret)) { ErrMsgPrintf(errmsg, "LZ4F_compressUpdate %s", LZ4F_getErrorName(ret)); rv = RAFT_IOERR; goto err_after_buff_alloc; } dst_offset += ret; src_offset += src_size; } } /* Make sure LZ4F_compressEnd has enough room to succeed */ dst_size_needed = LZ4F_compressBound(0, &lz4_pref); if ((dst_size - dst_offset) < dst_size_needed) { dst_size += dst_size_needed; compressed->base = raft_realloc(compressed->base, dst_size); if (compressed->base == NULL) { rv = RAFT_NOMEM; goto err_after_ctx_alloc; } } /* Finalize compression */ ret = LZ4F_compressEnd(ctx, (char*)compressed->base + dst_offset, dst_size - dst_offset, NULL); if (LZ4F_isError(ret)) { ErrMsgPrintf(errmsg, "LZ4F_compressEnd %s", LZ4F_getErrorName(ret)); rv = RAFT_IOERR; goto err_after_buff_alloc; } dst_offset += ret; compressed->len = dst_offset; LZ4F_freeCompressionContext(ctx); return 0; err_after_buff_alloc: raft_free(compressed->base); compressed->base = NULL; err_after_ctx_alloc: LZ4F_freeCompressionContext(ctx); err: return rv; #endif /* LZ4_AVAILABLE */ } int Decompress(struct raft_buffer buf, struct raft_buffer *decompressed, char *errmsg) { #ifndef LZ4_AVAILABLE (void) buf; (void) decompressed; ErrMsgPrintf(errmsg, "LZ4 not available"); return RAFT_INVALID; #else assert(decompressed != NULL); int rv = RAFT_IOERR; size_t src_offset = 0; size_t dst_offset = 0; size_t src_size = 0; size_t dst_size = 0; size_t ret = 0; LZ4F_decompressionContext_t ctx; if (LZ4F_isError(LZ4F_createDecompressionContext(&ctx, LZ4F_VERSION))) { ErrMsgPrintf(errmsg, "LZ4F_createDecompressionContext"); rv = RAFT_NOMEM; goto err; } src_size = buf.len; LZ4F_frameInfo_t frameInfo = {0}; /* `src_size` will contain the size of the LZ4 Frame Header after the call, * decompression must resume at that offset. */ ret = LZ4F_getFrameInfo(ctx, &frameInfo, buf.base, &src_size); if (LZ4F_isError(ret)) { ErrMsgPrintf(errmsg, "LZ4F_getFrameInfo %s", LZ4F_getErrorName(ret)); rv = RAFT_IOERR; goto err_after_ctx_alloc; } src_offset = src_size; decompressed->base = raft_malloc((size_t)frameInfo.contentSize); decompressed->len = (size_t)frameInfo.contentSize; if (decompressed->base == NULL) { rv = RAFT_NOMEM; goto err_after_ctx_alloc; } ret = 1; while (ret != 0) { src_size = buf.len - src_offset; /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! * The next line works around a bug in an older lz4 lib where the * `size_t` dst_size parameter would overflow an `int`. * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ dst_size = min(decompressed->len - dst_offset, (size_t)INT_MAX); /* `dst_size` will contain the number of bytes written to decompressed->base, * while `src_size` will contain the number of bytes consumed from * buf.base */ ret = LZ4F_decompress(ctx, (char*)decompressed->base + dst_offset, &dst_size, (char*)buf.base + src_offset, &src_size, NULL); if (LZ4F_isError(ret)) { ErrMsgPrintf(errmsg, "LZ4F_decompress %s", LZ4F_getErrorName(ret)); rv = RAFT_IOERR; goto err_after_buff_alloc; } src_offset += src_size; dst_offset += dst_size; } if (LZ4F_freeDecompressionContext(ctx) != 0) { raft_free(decompressed->base); decompressed->base = NULL; return RAFT_IOERR; } return 0; err_after_buff_alloc: raft_free(decompressed->base); decompressed->base = NULL; err_after_ctx_alloc: LZ4F_freeDecompressionContext(ctx); err: return rv; #endif /* LZ4_AVAILABLE */ } bool IsCompressed(const void *data, size_t sz) { if (data == NULL || sz < 4) { return false; } const void *cursor = data; #ifdef LZ4F_MAGICNUMBER #define RAFT_LZ4F_MAGICNUMBER LZ4F_MAGICNUMBER #else #define RAFT_LZ4F_MAGICNUMBER 0x184D2204U #endif return byteGet32(&cursor) == RAFT_LZ4F_MAGICNUMBER; } raft-0.11.3/src/compress.h000066400000000000000000000016131415614527300153340ustar00rootroot00000000000000#ifndef COMPRESS_H_ #define COMPRESS_H_ #include "../include/raft.h" #ifdef LZ4F_HEADER_SIZE_MAX #define LZ4F_HEADER_SIZE_MAX_RAFT LZ4F_HEADER_SIZE_MAX #else #define LZ4F_HEADER_SIZE_MAX_RAFT 19UL #endif /* * Compresses the content of `bufs` into a newly allocated buffer that is * returned to the caller through `compressed`. Returns a non-0 value upon * failure. */ int Compress(struct raft_buffer bufs[], unsigned n_bufs, struct raft_buffer *compressed, char *errmsg); /* * Decompresses the content of `buf` into a newly allocated buffer that is * returned to the caller through `decompressed`. Returns a non-0 value upon * failure. */ int Decompress(struct raft_buffer buf, struct raft_buffer *decompressed, char *errmsg); /* Returns `true` if `data` is compressed, `false` otherwise. */ bool IsCompressed(const void *data, size_t sz); #endif /* COMPRESS_H_ */ raft-0.11.3/src/configuration.c000066400000000000000000000204541415614527300163470ustar00rootroot00000000000000#include "configuration.h" #include "assert.h" #include "byte.h" #include "tracing.h" /* Current encoding format version. */ #define ENCODING_FORMAT 1 void configurationInit(struct raft_configuration *c) { c->servers = NULL; c->n = 0; } void configurationClose(struct raft_configuration *c) { size_t i; assert(c != NULL); assert(c->n == 0 || c->servers != NULL); for (i = 0; i < c->n; i++) { raft_free(c->servers[i].address); } if (c->servers != NULL) { raft_free(c->servers); } } unsigned configurationIndexOf(const struct raft_configuration *c, const raft_id id) { unsigned i; assert(c != NULL); for (i = 0; i < c->n; i++) { if (c->servers[i].id == id) { return i; } } return c->n; } unsigned configurationIndexOfVoter(const struct raft_configuration *c, const raft_id id) { unsigned i; unsigned j = 0; assert(c != NULL); assert(id > 0); for (i = 0; i < c->n; i++) { if (c->servers[i].id == id) { if (c->servers[i].role == RAFT_VOTER) { return j; } return c->n; } if (c->servers[i].role == RAFT_VOTER) { j++; } } return c->n; } const struct raft_server *configurationGet(const struct raft_configuration *c, const raft_id id) { size_t i; assert(c != NULL); assert(id > 0); /* Grab the index of the server with the given ID */ i = configurationIndexOf(c, id); if (i == c->n) { /* No server with matching ID. */ return NULL; } assert(i < c->n); return &c->servers[i]; } unsigned configurationVoterCount(const struct raft_configuration *c) { unsigned i; unsigned n = 0; assert(c != NULL); for (i = 0; i < c->n; i++) { if (c->servers[i].role == RAFT_VOTER) { n++; } } return n; } int configurationCopy(const struct raft_configuration *src, struct raft_configuration *dst) { size_t i; int rv; configurationInit(dst); for (i = 0; i < src->n; i++) { struct raft_server *server = &src->servers[i]; rv = configurationAdd(dst, server->id, server->address, server->role); if (rv != 0) { return rv; } } return 0; } int configurationAdd(struct raft_configuration *c, raft_id id, const char *address, int role) { struct raft_server *servers; struct raft_server *server; size_t i; assert(c != NULL); assert(id != 0); if (role != RAFT_STANDBY && role != RAFT_VOTER && role != RAFT_SPARE) { return RAFT_BADROLE; } /* Check that neither the given id or address is already in use */ for (i = 0; i < c->n; i++) { server = &c->servers[i]; if (server->id == id) { return RAFT_DUPLICATEID; } if (strcmp(server->address, address) == 0) { return RAFT_DUPLICATEADDRESS; } } /* Grow the servers array.. */ servers = raft_realloc(c->servers, (c->n + 1) * sizeof *server); if (servers == NULL) { return RAFT_NOMEM; } c->servers = servers; /* Fill the newly allocated slot (the last one) with the given details. */ server = &servers[c->n]; server->id = id; server->address = raft_malloc(strlen(address) + 1); if (server->address == NULL) { return RAFT_NOMEM; } strcpy(server->address, address); server->role = role; c->n++; return 0; } int configurationRemove(struct raft_configuration *c, const raft_id id) { unsigned i; unsigned j; struct raft_server *servers; assert(c != NULL); i = configurationIndexOf(c, id); if (i == c->n) { return RAFT_BADID; } assert(i < c->n); /* If this is the last server in the configuration, reset everything. */ if (c->n - 1 == 0) { raft_free(c->servers[0].address); raft_free(c->servers); c->n = 0; c->servers = NULL; return 0; } /* Create a new servers array. */ servers = raft_calloc(c->n - 1, sizeof *servers); if (servers == NULL) { return RAFT_NOMEM; } /* Copy the first part of the servers array into a new array, excluding the * i'th server. */ for (j = 0; j < i; j++) { servers[j] = c->servers[j]; } /* Copy the second part of the servers array into a new array. */ for (j = i + 1; j < c->n; j++) { servers[j - 1] = c->servers[j]; } /* Release the address of the server that was deleted. */ raft_free(c->servers[i].address); /* Release the old servers array */ raft_free(c->servers); c->servers = servers; c->n--; return 0; } size_t configurationEncodedSize(const struct raft_configuration *c) { size_t n = 0; unsigned i; /* We need one byte for the encoding format version */ n++; /* Then 8 bytes for number of servers. */ n += sizeof(uint64_t); /* Then some space for each server. */ for (i = 0; i < c->n; i++) { struct raft_server *server = &c->servers[i]; assert(server->address != NULL); n += sizeof(uint64_t); /* Server ID */ n += strlen(server->address) + 1; /* Address */ n++; /* Voting flag */ }; return bytePad64(n); } void configurationEncodeToBuf(const struct raft_configuration *c, void *buf) { void *cursor = buf; unsigned i; /* Encoding format version */ bytePut8(&cursor, ENCODING_FORMAT); /* Number of servers. */ bytePut64Unaligned(&cursor, c->n); /* cursor might not be 8-byte aligned */ for (i = 0; i < c->n; i++) { struct raft_server *server = &c->servers[i]; assert(server->address != NULL); bytePut64Unaligned(&cursor, server->id); /* might not be aligned */ bytePutString(&cursor, server->address); assert(server->role < 255); bytePut8(&cursor, (uint8_t)server->role); }; } int configurationEncode(const struct raft_configuration *c, struct raft_buffer *buf) { assert(c != NULL); assert(buf != NULL); /* The configuration can't be empty. */ assert(c->n > 0); buf->len = configurationEncodedSize(c); buf->base = raft_malloc(buf->len); if (buf->base == NULL) { return RAFT_NOMEM; } configurationEncodeToBuf(c, buf->base); return 0; } int configurationDecode(const struct raft_buffer *buf, struct raft_configuration *c) { const void *cursor; size_t i; size_t n; assert(c != NULL); assert(buf != NULL); /* TODO: use 'if' instead of assert for checking buffer boundaries */ assert(buf->len > 0); /* Check that the target configuration is empty. */ assert(c->n == 0); assert(c->servers == NULL); cursor = buf->base; /* Check the encoding format version */ if (byteGet8(&cursor) != ENCODING_FORMAT) { return RAFT_MALFORMED; } /* Read the number of servers. */ n = (size_t)byteGet64Unaligned(&cursor); /* Decode the individual servers. */ for (i = 0; i < n; i++) { raft_id id; const char *address; int role; int rv; /* Server ID. */ id = byteGet64Unaligned(&cursor); /* Server Address. */ address = byteGetString( &cursor, buf->len - (size_t)((uint8_t *)cursor - (uint8_t *)buf->base)); if (address == NULL) { return RAFT_MALFORMED; } /* Role code. */ role = byteGet8(&cursor); rv = configurationAdd(c, id, address, role); if (rv != 0) { return rv; } } return 0; } #define tracef(...) Tracef(r->tracer, __VA_ARGS__) void configurationTrace(const struct raft *r, struct raft_configuration *c, const char *msg) { if (r == NULL || c == NULL || !r->tracer->enabled ) { return; } tracef("%s", msg); tracef("=== CONFIG START ==="); unsigned i; struct raft_server *s; for (i = 0; i < c->n; i++) { s = &c->servers[i]; tracef("id:%llu address:%s role:%d", s->id, s->address, s->role); } tracef("=== CONFIG END ==="); } #undef tracef raft-0.11.3/src/configuration.h000066400000000000000000000055531415614527300163570ustar00rootroot00000000000000/* Modify and inspect @raft_configuration objects. */ #ifndef CONFIGURATION_H_ #define CONFIGURATION_H_ #include "../include/raft.h" /* Initialize an empty configuration. */ void configurationInit(struct raft_configuration *c); /* Release all memory used by the given configuration. */ void configurationClose(struct raft_configuration *c); /* Add a server to the given configuration. */ int configurationAdd(struct raft_configuration *c, raft_id id, const char *address, int role); /* Return the number of servers with the RAFT_VOTER role. */ unsigned configurationVoterCount(const struct raft_configuration *c); /* Return the index of the server with the given ID (relative to the c->servers * array). If there's no server with the given ID, return the number of * servers. */ unsigned configurationIndexOf(const struct raft_configuration *c, raft_id id); /* Return the index of the RAFT_VOTER server with the given ID (relative to the * sub array of c->servers that has only voting servers). If there's no server * with the given ID, or if it's not flagged as voting, return the number of * servers. */ unsigned configurationIndexOfVoter(const struct raft_configuration *c, raft_id id); /* Get the server with the given ID, or #NULL if no matching server is found. */ const struct raft_server *configurationGet(const struct raft_configuration *c, raft_id id); /* Remove a server from a raft configuration. The given ID must match the one of * an existing server in the configuration. */ int configurationRemove(struct raft_configuration *c, raft_id id); /* Add all servers in c1 to c2 (which must be empty). */ int configurationCopy(const struct raft_configuration *src, struct raft_configuration *dst); /* Number of bytes needed to encode the given configuration object. */ size_t configurationEncodedSize(const struct raft_configuration *c); /* Encode the given configuration object to the given pre-allocated buffer, * which is assumed to be at least configurationEncodedSize(c) bytes. */ void configurationEncodeToBuf(const struct raft_configuration *c, void *buf); /* Encode the given configuration object. The memory of the returned buffer is * allocated using raft_malloc(), and client code is responsible for releasing * it when no longer needed. */ int configurationEncode(const struct raft_configuration *c, struct raft_buffer *buf); /* Populate a configuration object by decoding the given serialized payload. */ int configurationDecode(const struct raft_buffer *buf, struct raft_configuration *c); /* Output the configuration to the raft tracer */ void configurationTrace(const struct raft *r, struct raft_configuration *c, const char *msg); #endif /* CONFIGURATION_H_ */ raft-0.11.3/src/convert.c000066400000000000000000000152011415614527300151520ustar00rootroot00000000000000#include "convert.h" #include "assert.h" #include "configuration.h" #include "election.h" #include "log.h" #include "membership.h" #include "progress.h" #include "queue.h" #include "replication.h" #include "request.h" #include "tracing.h" #define tracef(...) Tracef(r->tracer, __VA_ARGS__) /* Convenience for setting a new state value and asserting that the transition * is valid. */ static void convertSetState(struct raft *r, unsigned short new_state) { /* Check that the transition is legal, see Figure 3.3. Note that with * respect to the paper we have an additional "unavailable" state, which is * the initial or final state. */ tracef("old_state:%u new_state:%u", r->state, new_state); assert((r->state == RAFT_UNAVAILABLE && new_state == RAFT_FOLLOWER) || (r->state == RAFT_FOLLOWER && new_state == RAFT_CANDIDATE) || (r->state == RAFT_CANDIDATE && new_state == RAFT_FOLLOWER) || (r->state == RAFT_CANDIDATE && new_state == RAFT_LEADER) || (r->state == RAFT_LEADER && new_state == RAFT_FOLLOWER) || (r->state == RAFT_FOLLOWER && new_state == RAFT_UNAVAILABLE) || (r->state == RAFT_CANDIDATE && new_state == RAFT_UNAVAILABLE) || (r->state == RAFT_LEADER && new_state == RAFT_UNAVAILABLE)); r->state = new_state; } /* Clear follower state. */ static void convertClearFollower(struct raft *r) { tracef("clear follower state"); r->follower_state.current_leader.id = 0; if (r->follower_state.current_leader.address != NULL) { raft_free(r->follower_state.current_leader.address); } r->follower_state.current_leader.address = NULL; } /* Clear candidate state. */ static void convertClearCandidate(struct raft *r) { tracef("clear candidate state"); if (r->candidate_state.votes != NULL) { raft_free(r->candidate_state.votes); r->candidate_state.votes = NULL; } } static void convertFailApply(struct raft_apply *req) { if (req != NULL && req->cb != NULL) { req->cb(req, RAFT_LEADERSHIPLOST, NULL); } } static void convertFailBarrier(struct raft_barrier *req) { if (req != NULL && req->cb != NULL) { req->cb(req, RAFT_LEADERSHIPLOST); } } static void convertFailChange(struct raft_change *req) { if (req != NULL && req->cb != NULL) { req->cb(req, RAFT_LEADERSHIPLOST); } } /* Clear leader state. */ static void convertClearLeader(struct raft *r) { tracef("clear leader state"); if (r->leader_state.progress != NULL) { raft_free(r->leader_state.progress); r->leader_state.progress = NULL; } /* Fail all outstanding requests */ while (!QUEUE_IS_EMPTY(&r->leader_state.requests)) { struct request *req; queue *head; head = QUEUE_HEAD(&r->leader_state.requests); QUEUE_REMOVE(head); req = QUEUE_DATA(head, struct request, queue); assert(req->type == RAFT_COMMAND || req->type == RAFT_BARRIER); switch (req->type) { case RAFT_COMMAND: convertFailApply((struct raft_apply *)req); break; case RAFT_BARRIER: convertFailBarrier((struct raft_barrier *)req); break; }; } /* Fail any promote request that is still outstanding because the server is * still catching up and no entry was submitted. */ if (r->leader_state.change != NULL) { convertFailChange(r->leader_state.change); r->leader_state.change = NULL; } } /* Clear the current state */ static void convertClear(struct raft *r) { assert(r->state == RAFT_UNAVAILABLE || r->state == RAFT_FOLLOWER || r->state == RAFT_CANDIDATE || r->state == RAFT_LEADER); switch (r->state) { case RAFT_FOLLOWER: convertClearFollower(r); break; case RAFT_CANDIDATE: convertClearCandidate(r); break; case RAFT_LEADER: convertClearLeader(r); break; } } void convertToFollower(struct raft *r) { convertClear(r); convertSetState(r, RAFT_FOLLOWER); /* Reset election timer. */ electionResetTimer(r); r->follower_state.current_leader.id = 0; r->follower_state.current_leader.address = NULL; } int convertToCandidate(struct raft *r, bool disrupt_leader) { const struct raft_server *server; size_t n_voters = configurationVoterCount(&r->configuration); int rv; (void)server; /* Only used for assertions. */ convertClear(r); convertSetState(r, RAFT_CANDIDATE); /* Allocate the votes array. */ r->candidate_state.votes = raft_malloc(n_voters * sizeof(bool)); if (r->candidate_state.votes == NULL) { return RAFT_NOMEM; } r->candidate_state.disrupt_leader = disrupt_leader; r->candidate_state.in_pre_vote = disrupt_leader ? false : r->pre_vote; /* Fast-forward to leader if we're the only voting server in the * configuration. */ server = configurationGet(&r->configuration, r->id); assert(server != NULL); assert(server->role == RAFT_VOTER); if (n_voters == 1) { tracef("self elect and convert to leader"); return convertToLeader(r); } /* Start a new election round */ rv = electionStart(r); if (rv != 0) { r->state = RAFT_FOLLOWER; raft_free(r->candidate_state.votes); return rv; } return 0; } int convertToLeader(struct raft *r) { int rv; convertClear(r); convertSetState(r, RAFT_LEADER); /* Reset timers */ r->election_timer_start = r->io->time(r->io); /* Reset apply requests queue */ QUEUE_INIT(&r->leader_state.requests); /* Allocate and initialize the progress array. */ rv = progressBuildArray(r); if (rv != 0) { return rv; } r->leader_state.change = NULL; /* Reset promotion state. */ r->leader_state.promotee_id = 0; r->leader_state.round_number = 0; r->leader_state.round_index = 0; r->leader_state.round_start = 0; /* By definition, all entries until the last_stored entry will be committed if * we are the only voter around. */ size_t n_voters = configurationVoterCount(&r->configuration); if (n_voters == 1 && (r->last_stored > r->commit_index)) { tracef("Apply log entries after self election %llu %llu", r->last_stored, r->commit_index); r->commit_index = r->last_stored; rv = replicationApply(r); } return rv; } void convertToUnavailable(struct raft *r) { /* Abort any pending leadership transfer request. */ if (r->transfer != NULL) { membershipLeadershipTransferClose(r); } convertClear(r); convertSetState(r, RAFT_UNAVAILABLE); } #undef tracef raft-0.11.3/src/convert.h000066400000000000000000000030721415614527300151620ustar00rootroot00000000000000/* Convert from one state to another. */ #ifndef CONVERT_H_ #define CONVERT_H_ #include "../include/raft.h" /* Convert from unavailable, or candidate or leader to follower. * * From Figure 3.1: * * If election timeout elapses without receiving AppendEntries RPC from * current leader or granting vote to candidate: convert to candidate. * * The above implies that we need to reset the election timer when converting to * follower. */ void convertToFollower(struct raft *r); /* Convert from follower to candidate, starting a new election. * * From Figure 3.1: * * On conversion to candidate, start election * * If the disrupt_leader flag is true, the server will set the disrupt leader * flag of the RequestVote messages it sends. */ int convertToCandidate(struct raft *r, bool disrupt_leader); /* Convert from candidate to leader. * * From Figure 3.1: * * Upon election: send initial empty AppendEntries RPC (heartbeat) to each * server. * * From Section 3.4: * * Once a candidate wins an election, it becomes leader. It then sends * heartbeat messages to all of the other servers to establish its authority * and prevent new elections. * * From Section 3.3: * * The leader maintains a nextIndex for each follower, which is the index * of the next log entry the leader will send to that follower. When a * leader first comes to power, it initializes all nextIndex values to the * index just after the last one in its log. */ int convertToLeader(struct raft *r); void convertToUnavailable(struct raft *r); #endif /* CONVERT_H_ */ raft-0.11.3/src/election.c000066400000000000000000000225441415614527300153040ustar00rootroot00000000000000#include "election.h" #include "assert.h" #include "configuration.h" #include "heap.h" #include "log.h" #include "tracing.h" #define tracef(...) Tracef(r->tracer, __VA_ARGS__) /* Common fields between follower and candidate state. * * The follower_state and candidate_state structs in raft.h must be kept * consistent with this definition. */ struct followerOrCandidateState { unsigned randomized_election_timeout; }; /* Return a pointer to either the follower or candidate state. */ struct followerOrCandidateState *getFollowerOrCandidateState(struct raft *r) { struct followerOrCandidateState *state; assert(r->state == RAFT_FOLLOWER || r->state == RAFT_CANDIDATE); if (r->state == RAFT_FOLLOWER) { state = (struct followerOrCandidateState *)&r->follower_state; } else { state = (struct followerOrCandidateState *)&r->candidate_state; } return state; } void electionResetTimer(struct raft *r) { struct followerOrCandidateState *state = getFollowerOrCandidateState(r); unsigned timeout = (unsigned)r->io->random(r->io, (int)r->election_timeout, 2 * (int)r->election_timeout); assert(timeout >= r->election_timeout); assert(timeout <= r->election_timeout * 2); state->randomized_election_timeout = timeout; r->election_timer_start = r->io->time(r->io); } bool electionTimerExpired(struct raft *r) { struct followerOrCandidateState *state = getFollowerOrCandidateState(r); raft_time now = r->io->time(r->io); return now - r->election_timer_start >= state->randomized_election_timeout; } static void sendRequestVoteCb(struct raft_io_send *send, int status) { (void)status; HeapFree(send); } /* Send a RequestVote RPC to the given server. */ static int electionSend(struct raft *r, const struct raft_server *server) { struct raft_message message; struct raft_io_send *send; raft_term term; int rv; assert(server->id != r->id); assert(server->id != 0); /* If we are in the pre-vote phase, we indicate our future term in the * request. */ term = r->current_term; if (r->candidate_state.in_pre_vote) { term++; } message.type = RAFT_IO_REQUEST_VOTE; message.request_vote.term = term; message.request_vote.candidate_id = r->id; message.request_vote.last_log_index = logLastIndex(&r->log); message.request_vote.last_log_term = logLastTerm(&r->log); message.request_vote.disrupt_leader = r->candidate_state.disrupt_leader; message.request_vote.pre_vote = r->candidate_state.in_pre_vote; message.server_id = server->id; message.server_address = server->address; send = HeapMalloc(sizeof *send); if (send == NULL) { return RAFT_NOMEM; } send->data = r; rv = r->io->send(r->io, send, &message, sendRequestVoteCb); if (rv != 0) { HeapFree(send); return rv; } return 0; } int electionStart(struct raft *r) { raft_term term; size_t n_voters; size_t voting_index; size_t i; int rv; assert(r->state == RAFT_CANDIDATE); n_voters = configurationVoterCount(&r->configuration); voting_index = configurationIndexOfVoter(&r->configuration, r->id); /* This function should not be invoked if we are not a voting server, hence * voting_index must be lower than the number of servers in the * configuration (meaning that we are a voting server). */ assert(voting_index < r->configuration.n); /* Coherence check that configurationVoterCount and configurationIndexOfVoter * have returned something that makes sense. */ assert(n_voters <= r->configuration.n); assert(voting_index < n_voters); /* During pre-vote we don't increment our term, or reset our vote. Resetting * our vote could lead to double-voting if we were to receive a RequestVote * RPC during our Candidate state while we already voted for a server during * the term. */ if (!r->candidate_state.in_pre_vote) { /* Increment current term */ term = r->current_term + 1; rv = r->io->set_term(r->io, term); if (rv != 0) { tracef("set_term failed %d", rv); goto err; } /* Vote for self */ rv = r->io->set_vote(r->io, r->id); if (rv != 0) { tracef("set_vote self failed %d", rv); goto err; } /* Update our cache too. */ r->current_term = term; r->voted_for = r->id; } /* Reset election timer. */ electionResetTimer(r); assert(r->candidate_state.votes != NULL); /* Initialize the votes array and send vote requests. */ for (i = 0; i < n_voters; i++) { if (i == voting_index) { r->candidate_state.votes[i] = true; /* We vote for ourselves */ } else { r->candidate_state.votes[i] = false; } } for (i = 0; i < r->configuration.n; i++) { const struct raft_server *server = &r->configuration.servers[i]; if (server->id == r->id || server->role != RAFT_VOTER) { continue; } rv = electionSend(r, server); if (rv != 0) { /* This is not a critical failure, let's just log it. */ tracef("failed to send vote request to server %llu: %s", server->id, raft_strerror(rv)); } } return 0; err: assert(rv != 0); return rv; } int electionVote(struct raft *r, const struct raft_request_vote *args, bool *granted) { const struct raft_server *local_server; raft_index local_last_index; raft_term local_last_term; bool is_transferee; /* Requester is the target of a leadership transfer */ int rv; assert(r != NULL); assert(args != NULL); assert(granted != NULL); local_server = configurationGet(&r->configuration, r->id); *granted = false; if (local_server == NULL || local_server->role != RAFT_VOTER) { tracef("local server is not voting -> not granting vote"); return 0; } is_transferee = r->transfer != NULL && r->transfer->id == args->candidate_id; if (!args->pre_vote && r->voted_for != 0 && r->voted_for != args->candidate_id && !is_transferee) { tracef("local server already voted -> not granting vote"); return 0; } /* Raft Dissertation 9.6: * > In the Pre-Vote algorithm, a candidate * > only increments its term if it first learns from a majority of the * > cluster that they would be willing * > to grant the candidate their votes (if the candidate's log is * > sufficiently up-to-date, and the voters * > have not received heartbeats from a valid leader for at least a baseline * > election timeout) * Arriving here means that in a pre-vote phase, we will cast our vote * if the candidate's log is sufficiently up-to-date, no matter what the * candidate's term is. We have already checked if we currently have a leader * upon reception of the RequestVote RPC, meaning the 2 conditions will be * satisfied if the candidate's log is up-to-date. * */ local_last_index = logLastIndex(&r->log); /* Our log is definitely not more up-to-date if it's empty! */ if (local_last_index == 0) { tracef("local log is empty -> granting vote"); goto grant_vote; } local_last_term = logLastTerm(&r->log); if (args->last_log_term < local_last_term) { /* The requesting server has last entry's log term lower than ours. */ tracef( "local last entry %llu has term %llu higher than %llu -> not " "granting", local_last_index, local_last_term, args->last_log_term); return 0; } if (args->last_log_term > local_last_term) { /* The requesting server has a more up-to-date log. */ tracef( "remote last entry %llu has term %llu higher than %llu -> " "granting vote", args->last_log_index, args->last_log_term, local_last_term); goto grant_vote; } /* The term of the last log entry is the same, so let's compare the length * of the log. */ assert(args->last_log_term == local_last_term); if (local_last_index <= args->last_log_index) { /* Our log is shorter or equal to the one of the requester. */ tracef("remote log equal or longer than local -> granting vote"); goto grant_vote; } tracef("remote log shorter than local -> not granting vote"); return 0; grant_vote: if (!args->pre_vote) { rv = r->io->set_vote(r->io, args->candidate_id); if (rv != 0) { tracef("set_vote failed %d", rv); return rv; } r->voted_for = args->candidate_id; /* Reset the election timer. */ r->election_timer_start = r->io->time(r->io); } tracef("vote granted to %llu", args->candidate_id); *granted = true; return 0; } bool electionTally(struct raft *r, size_t voter_index) { size_t n_voters = configurationVoterCount(&r->configuration); size_t votes = 0; size_t i; size_t half = n_voters / 2; assert(r->state == RAFT_CANDIDATE); assert(r->candidate_state.votes != NULL); r->candidate_state.votes[voter_index] = true; for (i = 0; i < n_voters; i++) { if (r->candidate_state.votes[i]) { votes++; } } return votes >= half + 1; } #undef tracef raft-0.11.3/src/election.h000066400000000000000000000056401415614527300153070ustar00rootroot00000000000000/* Election-related logic and helpers. */ #ifndef ELECTION_H_ #define ELECTION_H_ #include "../include/raft.h" /* Reset the election_timer clock and set randomized_election_timeout to a * random value between election_timeout and 2 * election_timeout. * * From Section 3.4: * * Raft uses randomized election timeouts to ensure that split votes are rare * and that they are resolved quickly. To prevent split votes in the first * place, election timeouts are chosen randomly from a fixed interval (e.g., * 150-300 ms). This spreads out the servers so that in most cases only a * single server will time out. * * From Section 9.4: * * We used AvailSim to approximate a WAN spanning the continental US. Each * message was assigned a latency chosen randomly from the uniform range of * 30-40 ms, and the servers' election timeout range was set accordingly to * 300-600 ms (about 10-20 times the one-way network latency). When only one * of the five servers has failed, the average election completes within about * 475 ms, and 99.9% of elections complete within 1.5 s. Even when two of the * five servers have failed, the average election takes about 650 ms (about 20 * times the one-way network latency), and 99.9% of elections complete in 3 * s. We believe these election times are more than adequate for most WAN * deployments. * * Must be called in follower or candidate state. */ void electionResetTimer(struct raft *r); /* Return true if the election timer has expired. * * Must be called in follower or candidate state. */ bool electionTimerExpired(struct raft *r); /* Start a new election round. * * From Figure 3.1: * * [Rules for Servers] Candidates: On conversion to candidates, start * election: * * - Increment current term * - Vote for self * - Reset election timer * - Send RequestVote RPCs to all other servers * * From Section 3.4: * * To begin an election, a follower increments its current term and * transitions to candidate state. It then votes for itself and issues * RequestVote RPCs in parallel to each of the other servers in the * cluster. */ int electionStart(struct raft *r); /* Decide whether our vote should be granted to the requesting server and update * our state accordingly. * * From Figure 3.1: * * RequestVote RPC: Receiver Implementation: * * - If votedFor is null or candidateId, and candidate's log is at least as * up-to-date as receiver's log, grant vote. * * The outcome of the decision is stored through the @granted pointer. */ int electionVote(struct raft *r, const struct raft_request_vote *args, bool *granted); /* Update the votes array by adding the vote from the server at the given * index. Return true if with this vote the server has reached the majority of * votes and won elections. */ bool electionTally(struct raft *r, size_t voter_index); #endif /* ELECTION_H_ */ raft-0.11.3/src/entry.c000066400000000000000000000036121415614527300146360ustar00rootroot00000000000000#include #include #include "assert.h" #include "entry.h" void entryBatchesDestroy(struct raft_entry *entries, const size_t n) { void *batch = NULL; size_t i; if (entries == NULL) { assert(n == 0); return; } assert(n > 0); for (i = 0; i < n; i++) { assert(entries[i].batch != NULL); if (entries[i].batch != batch) { batch = entries[i].batch; raft_free(batch); } } raft_free(entries); } int entryCopy(const struct raft_entry *src, struct raft_entry *dst) { dst->term = src->term; dst->type = src->type; dst->buf.len = src->buf.len; dst->buf.base = raft_malloc(dst->buf.len); if (dst->buf.len > 0 && dst->buf.base == NULL) { return RAFT_NOMEM; } memcpy(dst->buf.base, src->buf.base, dst->buf.len); dst->batch = NULL; return 0; } int entryBatchCopy(const struct raft_entry *src, struct raft_entry **dst, const size_t n) { size_t size = 0; void *batch; uint8_t *cursor; unsigned i; if (n == 0) { *dst = NULL; return 0; } /* Calculate the total size of the entries content and allocate the * batch. */ for (i = 0; i < n; i++) { size += src[i].buf.len; } batch = raft_malloc(size); if (batch == NULL) { return RAFT_NOMEM; } /* Copy the entries. */ *dst = raft_malloc(n * sizeof **dst); if (*dst == NULL) { raft_free(batch); return RAFT_NOMEM; } cursor = batch; for (i = 0; i < n; i++) { (*dst)[i].term = src[i].term; (*dst)[i].type = src[i].type; (*dst)[i].buf.base = cursor; (*dst)[i].buf.len = src[i].buf.len; (*dst)[i].batch = batch; memcpy((*dst)[i].buf.base, src[i].buf.base, src[i].buf.len); cursor += src[i].buf.len; } return 0; } raft-0.11.3/src/entry.h000066400000000000000000000012171415614527300146420ustar00rootroot00000000000000#ifndef ENTRY_H_ #define ENTRY_H_ #include "../include/raft.h" /* Release all memory associated with the given entries, including the array * itself. The entries are supposed to belong to one or more batches. */ void entryBatchesDestroy(struct raft_entry *entries, size_t n); /* Create a copy of a log entry, including its data. */ int entryCopy(const struct raft_entry *src, struct raft_entry *dst); /* Create a single batch of entries containing a copy of the given entries, * including their data. */ int entryBatchCopy(const struct raft_entry *src, struct raft_entry **dst, size_t n); #endif /* ENTRY_H */ raft-0.11.3/src/err.c000066400000000000000000000034431415614527300142670ustar00rootroot00000000000000#include "err.h" #include #include "../include/raft.h" #include "assert.h" #define WRAP_SEP ": " #define WRAP_SEP_LEN (size_t)strlen(WRAP_SEP) void errMsgWrap(char *e, const char *format) { size_t n = RAFT_ERRMSG_BUF_SIZE; size_t prefix_n; size_t prefix_and_sep_n; size_t trail_n; size_t i; /* Calculate the length of the prefix. */ prefix_n = strlen(format); /* If there isn't enough space for the ": " separator and at least one * character of the wrapped error message, then just print the prefix. */ if (prefix_n >= n - (WRAP_SEP_LEN + 1)) { /* We explicitly allow truncation here + silence clang about unknown * warning-group "-Wformat-truncation" */ #ifdef __GNUC__ #ifndef __clang__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wformat-truncation" #endif #endif ErrMsgPrintf(e, "%s", format); #ifdef __GNUC__ #ifndef __clang__ #pragma GCC diagnostic pop #endif #endif return; } /* Right-shift the wrapped message, to make room for the prefix. */ prefix_and_sep_n = prefix_n + WRAP_SEP_LEN; trail_n = strnlen(e, n - prefix_and_sep_n - 1); memmove(e + prefix_and_sep_n, e, trail_n); e[prefix_and_sep_n + trail_n] = 0; /* Print the prefix. */ ErrMsgPrintf(e, "%s", format); /* Print the separator. * * Avoid using strncpy(e->msg + prefix_n, WRAP_SEP, WRAP_SEP_LEN) since it * generates a warning. */ for (i = 0; i < WRAP_SEP_LEN; i++) { e[prefix_n + i] = WRAP_SEP[i]; } } #define ERR_CODE_TO_STRING_CASE(CODE, MSG) \ case CODE: \ return MSG; const char *errCodeToString(int code) { switch (code) { ERR_CODE_TO_STRING_MAP(ERR_CODE_TO_STRING_CASE); default: return "unknown error"; } } raft-0.11.3/src/err.h000066400000000000000000000053011415614527300142670ustar00rootroot00000000000000/* Utilities around error handling. */ #ifndef ERROR_H_ #define ERROR_H_ #include #include #define ERR_CODE_TO_STRING_MAP(X) \ X(RAFT_NOMEM, "out of memory") \ X(RAFT_BADID, "server ID is not valid") \ X(RAFT_DUPLICATEID, "server ID already in use") \ X(RAFT_DUPLICATEADDRESS, "server address already in use") \ X(RAFT_BADROLE, "server role is not valid") \ X(RAFT_MALFORMED, "encoded data is malformed") \ X(RAFT_NOTLEADER, "server is not the leader") \ X(RAFT_LEADERSHIPLOST, "server has lost leadership") \ X(RAFT_SHUTDOWN, "server is shutting down") \ X(RAFT_CANTBOOTSTRAP, "bootstrap only works on new clusters") \ X(RAFT_CANTCHANGE, "a configuration change is already in progress") \ X(RAFT_CORRUPT, "persisted data is corrupted") \ X(RAFT_CANCELED, "operation canceled") \ X(RAFT_NAMETOOLONG, "resource name too long") \ X(RAFT_TOOBIG, "data is too big") \ X(RAFT_NOCONNECTION, "no connection to remote server available") \ X(RAFT_BUSY, "operation can't be performed at this time") \ X(RAFT_IOERR, "I/O error") /* Format an error message. */ #define ErrMsgPrintf(ERRMSG, ...) \ snprintf(ERRMSG, RAFT_ERRMSG_BUF_SIZE, __VA_ARGS__) /* Wrap the given error message with an additional prefix message.. */ #define ErrMsgWrapf(ERRMSG, ...) \ do { \ char _errmsg[RAFT_ERRMSG_BUF_SIZE]; \ ErrMsgPrintf(_errmsg, __VA_ARGS__); \ errMsgWrap(ERRMSG, _errmsg); \ } while (0) void errMsgWrap(char *e, const char *format); /* Transfer an error message from an object to another, wrapping it. */ #define ErrMsgTransfer(ERRMSG1, ERRMSG2, FORMAT) \ memcpy(ERRMSG2, ERRMSG1, RAFT_ERRMSG_BUF_SIZE); \ ErrMsgWrapf(ERRMSG2, FORMAT) #define ErrMsgTransferf(ERRMSG1, ERRMSG2, FORMAT, ...) \ memcpy(ERRMSG2, ERRMSG1, RAFT_ERRMSG_BUF_SIZE); \ ErrMsgWrapf(ERRMSG2, FORMAT, __VA_ARGS__) /* Use the static error message for the error with the given code. */ #define ErrMsgFromCode(ERRMSG, CODE) \ ErrMsgPrintf(ERRMSG, "%s", errCodeToString(CODE)) /* Format the out of memory error message. */ #define ErrMsgOom(ERRMSG) ErrMsgFromCode(ERRMSG, RAFT_NOMEM) /* Convert a numeric raft error code to a human-readable error message. */ const char *errCodeToString(int code); #endif /* ERROR_H_ */ raft-0.11.3/src/fixture.c000066400000000000000000001442351415614527300151720ustar00rootroot00000000000000#include "../include/raft/fixture.h" #include #include #include #include #include "assert.h" #include "configuration.h" #include "entry.h" #include "log.h" #include "queue.h" #include "snapshot.h" #include "tracing.h" #define tracef(...) Tracef(r->tracer, __VA_ARGS__) /* Defaults */ #define HEARTBEAT_TIMEOUT 100 #define INSTALL_SNAPSHOT_TIMEOUT 30000 #define ELECTION_TIMEOUT 1000 #define NETWORK_LATENCY 15 #define DISK_LATENCY 10 /* To keep in sync with raft.h */ #define N_MESSAGE_TYPES 6 /* Maximum number of peer stub instances connected to a certain stub * instance. This should be enough for testing purposes. */ #define MAX_PEERS 8 /* Fields common across all request types. */ #define REQUEST \ int type; /* Request code type. */ \ raft_time completion_time; /* When the request should be fulfilled. */ \ queue queue /* Link the I/O pending requests queue. */ /* Request type codes. */ enum { APPEND = 1, SEND, TRANSMIT, SNAPSHOT_PUT, SNAPSHOT_GET }; /* Abstract base type for an asynchronous request submitted to the stub I/o * implementation. */ struct ioRequest { REQUEST; }; /* Pending request to append entries to the log. */ struct append { REQUEST; struct raft_io_append *req; const struct raft_entry *entries; unsigned n; unsigned start; /* Request timestamp. */ }; /* Pending request to send a message. */ struct send { REQUEST; struct raft_io_send *req; struct raft_message message; }; /* Pending request to store a snapshot. */ struct snapshot_put { REQUEST; unsigned trailing; struct raft_io_snapshot_put *req; const struct raft_snapshot *snapshot; }; /* Pending request to load a snapshot. */ struct snapshot_get { REQUEST; struct raft_io_snapshot_get *req; }; /* Message that has been written to the network and is waiting to be delivered * (or discarded). */ struct transmit { REQUEST; struct raft_message message; /* Message to deliver */ int timer; /* Deliver after this n of msecs. */ }; /* Information about a peer server. */ struct peer { struct io *io; /* The peer's I/O backend. */ bool connected; /* Whether a connection is established. */ bool saturated; /* Whether the established connection is saturated. */ }; /* Stub I/O implementation implementing all operations in-memory. */ struct io { struct raft_io *io; /* I/O object we're implementing. */ unsigned index; /* Fixture server index. */ raft_time *time; /* Global cluster time. */ raft_time next_tick; /* Time the next tick should occurs. */ /* Term and vote */ raft_term term; raft_id voted_for; /* Log */ struct raft_snapshot *snapshot; /* Latest snapshot */ struct raft_entry *entries; /* Array or persisted entries */ size_t n; /* Size of the persisted entries array */ /* Parameters passed via raft_io->init and raft_io->start */ raft_id id; const char *address; unsigned tick_interval; raft_io_tick_cb tick_cb; raft_io_recv_cb recv_cb; /* Queue of pending asynchronous requests, whose callbacks still haven't * been fired. */ queue requests; /* Peers connected to us. */ struct peer peers[MAX_PEERS]; unsigned n_peers; unsigned randomized_election_timeout; /* Value returned by io->random() */ unsigned network_latency; /* Milliseconds to deliver RPCs */ unsigned disk_latency; /* Milliseconds to perform disk I/O */ struct { int countdown; /* Trigger the fault when this counter gets to zero. */ int n; /* Repeat the fault this many times. Default is -1. */ } fault; /* If flag i is true, messages of type i will be silently dropped. */ bool drop[N_MESSAGE_TYPES]; /* Counters of events that happened so far. */ unsigned n_send[N_MESSAGE_TYPES]; unsigned n_recv[N_MESSAGE_TYPES]; unsigned n_append; }; /* Advance the fault counters and return @true if an error should occur. */ static bool ioFaultTick(struct io *io) { /* If the countdown is negative, faults are disabled. */ if (io->fault.countdown < 0) { return false; } /* If the countdown didn't reach zero, it's still not come the time to * trigger faults. */ if (io->fault.countdown > 0) { io->fault.countdown--; return false; } assert(io->fault.countdown == 0); /* If n is negative we keep triggering the fault forever. */ if (io->fault.n < 0) { return true; } /* If n is positive we need to trigger the fault at least this time. */ if (io->fault.n > 0) { io->fault.n--; return true; } assert(io->fault.n == 0); /* We reached 'n', let's disable faults. */ io->fault.countdown--; return false; } static int ioMethodInit(struct raft_io *raft_io, raft_id id, const char *address) { struct io *io = raft_io->impl; io->id = id; io->address = address; return 0; } static int ioMethodStart(struct raft_io *raft_io, unsigned msecs, raft_io_tick_cb tick_cb, raft_io_recv_cb recv_cb) { struct io *io = raft_io->impl; if (ioFaultTick(io)) { return RAFT_IOERR; } io->tick_interval = msecs; io->tick_cb = tick_cb; io->recv_cb = recv_cb; io->next_tick = *io->time + io->tick_interval; return 0; } /* Flush an append entries request, appending its entries to the local in-memory * log. */ static void ioFlushAppend(struct io *s, struct append *append) { struct raft_entry *entries; unsigned i; /* Allocate an array for the old entries plus the new ones. */ entries = raft_realloc(s->entries, (s->n + append->n) * sizeof *s->entries); assert(entries != NULL); /* Copy new entries into the new array. */ for (i = 0; i < append->n; i++) { const struct raft_entry *src = &append->entries[i]; struct raft_entry *dst = &entries[s->n + i]; int rv = entryCopy(src, dst); assert(rv == 0); } s->entries = entries; s->n += append->n; if (append->req->cb != NULL) { append->req->cb(append->req, 0); } raft_free(append); } /* Flush a snapshot put request, copying the snapshot data. */ static void ioFlushSnapshotPut(struct io *s, struct snapshot_put *r) { int rv; if (s->snapshot == NULL) { s->snapshot = raft_malloc(sizeof *s->snapshot); assert(s->snapshot != NULL); } else { snapshotClose(s->snapshot); } rv = snapshotCopy(r->snapshot, s->snapshot); assert(rv == 0); if (r->trailing == 0) { rv = s->io->truncate(s->io, 1); assert(rv == 0); } if (r->req->cb != NULL) { r->req->cb(r->req, 0); } raft_free(r); } /* Flush a snapshot get request, returning to the client a copy of the local * snapshot (if any). */ static void ioFlushSnapshotGet(struct io *s, struct snapshot_get *r) { struct raft_snapshot *snapshot; int rv; snapshot = raft_malloc(sizeof *snapshot); assert(snapshot != NULL); rv = snapshotCopy(s->snapshot, snapshot); assert(rv == 0); r->req->cb(r->req, snapshot, 0); raft_free(r); } /* Search for the peer with the given ID. */ static struct peer *ioGetPeer(struct io *io, raft_id id) { unsigned i; for (i = 0; i < io->n_peers; i++) { struct peer *peer = &io->peers[i]; if (peer->io->id == id) { return peer; } } return NULL; } /* Copy the dynamically allocated memory of an AppendEntries message. */ static void copyAppendEntries(const struct raft_append_entries *src, struct raft_append_entries *dst) { int rv; rv = entryBatchCopy(src->entries, &dst->entries, src->n_entries); assert(rv == 0); dst->n_entries = src->n_entries; } /* Copy the dynamically allocated memory of an InstallSnapshot message. */ static void copyInstallSnapshot(const struct raft_install_snapshot *src, struct raft_install_snapshot *dst) { int rv; rv = configurationCopy(&src->conf, &dst->conf); assert(rv == 0); dst->data.base = raft_malloc(dst->data.len); assert(dst->data.base != NULL); memcpy(dst->data.base, src->data.base, src->data.len); } /* Flush a raft_io_send request, copying the message content into a new struct * transmit object and invoking the user callback. */ static void ioFlushSend(struct io *io, struct send *send) { struct peer *peer; struct transmit *transmit; struct raft_message *src; struct raft_message *dst; int status; /* If the peer doesn't exist or was disconnected, fail the request. */ peer = ioGetPeer(io, send->message.server_id); if (peer == NULL || !peer->connected) { status = RAFT_NOCONNECTION; goto out; } transmit = raft_malloc(sizeof *transmit); assert(transmit != NULL); transmit->type = TRANSMIT; transmit->completion_time = *io->time + io->network_latency; src = &send->message; dst = &transmit->message; QUEUE_PUSH(&io->requests, &transmit->queue); *dst = *src; switch (dst->type) { case RAFT_IO_APPEND_ENTRIES: /* Make a copy of the entries being sent */ copyAppendEntries(&src->append_entries, &dst->append_entries); break; case RAFT_IO_INSTALL_SNAPSHOT: copyInstallSnapshot(&src->install_snapshot, &dst->install_snapshot); break; } /* tracef("io: flush: %s", describeMessage(&send->message)); */ io->n_send[send->message.type]++; status = 0; out: if (send->req->cb != NULL) { send->req->cb(send->req, status); } raft_free(send); } /* Release the memory used by the given message transmit object. */ static void ioDestroyTransmit(struct transmit *transmit) { struct raft_message *message; message = &transmit->message; switch (message->type) { case RAFT_IO_APPEND_ENTRIES: if (message->append_entries.entries != NULL) { raft_free(message->append_entries.entries[0].batch); raft_free(message->append_entries.entries); } break; case RAFT_IO_INSTALL_SNAPSHOT: raft_configuration_close(&message->install_snapshot.conf); raft_free(message->install_snapshot.data.base); break; } raft_free(transmit); } /* Flush all requests in the queue. */ static void ioFlushAll(struct io *io) { while (!QUEUE_IS_EMPTY(&io->requests)) { queue *head; struct ioRequest *r; head = QUEUE_HEAD(&io->requests); QUEUE_REMOVE(head); r = QUEUE_DATA(head, struct ioRequest, queue); switch (r->type) { case APPEND: ioFlushAppend(io, (struct append *)r); break; case SEND: ioFlushSend(io, (struct send *)r); break; case TRANSMIT: ioDestroyTransmit((struct transmit *)r); break; case SNAPSHOT_PUT: ioFlushSnapshotPut(io, (struct snapshot_put *)r); break; case SNAPSHOT_GET: ioFlushSnapshotGet(io, (struct snapshot_get *)r); break; default: assert(0); } } } static void ioMethodClose(struct raft_io *raft_io, raft_io_close_cb cb) { if (cb != NULL) { cb(raft_io); } } static int ioMethodLoad(struct raft_io *io, raft_term *term, raft_id *voted_for, struct raft_snapshot **snapshot, raft_index *start_index, struct raft_entry **entries, size_t *n_entries) { struct io *s; int rv; s = io->impl; if (ioFaultTick(s)) { return RAFT_IOERR; } *term = s->term; *voted_for = s->voted_for; *start_index = 1; *n_entries = s->n; /* Make a copy of the persisted entries, storing their data into a single * batch. */ rv = entryBatchCopy(s->entries, entries, s->n); assert(rv == 0); if (s->snapshot != NULL) { *snapshot = raft_malloc(sizeof **snapshot); assert(*snapshot != NULL); rv = snapshotCopy(s->snapshot, *snapshot); assert(rv == 0); *start_index = (*snapshot)->index + 1; } else { *snapshot = NULL; } return 0; } static int ioMethodBootstrap(struct raft_io *raft_io, const struct raft_configuration *conf) { struct io *io = raft_io->impl; struct raft_buffer buf; struct raft_entry *entries; int rv; if (ioFaultTick(io)) { return RAFT_IOERR; } if (io->term != 0) { return RAFT_CANTBOOTSTRAP; } assert(io->voted_for == 0); assert(io->snapshot == NULL); assert(io->entries == NULL); assert(io->n == 0); /* Encode the given configuration. */ rv = configurationEncode(conf, &buf); if (rv != 0) { return rv; } entries = raft_calloc(1, sizeof *io->entries); if (entries == NULL) { return RAFT_NOMEM; } entries[0].term = 1; entries[0].type = RAFT_CHANGE; entries[0].buf = buf; io->term = 1; io->voted_for = 0; io->snapshot = NULL; io->entries = entries; io->n = 1; return 0; } static int ioMethodRecover(struct raft_io *io, const struct raft_configuration *conf) { /* TODO: implement this API */ (void)io; (void)conf; return RAFT_IOERR; } static int ioMethodSetTerm(struct raft_io *raft_io, const raft_term term) { struct io *io = raft_io->impl; if (ioFaultTick(io)) { return RAFT_IOERR; } io->term = term; io->voted_for = 0; return 0; } static int ioMethodSetVote(struct raft_io *raft_io, const raft_id server_id) { struct io *io = raft_io->impl; if (ioFaultTick(io)) { return RAFT_IOERR; } /* tracef("io: set vote: %d %d", server_id, io->index); */ io->voted_for = server_id; return 0; } static int ioMethodAppend(struct raft_io *raft_io, struct raft_io_append *req, const struct raft_entry entries[], unsigned n, raft_io_append_cb cb) { struct io *io = raft_io->impl; struct append *r; if (ioFaultTick(io)) { return RAFT_IOERR; } r = raft_malloc(sizeof *r); assert(r != NULL); r->type = APPEND; r->completion_time = *io->time + io->disk_latency; r->req = req; r->entries = entries; r->n = n; req->cb = cb; QUEUE_PUSH(&io->requests, &r->queue); return 0; } static int ioMethodTruncate(struct raft_io *raft_io, raft_index index) { struct io *io = raft_io->impl; size_t n; if (ioFaultTick(io)) { return RAFT_IOERR; } n = (size_t)(index - 1); /* Number of entries left after truncation */ if (n > 0) { struct raft_entry *entries; /* Create a new array of entries holding the non-truncated entries */ entries = raft_malloc(n * sizeof *entries); if (entries == NULL) { return RAFT_NOMEM; } memcpy(entries, io->entries, n * sizeof *io->entries); /* Release any truncated entry */ if (io->entries != NULL) { size_t i; for (i = n; i < io->n; i++) { raft_free(io->entries[i].buf.base); } raft_free(io->entries); } io->entries = entries; } else { /* Release everything we have */ if (io->entries != NULL) { size_t i; for (i = 0; i < io->n; i++) { raft_free(io->entries[i].buf.base); } raft_free(io->entries); io->entries = NULL; } } io->n = n; return 0; } static int ioMethodSnapshotPut(struct raft_io *raft_io, unsigned trailing, struct raft_io_snapshot_put *req, const struct raft_snapshot *snapshot, raft_io_snapshot_put_cb cb) { struct io *io = raft_io->impl; struct snapshot_put *r; r = raft_malloc(sizeof *r); assert(r != NULL); r->type = SNAPSHOT_PUT; r->req = req; r->req->cb = cb; r->snapshot = snapshot; r->completion_time = *io->time + io->disk_latency; r->trailing = trailing; QUEUE_PUSH(&io->requests, &r->queue); return 0; } static int ioMethodSnapshotGet(struct raft_io *raft_io, struct raft_io_snapshot_get *req, raft_io_snapshot_get_cb cb) { struct io *io = raft_io->impl; struct snapshot_get *r; r = raft_malloc(sizeof *r); assert(r != NULL); r->type = SNAPSHOT_GET; r->req = req; r->req->cb = cb; r->completion_time = *io->time + io->disk_latency; QUEUE_PUSH(&io->requests, &r->queue); return 0; } static raft_time ioMethodTime(struct raft_io *raft_io) { struct io *io = raft_io->impl; return *io->time; } static int ioMethodRandom(struct raft_io *raft_io, int min, int max) { struct io *io; (void)min; (void)max; io = raft_io->impl; return (int)io->randomized_election_timeout; } /* Queue up a request which will be processed later, when io_stub_flush() * is invoked. */ static int ioMethodSend(struct raft_io *raft_io, struct raft_io_send *req, const struct raft_message *message, raft_io_send_cb cb) { struct io *io = raft_io->impl; struct send *r; if (ioFaultTick(io)) { return RAFT_IOERR; } /* tracef("io: send: %s to server %d", describeMessage(message), message->server_id); */ r = raft_malloc(sizeof *r); assert(r != NULL); r->type = SEND; r->req = req; r->message = *message; r->req->cb = cb; /* TODO: simulate the presence of an OS send buffer, whose available size * might delay the completion of send requests */ r->completion_time = *io->time; QUEUE_PUSH(&io->requests, &r->queue); return 0; } static void ioReceive(struct io *io, struct raft_message *message) { /* tracef("io: recv: %s from server %d", describeMessage(message), message->server_id); */ io->recv_cb(io->io, message); io->n_recv[message->type]++; } static void ioDeliverTransmit(struct io *io, struct transmit *transmit) { struct raft_message *message = &transmit->message; struct peer *peer; /* Destination peer */ /* If this message type is in the drop list, let's discard it */ if (io->drop[message->type - 1]) { ioDestroyTransmit(transmit); return; } peer = ioGetPeer(io, message->server_id); /* We don't have any peer with this ID or it's disconnected or if the * connection is saturated, let's drop the message */ if (peer == NULL || !peer->connected || peer->saturated) { ioDestroyTransmit(transmit); return; } /* Update the message object with our details. */ message->server_id = io->id; message->server_address = io->address; ioReceive(peer->io, message); raft_free(transmit); } /* Connect @raft_io to @other, enabling delivery of messages sent from @io to * @other. */ static void ioConnect(struct raft_io *raft_io, struct raft_io *other) { struct io *io = raft_io->impl; struct io *io_other = other->impl; assert(io->n_peers < MAX_PEERS); io->peers[io->n_peers].io = io_other; io->peers[io->n_peers].connected = true; io->peers[io->n_peers].saturated = false; io->n_peers++; } /* Return whether the connection with the given peer is saturated. */ static bool ioSaturated(struct raft_io *raft_io, struct raft_io *other) { struct io *io = raft_io->impl; struct io *io_other = other->impl; struct peer *peer; peer = ioGetPeer(io, io_other->id); return peer != NULL && peer->saturated; } /* Disconnect @raft_io and @other, causing calls to @io->send() to fail * asynchronously when sending messages to @other. */ static void ioDisconnect(struct raft_io *raft_io, struct raft_io *other) { struct io *io = raft_io->impl; struct io *io_other = other->impl; struct peer *peer; peer = ioGetPeer(io, io_other->id); assert(peer != NULL); peer->connected = false; } /* Reconnect @raft_io and @other. */ static void ioReconnect(struct raft_io *raft_io, struct raft_io *other) { struct io *io = raft_io->impl; struct io *io_other = other->impl; struct peer *peer; peer = ioGetPeer(io, io_other->id); assert(peer != NULL); peer->connected = true; } /* Saturate the connection from @io to @other, causing messages sent from @io to * @other to be dropped. */ static void ioSaturate(struct raft_io *io, struct raft_io *other) { struct io *s; struct io *s_other; struct peer *peer; s = io->impl; s_other = other->impl; peer = ioGetPeer(s, s_other->id); assert(peer != NULL && peer->connected); peer->saturated = true; } /* Desaturate the connection from @raft_io to @other, re-enabling delivery of * messages sent from @raft_io to @other. */ static void ioDesaturate(struct raft_io *raft_io, struct raft_io *other) { struct io *io = raft_io->impl; struct io *io_other = other->impl; struct peer *peer; peer = ioGetPeer(io, io_other->id); assert(peer != NULL && peer->connected); peer->saturated = false; } /* Enable or disable silently dropping all outgoing messages of type @type. */ void ioDrop(struct io *io, int type, bool flag) { io->drop[type - 1] = flag; } static int ioInit(struct raft_io *raft_io, unsigned index, raft_time *time) { struct io *io; io = raft_malloc(sizeof *io); assert(io != NULL); io->io = raft_io; io->index = index; io->time = time; io->term = 0; io->voted_for = 0; io->snapshot = NULL; io->entries = NULL; io->n = 0; QUEUE_INIT(&io->requests); io->n_peers = 0; io->randomized_election_timeout = ELECTION_TIMEOUT + index * 100; io->network_latency = NETWORK_LATENCY; io->disk_latency = DISK_LATENCY; io->fault.countdown = -1; io->fault.n = -1; memset(io->drop, 0, sizeof io->drop); memset(io->n_send, 0, sizeof io->n_send); memset(io->n_recv, 0, sizeof io->n_recv); io->n_append = 0; raft_io->impl = io; raft_io->init = ioMethodInit; raft_io->close = ioMethodClose; raft_io->start = ioMethodStart; raft_io->load = ioMethodLoad; raft_io->bootstrap = ioMethodBootstrap; raft_io->recover = ioMethodRecover; raft_io->set_term = ioMethodSetTerm; raft_io->set_vote = ioMethodSetVote; raft_io->append = ioMethodAppend; raft_io->truncate = ioMethodTruncate; raft_io->send = ioMethodSend; raft_io->snapshot_put = ioMethodSnapshotPut; raft_io->snapshot_get = ioMethodSnapshotGet; raft_io->time = ioMethodTime; raft_io->random = ioMethodRandom; return 0; } /* Release all memory held by the given stub I/O implementation. */ void ioClose(struct raft_io *raft_io) { struct io *io = raft_io->impl; size_t i; for (i = 0; i < io->n; i++) { struct raft_entry *entry = &io->entries[i]; raft_free(entry->buf.base); } if (io->entries != NULL) { raft_free(io->entries); } if (io->snapshot != NULL) { snapshotClose(io->snapshot); raft_free(io->snapshot); } raft_free(io); } /* Custom emit tracer function which include the server ID. */ static void emit(struct raft_tracer *t, const char *file, int line, const char *message) { unsigned id = *(unsigned *)t->impl; fprintf(stderr, "%d: %30s:%*d - %s\n", id, file, 3, line, message); } static int serverInit(struct raft_fixture *f, unsigned i, struct raft_fsm *fsm) { int rv; struct raft_fixture_server *s = &f->servers[i]; s->alive = true; s->id = i + 1; sprintf(s->address, "%llu", s->id); rv = ioInit(&s->io, i, &f->time); if (rv != 0) { return rv; } rv = raft_init(&s->raft, &s->io, fsm, s->id, s->address); if (rv != 0) { return rv; } raft_set_election_timeout(&s->raft, ELECTION_TIMEOUT); raft_set_heartbeat_timeout(&s->raft, HEARTBEAT_TIMEOUT); raft_set_install_snapshot_timeout(&s->raft, INSTALL_SNAPSHOT_TIMEOUT); s->tracer.impl = (void *)&s->id; s->tracer.emit = emit; s->raft.tracer = &s->tracer; raft_tracer_maybe_enable(&s->tracer, true); return 0; } static void serverClose(struct raft_fixture_server *s) { raft_close(&s->raft, NULL); ioClose(&s->io); } /* Connect the server with the given index to all others */ static void serverConnectToAll(struct raft_fixture *f, unsigned i) { unsigned j; for (j = 0; j < f->n; j++) { struct raft_io *io1 = &f->servers[i].io; struct raft_io *io2 = &f->servers[j].io; if (i == j) { continue; } ioConnect(io1, io2); } } int raft_fixture_init(struct raft_fixture *f, unsigned n, struct raft_fsm *fsms) { unsigned i; int rc; assert(n >= 1); f->time = 0; f->n = n; /* Initialize all servers */ for (i = 0; i < n; i++) { rc = serverInit(f, i, &fsms[i]); if (rc != 0) { return rc; } } /* Connect all servers to each another */ for (i = 0; i < f->n; i++) { serverConnectToAll(f, i); } logInit(&f->log); f->commit_index = 0; f->hook = NULL; return 0; } void raft_fixture_close(struct raft_fixture *f) { unsigned i; for (i = 0; i < f->n; i++) { struct io *io = f->servers[i].io.impl; ioFlushAll(io); } for (i = 0; i < f->n; i++) { serverClose(&f->servers[i]); } logClose(&f->log); } int raft_fixture_configuration(struct raft_fixture *f, unsigned n_voting, struct raft_configuration *configuration) { unsigned i; assert(f->n > 0); assert(n_voting > 0); assert(n_voting <= f->n); raft_configuration_init(configuration); for (i = 0; i < f->n; i++) { struct raft_fixture_server *s; int role = i < n_voting ? RAFT_VOTER : RAFT_STANDBY; int rv; s = &f->servers[i]; rv = raft_configuration_add(configuration, s->id, s->address, role); if (rv != 0) { return rv; } } return 0; } int raft_fixture_bootstrap(struct raft_fixture *f, struct raft_configuration *configuration) { unsigned i; for (i = 0; i < f->n; i++) { struct raft *raft = raft_fixture_get(f, i); int rv; rv = raft_bootstrap(raft, configuration); if (rv != 0) { return rv; } } return 0; } int raft_fixture_start(struct raft_fixture *f) { unsigned i; int rv; for (i = 0; i < f->n; i++) { struct raft_fixture_server *s = &f->servers[i]; rv = raft_start(&s->raft); if (rv != 0) { return rv; } } return 0; } unsigned raft_fixture_n(struct raft_fixture *f) { return f->n; } raft_time raft_fixture_time(struct raft_fixture *f) { return f->time; } struct raft *raft_fixture_get(struct raft_fixture *f, unsigned i) { assert(i < f->n); return &f->servers[i].raft; } bool raft_fixture_alive(struct raft_fixture *f, unsigned i) { assert(i < f->n); return f->servers[i].alive; } unsigned raft_fixture_leader_index(struct raft_fixture *f) { if (f->leader_id != 0) { return (unsigned)(f->leader_id - 1); } return f->n; } raft_id raft_fixture_voted_for(struct raft_fixture *f, unsigned i) { struct io *io = f->servers[i].io.impl; return io->voted_for; } /* Update the leader and check for election safety. * * From figure 3.2: * * Election Safety -> At most one leader can be elected in a given * term. * * Return true if the current leader turns out to be different from the one at * the time this function was called. */ static bool updateLeaderAndCheckElectionSafety(struct raft_fixture *f) { raft_id leader_id = 0; unsigned leader_i = 0; raft_term leader_term = 0; unsigned i; bool changed; for (i = 0; i < f->n; i++) { struct raft *raft = raft_fixture_get(f, i); unsigned j; /* If the server is not alive or is not the leader, skip to the next * server. */ if (!raft_fixture_alive(f, i) || raft_state(raft) != RAFT_LEADER) { continue; } /* Check that no other server is leader for this term. */ for (j = 0; j < f->n; j++) { struct raft *other = raft_fixture_get(f, j); if (other->id == raft->id || other->state != RAFT_LEADER) { continue; } if (other->current_term == raft->current_term) { fprintf(stderr, "server %llu and %llu are both leaders in term %llu", raft->id, other->id, raft->current_term); abort(); } } if (raft->current_term > leader_term) { leader_id = raft->id; leader_i = i; leader_term = raft->current_term; } } /* Check that the leader is stable, in the sense that it has been * acknowledged by all alive servers connected to it, and those servers * together with the leader form a majority. */ if (leader_id != 0) { unsigned n_acks = 0; bool acked = true; unsigned n_quorum = 0; for (i = 0; i < f->n; i++) { struct raft *raft = raft_fixture_get(f, i); const struct raft_server *server = configurationGet(&raft->configuration, raft->id); /* If the server is not in the configuration or is idle, then don't * count it. */ if (server == NULL || server->role == RAFT_SPARE) { continue; } n_quorum++; /* If this server is itself the leader, or it's not alive or it's * not connected to the leader, then don't count it in for * stability. */ if (i == leader_i || !raft_fixture_alive(f, i) || raft_fixture_saturated(f, leader_i, i)) { continue; } if (raft->current_term != leader_term) { acked = false; break; } if (raft->state != RAFT_FOLLOWER) { acked = false; break; } if (raft->follower_state.current_leader.id == 0) { acked = false; break; } if (raft->follower_state.current_leader.id != leader_id) { acked = false; break; } n_acks++; } if (!acked || n_acks < (n_quorum / 2)) { leader_id = 0; } } changed = leader_id != f->leader_id; f->leader_id = leader_id; return changed; } /* Check for leader append-only. * * From figure 3.2: * * Leader Append-Only -> A leader never overwrites or deletes entries in its * own log; it only appends new entries. */ static void checkLeaderAppendOnly(struct raft_fixture *f) { struct raft *raft; raft_index index; raft_index last = logLastIndex(&f->log); /* If the cached log is empty it means there was no leader before. */ if (last == 0) { return; } /* If there's no new leader, just return. */ if (f->leader_id == 0) { return; } raft = raft_fixture_get(f, (unsigned)f->leader_id - 1); last = logLastIndex(&f->log); for (index = 1; index <= last; index++) { const struct raft_entry *entry1; const struct raft_entry *entry2; size_t i; entry1 = logGet(&f->log, index); entry2 = logGet(&raft->log, index); assert(entry1 != NULL); /* Check if the entry was snapshotted. */ if (entry2 == NULL) { assert(raft->log.snapshot.last_index >= index); continue; } /* Entry was not overwritten. */ assert(entry1->type == entry2->type); assert(entry1->term == entry2->term); for (i = 0; i < entry1->buf.len; i++) { assert(((uint8_t *)entry1->buf.base)[i] == ((uint8_t *)entry2->buf.base)[i]); } } } /* Make a copy of the the current leader log, in order to perform the Leader * Append-Only check at the next iteration. */ static void copyLeaderLog(struct raft_fixture *f) { struct raft *raft = raft_fixture_get(f, (unsigned)f->leader_id - 1); struct raft_entry *entries; unsigned n; size_t i; int rv; logClose(&f->log); logInit(&f->log); rv = logAcquire(&raft->log, 1, &entries, &n); assert(rv == 0); for (i = 0; i < n; i++) { struct raft_entry *entry = &entries[i]; struct raft_buffer buf; buf.len = entry->buf.len; buf.base = raft_malloc(buf.len); assert(buf.base != NULL); memcpy(buf.base, entry->buf.base, buf.len); rv = logAppend(&f->log, entry->term, entry->type, &buf, NULL); assert(rv == 0); } logRelease(&raft->log, 1, entries, n); } /* Update the commit index to match the one from the current leader. */ static void updateCommitIndex(struct raft_fixture *f) { struct raft *raft = raft_fixture_get(f, (unsigned)f->leader_id - 1); if (raft->commit_index > f->commit_index) { f->commit_index = raft->commit_index; } } /* Return the lowest tick time across all servers, along with the associated * server index */ static void getLowestTickTime(struct raft_fixture *f, raft_time *t, unsigned *i) { unsigned j; *t = (raft_time)-1 /* Maximum value */; for (j = 0; j < f->n; j++) { struct io *io = f->servers[j].io.impl; if (io->next_tick < *t) { *t = io->next_tick; *i = j; } } } /* Return the completion time of the request with the lowest completion time * across all servers, along with the associated server index. */ static void getLowestRequestCompletionTime(struct raft_fixture *f, raft_time *t, unsigned *i) { unsigned j; *t = (raft_time)-1 /* Maximum value */; for (j = 0; j < f->n; j++) { struct io *io = f->servers[j].io.impl; queue *head; QUEUE_FOREACH(head, &io->requests) { struct ioRequest *r = QUEUE_DATA(head, struct ioRequest, queue); if (r->completion_time < *t) { *t = r->completion_time; *i = j; } } } } /* Fire the tick callback of the i'th server. */ static void fireTick(struct raft_fixture *f, unsigned i) { struct io *io = f->servers[i].io.impl; f->time = io->next_tick; f->event.server_index = i; f->event.type = RAFT_FIXTURE_TICK; io->next_tick += io->tick_interval; io->tick_cb(io->io); } /* Complete the first request with completion time @t on the @i'th server. */ static void completeRequest(struct raft_fixture *f, unsigned i, raft_time t) { struct io *io = f->servers[i].io.impl; queue *head; struct ioRequest *r = NULL; bool found = false; f->time = t; f->event.server_index = i; QUEUE_FOREACH(head, &io->requests) { r = QUEUE_DATA(head, struct ioRequest, queue); if (r->completion_time == t) { found = true; break; } } assert(found); QUEUE_REMOVE(head); switch (r->type) { case APPEND: ioFlushAppend(io, (struct append *)r); f->event.type = RAFT_FIXTURE_DISK; break; case SEND: ioFlushSend(io, (struct send *)r); f->event.type = RAFT_FIXTURE_NETWORK; break; case TRANSMIT: ioDeliverTransmit(io, (struct transmit *)r); f->event.type = RAFT_FIXTURE_NETWORK; break; case SNAPSHOT_PUT: ioFlushSnapshotPut(io, (struct snapshot_put *)r); f->event.type = RAFT_FIXTURE_DISK; break; case SNAPSHOT_GET: ioFlushSnapshotGet(io, (struct snapshot_get *)r); f->event.type = RAFT_FIXTURE_DISK; break; default: assert(0); } } struct raft_fixture_event *raft_fixture_step(struct raft_fixture *f) { raft_time tick_time; raft_time completion_time; unsigned i = f->n; unsigned j = f->n; getLowestTickTime(f, &tick_time, &i); getLowestRequestCompletionTime(f, &completion_time, &j); assert(i < f->n || j < f->n); if (tick_time < completion_time || (tick_time == completion_time && i <= j)) { fireTick(f, i); } else { completeRequest(f, j, completion_time); } /* If the leader has not changed check the Leader Append-Only * guarantee. */ if (!updateLeaderAndCheckElectionSafety(f)) { checkLeaderAppendOnly(f); } /* If we have a leader, update leader-related state . */ if (f->leader_id != 0) { copyLeaderLog(f); updateCommitIndex(f); } if (f->hook != NULL) { f->hook(f, &f->event); } return &f->event; } struct raft_fixture_event *raft_fixture_step_n(struct raft_fixture *f, unsigned n) { unsigned i; assert(n > 0); for (i = 0; i < n - 1; i++) { raft_fixture_step(f); } return raft_fixture_step(f); } bool raft_fixture_step_until(struct raft_fixture *f, bool (*stop)(struct raft_fixture *f, void *arg), void *arg, unsigned max_msecs) { raft_time start = f->time; while (!stop(f, arg) && (f->time - start) < max_msecs) { raft_fixture_step(f); } return f->time - start < max_msecs; } /* A step function which return always false, forcing raft_fixture_step_n to * advance time at each iteration. */ static bool spin(struct raft_fixture *f, void *arg) { (void)f; (void)arg; return false; } void raft_fixture_step_until_elapsed(struct raft_fixture *f, unsigned msecs) { raft_fixture_step_until(f, spin, NULL, msecs); } static bool hasLeader(struct raft_fixture *f, void *arg) { (void)arg; return f->leader_id != 0; } bool raft_fixture_step_until_has_leader(struct raft_fixture *f, unsigned max_msecs) { return raft_fixture_step_until(f, hasLeader, NULL, max_msecs); } static bool hasNoLeader(struct raft_fixture *f, void *arg) { (void)arg; return f->leader_id == 0; } bool raft_fixture_step_until_has_no_leader(struct raft_fixture *f, unsigned max_msecs) { return raft_fixture_step_until(f, hasNoLeader, NULL, max_msecs); } /* Enable/disable dropping outgoing messages of a certain type from all servers * except one. */ static void dropAllExcept(struct raft_fixture *f, int type, bool flag, unsigned i) { unsigned j; for (j = 0; j < f->n; j++) { struct raft_fixture_server *s = &f->servers[j]; if (j == i) { continue; } ioDrop(s->io.impl, type, flag); } } /* Set the randomized election timeout of the given server to the minimum value * compatible with its current state and timers. */ static void minimizeRandomizedElectionTimeout(struct raft_fixture *f, unsigned i) { struct raft *raft = &f->servers[i].raft; raft_time now = raft->io->time(raft->io); unsigned timeout = raft->election_timeout; assert(raft->state == RAFT_FOLLOWER); /* If the minimum election timeout value would make the timer expire in the * past, cap it. */ if (now - raft->election_timer_start > timeout) { timeout = (unsigned)(now - raft->election_timer_start); } raft->follower_state.randomized_election_timeout = timeout; } /* Set the randomized election timeout to the maximum value on all servers * except the given one. */ static void maximizeAllRandomizedElectionTimeoutsExcept(struct raft_fixture *f, unsigned i) { unsigned j; for (j = 0; j < f->n; j++) { struct raft *raft = &f->servers[j].raft; unsigned timeout = raft->election_timeout * 2; if (j == i) { continue; } assert(raft->state == RAFT_FOLLOWER); raft->follower_state.randomized_election_timeout = timeout; } } void raft_fixture_hook(struct raft_fixture *f, raft_fixture_event_cb hook) { f->hook = hook; } void raft_fixture_elect(struct raft_fixture *f, unsigned i) { struct raft *raft = raft_fixture_get(f, i); unsigned j; /* Make sure there's currently no leader. */ assert(f->leader_id == 0); /* Make sure that the given server is voting. */ assert(configurationGet(&raft->configuration, raft->id)->role == RAFT_VOTER); /* Make sure all servers are currently followers. */ for (j = 0; j < f->n; j++) { assert(raft_state(&f->servers[j].raft) == RAFT_FOLLOWER); } /* Pretend that the last randomized election timeout was set at the maximum * value on all server expect the one to be elected, which is instead set to * the minimum possible value compatible with its current state. */ minimizeRandomizedElectionTimeout(f, i); maximizeAllRandomizedElectionTimeoutsExcept(f, i); raft_fixture_step_until_has_leader(f, ELECTION_TIMEOUT * 20); assert(f->leader_id == raft->id); } void raft_fixture_depose(struct raft_fixture *f) { unsigned leader_i; /* Make sure there's a leader. */ assert(f->leader_id != 0); leader_i = (unsigned)f->leader_id - 1; assert(raft_state(&f->servers[leader_i].raft) == RAFT_LEADER); /* Set a very large election timeout on all followers, to prevent them from * starting an election. */ maximizeAllRandomizedElectionTimeoutsExcept(f, leader_i); /* Prevent all servers from sending append entries results, so the leader * will eventually step down. */ dropAllExcept(f, RAFT_IO_APPEND_ENTRIES_RESULT, true, leader_i); raft_fixture_step_until_has_no_leader(f, ELECTION_TIMEOUT * 3); assert(f->leader_id == 0); dropAllExcept(f, RAFT_IO_APPEND_ENTRIES_RESULT, false, leader_i); } struct step_apply { unsigned i; raft_index index; }; static bool hasAppliedIndex(struct raft_fixture *f, void *arg) { struct step_apply *apply = (struct step_apply *)arg; struct raft *raft; unsigned n = 0; unsigned i; if (apply->i < f->n) { raft = raft_fixture_get(f, apply->i); return raft_last_applied(raft) >= apply->index; } for (i = 0; i < f->n; i++) { raft = raft_fixture_get(f, i); if (raft_last_applied(raft) >= apply->index) { n++; } } return n == f->n; } bool raft_fixture_step_until_applied(struct raft_fixture *f, unsigned i, raft_index index, unsigned max_msecs) { struct step_apply apply = {i, index}; return raft_fixture_step_until(f, hasAppliedIndex, &apply, max_msecs); } struct step_state { unsigned i; int state; }; static bool hasState(struct raft_fixture *f, void *arg) { struct step_state *target = (struct step_state *)arg; struct raft *raft; raft = raft_fixture_get(f, target->i); return raft_state(raft) == target->state; } bool raft_fixture_step_until_state_is(struct raft_fixture *f, unsigned i, int state, unsigned max_msecs) { struct step_state target = {i, state}; return raft_fixture_step_until(f, hasState, &target, max_msecs); } struct step_term { unsigned i; raft_term term; }; static bool hasTerm(struct raft_fixture *f, void *arg) { struct step_term *target = (struct step_term *)arg; struct raft *raft; raft = raft_fixture_get(f, target->i); return raft->current_term == target->term; } bool raft_fixture_step_until_term_is(struct raft_fixture *f, unsigned i, raft_term term, unsigned max_msecs) { struct step_term target = {i, term}; return raft_fixture_step_until(f, hasTerm, &target, max_msecs); } struct step_vote { unsigned i; unsigned j; }; static bool hasVotedFor(struct raft_fixture *f, void *arg) { struct step_vote *target = (struct step_vote *)arg; struct raft *raft; raft = raft_fixture_get(f, target->i); return raft->voted_for == target->j + 1; } bool raft_fixture_step_until_voted_for(struct raft_fixture *f, unsigned i, unsigned j, unsigned max_msecs) { struct step_vote target = {i, j}; return raft_fixture_step_until(f, hasVotedFor, &target, max_msecs); } struct step_deliver { unsigned i; unsigned j; }; static bool hasDelivered(struct raft_fixture *f, void *arg) { struct step_deliver *target = (struct step_deliver *)arg; struct raft *raft; struct io *io; struct raft_message *message; queue *head; raft = raft_fixture_get(f, target->i); io = raft->io->impl; QUEUE_FOREACH(head, &io->requests) { struct ioRequest *r; r = QUEUE_DATA(head, struct ioRequest, queue); message = NULL; switch (r->type) { case SEND: message = &((struct send *)r)->message; break; case TRANSMIT: message = &((struct transmit *)r)->message; break; } if (message != NULL && message->server_id == target->j + 1) { return false; } } return true; } bool raft_fixture_step_until_delivered(struct raft_fixture *f, unsigned i, unsigned j, unsigned max_msecs) { struct step_deliver target = {i, j}; return raft_fixture_step_until(f, hasDelivered, &target, max_msecs); } void raft_fixture_disconnect(struct raft_fixture *f, unsigned i, unsigned j) { struct raft_io *io1 = &f->servers[i].io; struct raft_io *io2 = &f->servers[j].io; ioDisconnect(io1, io2); } void raft_fixture_reconnect(struct raft_fixture *f, unsigned i, unsigned j) { struct raft_io *io1 = &f->servers[i].io; struct raft_io *io2 = &f->servers[j].io; ioReconnect(io1, io2); } void raft_fixture_saturate(struct raft_fixture *f, unsigned i, unsigned j) { struct raft_io *io1 = &f->servers[i].io; struct raft_io *io2 = &f->servers[j].io; ioSaturate(io1, io2); } static void disconnectFromAll(struct raft_fixture *f, unsigned i) { unsigned j; for (j = 0; j < f->n; j++) { if (j == i) { continue; } raft_fixture_saturate(f, i, j); raft_fixture_saturate(f, j, i); } } bool raft_fixture_saturated(struct raft_fixture *f, unsigned i, unsigned j) { struct raft_io *io1 = &f->servers[i].io; struct raft_io *io2 = &f->servers[j].io; return ioSaturated(io1, io2); } void raft_fixture_desaturate(struct raft_fixture *f, unsigned i, unsigned j) { struct raft_io *io1 = &f->servers[i].io; struct raft_io *io2 = &f->servers[j].io; ioDesaturate(io1, io2); } void raft_fixture_kill(struct raft_fixture *f, unsigned i) { disconnectFromAll(f, i); f->servers[i].alive = false; } int raft_fixture_grow(struct raft_fixture *f, struct raft_fsm *fsm) { unsigned i; unsigned j; int rc; i = f->n; f->n++; rc = serverInit(f, i, fsm); if (rc != 0) { return rc; } serverConnectToAll(f, i); for (j = 0; j < f->n; j++) { struct raft_io *io1 = &f->servers[i].io; struct raft_io *io2 = &f->servers[j].io; ioConnect(io2, io1); } return 0; } void raft_fixture_set_randomized_election_timeout(struct raft_fixture *f, unsigned i, unsigned msecs) { struct io *io = f->servers[i].io.impl; io->randomized_election_timeout = msecs; } void raft_fixture_set_network_latency(struct raft_fixture *f, unsigned i, unsigned msecs) { struct io *io = f->servers[i].io.impl; io->network_latency = msecs; } void raft_fixture_set_disk_latency(struct raft_fixture *f, unsigned i, unsigned msecs) { struct io *io = f->servers[i].io.impl; io->disk_latency = msecs; } void raft_fixture_set_term(struct raft_fixture *f, unsigned i, raft_term term) { struct io *io = f->servers[i].io.impl; io->term = term; } void raft_fixture_set_snapshot(struct raft_fixture *f, unsigned i, struct raft_snapshot *snapshot) { struct io *io = f->servers[i].io.impl; io->snapshot = snapshot; } void raft_fixture_add_entry(struct raft_fixture *f, unsigned i, struct raft_entry *entry) { struct io *io = f->servers[i].io.impl; struct raft_entry *entries; entries = raft_realloc(io->entries, (io->n + 1) * sizeof *entries); assert(entries != NULL); entries[io->n] = *entry; io->entries = entries; io->n++; } void raft_fixture_io_fault(struct raft_fixture *f, unsigned i, int delay, int repeat) { struct io *io = f->servers[i].io.impl; io->fault.countdown = delay; io->fault.n = repeat; } unsigned raft_fixture_n_send(struct raft_fixture *f, unsigned i, int type) { struct io *io = f->servers[i].io.impl; return io->n_send[type]; } unsigned raft_fixture_n_recv(struct raft_fixture *f, unsigned i, int type) { struct io *io = f->servers[i].io.impl; return io->n_recv[type]; } #undef tracef raft-0.11.3/src/heap.c000066400000000000000000000043071415614527300144140ustar00rootroot00000000000000#include "heap.h" #include #include "../include/raft.h" static void *defaultMalloc(void *data, size_t size) { (void)data; return malloc(size); } static void defaultFree(void *data, void *ptr) { (void)data; free(ptr); } static void *defaultCalloc(void *data, size_t nmemb, size_t size) { (void)data; return calloc(nmemb, size); } static void *defaultRealloc(void *data, void *ptr, size_t size) { (void)data; return realloc(ptr, size); } static void *defaultAlignedAlloc(void *data, size_t alignment, size_t size) { (void)data; return aligned_alloc(alignment, size); } static void defaultAlignedFree(void *data, size_t alignment, void *ptr) { (void)alignment; defaultFree(data, ptr); } static struct raft_heap defaultHeap = { NULL, /* data */ defaultMalloc, /* malloc */ defaultFree, /* free */ defaultCalloc, /* calloc */ defaultRealloc, /* realloc */ defaultAlignedAlloc, /* aligned_alloc */ defaultAlignedFree /* aligned_free */ }; static struct raft_heap *currentHeap = &defaultHeap; void *HeapMalloc(size_t size) { return currentHeap->malloc(currentHeap->data, size); } void HeapFree(void *ptr) { if (ptr == NULL) { return; } currentHeap->free(currentHeap->data, ptr); } void *HeapCalloc(size_t nmemb, size_t size) { return currentHeap->calloc(currentHeap->data, nmemb, size); } void *HeapRealloc(void *ptr, size_t size) { return currentHeap->realloc(currentHeap->data, ptr, size); } void *raft_malloc(size_t size) { return HeapMalloc(size); } void raft_free(void *ptr) { HeapFree(ptr); } void *raft_calloc(size_t nmemb, size_t size) { return HeapCalloc(nmemb, size); } void *raft_realloc(void *ptr, size_t size) { return HeapRealloc(ptr, size); } void *raft_aligned_alloc(size_t alignment, size_t size) { return currentHeap->aligned_alloc(currentHeap->data, alignment, size); } void raft_aligned_free(size_t alignment, void *ptr) { currentHeap->aligned_free(currentHeap->data, alignment, ptr); } void raft_heap_set(struct raft_heap *heap) { currentHeap = heap; } void raft_heap_set_default(void) { currentHeap = &defaultHeap; } raft-0.11.3/src/heap.h000066400000000000000000000003731415614527300144200ustar00rootroot00000000000000/* Internal heap APIs. */ #ifndef HEAP_H_ #define HEAP_H_ #include void *HeapMalloc(size_t size); void *HeapCalloc(size_t nmemb, size_t size); void *HeapRealloc(void *ptr, size_t size); void HeapFree(void *ptr); #endif /* HEAP_H_ */ raft-0.11.3/src/log.c000066400000000000000000000606141415614527300142630ustar00rootroot00000000000000#include "log.h" #include #include "../include/raft.h" #include "assert.h" #include "configuration.h" /* Calculate the reference count hash table key for the given log entry index in * an hash table of the given size. * * The hash is simply the log entry index minus one modulo the size. This * minimizes conflicts in the most frequent case, where a new log entry is * simply appended to the log and can use the hash table bucket next to the * bucket for the entry with the previous index (possibly resizing the table if * its cap is reached). */ static size_t refsKey(const raft_index index, const size_t size) { assert(index > 0); assert(size > 0); return (size_t)((index - 1) % size); } /* Try to insert a new reference count item for the given log entry index into * the given reference count hash table. * * A collision happens when the bucket associated with the hash key of the given * log entry index is already used to refcount log entries with a different * index. In that case the collision output parameter will be set to true and no * new reference count item is inserted into the hash table. * * If two log entries have the same index but different terms, the associated * bucket will be grown accordingly. */ static int refsTryInsert(struct raft_entry_ref *table, const size_t size, const raft_term term, const raft_index index, const unsigned short count, bool *collision) { struct raft_entry_ref *bucket; /* Bucket associated with this index. */ struct raft_entry_ref *next_slot; /* For traversing the bucket slots. */ struct raft_entry_ref *last_slot; /* To track the last traversed slot. */ struct raft_entry_ref *slot; /* Actual slot to use for this entry. */ size_t key; assert(table != NULL); assert(size > 0); assert(term > 0); assert(index > 0); assert(count > 0); assert(collision != NULL); /* Calculate the hash table key for the given index. */ key = refsKey(index, size); bucket = &table[key]; /* If a bucket is empty, then there's no collision and we can fill its first * slot. */ if (bucket->count == 0) { assert(bucket->next == NULL); slot = bucket; goto fill; } /* If the bucket is already used to refcount entries with a different * index, then we have a collision and we must abort here. */ if (bucket->index != index) { *collision = true; return 0; } /* If we get here it means that the bucket is in use to refcount one or more * entries with the same index as the given one, but different terms. * * We must append a newly allocated slot to refcount the entry with this * term. * * So first let's find the last slot in the bucket. */ for (next_slot = bucket; next_slot != NULL; next_slot = next_slot->next) { /* All entries in a bucket must have the same index. */ assert(next_slot->index == index); /* It should never happen that two entries with the same index and term * get appended. So no existing slot in this bucket must track an entry * with the same term as the given one. */ assert(next_slot->term != term); last_slot = next_slot; } /* The last slot must have no next slot. */ assert(last_slot->next == NULL); slot = raft_malloc(sizeof *slot); if (slot == NULL) { return RAFT_NOMEM; } last_slot->next = slot; fill: slot->term = term; slot->index = index; slot->count = count; slot->next = NULL; *collision = false; return 0; } /* Move the slots of the given bucket into the given reference count hash * table. The key of the bucket to use in the given table will be re-calculated * according to the given size. */ static int refsMove(struct raft_entry_ref *bucket, struct raft_entry_ref *table, const size_t size) { struct raft_entry_ref *slot; struct raft_entry_ref *next_slot; assert(bucket != NULL); assert(table != NULL); assert(size > 0); /* Only non-empty buckets should be moved. */ assert(bucket->count > 0); /* For each slot in the bucket, insert the relevant entry in the given * table, then free it. */ next_slot = bucket; while (next_slot != NULL) { bool collision; int rv; slot = next_slot; /* Insert the reference count for this entry into the new table. */ rv = refsTryInsert(table, size, slot->term, slot->index, slot->count, &collision); next_slot = slot->next; /* Unless this is the very first slot in the bucket, we need to free the * slot. */ if (slot != bucket) { raft_free(slot); } if (rv != 0) { return rv; } /* The given hash table is assumed to be large enough to hold all ref * counts without any conflict. */ assert(!collision); }; return 0; } /* Grow the size of the reference count hash table. */ static int refsGrow(struct raft_log *l) { struct raft_entry_ref *table; /* New hash table. */ size_t size; /* Size of the new hash table. */ size_t i; assert(l != NULL); assert(l->refs_size > 0); size = l->refs_size * 2; /* Double the table size */ table = raft_calloc(size, sizeof *table); if (table == NULL) { return RAFT_NOMEM; } /* Populate the new hash table, inserting all entries existing in the * current hash table. Each bucket will have a different key in the new hash * table, since the size has changed. */ for (i = 0; i < l->refs_size; i++) { struct raft_entry_ref *bucket = &l->refs[i]; if (bucket->count > 0) { int rv = refsMove(bucket, table, size); if (rv != 0) { return rv; } } else { /* If the count is zero, we expect that the bucket is unused. */ assert(bucket->next == NULL); } } raft_free(l->refs); l->refs = table; l->refs_size = size; return 0; } /* Initialize the reference count of the entry with the given index, setting it * to 1. */ static int refsInit(struct raft_log *l, const raft_term term, const raft_index index) { int i; assert(l != NULL); assert(term > 0); assert(index > 0); /* Initialize the hash map with a reasonable size */ if (l->refs == NULL) { l->refs_size = LOG__REFS_INITIAL_SIZE; l->refs = raft_calloc(l->refs_size, sizeof *l->refs); if (l->refs == NULL) { return RAFT_NOMEM; } } /* Check if the bucket associated with the given index is available * (i.e. there are no collisions), or grow the table and re-key it * otherwise. * * We limit the number of times we try to grow the table to 10, to avoid * eating up too much memory. In practice, there should never be a case * where this is not enough. */ for (i = 0; i < 10; i++) { bool collision; int rc; rc = refsTryInsert(l->refs, l->refs_size, term, index, 1, &collision); if (rc != 0) { return RAFT_NOMEM; } if (!collision) { return 0; } rc = refsGrow(l); if (rc != 0) { return rc; } }; return RAFT_NOMEM; } /* Increment the refcount of the entry with the given term and index. */ static void refsIncr(struct raft_log *l, const raft_term term, const raft_index index) { size_t key; /* Hash table key for the given index. */ struct raft_entry_ref *slot; /* Slot for the given term/index */ assert(l != NULL); assert(term > 0); assert(index > 0); key = refsKey(index, l->refs_size); /* Lookup the slot associated with the given term/index, which must have * been previously inserted. */ slot = &l->refs[key]; while (1) { assert(slot != NULL); assert(slot->index == index); if (slot->term == term) { break; } slot = slot->next; } assert(slot != NULL); slot->count++; } /* Decrement the refcount of the entry with the given index. Return a boolean * indicating whether the entry has now zero references. */ static bool refsDecr(struct raft_log *l, const raft_term term, const raft_index index) { size_t key; /* Hash table key for the given index. */ struct raft_entry_ref *slot; /* Slot for the given term/index */ struct raft_entry_ref *prev_slot; /* Slot preceeding the one to decrement */ assert(l != NULL); assert(term > 0); assert(index > 0); key = refsKey(index, l->refs_size); prev_slot = NULL; /* Lookup the slot associated with the given term/index, keeping track of * its previous slot in the bucket list. */ slot = &l->refs[key]; while (1) { assert(slot != NULL); assert(slot->index == index); if (slot->term == term) { break; } prev_slot = slot; slot = slot->next; } slot->count--; if (slot->count > 0) { /* The entry is still referenced. */ return false; } /* If the refcount has dropped to zero, delete the slot. */ if (prev_slot != NULL) { /* This isn't the very first slot, simply unlink it from the slot * list. */ prev_slot->next = slot->next; raft_free(slot); } else if (slot->next != NULL) { /* This is the very first slot, and slot list is not empty. Copy the * second slot into the first one, then delete it. */ struct raft_entry_ref *second_slot = slot->next; *slot = *second_slot; raft_free(second_slot); } return true; } void logInit(struct raft_log *l) { assert(l != NULL); l->entries = NULL; l->size = 0; l->front = l->back = 0; l->offset = 0; l->refs = NULL; l->refs_size = 0; l->snapshot.last_index = 0; l->snapshot.last_term = 0; } /* Return the index of the i'th entry in the log. */ static raft_index indexAt(struct raft_log *l, size_t i) { return l->offset + i + 1; } /* Return the circular buffer position of the i'th entry in the log. */ static size_t positionAt(struct raft_log *l, size_t i) { return (l->front + i) % l->size; } /* Return the i'th entry in the log. */ static struct raft_entry *entryAt(struct raft_log *l, size_t i) { return &l->entries[positionAt(l, i)]; } void logClose(struct raft_log *l) { void *batch = NULL; /* Last batch that has been freed */ assert(l != NULL); if (l->entries != NULL) { size_t i; size_t n = logNumEntries(l); for (i = 0; i < n; i++) { struct raft_entry *entry = entryAt(l, i); raft_index index = indexAt(l, i); size_t key = refsKey(index, l->refs_size); struct raft_entry_ref *slot = &l->refs[key]; /* We require that there are no outstanding references to active * entries. */ assert(slot->count == 1); /* TODO: we should support the case where the bucket has more than * one slot. */ assert(slot->next == NULL); /* Release the memory used by the entry data (either directly or via * a batch). */ if (entry->batch == NULL) { if (entry->buf.base != NULL) { raft_free(entry->buf.base); } } else { if (entry->batch != batch) { /* This batch was not released yet, so let's do it now. */ batch = entry->batch; raft_free(entry->batch); } } } raft_free(l->entries); } if (l->refs != NULL) { raft_free(l->refs); } } void logStart(struct raft_log *l, raft_index snapshot_index, raft_term snapshot_term, raft_index start_index) { assert(logNumEntries(l) == 0); assert(start_index > 0); assert(start_index <= snapshot_index + 1); assert(snapshot_index == 0 || snapshot_term != 0); l->snapshot.last_index = snapshot_index; l->snapshot.last_term = snapshot_term; l->offset = start_index - 1; } /* Ensure that the entries array has enough free slots for adding a new entry. */ static int ensureCapacity(struct raft_log *l) { struct raft_entry *entries; /* New entries array */ size_t n; /* Current number of entries */ size_t size; /* Size of the new array */ size_t i; n = logNumEntries(l); if (n + 1 < l->size) { return 0; } /* Make the new size twice the current size plus one (for the new * entry). Over-allocating now avoids smaller allocations later. */ size = (l->size + 1) * 2; entries = raft_calloc(size, sizeof *entries); if (entries == NULL) { return RAFT_NOMEM; } /* Copy all active old entries to the beginning of the newly allocated * array. */ for (i = 0; i < n; i++) { memcpy(&entries[i], entryAt(l, i), sizeof *entries); } /* Release the old entries array. */ if (l->entries != NULL) { raft_free(l->entries); } l->entries = entries; l->size = size; l->front = 0; l->back = n; return 0; } int logAppend(struct raft_log *l, const raft_term term, const unsigned short type, const struct raft_buffer *buf, void *batch) { int rv; struct raft_entry *entry; raft_index index; assert(l != NULL); assert(term > 0); assert(type == RAFT_CHANGE || type == RAFT_BARRIER || type == RAFT_COMMAND); assert(buf != NULL); rv = ensureCapacity(l); if (rv != 0) { return rv; } index = logLastIndex(l) + 1; rv = refsInit(l, term, index); if (rv != 0) { return rv; } entry = &l->entries[l->back]; entry->term = term; entry->type = type; entry->buf = *buf; entry->batch = batch; l->back += 1; l->back = l->back % l->size; return 0; } int logAppendCommands(struct raft_log *l, const raft_term term, const struct raft_buffer bufs[], const unsigned n) { unsigned i; int rv; assert(l != NULL); assert(term > 0); assert(bufs != NULL); assert(n > 0); for (i = 0; i < n; i++) { const struct raft_buffer *buf = &bufs[i]; rv = logAppend(l, term, RAFT_COMMAND, buf, NULL); if (rv != 0) { return rv; } } return 0; } int logAppendConfiguration(struct raft_log *l, const raft_term term, const struct raft_configuration *configuration) { struct raft_buffer buf; int rv; assert(l != NULL); assert(term > 0); assert(configuration != NULL); /* Encode the configuration into a buffer. */ rv = configurationEncode(configuration, &buf); if (rv != 0) { goto err; } /* Append the new entry to the log. */ rv = logAppend(l, term, RAFT_CHANGE, &buf, NULL); if (rv != 0) { goto err_after_encode; } return 0; err_after_encode: raft_free(buf.base); err: assert(rv != 0); return rv; } size_t logNumEntries(struct raft_log *l) { assert(l != NULL); /* The circular buffer is not wrapped. */ if (l->front <= l->back) { return l->back - l->front; } /* The circular buffer is wrapped. */ return l->size - l->front + l->back; } raft_index logLastIndex(struct raft_log *l) { /* If there are no entries in the log, but there is a snapshot available * check that it's last index is consistent with the offset. */ if (logNumEntries(l) == 0 && l->snapshot.last_index != 0) { assert(l->offset <= l->snapshot.last_index); } return l->offset + logNumEntries(l); } /* Return the position of the entry with the given index in the entries array. * * If no entry with the given index is in the log return the size of the entries * array. */ static size_t locateEntry(struct raft_log *l, const raft_index index) { size_t n = logNumEntries(l); if (n == 0 || index < indexAt(l, 0) || index > indexAt(l, n - 1)) { return l->size; } /* Get the circular buffer position of the desired entry. Log indexes start * at 1, so we subtract one to get array indexes. We also need to subtract * any index offset this log might start at. */ return positionAt(l, (size_t)((index - 1) - l->offset)); } raft_term logTermOf(struct raft_log *l, const raft_index index) { size_t i; assert(index > 0); assert(l->offset <= l->snapshot.last_index); if ((index < l->offset + 1 && index != l->snapshot.last_index) || index > logLastIndex(l)) { return 0; } if (index == l->snapshot.last_index) { assert(l->snapshot.last_term != 0); /* Coherence check that if we still have the entry at last_index, its term * matches the one in the snapshot. */ i = locateEntry(l, index); if (i != l->size) { assert(l->entries[i].term == l->snapshot.last_term); } return l->snapshot.last_term; } i = locateEntry(l, index); assert(i < l->size); return l->entries[i].term; } raft_index logSnapshotIndex(struct raft_log *l) { return l->snapshot.last_index; } raft_term logLastTerm(struct raft_log *l) { raft_index last_index; last_index = logLastIndex(l); return last_index > 0 ? logTermOf(l, last_index) : 0; } const struct raft_entry *logGet(struct raft_log *l, const raft_index index) { size_t i; assert(l != NULL); /* Get the array index of the desired entry. */ i = locateEntry(l, index); if (i == l->size) { return NULL; } assert(i < l->size); return &l->entries[i]; } int logAcquire(struct raft_log *l, const raft_index index, struct raft_entry *entries[], unsigned *n) { size_t i; size_t j; assert(l != NULL); assert(index > 0); assert(entries != NULL); assert(n != NULL); /* Get the array index of the first entry to acquire. */ i = locateEntry(l, index); if (i == l->size) { *n = 0; *entries = NULL; return 0; } if (i < l->back) { /* The last entry does not wrap with respect to i, so the number of * entries is simply the length of the range [i...l->back). */ *n = (unsigned)(l->back - i); } else { /* The last entry wraps with respect to i, so the number of entries is * the sum of the lengths of the ranges [i...l->size) and [0...l->back), * which is l->size - i + l->back.*/ *n = (unsigned)(l->size - i + l->back); } assert(*n > 0); *entries = raft_calloc(*n, sizeof **entries); if (*entries == NULL) { return RAFT_NOMEM; } for (j = 0; j < *n; j++) { size_t k = (i + j) % l->size; struct raft_entry *entry = &(*entries)[j]; *entry = l->entries[k]; refsIncr(l, entry->term, index + j); } return 0; } /* Return true if the given batch is referenced by any entry currently in the * log. */ static bool isBatchReferenced(struct raft_log *l, const void *batch) { size_t i; /* Iterate through all live entries to see if there's one * belonging to the same batch. This is slightly inefficient but * this code path should be taken very rarely in practice. */ for (i = 0; i < logNumEntries(l); i++) { struct raft_entry *entry = entryAt(l, i); if (entry->batch == batch) { return true; } } return false; } void logRelease(struct raft_log *l, const raft_index index, struct raft_entry entries[], const unsigned n) { size_t i; void *batch = NULL; /* Last batch whose memory was freed */ assert(l != NULL); assert((entries == NULL && n == 0) || (entries != NULL && n > 0)); for (i = 0; i < n; i++) { struct raft_entry *entry = &entries[i]; bool unref; unref = refsDecr(l, entry->term, index + i); /* If there are no outstanding references to this entry, free its * payload if it's not part of a batch, or check if we can free the * batch itself. */ if (unref) { if (entries[i].batch == NULL) { if (entry->buf.base != NULL) { raft_free(entries[i].buf.base); } } else { if (entry->batch != batch) { if (!isBatchReferenced(l, entry->batch)) { batch = entry->batch; raft_free(batch); } } } } } if (entries != NULL) { raft_free(entries); } } /* Clear the log if it became empty. */ static void clearIfEmpty(struct raft_log *l) { if (logNumEntries(l) > 0) { return; } raft_free(l->entries); l->entries = NULL; l->size = 0; l->front = 0; l->back = 0; } /* Destroy an entry, possibly releasing the memory of its buffer. */ static void destroyEntry(struct raft_log *l, struct raft_entry *entry) { if (entry->batch == NULL) { if (entry->buf.base != NULL) { raft_free(entry->buf.base); } } else { if (!isBatchReferenced(l, entry->batch)) { raft_free(entry->batch); } } } /* Core logic of @logTruncate and @logDiscard, removing all log entries from * @index onward. If @destroy is true, also destroy the removed entries. */ static void removeSuffix(struct raft_log *l, const raft_index index, bool destroy) { size_t i; size_t n; raft_index start = index; assert(l != NULL); assert(index > l->offset); assert(index <= logLastIndex(l)); /* Number of entries to delete */ n = (size_t)(logLastIndex(l) - start) + 1; for (i = 0; i < n; i++) { struct raft_entry *entry; bool unref; if (l->back == 0) { l->back = l->size - 1; } else { l->back--; } entry = &l->entries[l->back]; unref = refsDecr(l, entry->term, start + n - i - 1); if (unref && destroy) { destroyEntry(l, entry); } } clearIfEmpty(l); } void logTruncate(struct raft_log *l, const raft_index index) { if (logNumEntries(l) == 0) { return; } removeSuffix(l, index, true); } void logDiscard(struct raft_log *l, const raft_index index) { removeSuffix(l, index, false); } /* Delete all entries up to the given index (included). */ static void removePrefix(struct raft_log *l, const raft_index index) { size_t i; size_t n; assert(l != NULL); assert(index > 0); assert(index <= logLastIndex(l)); /* Number of entries to delete */ n = (size_t)(index - indexAt(l, 0)) + 1; for (i = 0; i < n; i++) { struct raft_entry *entry; bool unref; entry = &l->entries[l->front]; if (l->front == l->size - 1) { l->front = 0; } else { l->front++; } l->offset++; unref = refsDecr(l, entry->term, l->offset); if (unref) { destroyEntry(l, entry); } } clearIfEmpty(l); } void logSnapshot(struct raft_log *l, raft_index last_index, unsigned trailing) { raft_term last_term = logTermOf(l, last_index); /* We must have an entry at this index */ assert(last_term != 0); l->snapshot.last_index = last_index; l->snapshot.last_term = last_term; /* If we have not at least n entries preceeding the given last index, then * there's nothing to remove and we're done. */ if (last_index <= trailing || locateEntry(l, last_index - trailing) == l->size) { return; } removePrefix(l, last_index - trailing); } void logRestore(struct raft_log *l, raft_index last_index, raft_term last_term) { size_t n = logNumEntries(l); assert(last_index > 0); assert(last_term > 0); if (n > 0) { logTruncate(l, logLastIndex(l) - n + 1); } l->snapshot.last_index = last_index; l->snapshot.last_term = last_term; l->offset = last_index; } raft-0.11.3/src/log.h000066400000000000000000000107021415614527300142610ustar00rootroot00000000000000/* In-memory cache of the persistent raft log stored on disk. */ #ifndef RAFT_LOG_H_ #define RAFT_LOG_H_ #include "../include/raft.h" /* Initial size of the entry reference count hash table. */ #define LOG__REFS_INITIAL_SIZE 256 /* Initialize an empty in-memory log of raft entries. */ void logInit(struct raft_log *l); /* Release all memory used by the given log object. */ void logClose(struct raft_log *l); /* Called at startup when populating the log with entries loaded from disk. It * sets the starting state of the log. The start index must be lower or equal * than snapshot_index + 1. */ void logStart(struct raft_log *l, raft_index snapshot_index, raft_term snapshot_term, raft_index start_index); /* Get the number of entries the log currently contains. */ size_t logNumEntries(struct raft_log *l); /* Get the index of the last entry in the log. Return #0 if the log is empty. */ raft_index logLastIndex(struct raft_log *l); /* Get the term of the last entry in the log. Return #0 if the log is empty. */ raft_term logLastTerm(struct raft_log *l); /* Get the term of the entry with the given index. Return #0 if @index is * * greater than the last index of the log, or if it's lower than oldest index we * know the term of (either because it's outstanding or because it's the last * entry in the most recent snapshot). */ raft_term logTermOf(struct raft_log *l, raft_index index); /* Get the last index of the most recent snapshot. Return #0 if there are no * * snapshots. */ raft_index logSnapshotIndex(struct raft_log *l); /* Get the entry with the given index. * The returned pointer remains valid only * as long as no API that might delete the entry with the given index is * invoked. Return #NULL if there is no such entry. */ const struct raft_entry *logGet(struct raft_log *l, const raft_index index); /* Append a new entry to the log. */ int logAppend(struct raft_log *l, raft_term term, unsigned short type, const struct raft_buffer *buf, void *batch); /* Convenience to append a series of #RAFT_COMMAND entries. */ int logAppendCommands(struct raft_log *l, const raft_term term, const struct raft_buffer bufs[], const unsigned n); /* Convenience to encode and append a single #RAFT_CHANGE entry. */ int logAppendConfiguration(struct raft_log *l, const raft_term term, const struct raft_configuration *configuration); /* Acquire an array of entries from the given index onwards. * The payload * memory referenced by the @buf attribute of the returned entries is guaranteed * to be valid until logRelease() is called. */ int logAcquire(struct raft_log *l, raft_index index, struct raft_entry *entries[], unsigned *n); /* Release a previously acquired array of entries. */ void logRelease(struct raft_log *l, raft_index index, struct raft_entry entries[], unsigned n); /* Delete all entries from the given index (included) onwards. If the log is * empty this is a no-op. If @index is lower than or equal to the index of the * first entry in the log, then the log will become empty. */ void logTruncate(struct raft_log *l, const raft_index index); /* Discard all entries from the given index (included) onwards. This is exactly * the same as truncate, but the memory of the entries does not gets * released. This is called as part of error handling, when reverting the effect * of previous logAppend calls. */ void logDiscard(struct raft_log *l, const raft_index index); /* To be called when taking a new snapshot. The log must contain an entry at * last_index, which is the index of the last entry included in the * snapshot. The function will update the last snapshot information and delete * all entries up last_index - trailing (included). If the log contains no entry * a last_index - trailing, then no entry will be deleted. */ void logSnapshot(struct raft_log *l, raft_index last_index, unsigned trailing); /* To be called when installing a snapshot. * * The log can be in any state. All outstanding entries will be discarded, the * last index and last term of the most recent snapshot will be set to the given * values, and the offset adjusted accordingly. */ void logRestore(struct raft_log *l, raft_index last_index, raft_term last_term); #endif /* RAFT_LOG_H_ */ raft-0.11.3/src/membership.c000066400000000000000000000145321415614527300156330ustar00rootroot00000000000000#include "membership.h" #include "../include/raft.h" #include "assert.h" #include "configuration.h" #include "err.h" #include "log.h" #include "progress.h" #include "tracing.h" #define tracef(...) Tracef(r->tracer, __VA_ARGS__) int membershipCanChangeConfiguration(struct raft *r) { int rv; if (r->state != RAFT_LEADER || r->transfer != NULL) { tracef("NOT LEADER"); rv = RAFT_NOTLEADER; goto err; } if (r->configuration_uncommitted_index != 0) { tracef("r->configuration_uncommitted_index %llu", r->configuration_uncommitted_index); rv = RAFT_CANTCHANGE; goto err; } if (r->leader_state.promotee_id != 0) { tracef("r->leader_state.promotee_id %llu", r->leader_state.promotee_id); rv = RAFT_CANTCHANGE; goto err; } /* In order to become leader at all we are supposed to have committed at * least the initial configuration at index 1. */ assert(r->configuration_index > 0); /* The index of the last committed configuration can't be greater than the * last log index. */ assert(logLastIndex(&r->log) >= r->configuration_index); /* No catch-up round should be in progress. */ assert(r->leader_state.round_number == 0); assert(r->leader_state.round_index == 0); assert(r->leader_state.round_start == 0); return 0; err: assert(rv != 0); ErrMsgFromCode(r->errmsg, rv); return rv; } bool membershipUpdateCatchUpRound(struct raft *r) { unsigned server_index; raft_index match_index; raft_index last_index; raft_time now = r->io->time(r->io); raft_time round_duration; bool is_up_to_date; bool is_fast_enough; assert(r->state == RAFT_LEADER); assert(r->leader_state.promotee_id != 0); server_index = configurationIndexOf(&r->configuration, r->leader_state.promotee_id); assert(server_index < r->configuration.n); match_index = progressMatchIndex(r, server_index); /* If the server did not reach the target index for this round, it did not * catch up. */ if (match_index < r->leader_state.round_index) { tracef("member (index: %u) not yet caught up match_index:%llu round_index:%llu", server_index, match_index, r->leader_state.round_index); return false; } last_index = logLastIndex(&r->log); round_duration = now - r->leader_state.round_start; is_up_to_date = match_index == last_index; is_fast_enough = round_duration < r->election_timeout; tracef("member is_up_to_date:%d is_fast_enough:%d", is_up_to_date, is_fast_enough); /* If the server's log is fully up-to-date or the round that just terminated * was fast enough, then the server as caught up. */ if (is_up_to_date || is_fast_enough) { r->leader_state.round_number = 0; r->leader_state.round_index = 0; r->leader_state.round_start = 0; return true; } /* If we get here it means that this catch-up round is complete, but there * are more entries to replicate, or it was not fast enough. Let's start a * new round. */ r->leader_state.round_number++; r->leader_state.round_index = last_index; r->leader_state.round_start = now; return false; } int membershipUncommittedChange(struct raft *r, const raft_index index, const struct raft_entry *entry) { struct raft_configuration configuration; int rv; assert(r != NULL); assert(r->state == RAFT_FOLLOWER); assert(entry != NULL); assert(entry->type == RAFT_CHANGE); raft_configuration_init(&configuration); rv = configurationDecode(&entry->buf, &configuration); if (rv != 0) { tracef("failed to decode configuration at index:%llu", index); goto err; } configurationTrace(r, &configuration, "uncommitted config change"); raft_configuration_close(&r->configuration); r->configuration = configuration; r->configuration_uncommitted_index = index; return 0; err: assert(rv != 0); return rv; } int membershipRollback(struct raft *r) { const struct raft_entry *entry; int rv; assert(r != NULL); assert(r->state == RAFT_FOLLOWER); assert(r->configuration_uncommitted_index > 0); tracef("roll back membership"); /* Fetch the last committed configuration entry. */ assert(r->configuration_index != 0); entry = logGet(&r->log, r->configuration_index); assert(entry != NULL); /* Replace the current configuration with the last committed one. */ raft_configuration_close(&r->configuration); raft_configuration_init(&r->configuration); rv = configurationDecode(&entry->buf, &r->configuration); if (rv != 0) { return rv; } configurationTrace(r, &r->configuration, "roll back config"); r->configuration_uncommitted_index = 0; return 0; } void membershipLeadershipTransferInit(struct raft *r, struct raft_transfer *req, raft_id id, raft_transfer_cb cb) { req->cb = cb; req->id = id; req->start = r->io->time(r->io); req->send.data = NULL; r->transfer = req; } int membershipLeadershipTransferStart(struct raft *r) { const struct raft_server *server; struct raft_message message; int rv; assert(r->transfer->send.data == NULL); server = configurationGet(&r->configuration, r->transfer->id); assert(server != NULL); if (server == NULL) { tracef("transferee server not found in configuration"); return -1; } message.type = RAFT_IO_TIMEOUT_NOW; message.server_id = server->id; message.server_address = server->address; message.timeout_now.term = r->current_term; message.timeout_now.last_log_index = logLastIndex(&r->log); message.timeout_now.last_log_term = logLastTerm(&r->log); r->transfer->send.data = r; rv = r->io->send(r->io, &r->transfer->send, &message, NULL); if (rv != 0) { ErrMsgTransferf(r->io->errmsg, r->errmsg, "send timeout now to %llu", server->id); return rv; } return 0; } void membershipLeadershipTransferClose(struct raft *r) { struct raft_transfer *req = r->transfer; raft_transfer_cb cb = req->cb; r->transfer = NULL; if (cb != NULL) { cb(req); } } raft-0.11.3/src/membership.h000066400000000000000000000041161415614527300156350ustar00rootroot00000000000000/* Membership-related APIs. */ #ifndef MEMBERSHIP_H_ #define MEMBERSHIP_H_ #include "../include/raft.h" /* Helper returning an error if the configuration can't be changed, either * because this node is not the leader or because a configuration change is * already in progress. */ int membershipCanChangeConfiguration(struct raft *r); /* Update the information about the progress that the non-voting server * currently being promoted is making in catching with logs. * * Return false if the server being promoted did not yet catch-up with logs, and * true if it did. * * This function must be called only by leaders after a @raft_assign request * has been submitted. */ bool membershipUpdateCatchUpRound(struct raft *r); /* Update the local configuration replacing it with the content of the given * RAFT_CHANGE entry, which has just been received in as part of an * AppendEntries RPC request. The uncommitted configuration index will be * updated accordingly. * * It must be called only by followers. */ int membershipUncommittedChange(struct raft *r, const raft_index index, const struct raft_entry *entry); /* Rollback any promotion configuration change that was applied locally, but * failed to be committed. It must be called by followers after they receive an * AppendEntries RPC request that instructs them to evict the uncommitted entry * from their log. */ int membershipRollback(struct raft *r); /* Initialize the state of a leadership transfer request. */ void membershipLeadershipTransferInit(struct raft *r, struct raft_transfer *req, raft_id id, raft_transfer_cb cb); /* Start the leadership transfer by sending a TimeoutNow message to the target * server. */ int membershipLeadershipTransferStart(struct raft *r); /* Finish a leadership transfer (whether successful or not), resetting the * leadership transfer state and firing the user callback. */ void membershipLeadershipTransferClose(struct raft *r); #endif /* MEMBERSHIP_H_ */ raft-0.11.3/src/progress.c000066400000000000000000000217141415614527300153440ustar00rootroot00000000000000#include "progress.h" #include "assert.h" #include "configuration.h" #include "log.h" #include "tracing.h" #define tracef(...) Tracef(r->tracer, __VA_ARGS__) #ifndef max #define max(a, b) ((a) < (b) ? (b) : (a)) #endif #ifndef min #define min(a, b) ((a) < (b) ? (a) : (b)) #endif /* Initialize a single progress object. */ static void initProgress(struct raft_progress *p, raft_index last_index) { p->next_index = last_index + 1; p->match_index = 0; p->snapshot_index = 0; p->last_send = 0; p->snapshot_last_send = 0; p->recent_recv = false; p->state = PROGRESS__PROBE; } int progressBuildArray(struct raft *r) { struct raft_progress *progress; unsigned i; raft_index last_index = logLastIndex(&r->log); progress = raft_malloc(r->configuration.n * sizeof *progress); if (progress == NULL) { return RAFT_NOMEM; } for (i = 0; i < r->configuration.n; i++) { initProgress(&progress[i], last_index); if (r->configuration.servers[i].id == r->id) { progress[i].match_index = r->last_stored; } } r->leader_state.progress = progress; return 0; } int progressRebuildArray(struct raft *r, const struct raft_configuration *configuration) { raft_index last_index = logLastIndex(&r->log); struct raft_progress *progress; unsigned i; unsigned j; raft_id id; progress = raft_malloc(configuration->n * sizeof *progress); if (progress == NULL) { return RAFT_NOMEM; } /* First copy the progress information for the servers that exists both in * the current and in the new configuration. */ for (i = 0; i < r->configuration.n; i++) { id = r->configuration.servers[i].id; j = configurationIndexOf(configuration, id); if (j == configuration->n) { /* This server is not present in the new configuration, so we just * skip it. */ continue; } progress[j] = r->leader_state.progress[i]; } /* Then reset the replication state for servers that are present in the new * configuration, but not in the current one. */ for (i = 0; i < configuration->n; i++) { id = configuration->servers[i].id; j = configurationIndexOf(&r->configuration, id); if (j < r->configuration.n) { /* This server is present both in the new and in the current * configuration, so we have already copied its next/match index * value in the loop above. */ continue; } assert(j == r->configuration.n); initProgress(&progress[i], last_index); } raft_free(r->leader_state.progress); r->leader_state.progress = progress; return 0; } bool progressIsUpToDate(struct raft *r, unsigned i) { struct raft_progress *p = &r->leader_state.progress[i]; raft_index last_index = logLastIndex(&r->log); return p->next_index == last_index + 1; } bool progressShouldReplicate(struct raft *r, unsigned i) { struct raft_progress *p = &r->leader_state.progress[i]; raft_time now = r->io->time(r->io); bool needs_heartbeat = now - p->last_send >= r->heartbeat_timeout; raft_index last_index = logLastIndex(&r->log); bool result = false; /* We must be in a valid state. */ assert(p->state == PROGRESS__PROBE || p->state == PROGRESS__PIPELINE || p->state == PROGRESS__SNAPSHOT); /* The next index to send must be lower than the highest index in our * log. */ assert(p->next_index <= last_index + 1); switch (p->state) { case PROGRESS__SNAPSHOT: /* Snapshot timed out, move to PROBE */ if (now - p->snapshot_last_send >= r->install_snapshot_timeout) { tracef("snapshot timed out for index:%u", i); result = true; progressAbortSnapshot(r, i); } else { /* Enforce Leadership during follower Snapshot installation */ result = needs_heartbeat; } break; case PROGRESS__PROBE: /* We send at most one message per heartbeat interval. */ result = needs_heartbeat; break; case PROGRESS__PIPELINE: /* In replication mode we send empty append entries messages only if * haven't sent anything in the last heartbeat interval. */ result = !progressIsUpToDate(r, i) || needs_heartbeat; break; } return result; } raft_index progressNextIndex(struct raft *r, unsigned i) { return r->leader_state.progress[i].next_index; } raft_index progressMatchIndex(struct raft *r, unsigned i) { return r->leader_state.progress[i].match_index; } void progressUpdateLastSend(struct raft *r, unsigned i) { r->leader_state.progress[i].last_send = r->io->time(r->io); } void progressUpdateSnapshotLastSend(struct raft *r, unsigned i) { r->leader_state.progress[i].snapshot_last_send = r->io->time(r->io); } bool progressResetRecentRecv(struct raft *r, const unsigned i) { bool prev = r->leader_state.progress[i].recent_recv; r->leader_state.progress[i].recent_recv = false; return prev; } void progressMarkRecentRecv(struct raft *r, const unsigned i) { r->leader_state.progress[i].recent_recv = true; } bool progressGetRecentRecv(const struct raft *r, const unsigned i) { return r->leader_state.progress[i].recent_recv; } void progressToSnapshot(struct raft *r, unsigned i) { struct raft_progress *p = &r->leader_state.progress[i]; p->state = PROGRESS__SNAPSHOT; p->snapshot_index = logSnapshotIndex(&r->log); } void progressAbortSnapshot(struct raft *r, const unsigned i) { struct raft_progress *p = &r->leader_state.progress[i]; p->snapshot_index = 0; p->state = PROGRESS__PROBE; } int progressState(struct raft *r, const unsigned i) { struct raft_progress *p = &r->leader_state.progress[i]; return p->state; } bool progressMaybeDecrement(struct raft *r, const unsigned i, raft_index rejected, raft_index last_index) { struct raft_progress *p = &r->leader_state.progress[i]; assert(p->state == PROGRESS__PROBE || p->state == PROGRESS__PIPELINE || p->state == PROGRESS__SNAPSHOT); if (p->state == PROGRESS__SNAPSHOT) { /* The rejection must be stale or spurious if the rejected index does * not match the last snapshot index. */ if (rejected != p->snapshot_index) { return false; } progressAbortSnapshot(r, i); return true; } if (p->state == PROGRESS__PIPELINE) { /* The rejection must be stale if the rejected index is smaller than * the matched one. */ if (rejected <= p->match_index) { tracef("match index is up to date -> ignore "); return false; } /* Directly decrease next to match + 1 */ p->next_index = min(rejected, p->match_index + 1); progressToProbe(r, i); return true; } /* The rejection must be stale or spurious if the rejected index does not * match the next index minus one. */ if (rejected != p->next_index - 1) { tracef("rejected index %llu different from next index %lld -> ignore ", rejected, p->next_index); return false; } p->next_index = min(rejected, last_index + 1); p->next_index = max(p->next_index, 1); return true; } void progressOptimisticNextIndex(struct raft *r, unsigned i, raft_index next_index) { struct raft_progress *p = &r->leader_state.progress[i]; p->next_index = next_index; } bool progressMaybeUpdate(struct raft *r, unsigned i, raft_index last_index) { struct raft_progress *p = &r->leader_state.progress[i]; bool updated = false; if (p->match_index < last_index) { p->match_index = last_index; updated = true; } if (p->next_index < last_index + 1) { p->next_index = last_index + 1; } return updated; } void progressToProbe(struct raft *r, const unsigned i) { struct raft_progress *p = &r->leader_state.progress[i]; /* If the current state is snapshot, we know that the pending snapshot has * been sent to this peer successfully, so we probe from snapshot_index + * 1.*/ if (p->state == PROGRESS__SNAPSHOT) { assert(p->snapshot_index > 0); p->next_index = max(p->match_index + 1, p->snapshot_index); p->snapshot_index = 0; } else { p->next_index = p->match_index + 1; } p->state = PROGRESS__PROBE; } void progressToPipeline(struct raft *r, const unsigned i) { struct raft_progress *p = &r->leader_state.progress[i]; p->state = PROGRESS__PIPELINE; } bool progressSnapshotDone(struct raft *r, const unsigned i) { struct raft_progress *p = &r->leader_state.progress[i]; assert(p->state == PROGRESS__SNAPSHOT); return p->match_index >= p->snapshot_index; } #undef tracef raft-0.11.3/src/progress.h000066400000000000000000000103251415614527300153450ustar00rootroot00000000000000/* Track replication progress on followers. */ #ifndef PROGRESS_H_ #define PROGRESS_H_ #include "../include/raft.h" /* Possible values for the state field of struct raft_progress. */ enum { PROGRESS__PROBE = 0, /* At most one AppendEntries per heartbeat interval */ PROGRESS__PIPELINE, /* Optimistically stream AppendEntries */ PROGRESS__SNAPSHOT /* Sending a snapshot */ }; /* Create and initialize the array of progress objects used by the leader to * * track followers. The match index will be set to zero, and the next index to * the current last index plus 1. */ int progressBuildArray(struct raft *r); /* Re-build the progress array against a new configuration. * * Progress information for servers existing both in the new and in the current * configuration will remain unchanged. * * Progress information for servers existing only in the new configuration will * be initialized as in progressBuildArray().*/ int progressRebuildArray(struct raft *r, const struct raft_configuration *configuration); /* Whether the log of the i'th server in the configuration up-to-date with * ours. */ bool progressIsUpToDate(struct raft *r, unsigned i); /* Whether a new AppendEntries or InstallSnapshot message should be sent to the * i'th server at this time. * * See the docstring of replicationProgress() for details about how the decision * is taken. */ bool progressShouldReplicate(struct raft *r, unsigned i); /* Return the index of the next entry that should be sent to the i'th server. */ raft_index progressNextIndex(struct raft *r, unsigned i); /* Return the index of the most recent entry that the i'th server has reported * as replicated. */ raft_index progressMatchIndex(struct raft *r, unsigned i); /* Update the last_send timestamp after an AppendEntries request has been * sent. */ void progressUpdateLastSend(struct raft *r, unsigned i); /* Update the snapshot_last_send timestamp after an InstallSnaphot request has * been sent. */ void progressUpdateSnapshotLastSend(struct raft *r, unsigned i); /* Reset to false the recent_recv flag of the server at the given index, * returning the previous value. * * To be called once every election_timeout milliseconds. */ bool progressResetRecentRecv(struct raft *r, unsigned i); /* Set to true the recent_recv flag of the server at the given index. * * To be called whenever we receive an AppendEntries RPC result */ void progressMarkRecentRecv(struct raft *r, unsigned i); /* Return the value of the recent_recv flag. */ bool progressGetRecentRecv(const struct raft *r, unsigned i); /* Convert to the i'th server to snapshot mode. */ void progressToSnapshot(struct raft *r, unsigned i); /* Convert to probe mode. */ void progressToProbe(struct raft *r, unsigned i); /* Convert to pipeline mode. */ void progressToPipeline(struct raft *r, unsigned i); /* Abort snapshot mode and switch to back to probe. * * Called after sending the snapshot has failed or timed out. */ void progressAbortSnapshot(struct raft *r, unsigned i); /* Return the progress mode code for the i'th server. */ int progressState(struct raft *r, unsigned i); /* Optimistically update the next index of the given server. * * Called in pipeline mode after sending new entries. */ void progressOptimisticNextIndex(struct raft *r, unsigned i, raft_index next_index); /* Return false if the given @index comes from an outdated message. Otherwise * update the progress and returns true. To be called when receiving a * successful AppendEntries RPC response. */ bool progressMaybeUpdate(struct raft *r, unsigned i, raft_index last_index); /* Return false if the given rejected index comes from an out of order * message. Otherwise decrease the progress next index to min(rejected, * last_index) and returns true. To be called when receiving an unsuccessful * AppendEntries RPC response. */ bool progressMaybeDecrement(struct raft *r, unsigned i, raft_index rejected, raft_index last_index); /* Return true if match_index is equal or higher than the snapshot_index. */ bool progressSnapshotDone(struct raft *r, unsigned i); #endif /* PROGRESS_H_ */ raft-0.11.3/src/queue.h000066400000000000000000000033151415614527300146260ustar00rootroot00000000000000#ifndef QUEUE_H_ #define QUEUE_H_ #include typedef void *queue[2]; /* Private macros. */ #define QUEUE_NEXT(q) (*(queue **)&((*(q))[0])) #define QUEUE_PREV(q) (*(queue **)&((*(q))[1])) #define QUEUE_PREV_NEXT(q) (QUEUE_NEXT(QUEUE_PREV(q))) #define QUEUE_NEXT_PREV(q) (QUEUE_PREV(QUEUE_NEXT(q))) /* Initialize an empty queue. */ #define QUEUE_INIT(q) \ { \ QUEUE_NEXT(q) = (q); \ QUEUE_PREV(q) = (q); \ } /* Return true if the queue has no element. */ #define QUEUE_IS_EMPTY(q) ((const queue *)(q) == (const queue *)QUEUE_NEXT(q)) /* Insert an element at the back of a queue. */ #define QUEUE_PUSH(q, e) \ { \ QUEUE_NEXT(e) = (q); \ QUEUE_PREV(e) = QUEUE_PREV(q); \ QUEUE_PREV_NEXT(e) = (e); \ QUEUE_PREV(q) = (e); \ } /* Remove the given element from the queue. Any element can be removed at any * * time. */ #define QUEUE_REMOVE(e) \ { \ QUEUE_PREV_NEXT(e) = QUEUE_NEXT(e); \ QUEUE_NEXT_PREV(e) = QUEUE_PREV(e); \ } /* Return the element at the front of the queue. */ #define QUEUE_HEAD(q) (QUEUE_NEXT(q)) /* Return the element at the back of the queue. */ #define QUEUE_TAIL(q) (QUEUE_PREV(q)) /* Iterate over the element of a queue. * Mutating the queue while iterating * results in undefined behavior. */ #define QUEUE_FOREACH(q, e) \ for ((q) = QUEUE_NEXT(e); (q) != (e); (q) = QUEUE_NEXT(q)) /* Return the structure holding the given element. */ #define QUEUE_DATA(e, type, field) ((type *)((void*)((char *)(e)-offsetof(type, field)))) #endif /* QUEUE_H_*/ raft-0.11.3/src/raft.c000066400000000000000000000126711415614527300144360ustar00rootroot00000000000000#include "../include/raft.h" #include #include "assert.h" #include "byte.h" #include "configuration.h" #include "convert.h" #include "election.h" #include "err.h" #include "heap.h" #include "log.h" #include "membership.h" #include "tracing.h" #define DEFAULT_ELECTION_TIMEOUT 1000 /* One second */ #define DEFAULT_HEARTBEAT_TIMEOUT 100 /* One tenth of a second */ #define DEFAULT_INSTALL_SNAPSHOT_TIMEOUT 30000 /* 30 seconds */ #define DEFAULT_SNAPSHOT_THRESHOLD 1024 #define DEFAULT_SNAPSHOT_TRAILING 2048 /* Number of milliseconds after which a server promotion will be aborted if the * server hasn't caught up with the logs yet. */ #define DEFAULT_MAX_CATCH_UP_ROUNDS 10 #define DEFAULT_MAX_CATCH_UP_ROUND_DURATION (5 * 1000) int raft_init(struct raft *r, struct raft_io *io, struct raft_fsm *fsm, const raft_id id, const char *address) { int rv; assert(r != NULL); r->io = io; r->io->data = r; r->fsm = fsm; r->tracer = &StderrTracer; raft_tracer_maybe_enable(r->tracer, true); r->id = id; /* Make a copy of the address */ r->address = HeapMalloc(strlen(address) + 1); if (r->address == NULL) { rv = RAFT_NOMEM; goto err; } strcpy(r->address, address); r->current_term = 0; r->voted_for = 0; logInit(&r->log); raft_configuration_init(&r->configuration); r->configuration_index = 0; r->configuration_uncommitted_index = 0; r->election_timeout = DEFAULT_ELECTION_TIMEOUT; r->heartbeat_timeout = DEFAULT_HEARTBEAT_TIMEOUT; r->install_snapshot_timeout = DEFAULT_INSTALL_SNAPSHOT_TIMEOUT; r->commit_index = 0; r->last_applied = 0; r->last_stored = 0; r->state = RAFT_UNAVAILABLE; r->transfer = NULL; r->snapshot.pending.term = 0; r->snapshot.threshold = DEFAULT_SNAPSHOT_THRESHOLD; r->snapshot.trailing = DEFAULT_SNAPSHOT_TRAILING; r->snapshot.put.data = NULL; r->close_cb = NULL; memset(r->errmsg, 0, sizeof r->errmsg); r->pre_vote = false; r->max_catch_up_rounds = DEFAULT_MAX_CATCH_UP_ROUNDS; r->max_catch_up_round_duration = DEFAULT_MAX_CATCH_UP_ROUND_DURATION; rv = r->io->init(r->io, r->id, r->address); if (rv != 0) { ErrMsgTransfer(r->io->errmsg, r->errmsg, "io"); goto err_after_address_alloc; } return 0; err_after_address_alloc: HeapFree(r->address); err: assert(rv != 0); return rv; } static void ioCloseCb(struct raft_io *io) { struct raft *r = io->data; raft_free(r->address); logClose(&r->log); raft_configuration_close(&r->configuration); if (r->close_cb != NULL) { r->close_cb(r); } } void raft_close(struct raft *r, void (*cb)(struct raft *r)) { assert(r->close_cb == NULL); if (r->state != RAFT_UNAVAILABLE) { convertToUnavailable(r); } r->close_cb = cb; r->io->close(r->io, ioCloseCb); } void raft_set_election_timeout(struct raft *r, const unsigned msecs) { r->election_timeout = msecs; } void raft_set_heartbeat_timeout(struct raft *r, const unsigned msecs) { r->heartbeat_timeout = msecs; } void raft_set_install_snapshot_timeout(struct raft *r, const unsigned msecs) { r->install_snapshot_timeout = msecs; } void raft_set_snapshot_threshold(struct raft *r, unsigned n) { r->snapshot.threshold = n; } void raft_set_snapshot_trailing(struct raft *r, unsigned n) { r->snapshot.trailing = n; } void raft_set_max_catch_up_rounds(struct raft *r, unsigned n) { r->max_catch_up_rounds = n; } void raft_set_max_catch_up_round_duration(struct raft *r, unsigned msecs) { r->max_catch_up_round_duration = msecs; } void raft_set_pre_vote(struct raft *r, bool enabled) { r->pre_vote = enabled; } const char *raft_errmsg(struct raft *r) { return r->errmsg; } int raft_bootstrap(struct raft *r, const struct raft_configuration *conf) { int rv; if (r->state != RAFT_UNAVAILABLE) { return RAFT_BUSY; } rv = r->io->bootstrap(r->io, conf); if (rv != 0) { return rv; } return 0; } int raft_recover(struct raft *r, const struct raft_configuration *conf) { int rv; if (r->state != RAFT_UNAVAILABLE) { return RAFT_BUSY; } rv = r->io->recover(r->io, conf); if (rv != 0) { return rv; } return 0; } const char *raft_strerror(int errnum) { return errCodeToString(errnum); } void raft_configuration_init(struct raft_configuration *c) { configurationInit(c); } void raft_configuration_close(struct raft_configuration *c) { configurationClose(c); } int raft_configuration_add(struct raft_configuration *c, const raft_id id, const char *address, const int role) { return configurationAdd(c, id, address, role); } int raft_configuration_encode(const struct raft_configuration *c, struct raft_buffer *buf) { return configurationEncode(c, buf); } unsigned long long raft_digest(const char *text, unsigned long long n) { struct byteSha1 sha1; uint8_t value[20]; uint64_t n64 = byteFlip64((uint64_t)n); uint64_t digest; byteSha1Init(&sha1); byteSha1Update(&sha1, (const uint8_t *)text, (uint32_t)strlen(text)); byteSha1Update(&sha1, (const uint8_t *)&n64, (uint32_t)(sizeof n64)); byteSha1Digest(&sha1, value); memcpy(&digest, value + (sizeof value - sizeof digest), sizeof digest); return byteFlip64(digest); } raft-0.11.3/src/recv.c000066400000000000000000000150521415614527300144350ustar00rootroot00000000000000#include "recv.h" #include "assert.h" #include "convert.h" #include "entry.h" #include "heap.h" #include "log.h" #include "membership.h" #include "recv_append_entries.h" #include "recv_append_entries_result.h" #include "recv_install_snapshot.h" #include "recv_request_vote.h" #include "recv_request_vote_result.h" #include "recv_timeout_now.h" #include "string.h" #include "tracing.h" #define tracef(...) Tracef(r->tracer, __VA_ARGS__) /* Dispatch a single RPC message to the appropriate handler. */ static int recvMessage(struct raft *r, struct raft_message *message) { int rv = 0; if (message->type < RAFT_IO_APPEND_ENTRIES || message->type > RAFT_IO_TIMEOUT_NOW) { tracef("received unknown message type type: %d", message->type); return 0; } /* tracef("%s from server %ld", message_descs[message->type - 1], message->server_id); */ switch (message->type) { case RAFT_IO_APPEND_ENTRIES: rv = recvAppendEntries(r, message->server_id, message->server_address, &message->append_entries); if (rv != 0) { entryBatchesDestroy(message->append_entries.entries, message->append_entries.n_entries); } break; case RAFT_IO_APPEND_ENTRIES_RESULT: rv = recvAppendEntriesResult(r, message->server_id, message->server_address, &message->append_entries_result); break; case RAFT_IO_REQUEST_VOTE: rv = recvRequestVote(r, message->server_id, message->server_address, &message->request_vote); break; case RAFT_IO_REQUEST_VOTE_RESULT: rv = recvRequestVoteResult(r, message->server_id, message->server_address, &message->request_vote_result); break; case RAFT_IO_INSTALL_SNAPSHOT: rv = recvInstallSnapshot(r, message->server_id, message->server_address, &message->install_snapshot); /* Already installing a snapshot, wait for it and ignore this one */ if (rv == RAFT_BUSY) { raft_free(message->install_snapshot.data.base); raft_configuration_close(&message->install_snapshot.conf); rv = 0; } break; case RAFT_IO_TIMEOUT_NOW: rv = recvTimeoutNow(r, message->server_id, message->server_address, &message->timeout_now); break; }; if (rv != 0 && rv != RAFT_NOCONNECTION) { tracef("recv: %d: %s", message->type, raft_strerror(rv)); return rv; } /* If there's a leadership transfer in progress, check if it has * completed. */ if (r->transfer != NULL) { if (r->follower_state.current_leader.id == r->transfer->id) { membershipLeadershipTransferClose(r); } } return 0; } void recvCb(struct raft_io *io, struct raft_message *message) { struct raft *r = io->data; int rv; if (r->state == RAFT_UNAVAILABLE) { switch (message->type) { case RAFT_IO_APPEND_ENTRIES: entryBatchesDestroy(message->append_entries.entries, message->append_entries.n_entries); break; case RAFT_IO_INSTALL_SNAPSHOT: raft_configuration_close(&message->install_snapshot.conf); raft_free(message->install_snapshot.data.base); break; } return; } rv = recvMessage(r, message); if (rv != 0) { convertToUnavailable(r); } } int recvBumpCurrentTerm(struct raft *r, raft_term term) { int rv; char msg[128]; assert(r != NULL); assert(term > r->current_term); sprintf(msg, "remote term %lld is higher than %lld -> bump local term", term, r->current_term); if (r->state != RAFT_FOLLOWER) { strcat(msg, " and step down"); } tracef("%s", msg); /* Save the new term to persistent store, resetting the vote. */ rv = r->io->set_term(r->io, term); if (rv != 0) { return rv; } /* Update our cache too. */ r->current_term = term; r->voted_for = 0; if (r->state != RAFT_FOLLOWER) { /* Also convert to follower. */ convertToFollower(r); } return 0; } void recvCheckMatchingTerms(struct raft *r, raft_term term, int *match) { if (term < r->current_term) { *match = -1; } else if (term > r->current_term) { *match = 1; } else { *match = 0; } } int recvEnsureMatchingTerms(struct raft *r, raft_term term, int *match) { int rv; assert(r != NULL); assert(match != NULL); recvCheckMatchingTerms(r, term, match); if (*match == -1) { tracef("old term - current_term:%llu other_term:%llu", r->current_term, term); return 0; } /* From Figure 3.1: * * Rules for Servers: All Servers: If RPC request or response contains * term T > currentTerm: set currentTerm = T, convert to follower. * * From state diagram in Figure 3.3: * * [leader]: discovers server with higher term -> [follower] * * From Section 3.3: * * If a candidate or leader discovers that its term is out of date, it * immediately reverts to follower state. */ if (*match == 1) { rv = recvBumpCurrentTerm(r, term); if (rv != 0) { tracef("recvBumpCurrentTerm failed %d", rv); return rv; } } return 0; } int recvUpdateLeader(struct raft *r, const raft_id id, const char *address) { assert(r->state == RAFT_FOLLOWER); r->follower_state.current_leader.id = id; /* If the address of the current leader is the same as the given one, we're * done. */ if (r->follower_state.current_leader.address != NULL && strcmp(address, r->follower_state.current_leader.address) == 0) { return 0; } if (r->follower_state.current_leader.address != NULL) { HeapFree(r->follower_state.current_leader.address); } r->follower_state.current_leader.address = HeapMalloc(strlen(address) + 1); if (r->follower_state.current_leader.address == NULL) { return RAFT_NOMEM; } strcpy(r->follower_state.current_leader.address, address); return 0; } #undef tracef raft-0.11.3/src/recv.h000066400000000000000000000033431415614527300144420ustar00rootroot00000000000000/* Receive an RPC message. */ #ifndef RECV_H_ #define RECV_H_ #include "../include/raft.h" /* Callback to be passed to the raft_io implementation. It will be invoked upon * receiving an RPC message. */ void recvCb(struct raft_io *io, struct raft_message *message); /* Compare a request's term with the server's current term. * * The match output parameter will be set to 0 if the local term matches the * request's term, to -1 if the request's term is lower, and to 1 if the * request's term is higher. */ void recvCheckMatchingTerms(struct raft *r, raft_term term, int *match); /* Bump the current term and possibly step down from candidate or leader * state. */ int recvBumpCurrentTerm(struct raft *r, raft_term term); /* Common logic for RPC handlers, comparing the request's term with the server's * current term and possibly deciding to reject the request or step down from * candidate or leader. * * From Section 3.3: * * If a candidate or leader discovers that its term is out of date, it * immediately reverts to follower state. If a server receives a request with * a stale term number, it rejects the request. * * The match output parameter will be set to 0 if the local term matches the * request's term, to -1 if the request's term is lower, and to 1 if the * request's term was higher but we have successfully bumped the local one to * match it (and stepped down to follower in that case, if we were not * follower already). */ int recvEnsureMatchingTerms(struct raft *r, raft_term term, int *match); /* If different from the current one, update information about the current * leader. Must be called only by followers. */ int recvUpdateLeader(struct raft *r, raft_id id, const char *address); #endif /* RECV_H_ */ raft-0.11.3/src/recv_append_entries.c000066400000000000000000000116221415614527300175140ustar00rootroot00000000000000#include "recv_append_entries.h" #include "assert.h" #include "convert.h" #include "entry.h" #include "heap.h" #include "log.h" #include "recv.h" #include "replication.h" #include "tracing.h" #define tracef(...) Tracef(r->tracer, __VA_ARGS__) static void recvSendAppendEntriesResultCb(struct raft_io_send *req, int status) { (void)status; HeapFree(req); } int recvAppendEntries(struct raft *r, raft_id id, const char *address, const struct raft_append_entries *args) { struct raft_io_send *req; struct raft_message message; struct raft_append_entries_result *result = &message.append_entries_result; int match; bool async; int rv; assert(r != NULL); assert(id > 0); assert(args != NULL); assert(address != NULL); tracef("self:%llu from:%llu@%s leader_commit:%llu n_entries:%d prev_log_index:%llu prev_log_term:%llu, term:%llu", r->id, id, address, args->leader_commit, args->n_entries, args->prev_log_index, args->prev_log_term, args->term); result->rejected = args->prev_log_index; result->last_log_index = logLastIndex(&r->log); rv = recvEnsureMatchingTerms(r, args->term, &match); if (rv != 0) { return rv; } /* From Figure 3.1: * * AppendEntries RPC: Receiver implementation: Reply false if term < * currentTerm. */ if (match < 0) { tracef("local term is higher -> reject "); goto reply; } /* If we get here it means that the term in the request matches our current * term or it was higher and we have possibly stepped down, because we * discovered the current leader: * * From Figure 3.1: * * Rules for Servers: Candidates: if AppendEntries RPC is received from * new leader: convert to follower. * * From Section 3.4: * * While waiting for votes, a candidate may receive an AppendEntries RPC * from another server claiming to be leader. If the leader's term * (included in its RPC) is at least as large as the candidate's current * term, then the candidate recognizes the leader as legitimate and * returns to follower state. If the term in the RPC is smaller than the * candidate's current term, then the candidate rejects the RPC and * continues in candidate state. * * From state diagram in Figure 3.3: * * [candidate]: discovers current leader -> [follower] * * Note that it should not be possible for us to be in leader state, because * the leader that is sending us the request should have either a lower term * (and in that case we reject the request above), or a higher term (and in * that case we step down). It can't have the same term because at most one * leader can be elected at any given term. */ assert(r->state == RAFT_FOLLOWER || r->state == RAFT_CANDIDATE); assert(r->current_term == args->term); if (r->state == RAFT_CANDIDATE) { /* The current term and the peer one must match, otherwise we would have * either rejected the request or stepped down to followers. */ assert(match == 0); tracef("discovered leader -> step down "); convertToFollower(r); } assert(r->state == RAFT_FOLLOWER); /* Update current leader because the term in this AppendEntries RPC is up to * date. */ rv = recvUpdateLeader(r, id, address); if (rv != 0) { return rv; } /* Reset the election timer. */ r->election_timer_start = r->io->time(r->io); /* If we are installing a snapshot, ignore these entries. TODO: we should do * something smarter, e.g. buffering the entries in the I/O backend, which * should be in charge of serializing everything. */ if (replicationInstallSnapshotBusy(r) && args->n_entries > 0) { tracef("ignoring AppendEntries RPC during snapshot install"); entryBatchesDestroy(args->entries, args->n_entries); return 0; } rv = replicationAppend(r, args, &result->rejected, &async); if (rv != 0) { return rv; } if (async) { return 0; } /* Echo back to the leader the point that we reached. */ result->last_log_index = r->last_stored; reply: result->term = r->current_term; /* Free the entries batch, if any. */ if (args->n_entries > 0 && args->entries[0].batch != NULL) { raft_free(args->entries[0].batch); } if (args->entries != NULL) { raft_free(args->entries); } message.type = RAFT_IO_APPEND_ENTRIES_RESULT; message.server_id = id; message.server_address = address; req = HeapMalloc(sizeof *req); if (req == NULL) { return RAFT_NOMEM; } req->data = r; rv = r->io->send(r->io, req, &message, recvSendAppendEntriesResultCb); if (rv != 0) { raft_free(req); return rv; } return 0; } #undef tracef raft-0.11.3/src/recv_append_entries.h000066400000000000000000000006271415614527300175240ustar00rootroot00000000000000/* Receive an AppendEntries message. */ #ifndef RECV_APPEND_ENTRIES_H_ #define RECV_APPEND_ENTRIES_H_ #include "../include/raft.h" /* Process an AppendEntries RPC from the given server. */ int recvAppendEntries(struct raft *r, raft_id id, const char *address, const struct raft_append_entries *args); #endif /* RECV_APPEND_ENTRIES_H_ */ raft-0.11.3/src/recv_append_entries_result.c000066400000000000000000000035141415614527300211130ustar00rootroot00000000000000#include "recv_append_entries_result.h" #include "assert.h" #include "configuration.h" #include "tracing.h" #include "recv.h" #include "replication.h" #define tracef(...) Tracef(r->tracer, __VA_ARGS__) int recvAppendEntriesResult(struct raft *r, const raft_id id, const char *address, const struct raft_append_entries_result *result) { int match; const struct raft_server *server; int rv; assert(r != NULL); assert(id > 0); assert(address != NULL); assert(result != NULL); tracef("self:%llu from:%llu@%s last_log_index:%llu rejected:%llu term:%llu", r->id, id, address, result->last_log_index, result->rejected, result->term); if (r->state != RAFT_LEADER) { tracef("local server is not leader -> ignore"); return 0; } rv = recvEnsureMatchingTerms(r, result->term, &match); if (rv != 0) { return rv; } if (match < 0) { tracef("local term is higher -> ignore "); return 0; } /* If we have stepped down, abort here. * * From Figure 3.1: * * [Rules for Servers] All Servers: If RPC request or response contains * term T > currentTerm: set currentTerm = T, convert to follower. */ if (match > 0) { assert(r->state == RAFT_FOLLOWER); return 0; } assert(result->term == r->current_term); /* Ignore responses from servers that have been removed */ server = configurationGet(&r->configuration, id); if (server == NULL) { tracef("unknown server -> ignore"); return 0; } /* Update the progress of this server, possibly sending further entries. */ rv = replicationUpdate(r, server, result); if (rv != 0) { return rv; } return 0; } #undef tracef raft-0.11.3/src/recv_append_entries_result.h000066400000000000000000000007331415614527300211200ustar00rootroot00000000000000/* Receive an AppendEntries result message. */ #ifndef RECV_APPEND_ENTRIES_RESULT_H_ #define RECV_APPEND_ENTRIES_RESULT_H_ #include "../include/raft.h" /* Process an AppendEntries RPC result from the given server. */ int recvAppendEntriesResult(struct raft *r, raft_id id, const char *address, const struct raft_append_entries_result *result); #endif /* RECV_APPEND_ENTRIES_RESULT_H_ */ raft-0.11.3/src/recv_install_snapshot.c000066400000000000000000000051001415614527300200730ustar00rootroot00000000000000#include "recv_install_snapshot.h" #include "assert.h" #include "convert.h" #include "log.h" #include "recv.h" #include "replication.h" #include "tracing.h" #define tracef(...) Tracef(r->tracer, __VA_ARGS__) static void installSnapshotSendCb(struct raft_io_send *req, int status) { (void)status; raft_free(req); } int recvInstallSnapshot(struct raft *r, const raft_id id, const char *address, struct raft_install_snapshot *args) { struct raft_io_send *req; struct raft_message message; struct raft_append_entries_result *result = &message.append_entries_result; int rv; int match; bool async; assert(address != NULL); tracef("self:%llu from:%llu@%s conf_index:%llu last_index:%llu last_term:%llu term:%llu", r->id, id, address, args->conf_index, args->last_index, args->last_term, args->term); result->rejected = args->last_index; result->last_log_index = logLastIndex(&r->log); rv = recvEnsureMatchingTerms(r, args->term, &match); if (rv != 0) { return rv; } if (match < 0) { tracef("local term is higher -> reject "); goto reply; } /* TODO: this logic duplicates the one in the AppendEntries handler */ assert(r->state == RAFT_FOLLOWER || r->state == RAFT_CANDIDATE); assert(r->current_term == args->term); if (r->state == RAFT_CANDIDATE) { assert(match == 0); tracef("discovered leader -> step down "); convertToFollower(r); } rv = recvUpdateLeader(r, id, address); if (rv != 0) { return rv; } r->election_timer_start = r->io->time(r->io); rv = replicationInstallSnapshot(r, args, &result->rejected, &async); if (rv != 0) { tracef("replicationInstallSnapshot failed %d", rv); return rv; } if (async) { return 0; } if (result->rejected == 0) { /* Echo back to the leader the point that we reached. */ result->last_log_index = args->last_index; } reply: result->term = r->current_term; /* Free the snapshot data. */ raft_configuration_close(&args->conf); raft_free(args->data.base); message.type = RAFT_IO_APPEND_ENTRIES_RESULT; message.server_id = id; message.server_address = address; req = raft_malloc(sizeof *req); if (req == NULL) { return RAFT_NOMEM; } req->data = r; rv = r->io->send(r->io, req, &message, installSnapshotSendCb); if (rv != 0) { raft_free(req); return rv; } return 0; } #undef tracef raft-0.11.3/src/recv_install_snapshot.h000066400000000000000000000006371415614527300201120ustar00rootroot00000000000000/* InstallSnapshot RPC handlers. */ #ifndef RECV_INSTALL_SNAPSHOT_H_ #define RECV_INSTALL_SNAPSHOT_H_ #include "../include/raft.h" /* Process an InstallSnapshot RPC from the given server. */ int recvInstallSnapshot(struct raft *r, raft_id id, const char *address, struct raft_install_snapshot *args); #endif /* RECV_INSTALL_SNAPSHOT_H_ */ raft-0.11.3/src/recv_request_vote.c000066400000000000000000000072611415614527300172450ustar00rootroot00000000000000#include "recv_request_vote.h" #include "assert.h" #include "election.h" #include "recv.h" #include "tracing.h" #define tracef(...) Tracef(r->tracer, __VA_ARGS__) static void requestVoteSendCb(struct raft_io_send *req, int status) { (void)status; raft_free(req); } int recvRequestVote(struct raft *r, const raft_id id, const char *address, const struct raft_request_vote *args) { struct raft_io_send *req; struct raft_message message; struct raft_request_vote_result *result = &message.request_vote_result; bool has_leader; int match; int rv; assert(r != NULL); assert(id > 0); assert(args != NULL); tracef("self:%llu from:%llu@%s candidate_id:%llu disrupt_leader:%d last_log_index:%llu " "last_log_term:%llu pre_vote:%d term:%llu", r->id, id, address, args->candidate_id, args->disrupt_leader, args->last_log_index, args->last_log_term, args->pre_vote, args->term); result->vote_granted = false; result->pre_vote = TO_RAFT_TRIBOOL(args->pre_vote); /* Reject the request if we have a leader. * * From Section 4.2.3: * * [Removed] servers should not be able to disrupt a leader whose cluster * is receiving heartbeats. [...] If a server receives a RequestVote * request within the minimum election timeout of hearing from a current * leader, it does not update its term or grant its vote * * From Section 4.2.3: * * This change conflicts with the leadership transfer mechanism as * described in Chapter 3, in which a server legitimately starts an * election without waiting an election timeout. In that case, RequestVote * messages should be processed by other servers even when they believe a * current cluster leader exists. Those RequestVote requests can include a * special flag to indicate this behavior ("I have permission to disrupt * the leader - it told me to!"). */ has_leader = r->state == RAFT_LEADER || (r->state == RAFT_FOLLOWER && r->follower_state.current_leader.id != 0); if (has_leader && !args->disrupt_leader) { tracef("local server has a leader -> reject "); goto reply; } /* If this is a pre-vote request, don't actually increment our term or * persist the vote. */ if (args->pre_vote) { recvCheckMatchingTerms(r, args->term, &match); } else { rv = recvEnsureMatchingTerms(r, args->term, &match); if (rv != 0) { return rv; } } /* From Figure 3.1: * * RequestVote RPC: Receiver implementation: Reply false if * term < currentTerm. * */ if (match < 0) { tracef("local term is higher -> reject "); goto reply; } /* Unless this is a pre-vote request, at this point our term must be the * same as the request term (otherwise we would have rejected the request or * bumped our term). */ if (!args->pre_vote) { tracef("no pre_vote: current_term:%llu term:%llu", r->current_term, args->term); assert(r->current_term == args->term); } rv = electionVote(r, args, &result->vote_granted); if (rv != 0) { return rv; } reply: result->term = r->current_term; message.type = RAFT_IO_REQUEST_VOTE_RESULT; message.server_id = id; message.server_address = address; req = raft_malloc(sizeof *req); if (req == NULL) { return RAFT_NOMEM; } req->data = r; rv = r->io->send(r->io, req, &message, requestVoteSendCb); if (rv != 0) { raft_free(req); return rv; } return 0; } #undef tracef raft-0.11.3/src/recv_request_vote.h000066400000000000000000000005731415614527300172510ustar00rootroot00000000000000/* RequestVote RPC handler. */ #ifndef RECV_REQUEST_VOTE_H_ #define RECV_REQUEST_VOTE_H_ #include "../include/raft.h" /* Process a RequestVote RPC from the given server. */ int recvRequestVote(struct raft *r, raft_id id, const char *address, const struct raft_request_vote *args); #endif /* RECV_REQUEST_VOTE_H_ */ raft-0.11.3/src/recv_request_vote_result.c000066400000000000000000000115461415614527300206440ustar00rootroot00000000000000#include "recv_request_vote_result.h" #include "assert.h" #include "configuration.h" #include "convert.h" #include "election.h" #include "recv.h" #include "replication.h" #include "tracing.h" #define tracef(...) Tracef(r->tracer, __VA_ARGS__) int recvRequestVoteResult(struct raft *r, raft_id id, const char *address, const struct raft_request_vote_result *result) { size_t votes_index; int match; int rv; (void)address; assert(r != NULL); assert(id > 0); tracef("self:%llu from:%llu@%s term:%llu vote_granted:%d pre_vote:%d", r->id, id, address, result->term, result->vote_granted, result->pre_vote); votes_index = configurationIndexOfVoter(&r->configuration, id); if (votes_index == r->configuration.n) { tracef("non-voting or unknown server -> reject"); return 0; } /* Ignore responses if we are not candidate anymore */ if (r->state != RAFT_CANDIDATE) { tracef("local server is not candidate -> ignore"); return 0; } /* If we're in the pre-vote phase, don't actually increment our term right * now (we'll do it later, if we start the second phase), and also don't * step down if the peer is just one term ahead (this is okay as in the * request we sent our current term plus one). */ if (r->candidate_state.in_pre_vote) { recvCheckMatchingTerms(r, result->term, &match); } else { rv = recvEnsureMatchingTerms(r, result->term, &match); if (rv != 0) { return rv; } } /* Converted to follower as a result of seeing a higher term. */ if (r->state != RAFT_CANDIDATE) { tracef("no longer candidate -> ignore"); return 0; } if (match < 0) { /* If the term in the result is older than ours, this is an old message * we should ignore, because the node who voted for us would have * obtained our term. This happens if the network is pretty choppy. */ tracef("local term is higher -> ignore"); return 0; } /* Avoid counting pre-vote votes as regular votes. */ if (!r->candidate_state.in_pre_vote && result->pre_vote == raft_tribool_true) { tracef("receive stale pre-vote response -> ignore"); return 0; } /* This can happen when a candidate wins a pre-vote, bumps its term, * sends real RequestVote RPCs, crashes, comes online, starts a pre-vote * and then receives the response to the RequestVote RPC it sent * out before crashing. */ if (r->candidate_state.in_pre_vote && result->pre_vote == raft_tribool_false) { tracef("receive vote response during pre-vote -> ignore"); return 0; } /* If we're in the pre-vote phase, check that the peer's is at most one term * ahead (possibly stepping down). If we're the actual voting phase, we * expect our term must to be the same as the response term (otherwise we * would have either ignored the result bumped our term). */ if (r->candidate_state.in_pre_vote) { if (match > 0) { if (result->term > r->current_term + 1) { assert(!result->vote_granted); rv = recvBumpCurrentTerm(r, result->term); return rv; } } } else { assert(result->term == r->current_term); } /* If the vote was granted and we reached quorum, convert to leader. * * From Figure 3.1: * * If votes received from majority of severs: become leader. * * From state diagram in Figure 3.3: * * [candidate]: receives votes from majority of servers -> [leader] * * From Section 3.4: * * A candidate wins an election if it receives votes from a majority of * the servers in the full cluster for the same term. Each server will * vote for at most one candidate in a given term, on a * firstcome-first-served basis [...]. Once a candidate wins an election, * it becomes leader. */ if (result->vote_granted) { if (electionTally(r, votes_index)) { if (r->candidate_state.in_pre_vote) { tracef("votes quorum reached -> pre-vote successful"); r->candidate_state.in_pre_vote = false; rv = electionStart(r); if (rv != 0) { return rv; } } else { tracef("votes quorum reached -> convert to leader"); rv = convertToLeader(r); if (rv != 0) { return rv; } /* Send initial heartbeat. */ replicationHeartbeat(r); } } else { tracef("votes quorum not reached"); } } else { tracef("vote was not granted"); } return 0; } #undef tracef raft-0.11.3/src/recv_request_vote_result.h000066400000000000000000000007021415614527300206410ustar00rootroot00000000000000/* Receive a RequestVote result. */ #ifndef RECV_REQUEST_VOTE_RESULT_H_ #define RECV_REQUEST_VOTE_RESULT_H_ #include "../include/raft.h" /* Process a RequestVote RPC result from the given server. */ int recvRequestVoteResult(struct raft *r, raft_id id, const char *address, const struct raft_request_vote_result *result); #endif /* RAFT_RECV_REQUEST_VOTE_RESULT_H_ */ raft-0.11.3/src/recv_timeout_now.c000066400000000000000000000037631415614527300170740ustar00rootroot00000000000000#include "recv_timeout_now.h" #include "assert.h" #include "configuration.h" #include "convert.h" #include "log.h" #include "recv.h" #include "tracing.h" #define tracef(...) Tracef(r->tracer, __VA_ARGS__) int recvTimeoutNow(struct raft *r, const raft_id id, const char *address, const struct raft_timeout_now *args) { const struct raft_server *local_server; raft_index local_last_index; raft_term local_last_term; int match; int rv; assert(r != NULL); assert(id > 0); assert(args != NULL); (void)address; tracef("self:%llu from:%llu@%s last_log_index:%llu last_log_term:%llu term:%llu", r->id, id, address, args->last_log_index, args->last_log_term, args->term); /* Ignore the request if we are not voters. */ local_server = configurationGet(&r->configuration, r->id); if (local_server == NULL || local_server->role != RAFT_VOTER) { tracef("non-voter"); return 0; } /* Ignore the request if we are not follower, or we have different * leader. */ if (r->state != RAFT_FOLLOWER || r->follower_state.current_leader.id != id) { tracef("Ignore - r->state:%d current_leader.id:%llu", r->state, r->follower_state.current_leader.id); return 0; } /* Possibly update our term. Ignore the request if it turns out we have a * higher term. */ rv = recvEnsureMatchingTerms(r, args->term, &match); if (rv != 0) { return rv; } if (match < 0) { return 0; } /* Ignore the request if we our log is not up-to-date. */ local_last_index = logLastIndex(&r->log); local_last_term = logLastTerm(&r->log); if (local_last_index != args->last_log_index || local_last_term != args->last_log_term) { return 0; } /* Convert to candidate and start a new election. */ rv = convertToCandidate(r, true /* disrupt leader */); if (rv != 0) { return rv; } return 0; } #undef tracef raft-0.11.3/src/recv_timeout_now.h000066400000000000000000000005671415614527300171000ustar00rootroot00000000000000/* Receive a TimeoutNow message. */ #ifndef RECV_TIMEOUT_NOW_H_ #define RECV_TIMEOUT_NOW_H_ #include "../include/raft.h" /* Process a TimeoutNow RPC from the given server. */ int recvTimeoutNow(struct raft *r, raft_id id, const char *address, const struct raft_timeout_now *args); #endif /* RECV_TIMEOUT_NOW_H_ */ raft-0.11.3/src/replication.c000066400000000000000000001362441415614527300160160ustar00rootroot00000000000000#include #include "assert.h" #include "configuration.h" #include "convert.h" #include "entry.h" #ifdef __GLIBC__ #include "error.h" #endif #include "err.h" #include "heap.h" #include "log.h" #include "membership.h" #include "progress.h" #include "queue.h" #include "replication.h" #include "request.h" #include "snapshot.h" #include "tracing.h" #define tracef(...) Tracef(r->tracer, __VA_ARGS__) #ifndef max #define max(a, b) ((a) < (b) ? (b) : (a)) #endif #ifndef min #define min(a, b) ((a) < (b) ? (a) : (b)) #endif /* Context of a RAFT_IO_APPEND_ENTRIES request that was submitted with * raft_io_>send(). */ struct sendAppendEntries { struct raft *raft; /* Instance sending the entries. */ struct raft_io_send send; /* Underlying I/O send request. */ raft_index index; /* Index of the first entry in the request. */ struct raft_entry *entries; /* Entries referenced in the request. */ unsigned n; /* Length of the entries array. */ raft_id server_id; /* Destination server. */ }; /* Callback invoked after request to send an AppendEntries RPC has completed. */ static void sendAppendEntriesCb(struct raft_io_send *send, const int status) { struct sendAppendEntries *req = send->data; struct raft *r = req->raft; unsigned i = configurationIndexOf(&r->configuration, req->server_id); if (r->state == RAFT_LEADER && i < r->configuration.n) { if (status != 0) { tracef("failed to send append entries to server %llu: %s", req->server_id, raft_strerror(status)); /* Go back to probe mode. */ progressToProbe(r, i); } } /* Tell the log that we're done referencing these entries. */ logRelease(&r->log, req->index, req->entries, req->n); raft_free(req); } /* Send an AppendEntries message to the i'th server, including all log entries * from the given point onwards. */ static int sendAppendEntries(struct raft *r, const unsigned i, const raft_index prev_index, const raft_term prev_term) { struct raft_server *server = &r->configuration.servers[i]; struct raft_message message; struct raft_append_entries *args = &message.append_entries; struct sendAppendEntries *req; raft_index next_index = prev_index + 1; int rv; args->term = r->current_term; args->prev_log_index = prev_index; args->prev_log_term = prev_term; /* TODO: implement a limit to the total size of the entries being sent */ rv = logAcquire(&r->log, next_index, &args->entries, &args->n_entries); if (rv != 0) { goto err; } /* From Section 3.5: * * The leader keeps track of the highest index it knows to be committed, * and it includes that index in future AppendEntries RPCs (including * heartbeats) so that the other servers eventually find out. Once a * follower learns that a log entry is committed, it applies the entry to * its local state machine (in log order) */ args->leader_commit = r->commit_index; tracef("send %u entries starting at %llu to server %llu (last index %llu)", args->n_entries, args->prev_log_index, server->id, logLastIndex(&r->log)); message.type = RAFT_IO_APPEND_ENTRIES; message.server_id = server->id; message.server_address = server->address; req = raft_malloc(sizeof *req); if (req == NULL) { rv = RAFT_NOMEM; goto err_after_entries_acquired; } req->raft = r; req->index = args->prev_log_index + 1; req->entries = args->entries; req->n = args->n_entries; req->server_id = server->id; req->send.data = req; rv = r->io->send(r->io, &req->send, &message, sendAppendEntriesCb); if (rv != 0) { goto err_after_req_alloc; } if (progressState(r, i) == PROGRESS__PIPELINE) { /* Optimistically update progress. */ progressOptimisticNextIndex(r, i, req->index + req->n); } progressUpdateLastSend(r, i); return 0; err_after_req_alloc: raft_free(req); err_after_entries_acquired: logRelease(&r->log, next_index, args->entries, args->n_entries); err: assert(rv != 0); return rv; } /* Context of a RAFT_IO_INSTALL_SNAPSHOT request that was submitted with * raft_io_>send(). */ struct sendInstallSnapshot { struct raft *raft; /* Instance sending the snapshot. */ struct raft_io_snapshot_get get; /* Snapshot get request. */ struct raft_io_send send; /* Underlying I/O send request. */ struct raft_snapshot *snapshot; /* Snapshot to send. */ raft_id server_id; /* Destination server. */ }; static void sendInstallSnapshotCb(struct raft_io_send *send, int status) { struct sendInstallSnapshot *req = send->data; struct raft *r = req->raft; const struct raft_server *server; server = configurationGet(&r->configuration, req->server_id); if (status != 0) { tracef("send install snapshot: %s", raft_strerror(status)); if (r->state == RAFT_LEADER && server != NULL) { unsigned i; i = configurationIndexOf(&r->configuration, req->server_id); progressAbortSnapshot(r, i); } } snapshotClose(req->snapshot); raft_free(req->snapshot); raft_free(req); } static void sendSnapshotGetCb(struct raft_io_snapshot_get *get, struct raft_snapshot *snapshot, int status) { struct sendInstallSnapshot *req = get->data; struct raft *r = req->raft; struct raft_message message; struct raft_install_snapshot *args = &message.install_snapshot; const struct raft_server *server = NULL; bool progress_state_is_snapshot = false; unsigned i = 0; int rv; if (status != 0) { tracef("get snapshot %s", raft_strerror(status)); goto abort; } if (r->state != RAFT_LEADER) { goto abort_with_snapshot; } server = configurationGet(&r->configuration, req->server_id); if (server == NULL) { /* Probably the server was removed in the meantime. */ goto abort_with_snapshot; } i = configurationIndexOf(&r->configuration, req->server_id); progress_state_is_snapshot = progressState(r, i) == PROGRESS__SNAPSHOT; if (!progress_state_is_snapshot) { /* Something happened in the meantime. */ goto abort_with_snapshot; } assert(snapshot->n_bufs == 1); message.type = RAFT_IO_INSTALL_SNAPSHOT; message.server_id = server->id; message.server_address = server->address; args->term = r->current_term; args->last_index = snapshot->index; args->last_term = snapshot->term; args->conf_index = snapshot->configuration_index; args->conf = snapshot->configuration; args->data = snapshot->bufs[0]; req->snapshot = snapshot; req->send.data = req; tracef("sending snapshot with last index %llu to %llu", snapshot->index, server->id); rv = r->io->send(r->io, &req->send, &message, sendInstallSnapshotCb); if (rv != 0) { goto abort_with_snapshot; } goto out; abort_with_snapshot: snapshotClose(snapshot); raft_free(snapshot); abort: if (r->state == RAFT_LEADER && server != NULL && progress_state_is_snapshot) { progressAbortSnapshot(r, i); } raft_free(req); out: return; } /* Send the latest snapshot to the i'th server */ static int sendSnapshot(struct raft *r, const unsigned i) { struct raft_server *server = &r->configuration.servers[i]; struct sendInstallSnapshot *request; int rv; progressToSnapshot(r, i); request = raft_malloc(sizeof *request); if (request == NULL) { rv = RAFT_NOMEM; goto err; } request->raft = r; request->server_id = server->id; request->get.data = request; /* TODO: make sure that the I/O implementation really returns the latest * snapshot *at this time* and not any snapshot that might be stored at a * later point. Otherwise the progress snapshot_index would be wrong. */ rv = r->io->snapshot_get(r->io, &request->get, sendSnapshotGetCb); if (rv != 0) { goto err_after_req_alloc; } progressUpdateSnapshotLastSend(r, i); return 0; err_after_req_alloc: raft_free(request); err: progressAbortSnapshot(r, i); assert(rv != 0); return rv; } int replicationProgress(struct raft *r, unsigned i) { struct raft_server *server = &r->configuration.servers[i]; bool progress_state_is_snapshot = progressState(r, i) == PROGRESS__SNAPSHOT; raft_index snapshot_index = logSnapshotIndex(&r->log); raft_index next_index = progressNextIndex(r, i); raft_index prev_index; raft_term prev_term; assert(r->state == RAFT_LEADER); assert(server->id != r->id); assert(next_index >= 1); if (!progressShouldReplicate(r, i)) { return 0; } /* From Section 3.5: * * When sending an AppendEntries RPC, the leader includes the index and * term of the entry in its log that immediately precedes the new * entries. If the follower does not find an entry in its log with the * same index and term, then it refuses the new entries. The consistency * check acts as an induction step: the initial empty state of the logs * satisfies the Log Matching Property, and the consistency check * preserves the Log Matching Property whenever logs are extended. As a * result, whenever AppendEntries returns successfully, the leader knows * that the follower's log is identical to its own log up through the new * entries (Log Matching Property in Figure 3.2). */ if (next_index == 1) { /* We're including the very first entry, so prevIndex and prevTerm are * null. If the first entry is not available anymore, send the last * snapshot if we're not already sending one. */ if (snapshot_index > 0 && !progress_state_is_snapshot) { raft_index last_index = logLastIndex(&r->log); assert(last_index > 0); /* The log can't be empty */ goto send_snapshot; } prev_index = 0; prev_term = 0; } else { /* Set prevIndex and prevTerm to the index and term of the entry at * next_index - 1. */ prev_index = next_index - 1; prev_term = logTermOf(&r->log, prev_index); /* If the entry is not anymore in our log, send the last snapshot if we're * not doing so already. */ if (prev_term == 0 && !progress_state_is_snapshot) { assert(prev_index < snapshot_index); tracef("missing entry at index %lld -> send snapshot", prev_index); goto send_snapshot; } } /* Send empty AppendEntries RPC when installing a snaphot */ if (progress_state_is_snapshot) { prev_index = logLastIndex(&r->log); prev_term = logLastTerm(&r->log); } return sendAppendEntries(r, i, prev_index, prev_term); send_snapshot: if (progressGetRecentRecv(r, i)) { /* Only send a snapshot when we have heard from the server */ return sendSnapshot(r, i); } else { /* Send empty AppendEntries RPC when we haven't heard from the server */ prev_index = logLastIndex(&r->log); prev_term = logLastTerm(&r->log); return sendAppendEntries(r, i, prev_index, prev_term); } } /* Possibly trigger I/O requests for newly appended log entries or heartbeats. * * This function loops through all followers and triggers replication on them. * * It must be called only by leaders. */ static int triggerAll(struct raft *r) { unsigned i; int rv; assert(r->state == RAFT_LEADER); /* Trigger replication for servers we didn't hear from recently. */ for (i = 0; i < r->configuration.n; i++) { struct raft_server *server = &r->configuration.servers[i]; if (server->id == r->id) { continue; } /* Skip spare servers, unless they're being promoted. */ if (server->role == RAFT_SPARE && server->id != r->leader_state.promotee_id) { continue; } rv = replicationProgress(r, i); if (rv != 0 && rv != RAFT_NOCONNECTION) { /* This is not a critical failure, let's just log it. */ tracef("failed to send append entries to server %llu: %s (%d)", server->id, raft_strerror(rv), rv); } } return 0; } int replicationHeartbeat(struct raft *r) { return triggerAll(r); } /* Context for a write log entries request that was submitted by a leader. */ struct appendLeader { struct raft *raft; /* Instance that has submitted the request */ raft_index index; /* Index of the first entry in the request. */ struct raft_entry *entries; /* Entries referenced in the request. */ unsigned n; /* Length of the entries array. */ struct raft_io_append req; }; /* Called after a successful append entries I/O request to update the index of * the last entry stored on disk. Return how many new entries that are still * present in our in-memory log were stored. */ static size_t updateLastStored(struct raft *r, raft_index first_index, struct raft_entry *entries, size_t n_entries) { size_t i; /* Check which of these entries is still in our in-memory log */ for (i = 0; i < n_entries; i++) { struct raft_entry *entry = &entries[i]; raft_index index = first_index + i; raft_term local_term = logTermOf(&r->log, index); /* If we have no entry at this index, or if the entry we have now has a * different term, it means that this entry got truncated, so let's stop * here. */ if (local_term == 0 || (local_term > 0 && local_term != entry->term)) { break; } /* If we do have an entry at this index, its term must match the one of * the entry we wrote on disk. */ assert(local_term != 0 && local_term == entry->term); } r->last_stored += i; return i; } /* Get the request matching the given index and type, if any. */ static struct request *getRequest(struct raft *r, const raft_index index, int type) { queue *head; struct request *req; if (r->state != RAFT_LEADER) { return NULL; } QUEUE_FOREACH(head, &r->leader_state.requests) { req = QUEUE_DATA(head, struct request, queue); if (req->index == index) { assert(req->type == type); QUEUE_REMOVE(head); return req; } } return NULL; } /* Invoked once a disk write request for new entries has been completed. */ static void appendLeaderCb(struct raft_io_append *req, int status) { struct appendLeader *request = req->data; struct raft *r = request->raft; size_t server_index; int rv; tracef("leader: written %u entries starting at %lld: status %d", request->n, request->index, status); /* In case of a failed disk write, if we were the leader creating these * entries in the first place, truncate our log too (since we have appended * these entries to it) and fire the request callback. */ if (status != 0) { struct raft_apply *apply; ErrMsgTransfer(r->io->errmsg, r->errmsg, "io"); apply = (struct raft_apply *)getRequest(r, request->index, RAFT_COMMAND); if (apply != NULL) { if (apply->cb != NULL) { apply->cb(apply, status, NULL); } } goto out; } updateLastStored(r, request->index, request->entries, request->n); /* If we are not leader anymore, just discard the result. */ if (r->state != RAFT_LEADER) { tracef("local server is not leader -> ignore write log result"); goto out; } /* Only update the next index if we are part of the current * configuration. The only case where this is not true is when we were * asked to remove ourselves from the cluster. * * From Section 4.2.2: * * there will be a period of time (while it is committing Cnew) when a * leader can manage a cluster that does not include itself; it * replicates log entries but does not count itself in majorities. */ server_index = configurationIndexOf(&r->configuration, r->id); if (server_index < r->configuration.n) { r->leader_state.progress[server_index].match_index = r->last_stored; } /* Check if we can commit some new entries. */ replicationQuorum(r, r->last_stored); rv = replicationApply(r); if (rv != 0) { /* TODO: just log the error? */ } out: /* Tell the log that we're done referencing these entries. */ logRelease(&r->log, request->index, request->entries, request->n); if (status != 0) { logTruncate(&r->log, request->index); } raft_free(request); } /* Submit a disk write for all entries from the given index onward. */ static int appendLeader(struct raft *r, raft_index index) { struct raft_entry *entries; unsigned n; struct appendLeader *request; int rv; assert(r->state == RAFT_LEADER); assert(index > 0); assert(index > r->last_stored); /* Acquire all the entries from the given index onwards. */ rv = logAcquire(&r->log, index, &entries, &n); if (rv != 0) { goto err; } /* We expect this function to be called only when there are actually * some entries to write. */ if (n == 0) { assert(false); tracef("No log entries found at index %llu", index); ErrMsgPrintf(r->errmsg, "No log entries found at index %llu", index); rv = RAFT_SHUTDOWN; goto err_after_entries_acquired; } /* Allocate a new request. */ request = raft_malloc(sizeof *request); if (request == NULL) { rv = RAFT_NOMEM; goto err_after_entries_acquired; } request->raft = r; request->index = index; request->entries = entries; request->n = n; request->req.data = request; rv = r->io->append(r->io, &request->req, entries, n, appendLeaderCb); if (rv != 0) { ErrMsgTransfer(r->io->errmsg, r->errmsg, "io"); goto err_after_request_alloc; } return 0; err_after_request_alloc: raft_free(request); err_after_entries_acquired: logRelease(&r->log, index, entries, n); err: assert(rv != 0); return rv; } int replicationTrigger(struct raft *r, raft_index index) { int rv; rv = appendLeader(r, index); if (rv != 0) { return rv; } return triggerAll(r); } /* Helper to be invoked after a promotion of a non-voting server has been * requested via @raft_assign and that server has caught up with logs. * * This function changes the local configuration marking the server being * promoted as actually voting, appends the a RAFT_CHANGE entry with the new * configuration to the local log and triggers its replication. */ static int triggerActualPromotion(struct raft *r) { raft_index index; raft_term term = r->current_term; size_t server_index; struct raft_server *server; int old_role; int rv; assert(r->state == RAFT_LEADER); assert(r->leader_state.promotee_id != 0); server_index = configurationIndexOf(&r->configuration, r->leader_state.promotee_id); assert(server_index < r->configuration.n); server = &r->configuration.servers[server_index]; assert(server->role != RAFT_VOTER); /* Update our current configuration. */ old_role = server->role; server->role = RAFT_VOTER; /* Index of the entry being appended. */ index = logLastIndex(&r->log) + 1; /* Encode the new configuration and append it to the log. */ rv = logAppendConfiguration(&r->log, term, &r->configuration); if (rv != 0) { goto err; } /* Start writing the new log entry to disk and send it to the followers. */ rv = replicationTrigger(r, index); if (rv != 0) { goto err_after_log_append; } r->leader_state.promotee_id = 0; r->configuration_uncommitted_index = logLastIndex(&r->log); return 0; err_after_log_append: logTruncate(&r->log, index); err: server->role = old_role; assert(rv != 0); return rv; } int replicationUpdate(struct raft *r, const struct raft_server *server, const struct raft_append_entries_result *result) { bool is_being_promoted; raft_index last_index; unsigned i; int rv; i = configurationIndexOf(&r->configuration, server->id); assert(r->state == RAFT_LEADER); assert(i < r->configuration.n); progressMarkRecentRecv(r, i); /* If the RPC failed because of a log mismatch, retry. * * From Figure 3.1: * * [Rules for servers] Leaders: * * - If AppendEntries fails because of log inconsistency: * decrement nextIndex and retry. */ if (result->rejected > 0) { bool retry; retry = progressMaybeDecrement(r, i, result->rejected, result->last_log_index); if (retry) { /* Retry, ignoring errors. */ tracef("log mismatch -> send old entries to %llu", server->id); replicationProgress(r, i); } return 0; } /* In case of success the remote server is expected to send us back the * value of prevLogIndex + len(entriesToAppend). If it has a longer log, it * might be a leftover from previous terms. */ last_index = result->last_log_index; if (last_index > logLastIndex(&r->log)) { last_index = logLastIndex(&r->log); } /* If the RPC succeeded, update our counters for this server. * * From Figure 3.1: * * [Rules for servers] Leaders: * * If successful update nextIndex and matchIndex for follower. */ if (!progressMaybeUpdate(r, i, last_index)) { return 0; } switch (progressState(r, i)) { case PROGRESS__SNAPSHOT: /* If a snapshot has been installed, transition back to probe */ if (progressSnapshotDone(r, i)) { progressToProbe(r, i); } break; case PROGRESS__PROBE: /* Transition to pipeline */ progressToPipeline(r, i); } /* If the server is currently being promoted and is catching with logs, * update the information about the current catch-up round, and possibly * proceed with the promotion. */ is_being_promoted = r->leader_state.promotee_id != 0 && r->leader_state.promotee_id == server->id; if (is_being_promoted) { bool is_up_to_date = membershipUpdateCatchUpRound(r); if (is_up_to_date) { rv = triggerActualPromotion(r); if (rv != 0) { return rv; } } } /* Check if we can commit some new entries. */ replicationQuorum(r, r->last_stored); rv = replicationApply(r); if (rv != 0) { /* TODO: just log the error? */ } /* Abort here we have been removed and we are not leaders anymore. */ if (r->state != RAFT_LEADER) { goto out; } /* Get again the server index since it might have been removed from the * configuration. */ i = configurationIndexOf(&r->configuration, server->id); if (i < r->configuration.n) { /* If we are transferring leadership to this follower, check if its log * is now up-to-date and, if so, send it a TimeoutNow RPC (unless we * already did). */ if (r->transfer != NULL && r->transfer->id == server->id) { if (progressIsUpToDate(r, i) && r->transfer->send.data == NULL) { rv = membershipLeadershipTransferStart(r); if (rv != 0) { membershipLeadershipTransferClose(r); } } } /* If this follower is in pipeline mode, send it more entries. */ if (progressState(r, i) == PROGRESS__PIPELINE) { replicationProgress(r, i); } } out: return 0; } static void sendAppendEntriesResultCb(struct raft_io_send *req, int status) { (void)status; HeapFree(req); } static void sendAppendEntriesResult( struct raft *r, const struct raft_append_entries_result *result) { struct raft_message message; struct raft_io_send *req; int rv; message.type = RAFT_IO_APPEND_ENTRIES_RESULT; message.server_id = r->follower_state.current_leader.id; message.server_address = r->follower_state.current_leader.address; message.append_entries_result = *result; req = raft_malloc(sizeof *req); if (req == NULL) { return; } req->data = r; rv = r->io->send(r->io, req, &message, sendAppendEntriesResultCb); if (rv != 0) { raft_free(req); } } /* Context for a write log entries request that was submitted by a follower. */ struct appendFollower { struct raft *raft; /* Instance that has submitted the request */ raft_index index; /* Index of the first entry in the request. */ struct raft_append_entries args; struct raft_io_append req; }; static void appendFollowerCb(struct raft_io_append *req, int status) { struct appendFollower *request = req->data; struct raft *r = request->raft; struct raft_append_entries *args = &request->args; struct raft_append_entries_result result; size_t i; size_t j; int rv; tracef("I/O completed on follower: status %d", status); assert(args->entries != NULL); assert(args->n_entries > 0); result.term = r->current_term; if (status != 0) { if (r->state != RAFT_FOLLOWER) { tracef("local server is not follower -> ignore I/O failure"); goto out; } result.rejected = args->prev_log_index + 1; goto respond; } /* If we're shutting down or have errored, ignore the result. */ if (r->state == RAFT_UNAVAILABLE) { tracef("local server is unavailable -> ignore I/O result"); goto out; } /* We received an InstallSnapshot RCP while these entries were being * persisted to disk */ if (replicationInstallSnapshotBusy(r)) { goto out; } i = updateLastStored(r, request->index, args->entries, args->n_entries); /* If none of the entries that we persisted is present anymore in our * in-memory log, there's nothing to report or to do. We just discard * them. */ if (i == 0 || r->state != RAFT_FOLLOWER) { goto out; } /* Possibly apply configuration changes as uncommitted. */ for (j = 0; j < i; j++) { struct raft_entry *entry = &args->entries[j]; raft_index index = request->index + j; raft_term local_term = logTermOf(&r->log, index); assert(local_term != 0 && local_term == entry->term); if (entry->type == RAFT_CHANGE) { rv = membershipUncommittedChange(r, index, entry); if (rv != 0) { goto out; } } } /* From Figure 3.1: * * AppendEntries RPC: Receiver implementation: If leaderCommit > * commitIndex, set commitIndex = min(leaderCommit, index of last new * entry). */ if (args->leader_commit > r->commit_index) { r->commit_index = min(args->leader_commit, r->last_stored); rv = replicationApply(r); if (rv != 0) { goto out; } } if (r->state != RAFT_FOLLOWER) { tracef("local server is not follower -> don't send result"); goto out; } result.rejected = 0; respond: result.last_log_index = r->last_stored; sendAppendEntriesResult(r, &result); out: logRelease(&r->log, request->index, request->args.entries, request->args.n_entries); raft_free(request); } /* Check the log matching property against an incoming AppendEntries request. * * From Figure 3.1: * * [AppendEntries RPC] Receiver implementation: * * 2. Reply false if log doesn't contain an entry at prevLogIndex whose * term matches prevLogTerm. * * Return 0 if the check passed. * * Return 1 if the check did not pass and the request needs to be rejected. * * Return -1 if there's a conflict and we need to shutdown. */ static int checkLogMatchingProperty(struct raft *r, const struct raft_append_entries *args) { raft_term local_prev_term; /* If this is the very first entry, there's nothing to check. */ if (args->prev_log_index == 0) { return 0; } local_prev_term = logTermOf(&r->log, args->prev_log_index); if (local_prev_term == 0) { tracef("no entry at index %llu -> reject", args->prev_log_index); return 1; } if (local_prev_term != args->prev_log_term) { if (args->prev_log_index <= r->commit_index) { /* Should never happen; something is seriously wrong! */ tracef( "conflicting terms %llu and %llu for entry %llu (commit " "index %llu) -> shutdown", local_prev_term, args->prev_log_term, args->prev_log_index, r->commit_index); return -1; } tracef("previous term mismatch -> reject"); return 1; } return 0; } /* Delete from our log all entries that conflict with the ones in the given * AppendEntries request. * * From Figure 3.1: * * [AppendEntries RPC] Receiver implementation: * * 3. If an existing entry conflicts with a new one (same index but * different terms), delete the existing entry and all that follow it. * * The i output parameter will be set to the array index of the first new log * entry that we don't have yet in our log, among the ones included in the given * AppendEntries request. */ static int deleteConflictingEntries(struct raft *r, const struct raft_append_entries *args, size_t *i) { size_t j; int rv; for (j = 0; j < args->n_entries; j++) { struct raft_entry *entry = &args->entries[j]; raft_index entry_index = args->prev_log_index + 1 + j; raft_term local_term = logTermOf(&r->log, entry_index); if (local_term > 0 && local_term != entry->term) { if (entry_index <= r->commit_index) { /* Should never happen; something is seriously wrong! */ tracef("new index conflicts with committed entry -> shutdown"); return RAFT_SHUTDOWN; } tracef("log mismatch -> truncate (%llu)", entry_index); /* Possibly discard uncommitted configuration changes. */ if (r->configuration_uncommitted_index >= entry_index) { rv = membershipRollback(r); if (rv != 0) { return rv; } } /* Delete all entries from this index on because they don't * match. */ rv = r->io->truncate(r->io, entry_index); if (rv != 0) { return rv; } logTruncate(&r->log, entry_index); /* Drop information about previously stored entries that have just * been discarded. */ if (r->last_stored >= entry_index) { r->last_stored = entry_index - 1; } /* We want to append all entries from here on, replacing anything * that we had before. */ break; } else if (local_term == 0) { /* We don't have an entry at this index, so we want to append this * new one and all the subsequent ones. */ break; } } *i = j; return 0; } int replicationAppend(struct raft *r, const struct raft_append_entries *args, raft_index *rejected, bool *async) { struct appendFollower *request; int match; size_t n; size_t i; size_t j; int rv; assert(r != NULL); assert(args != NULL); assert(rejected != NULL); assert(async != NULL); assert(r->state == RAFT_FOLLOWER); *rejected = args->prev_log_index; *async = false; /* Check the log matching property. */ match = checkLogMatchingProperty(r, args); if (match != 0) { assert(match == 1 || match == -1); return match == 1 ? 0 : RAFT_SHUTDOWN; } /* Delete conflicting entries. */ rv = deleteConflictingEntries(r, args, &i); if (rv != 0) { return rv; } *rejected = 0; n = args->n_entries - i; /* Number of new entries */ /* If this is an empty AppendEntries, there's nothing to write. However we * still want to check if we can commit some entry. However, don't commit * anything while a snapshot install is busy, r->last_stored will be 0 in * that case. * * From Figure 3.1: * * AppendEntries RPC: Receiver implementation: If leaderCommit > * commitIndex, set commitIndex = min(leaderCommit, index of last new * entry). */ if (n == 0) { if ((args->leader_commit > r->commit_index) && !replicationInstallSnapshotBusy(r)) { r->commit_index = min(args->leader_commit, r->last_stored); rv = replicationApply(r); if (rv != 0) { return rv; } } return 0; } *async = true; request = raft_malloc(sizeof *request); if (request == NULL) { rv = RAFT_NOMEM; goto err; } request->raft = r; request->args = *args; /* Index of first new entry */ request->index = args->prev_log_index + 1 + i; /* Update our in-memory log to reflect that we received these entries. We'll * notify the leader of a successful append once the write entries request * that we issue below actually completes. */ for (j = 0; j < n; j++) { struct raft_entry *entry = &args->entries[i + j]; /* TODO This copy should not strictly be necessary, as the batch logic will * take care of freeing the batch buffer in which the entries are received. * However, this would lead to memory spikes in certain edge cases. * https://github.com/canonical/dqlite/issues/276 */ struct raft_entry copy = {0}; rv = entryCopy(entry, ©); if (rv != 0) { goto err_after_request_alloc; } rv = logAppend(&r->log, copy.term, copy.type, ©.buf, NULL); if (rv != 0) { goto err_after_request_alloc; } } /* Acquire the relevant entries from the log. */ rv = logAcquire(&r->log, request->index, &request->args.entries, &request->args.n_entries); if (rv != 0) { goto err_after_request_alloc; } assert(request->args.n_entries == n); if (request->args.n_entries == 0) { tracef("No log entries found at index %llu", request->index); ErrMsgPrintf(r->errmsg, "No log entries found at index %llu", request->index); rv = RAFT_SHUTDOWN; goto err_after_acquire_entries; } request->req.data = request; rv = r->io->append(r->io, &request->req, request->args.entries, request->args.n_entries, appendFollowerCb); if (rv != 0) { ErrMsgTransfer(r->io->errmsg, r->errmsg, "io"); goto err_after_acquire_entries; } entryBatchesDestroy(args->entries, args->n_entries); return 0; err_after_acquire_entries: /* Release the entries related to the IO request */ logRelease(&r->log, request->index, request->args.entries, request->args.n_entries); err_after_request_alloc: /* Release all entries added to the in-memory log, making * sure the in-memory log and disk don't diverge, leading * to future log entries not being persisted to disk. */ if (j != 0) { logTruncate(&r->log, request->index); } raft_free(request); err: assert(rv != 0); return rv; } struct recvInstallSnapshot { struct raft *raft; struct raft_snapshot snapshot; }; static void installSnapshotCb(struct raft_io_snapshot_put *req, int status) { struct recvInstallSnapshot *request = req->data; struct raft *r = request->raft; struct raft_snapshot *snapshot = &request->snapshot; struct raft_append_entries_result result; int rv; r->snapshot.put.data = NULL; result.term = r->current_term; /* If we are shutting down, let's discard the result. TODO: what about other * states? */ if (r->state == RAFT_UNAVAILABLE) { goto discard; } if (status != 0) { result.rejected = snapshot->index; tracef("save snapshot %llu: %s", snapshot->index, raft_strerror(status)); goto discard; } /* From Figure 5.3: * * 7. Discard the entire log * 8. Reset state machine using snapshot contents (and load lastConfig * as cluster configuration). */ rv = snapshotRestore(r, snapshot); if (rv != 0) { result.rejected = snapshot->index; tracef("restore snapshot %llu: %s", snapshot->index, raft_strerror(status)); goto discard; } tracef("restored snapshot with last index %llu", snapshot->index); result.rejected = 0; goto respond; discard: /* In case of error we must also free the snapshot data buffer and free the * configuration. */ raft_free(snapshot->bufs[0].base); raft_configuration_close(&snapshot->configuration); respond: if (r->state != RAFT_UNAVAILABLE) { result.last_log_index = r->last_stored; sendAppendEntriesResult(r, &result); } raft_free(request); } int replicationInstallSnapshot(struct raft *r, const struct raft_install_snapshot *args, raft_index *rejected, bool *async) { struct recvInstallSnapshot *request; struct raft_snapshot *snapshot; raft_term local_term; int rv; assert(r->state == RAFT_FOLLOWER); *rejected = args->last_index; *async = false; /* If we are taking a snapshot ourselves or installing a snapshot, ignore * the request, the leader will eventually retry. TODO: we should do * something smarter. */ if (r->snapshot.pending.term != 0 || r->snapshot.put.data != NULL) { *async = true; tracef("already taking or installing snapshot"); return RAFT_BUSY; } /* If our last snapshot is more up-to-date, this is a no-op */ if (r->log.snapshot.last_index >= args->last_index) { tracef("have more recent snapshot"); *rejected = 0; return 0; } /* If we already have all entries in the snapshot, this is a no-op */ local_term = logTermOf(&r->log, args->last_index); if (local_term != 0 && local_term >= args->last_term) { tracef("have all entries"); *rejected = 0; return 0; } *async = true; /* Preemptively update our in-memory state. */ logRestore(&r->log, args->last_index, args->last_term); r->last_stored = 0; request = raft_malloc(sizeof *request); if (request == NULL) { rv = RAFT_NOMEM; goto err; } request->raft = r; snapshot = &request->snapshot; snapshot->term = args->last_term; snapshot->index = args->last_index; snapshot->configuration_index = args->conf_index; snapshot->configuration = args->conf; snapshot->bufs = raft_malloc(sizeof *snapshot->bufs); if (snapshot->bufs == NULL) { rv = RAFT_NOMEM; goto err_after_request_alloc; } snapshot->bufs[0] = args->data; snapshot->n_bufs = 1; assert(r->snapshot.put.data == NULL); r->snapshot.put.data = request; rv = r->io->snapshot_put(r->io, 0 /* zero trailing means replace everything */, &r->snapshot.put, snapshot, installSnapshotCb); if (rv != 0) { tracef("snapshot_put failed %d", rv); goto err_after_bufs_alloc; } return 0; err_after_bufs_alloc: raft_free(snapshot->bufs); r->snapshot.put.data = NULL; err_after_request_alloc: raft_free(request); err: assert(rv != 0); return rv; } /* Apply a RAFT_COMMAND entry that has been committed. */ static int applyCommand(struct raft *r, const raft_index index, const struct raft_buffer *buf) { struct raft_apply *req; void *result; int rv; rv = r->fsm->apply(r->fsm, buf, &result); if (rv != 0) { return rv; } req = (struct raft_apply *)getRequest(r, index, RAFT_COMMAND); if (req != NULL && req->cb != NULL) { req->cb(req, 0, result); } return 0; } /* Fire the callback of a barrier request whose entry has been committed. */ static void applyBarrier(struct raft *r, const raft_index index) { struct raft_barrier *req; req = (struct raft_barrier *)getRequest(r, index, RAFT_BARRIER); if (req != NULL && req->cb != NULL) { req->cb(req, 0); } } /* Apply a RAFT_CHANGE entry that has been committed. */ static void applyChange(struct raft *r, const raft_index index) { struct raft_change *req; assert(index > 0); /* If this is an uncommitted configuration that we had already applied when * submitting the configuration change (for leaders) or upon receiving it * via an AppendEntries RPC (for followers), then reset the uncommitted * index, since that uncommitted configuration is now committed. */ if (r->configuration_uncommitted_index == index) { r->configuration_uncommitted_index = 0; } r->configuration_index = index; if (r->state == RAFT_LEADER) { const struct raft_server *server; req = r->leader_state.change; r->leader_state.change = NULL; /* If we are leader but not part of this new configuration, step * down. * * From Section 4.2.2: * * In this approach, a leader that is removed from the configuration * steps down once the Cnew entry is committed. */ server = configurationGet(&r->configuration, r->id); if (server == NULL || server->role != RAFT_VOTER) { tracef("leader removed from config or no longer voter server: %p", (void*)server); convertToFollower(r); } if (req != NULL && req->cb != NULL) { req->cb(req, 0); } } } static bool shouldTakeSnapshot(struct raft *r) { /* If we are shutting down, let's not do anything. */ if (r->state == RAFT_UNAVAILABLE) { return false; } /* If a snapshot is already in progress or we're installing a snapshot, we * don't want to start another one. */ if (r->snapshot.pending.term != 0 || r->snapshot.put.data != NULL) { return false; }; /* If we didn't reach the threshold yet, do nothing. */ if (r->last_applied - r->log.snapshot.last_index < r->snapshot.threshold) { return false; } return true; } static void takeSnapshotCb(struct raft_io_snapshot_put *req, int status) { struct raft *r = req->data; struct raft_snapshot *snapshot; r->snapshot.put.data = NULL; snapshot = &r->snapshot.pending; if (status != 0) { tracef("snapshot %lld at term %lld: %s", snapshot->index, snapshot->term, raft_strerror(status)); goto out; } logSnapshot(&r->log, snapshot->index, r->snapshot.trailing); out: snapshotClose(&r->snapshot.pending); r->snapshot.pending.term = 0; } static int takeSnapshot(struct raft *r) { struct raft_snapshot *snapshot; unsigned i; int rv; tracef("take snapshot at %lld", r->last_applied); snapshot = &r->snapshot.pending; snapshot->index = r->last_applied; snapshot->term = logTermOf(&r->log, r->last_applied); rv = configurationCopy(&r->configuration, &snapshot->configuration); if (rv != 0) { goto abort; } snapshot->configuration_index = r->configuration_index; rv = r->fsm->snapshot(r->fsm, &snapshot->bufs, &snapshot->n_bufs); if (rv != 0) { /* Ignore transient errors. We'll retry next time. */ if (rv == RAFT_BUSY) { rv = 0; } goto abort_after_config_copy; } assert(r->snapshot.put.data == NULL); r->snapshot.put.data = r; rv = r->io->snapshot_put(r->io, r->snapshot.trailing, &r->snapshot.put, snapshot, takeSnapshotCb); if (rv != 0) { goto abort_after_fsm_snapshot; } return 0; abort_after_fsm_snapshot: for (i = 0; i < snapshot->n_bufs; i++) { raft_free(snapshot->bufs[i].base); } raft_free(snapshot->bufs); abort_after_config_copy: raft_configuration_close(&snapshot->configuration); abort: r->snapshot.pending.term = 0; return rv; } int replicationApply(struct raft *r) { raft_index index; int rv = 0; assert(r->state == RAFT_LEADER || r->state == RAFT_FOLLOWER); assert(r->last_applied <= r->commit_index); if (r->last_applied == r->commit_index) { /* Nothing to do. */ return 0; } for (index = r->last_applied + 1; index <= r->commit_index; index++) { const struct raft_entry *entry = logGet(&r->log, index); if (entry == NULL) { /* This can happen while installing a snapshot */ tracef("replicationApply - ENTRY NULL"); return 0; } assert(entry->type == RAFT_COMMAND || entry->type == RAFT_BARRIER || entry->type == RAFT_CHANGE); switch (entry->type) { case RAFT_COMMAND: rv = applyCommand(r, index, &entry->buf); break; case RAFT_BARRIER: applyBarrier(r, index); rv = 0; break; case RAFT_CHANGE: applyChange(r, index); rv = 0; break; default: rv = 0; /* For coverity. This case can't be taken. */ break; } if (rv != 0) { break; } r->last_applied = index; } if (shouldTakeSnapshot(r)) { rv = takeSnapshot(r); } return rv; } void replicationQuorum(struct raft *r, const raft_index index) { size_t votes = 0; size_t i; assert(r->state == RAFT_LEADER); if (index <= r->commit_index) { return; } /* TODO: fuzzy-test --seed 0x8db5fccc replication/entries/partitioned * fails the assertion below. */ if (logTermOf(&r->log, index) == 0) { return; } // assert(logTermOf(&r->log, index) > 0); assert(logTermOf(&r->log, index) <= r->current_term); for (i = 0; i < r->configuration.n; i++) { struct raft_server *server = &r->configuration.servers[i]; if (server->role != RAFT_VOTER) { continue; } if (r->leader_state.progress[i].match_index >= index) { votes++; } } if (votes > configurationVoterCount(&r->configuration) / 2) { r->commit_index = index; tracef("new commit index %llu", r->commit_index); } return; } inline bool replicationInstallSnapshotBusy(struct raft *r) { return r->last_stored == 0 && r->snapshot.put.data != NULL; } #undef tracef raft-0.11.3/src/replication.h000066400000000000000000000075421415614527300160210ustar00rootroot00000000000000/* Log replication logic and helpers. */ #ifndef REPLICATION_H_ #define REPLICATION_H_ #include "../include/raft.h" /* Send AppendEntries RPC messages to all followers to which no AppendEntries * was sent in the last heartbeat interval. */ int replicationHeartbeat(struct raft *r); /* Start a local disk write for entries from the given index onwards, and * trigger replication against all followers, typically sending AppendEntries * RPC messages with outstanding log entries. */ int replicationTrigger(struct raft *r, raft_index index); /* Possibly send an AppendEntries or an InstallSnapshot RPC message to the * server with the given index. * * The rules to decide whether or not to send a message are: * * - If we have sent an InstallSnapshot RPC recently and we haven't yet received * a response, then don't send any new message. * * - If we are probing the follower (i.e. we haven't received a successful * response during the last heartbeat interval), then send a message only if * haven't sent any during the last heartbeat interval. * * - If we are pipelining entries to the follower, then send any new entries * haven't yet sent. * * If a message should be sent, the rules to decide what type of message to send * and what it should contain are: * * - If we don't have anymore the first entry that should be sent to the * follower, then send an InstallSnapshot RPC with the last snapshot. * * - If we still have the first entry to send, then send all entries from that index onward (possibly zero). * * This function must be called only by leaders. */ int replicationProgress(struct raft *r, unsigned i); /* Update the replication state (match and next indexes) for the given server * using the given AppendEntries RPC result. * * Possibly send to the server a new set of entries or a snapshot if the result * was unsuccessful because of missing entries or if new entries were added to * our log in the meantime. * * It must be called only by leaders. */ int replicationUpdate(struct raft *r, const struct raft_server *server, const struct raft_append_entries_result *result); /* Append the log entries in the given request if the Log Matching Property is * satisfied. * * The rejected output parameter will be set to 0 if the Log Matching Property * was satisfied, or to args->prev_log_index if not. * * The async output parameter will be set to true if some of the entries in the * request were not present in our log, and a disk write was started to persist * them to disk. The entries will still be appended immediately to our in-memory * copy of the log, but an AppendEntries result message will be sent only once * the disk write completes and the I/O callback is invoked. * * It must be called only by followers. */ int replicationAppend(struct raft *r, const struct raft_append_entries *args, raft_index *rejected, bool *async); int replicationInstallSnapshot(struct raft *r, const struct raft_install_snapshot *args, raft_index *rejected, bool *async); /* Returns `true` if the raft instance is currently installing a snapshot */ bool replicationInstallSnapshotBusy(struct raft *r); /* Apply any committed entry that was not applied yet. * * It must be called by leaders or followers. */ int replicationApply(struct raft *r); /* Check if a quorum has been reached for the given log index, and update the * commit index accordingly if so. * * From Figure 3.1: * * [Rules for servers] Leaders: * * If there exists an N such that N > commitIndex, a majority of * matchIndex[i] >= N, and log[N].term == currentTerm: set commitIndex = N */ void replicationQuorum(struct raft *r, const raft_index index); #endif /* REPLICATION_H_ */ raft-0.11.3/src/request.h000066400000000000000000000004221415614527300151660ustar00rootroot00000000000000#ifndef REQUEST_H_ #define REQUEST_H_ #include "../include/raft.h" /* Abstract request type */ struct request { /* Must be kept in sync with RAFT__REQUEST in raft.h */ void *data; int type; raft_index index; void *queue[2]; }; #endif /* REQUEST_H_ */ raft-0.11.3/src/snapshot.c000066400000000000000000000042371415614527300153400ustar00rootroot00000000000000#include "snapshot.h" #include #include #include "assert.h" #include "configuration.h" #include "err.h" #include "log.h" #include "tracing.h" #define tracef(...) Tracef(r->tracer, __VA_ARGS__) void snapshotClose(struct raft_snapshot *s) { unsigned i; configurationClose(&s->configuration); for (i = 0; i < s->n_bufs; i++) { raft_free(s->bufs[i].base); } raft_free(s->bufs); } void snapshotDestroy(struct raft_snapshot *s) { snapshotClose(s); raft_free(s); } int snapshotRestore(struct raft *r, struct raft_snapshot *snapshot) { int rv; assert(snapshot->n_bufs == 1); rv = r->fsm->restore(r->fsm, &snapshot->bufs[0]); if (rv != 0) { tracef("restore snapshot %llu: %s", snapshot->index, errCodeToString(rv)); return rv; } configurationClose(&r->configuration); r->configuration = snapshot->configuration; r->configuration_index = snapshot->configuration_index; configurationTrace(r, &r->configuration, "configuration restore from snapshot"); r->commit_index = snapshot->index; r->last_applied = snapshot->index; r->last_stored = snapshot->index; /* Don't free the snapshot data buffer, as ownership has been transferred to * the fsm. */ raft_free(snapshot->bufs); return 0; } int snapshotCopy(const struct raft_snapshot *src, struct raft_snapshot *dst) { int rv; unsigned i; size_t size; uint8_t *cursor; dst->term = src->term; dst->index = src->index; rv = configurationCopy(&src->configuration, &dst->configuration); if (rv != 0) { return rv; } size = 0; for (i = 0; i < src->n_bufs; i++) { size += src->bufs[i].len; } dst->bufs = raft_malloc(sizeof *dst->bufs); assert(dst->bufs != NULL); dst->bufs[0].base = raft_malloc(size); dst->bufs[0].len = size; if (dst->bufs[0].base == NULL) { return RAFT_NOMEM; } cursor = dst->bufs[0].base; for (i = 0; i < src->n_bufs; i++) { memcpy(cursor, src->bufs[i].base, src->bufs[i].len); cursor += src->bufs[i].len; } dst->n_bufs = 1; return 0; } #undef tracef raft-0.11.3/src/snapshot.h000066400000000000000000000017131415614527300153410ustar00rootroot00000000000000#ifndef RAFT_SNAPSHOT_H_ #define RAFT_SNAPSHOT_H_ #include "../include/raft.h" /* Release all memory associated with the given snapshot. */ void snapshotClose(struct raft_snapshot *s); /* Like snapshotClose(), but also release the snapshot object itself. */ void snapshotDestroy(struct raft_snapshot *s); /* Restore a snapshot. * * This will reset the current state of the server as if the last entry * contained in the snapshot had just been persisted, committed and applied. * * The in-memory log must be empty when calling this function. * * If no error occurs, the memory of the snapshot object gets released. */ int snapshotRestore(struct raft *r, struct raft_snapshot *snapshot); /* Make a full deep copy of a snapshot object. * * All data buffers in the source snapshot will be compacted in a single buffer * in the destination snapshot. */ int snapshotCopy(const struct raft_snapshot *src, struct raft_snapshot *dst); #endif /* RAFT_SNAPSHOT_H */ raft-0.11.3/src/start.c000066400000000000000000000157001415614527300146330ustar00rootroot00000000000000#include "../include/raft.h" #include "assert.h" #include "configuration.h" #include "convert.h" #include "entry.h" #include "err.h" #include "log.h" #include "recv.h" #include "snapshot.h" #include "tick.h" #include "tracing.h" #define tracef(...) Tracef(r->tracer, __VA_ARGS__) /* Restore the most recent configuration. */ static int restoreMostRecentConfiguration(struct raft *r, struct raft_entry *entry, raft_index index) { struct raft_configuration configuration; int rv; raft_configuration_init(&configuration); rv = configurationDecode(&entry->buf, &configuration); if (rv != 0) { raft_configuration_close(&configuration); return rv; } configurationTrace(r, &configuration, "restore most recent configuration"); raft_configuration_close(&r->configuration); r->configuration = configuration; r->configuration_index = index; return 0; } /* Restore the entries that were loaded from persistent storage. The most recent * configuration entry will be restored as well, if any. * * Note that we don't care whether the most recent configuration entry was * actually committed or not. We don't allow more than one pending uncommitted * configuration change at a time, plus * * when adding or removing just a single server, it is safe to switch directly * to the new configuration. * * and * * The new configuration takes effect on each server as soon as it is added to * that server's log: the C_new entry is replicated to the C_new servers, and * a majority of the new configuration is used to determine the C_new entry's * commitment. This means that servers do notwait for configuration entries to * be committed, and each server always uses the latest configuration found in * its log. * * as explained in section 4.1. * * TODO: we should probably set configuration_uncommitted_index as well, since we * can't be sure a configuration change has been committed and we need to be * ready to roll back to the last committed configuration. */ static int restoreEntries(struct raft *r, raft_index snapshot_index, raft_term snapshot_term, raft_index start_index, struct raft_entry *entries, size_t n) { struct raft_entry *conf = NULL; raft_index conf_index = 0; size_t i; int rv; logStart(&r->log, snapshot_index, snapshot_term, start_index); r->last_stored = start_index - 1; for (i = 0; i < n; i++) { struct raft_entry *entry = &entries[i]; rv = logAppend(&r->log, entry->term, entry->type, &entry->buf, entry->batch); if (rv != 0) { goto err; } r->last_stored++; if (entry->type == RAFT_CHANGE) { conf = entry; conf_index = r->last_stored; } } if (conf != NULL) { rv = restoreMostRecentConfiguration(r, conf, conf_index); if (rv != 0) { goto err; } } raft_free(entries); return 0; err: if (logNumEntries(&r->log) > 0) { logDiscard(&r->log, r->log.offset + 1); } return rv; } /* If we're the only voting server in the configuration, automatically * self-elect ourselves and convert to leader without waiting for the election * timeout. */ static int maybeSelfElect(struct raft *r) { const struct raft_server *server; int rv; server = configurationGet(&r->configuration, r->id); if (server == NULL || server->role != RAFT_VOTER || configurationVoterCount(&r->configuration) > 1) { return 0; } /* Converting to candidate will notice that we're the only voter and * automatically convert to leader. */ rv = convertToCandidate(r, false /* disrupt leader */); if (rv != 0) { return rv; } assert(r->state == RAFT_LEADER); return 0; } int raft_start(struct raft *r) { struct raft_snapshot *snapshot; raft_index snapshot_index = 0; raft_term snapshot_term = 0; raft_index start_index; struct raft_entry *entries; size_t n_entries; int rv; assert(r != NULL); assert(r->state == RAFT_UNAVAILABLE); assert(r->heartbeat_timeout != 0); assert(r->heartbeat_timeout < r->election_timeout); assert(r->install_snapshot_timeout != 0); assert(logNumEntries(&r->log) == 0); assert(logSnapshotIndex(&r->log) == 0); assert(r->last_stored == 0); tracef("starting"); rv = r->io->load(r->io, &r->current_term, &r->voted_for, &snapshot, &start_index, &entries, &n_entries); if (rv != 0) { ErrMsgTransfer(r->io->errmsg, r->errmsg, "io"); return rv; } assert(start_index >= 1); tracef("current_term:%llu voted_for:%llu start_index:%llu n_entries:%zu", r->current_term, r->voted_for, start_index, n_entries); /* If we have a snapshot, let's restore it. */ if (snapshot != NULL) { tracef("restore snapshot with last index %llu and last term %llu", snapshot->index, snapshot->term); rv = snapshotRestore(r, snapshot); if (rv != 0) { snapshotDestroy(snapshot); entryBatchesDestroy(entries, n_entries); return rv; } snapshot_index = snapshot->index; snapshot_term = snapshot->term; raft_free(snapshot); } else if (n_entries > 0) { /* If we don't have a snapshot and the on-disk log is not empty, then * the first entry must be a configuration entry. */ assert(start_index == 1); assert(entries[0].type == RAFT_CHANGE); /* As a small optimization, bump the commit index to 1 since we require * the first entry to be the same on all servers. */ r->commit_index = 1; r->last_applied = 1; } /* Append the entries to the log, possibly restoring the last * configuration. */ tracef("restore %zu entries starting at %llu", n_entries, start_index); rv = restoreEntries(r, snapshot_index, snapshot_term, start_index, entries, n_entries); if (rv != 0) { entryBatchesDestroy(entries, n_entries); return rv; } /* Start the I/O backend. The tickCb function is expected to fire every * r->heartbeat_timeout milliseconds and recvCb whenever an RPC is * received. */ rv = r->io->start(r->io, r->heartbeat_timeout, tickCb, recvCb); if (rv != 0) { tracef("io start failed %d", rv); return rv; } /* By default we start as followers. */ convertToFollower(r); /* If there's only one voting server, and that is us, it's safe to convert * to leader right away. If that is not us, we're either joining the cluster * or we're simply configured as non-voter, and we'll stay follower. */ rv = maybeSelfElect(r); if (rv != 0) { return rv; } return 0; } #undef tracef raft-0.11.3/src/state.c000066400000000000000000000017041415614527300146150ustar00rootroot00000000000000#include "assert.h" #include "configuration.h" #include "election.h" #include "log.h" #include "queue.h" int raft_state(struct raft *r) { return r->state; } void raft_leader(struct raft *r, raft_id *id, const char **address) { switch (r->state) { case RAFT_UNAVAILABLE: case RAFT_CANDIDATE: *id = 0; *address = NULL; return; case RAFT_FOLLOWER: *id = r->follower_state.current_leader.id; *address = r->follower_state.current_leader.address; return; case RAFT_LEADER: if (r->transfer != NULL) { *id = 0; *address = NULL; return; } *id = r->id; *address = r->address; return; } } raft_index raft_last_index(struct raft *r) { return logLastIndex(&r->log); } raft_index raft_last_applied(struct raft *r) { return r->last_applied; } raft-0.11.3/src/syscall.c000066400000000000000000000027151415614527300151520ustar00rootroot00000000000000#include "syscall.h" #if HAVE_LINUX_AIO_ABI_H || HAVE_LINUX_IO_URING_H #include #include #endif #if HAVE_LINUX_AIO_ABI_H int io_setup(unsigned nr_events, aio_context_t *ctx_idp) { return (int)syscall(__NR_io_setup, nr_events, ctx_idp); } int io_destroy(aio_context_t ctx_id) { return (int)syscall(__NR_io_destroy, ctx_id); } int io_submit(aio_context_t ctx_id, long nr, struct iocb **iocbpp) { return (int)syscall(__NR_io_submit, ctx_id, nr, iocbpp); } int io_getevents(aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout) { return (int)syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout); } #endif #if HAVE_LINUX_IO_URING_H int io_uring_register(int fd, unsigned int opcode, const void *arg, unsigned int nr_args) { return (int)syscall(__NR_io_uring_register, fd, opcode, arg, nr_args); } int io_uring_setup(unsigned int entries, struct io_uring_params *p) { return (int)syscall(__NR_io_uring_setup, entries, p); } int io_uring_enter(int fd, unsigned int to_submit, unsigned int min_complete, unsigned int flags, sigset_t *sig) { return (int)syscall(__NR_io_uring_enter, fd, to_submit, min_complete, flags, sig, _NSIG / 8); } #endif raft-0.11.3/src/syscall.h000066400000000000000000000021561415614527300151560ustar00rootroot00000000000000/* Wrappers for system calls not yet defined in libc. */ #ifndef SYSCALL_H_ #define SYSCALL_H_ #if HAVE_LINUX_AIO_ABI_H #include #include #include #endif #if HAVE_LINUX_IO_URING_H #include #endif #if HAVE_LINUX_AIO_ABI_H /* AIO */ int io_setup(unsigned nr_events, aio_context_t *ctx_idp); int io_destroy(aio_context_t ctx_id); int io_submit(aio_context_t ctx_id, long nr, struct iocb **iocbpp); int io_getevents(aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout); #endif #if HAVE_LINUX_IO_URING_H /* uring */ int io_uring_register(int fd, unsigned int opcode, const void *arg, unsigned int nr_args); int io_uring_setup(unsigned int entries, struct io_uring_params *p); int io_uring_enter(int fd, unsigned int to_submit, unsigned int min_complete, unsigned int flags, sigset_t *sig); #endif #endif /* SYSCALL_ */ raft-0.11.3/src/tick.c000066400000000000000000000165661415614527300144430ustar00rootroot00000000000000#include "../include/raft.h" #include "assert.h" #include "configuration.h" #include "convert.h" #include "election.h" #include "membership.h" #include "progress.h" #include "replication.h" #include "tracing.h" #define tracef(...) Tracef(r->tracer, __VA_ARGS__) /* Apply time-dependent rules for followers (Figure 3.1). */ static int tickFollower(struct raft *r) { const struct raft_server *server; int rv; assert(r != NULL); assert(r->state == RAFT_FOLLOWER); server = configurationGet(&r->configuration, r->id); /* If we have been removed from the configuration, or maybe we didn't * receive one yet, just stay follower. */ if (server == NULL) { return 0; } /* Check if we need to start an election. * * From Section 3.3: * * If a follower receives no communication over a period of time called * the election timeout, then it assumes there is no viable leader and * begins an election to choose a new leader. * * Figure 3.1: * * If election timeout elapses without receiving AppendEntries RPC from * current leader or granting vote to candidate, convert to candidate. */ if (electionTimerExpired(r) && server->role == RAFT_VOTER) { tracef("convert to candidate and start new election"); rv = convertToCandidate(r, false /* disrupt leader */); if (rv != 0) { tracef("convert to candidate: %s", raft_strerror(rv)); return rv; } } return 0; } /* Apply time-dependent rules for candidates (Figure 3.1). */ static int tickCandidate(struct raft *r) { assert(r != NULL); assert(r->state == RAFT_CANDIDATE); /* Check if we need to start an election. * * From Section 3.4: * * The third possible outcome is that a candidate neither wins nor loses * the election: if many followers become candidates at the same time, * votes could be split so that no candidate obtains a majority. When this * happens, each candidate will time out and start a new election by * incrementing its term and initiating another round of RequestVote RPCs */ if (electionTimerExpired(r)) { tracef("start new election"); return electionStart(r); } return 0; } /* Return true if we received an AppendEntries RPC result from a majority of * voting servers since we became leaders or since the last time this function * was called. * * For each server the function checks the recent_recv flag of the associated * progress object, and resets the flag after the check. It returns true if a * majority of voting server had the flag set to true. */ static bool checkContactQuorum(struct raft *r) { unsigned i; unsigned contacts = 0; assert(r->state == RAFT_LEADER); for (i = 0; i < r->configuration.n; i++) { struct raft_server *server = &r->configuration.servers[i]; bool recent_recv = progressResetRecentRecv(r, i); if ((server->role == RAFT_VOTER && recent_recv) || server->id == r->id) { contacts++; } } return contacts > configurationVoterCount(&r->configuration) / 2; } /* Apply time-dependent rules for leaders (Figure 3.1). */ static int tickLeader(struct raft *r) { raft_time now = r->io->time(r->io); assert(r->state == RAFT_LEADER); /* Check if we still can reach a majority of servers. * * From Section 6.2: * * A leader in Raft steps down if an election timeout elapses without a * successful round of heartbeats to a majority of its cluster; this * allows clients to retry their requests with another server. */ if (now - r->election_timer_start >= r->election_timeout) { if (!checkContactQuorum(r)) { tracef("unable to contact majority of cluster -> step down"); convertToFollower(r); return 0; } r->election_timer_start = r->io->time(r->io); } /* Possibly send heartbeats. * * From Figure 3.1: * * Send empty AppendEntries RPC during idle periods to prevent election * timeouts. */ replicationHeartbeat(r); /* If a server is being promoted, increment the timer of the current * round or abort the promotion. * * From Section 4.2.1: * * The algorithm waits a fixed number of rounds (such as 10). If the last * round lasts less than an election timeout, then the leader adds the new * server to the cluster, under the assumption that there are not enough * unreplicated entries to create a significant availability * gap. Otherwise, the leader aborts the configuration change with an * error. */ if (r->leader_state.promotee_id != 0) { raft_id id = r->leader_state.promotee_id; unsigned server_index; raft_time round_duration = now - r->leader_state.round_start; bool is_too_slow; bool is_unresponsive; /* If a promotion is in progress, we expect that our configuration * contains an entry for the server being promoted, and that the server * is not yet considered as voting. */ server_index = configurationIndexOf(&r->configuration, id); assert(server_index < r->configuration.n); assert(r->configuration.servers[server_index].role != RAFT_VOTER); is_too_slow = (r->leader_state.round_number == r->max_catch_up_rounds && round_duration > r->election_timeout); is_unresponsive = round_duration > r->max_catch_up_round_duration; /* Abort the promotion if we are at the 10'th round and it's still * taking too long, or if the server is unresponsive. */ if (is_too_slow || is_unresponsive) { tracef("server_index:%d is_too_slow:%d is_unresponsive:%d", server_index, is_too_slow, is_unresponsive); struct raft_change *change; r->leader_state.promotee_id = 0; r->leader_state.round_index = 0; r->leader_state.round_number = 0; r->leader_state.round_start = 0; change = r->leader_state.change; r->leader_state.change = NULL; if (change != NULL && change->cb != NULL) { change->cb(change, RAFT_NOCONNECTION); } } } return 0; } static int tick(struct raft *r) { int rv = -1; assert(r->state == RAFT_UNAVAILABLE || r->state == RAFT_FOLLOWER || r->state == RAFT_CANDIDATE || r->state == RAFT_LEADER); /* If we are not available, let's do nothing. */ if (r->state == RAFT_UNAVAILABLE) { return 0; } switch (r->state) { case RAFT_FOLLOWER: rv = tickFollower(r); break; case RAFT_CANDIDATE: rv = tickCandidate(r); break; case RAFT_LEADER: rv = tickLeader(r); break; } return rv; } void tickCb(struct raft_io *io) { struct raft *r; int rv; r = io->data; rv = tick(r); if (rv != 0) { convertToUnavailable(r); return; } /* For all states: if there is a leadership transfer request in progress, * check if it's expired. */ if (r->transfer != NULL) { raft_time now = r->io->time(r->io); if (now - r->transfer->start >= r->election_timeout) { membershipLeadershipTransferClose(r); } } } #undef tracef raft-0.11.3/src/tick.h000066400000000000000000000004711415614527300144340ustar00rootroot00000000000000/* Logic to be invoked periodically. */ #ifndef TICK_H_ #define TICK_H_ #include "../include/raft.h" /* Callback to be passed to the @raft_io implementation. It notifies us that a * certain amount of time has elapsed and will be invoked periodically. */ void tickCb(struct raft_io *io); #endif /* TICK_H_ */ raft-0.11.3/src/tracing.c000066400000000000000000000022171415614527300151240ustar00rootroot00000000000000#include #include #include #include "tracing.h" static inline void noopTracerEmit(struct raft_tracer *t, const char *file, int line, const char *message) { (void)t; (void)file; (void)line; (void)message; } struct raft_tracer NoopTracer = {.impl = NULL, .enabled = false, .emit = noopTracerEmit}; static inline void stderrTracerEmit(struct raft_tracer *t, const char *file, int line, const char *message) { (void)t; struct timespec ts = {0}; /* ignore errors */ clock_gettime(CLOCK_REALTIME, &ts); int64_t ns = ts.tv_sec * 1000000000 + ts.tv_nsec; fprintf(stderr, "LIBRAFT %" PRId64 " %s:%d %s\n", ns, file, line, message); } struct raft_tracer StderrTracer = {.impl = NULL, .enabled = false, .emit = stderrTracerEmit}; void raft_tracer_maybe_enable(struct raft_tracer *tracer, bool enabled) { if (getenv(LIBRAFT_TRACE) != NULL) { tracer->enabled = enabled; } } raft-0.11.3/src/tracing.h000066400000000000000000000020761415614527300151340ustar00rootroot00000000000000/* Tracing functions and helpers. */ #ifndef TRACING_H_ #define TRACING_H_ /* If an env var with this name is found, tracing can be enabled */ #define LIBRAFT_TRACE "LIBRAFT_TRACE" #include "../include/raft.h" extern struct raft_tracer NoopTracer; /* Default stderr tracer. */ extern struct raft_tracer StderrTracer; /* Emit a debug message with the given tracer. */ #define Tracef(TRACER, ...) \ do { \ if (TRACER != NULL && TRACER->emit != NULL && TRACER->enabled) { \ static char _msg[1024]; \ snprintf(_msg, sizeof _msg, __VA_ARGS__); \ TRACER->emit(TRACER, __FILE__, __LINE__, _msg); \ } \ } while (0) /* Enable the tracer if the env variable is set or disable the tracer */ void raft_tracer_maybe_enable(struct raft_tracer *tracer, bool enabled); #endif /* TRACING_H_ */ raft-0.11.3/src/uv.c000066400000000000000000000475641415614527300141450ustar00rootroot00000000000000#include "../include/raft/uv.h" #include #include #include #include #include #include #include "../include/raft.h" #include "assert.h" #include "byte.h" #include "configuration.h" #include "entry.h" #include "heap.h" #include "snapshot.h" #include "tracing.h" #include "uv.h" #include "uv_encoding.h" #include "uv_os.h" #define tracef(...) Tracef(uv->tracer, __VA_ARGS__) /* Retry to connect to peer servers every second. * * TODO: implement an exponential backoff instead. */ #define CONNECT_RETRY_DELAY 1000 /* Cleans up files that are no longer used by the system */ static int uvMaintenance(const char *dir, char* errmsg) { struct uv_fs_s req; struct uv_dirent_s entry; int n; int i; int rv; int rv2; n = uv_fs_scandir(NULL, &req, dir, 0, NULL); if (n < 0) { ErrMsgPrintf(errmsg, "scan data directory: %s", uv_strerror(n)); return RAFT_IOERR; } rv = 0; for (i = 0; i < n; i++) { const char *filename; rv = uv_fs_scandir_next(&req, &entry); assert(rv == 0); /* Can't fail in libuv */ filename = entry.name; /* Remove leftover tmp-files */ if (strncmp(filename, TMP_FILE_PREFIX, strlen(TMP_FILE_PREFIX)) == 0) { UvFsRemoveFile(dir, filename, errmsg); /* Ignore errors */ continue; } /* Remove orphaned snapshot files */ bool orphan = false; if ((UvSnapshotIsOrphan(dir, filename, &orphan) == 0) && orphan) { UvFsRemoveFile(dir, filename, errmsg); /* Ignore errors */ continue; } /* Remove orphaned snapshot metadata files */ if ((UvSnapshotMetaIsOrphan(dir, filename, &orphan) == 0) && orphan) { UvFsRemoveFile(dir, filename, errmsg); /* Ignore errors */ } } rv2 = uv_fs_scandir_next(&req, &entry); assert(rv2 == UV_EOF); return rv; } /* Implementation of raft_io->config. */ static int uvInit(struct raft_io *io, raft_id id, const char *address) { struct uv *uv; size_t direct_io; struct uvMetadata metadata; int rv; uv = io->impl; uv->id = id; rv = UvFsCheckDir(uv->dir, io->errmsg); if (rv != 0) { return rv; } /* Probe file system capabilities */ rv = UvFsProbeCapabilities(uv->dir, &direct_io, &uv->async_io, io->errmsg); if (rv != 0) { return rv; } uv->direct_io = direct_io != 0; uv->block_size = direct_io != 0 ? direct_io : 4096; rv = uvMaintenance(uv->dir, io->errmsg); if (rv != 0) { return rv; } rv = uvMetadataLoad(uv->dir, &metadata, io->errmsg); if (rv != 0) { return rv; } uv->metadata = metadata; rv = uv->transport->init(uv->transport, id, address); if (rv != 0) { ErrMsgTransfer(uv->transport->errmsg, io->errmsg, "transport"); return rv; } uv->transport->data = uv; rv = uv_timer_init(uv->loop, &uv->timer); assert(rv == 0); /* This should never fail */ uv->timer.data = uv; return 0; } /* Periodic timer callback */ static void uvTickTimerCb(uv_timer_t *timer) { struct uv *uv; uv = timer->data; if (uv->tick_cb != NULL) { uv->tick_cb(uv->io); } } /* Implementation of raft_io->start. */ static int uvStart(struct raft_io *io, unsigned msecs, raft_io_tick_cb tick_cb, raft_io_recv_cb recv_cb) { struct uv *uv; int rv; uv = io->impl; uv->state = UV__ACTIVE; uv->tick_cb = tick_cb; uv->recv_cb = recv_cb; rv = UvRecvStart(uv); if (rv != 0) { return rv; } rv = uv_timer_start(&uv->timer, uvTickTimerCb, msecs, msecs); assert(rv == 0); return 0; } void uvMaybeFireCloseCb(struct uv *uv) { if (!uv->closing) { return; } if (uv->transport->data != NULL) { return; } if (uv->timer.data != NULL) { return; } if (!QUEUE_IS_EMPTY(&uv->append_segments)) { return; } if (!QUEUE_IS_EMPTY(&uv->finalize_reqs)) { return; } if (uv->finalize_work.data != NULL) { return; } if (uv->prepare_inflight != NULL) { return; } if (uv->barrier != NULL) { return; } if (uv->snapshot_put_work.data != NULL) { return; } if (!QUEUE_IS_EMPTY(&uv->snapshot_get_reqs)) { return; } if (!QUEUE_IS_EMPTY(&uv->aborting)) { return; } assert(uv->truncate_work.data == NULL); if (uv->close_cb != NULL) { uv->close_cb(uv->io); } } static void uvTickTimerCloseCb(uv_handle_t *handle) { struct uv *uv = handle->data; assert(uv->closing); uv->timer.data = NULL; uvMaybeFireCloseCb(uv); } static void uvTransportCloseCb(struct raft_uv_transport *transport) { struct uv *uv = transport->data; assert(uv->closing); uv->transport->data = NULL; uvMaybeFireCloseCb(uv); } /* Implementation of raft_io->stop. */ static void uvClose(struct raft_io *io, raft_io_close_cb cb) { struct uv *uv; uv = io->impl; assert(!uv->closing); uv->close_cb = cb; uv->closing = true; UvSendClose(uv); UvRecvClose(uv); uvAppendClose(uv); if (uv->transport->data != NULL) { uv->transport->close(uv->transport, uvTransportCloseCb); } if (uv->timer.data != NULL) { uv_close((uv_handle_t *)&uv->timer, uvTickTimerCloseCb); } uvMaybeFireCloseCb(uv); } /* Filter the given segment list to find the most recent contiguous chunk of * closed segments that overlaps with the given snapshot last index. */ static int uvFilterSegments(struct uv *uv, raft_index last_index, const char *snapshot_filename, struct uvSegmentInfo **segments, size_t *n) { struct uvSegmentInfo *segment; size_t i; /* First valid closed segment. */ size_t j; /* Last valid closed segment. */ /* If there are not segments at all, or only open segments, there's nothing * to do. */ if (*segments == NULL || (*segments)[0].is_open) { return 0; } /* Find the index of the most recent closed segment. */ for (j = 0; j < *n; j++) { segment = &(*segments)[j]; if (segment->is_open) { break; } } assert(j > 0); j--; segment = &(*segments)[j]; tracef("most recent closed segment is %s", segment->filename); /* If the end index of the last closed segment is lower than the last * snapshot index, there might be no entry that we can keep. We return an * empty segment list, unless there is at least one open segment, in that * case we keep everything hoping that they contain all the entries since * the last closed segment (TODO: we should encode the starting entry in the * open segment). */ if (segment->end_index < last_index) { if (!(*segments)[*n - 1].is_open) { tracef( "discarding all closed segments, since most recent is behind " "last snapshot"); raft_free(*segments); *segments = NULL; *n = 0; return 0; } tracef( "most recent closed segment %s is behind last snapshot, " "yet there are open segments", segment->filename); } /* Now scan the segments backwards, searching for the longest list of * contiguous closed segments. */ if (j >= 1) { for (i = j; i > 0; i--) { struct uvSegmentInfo *newer; struct uvSegmentInfo *older; newer = &(*segments)[i]; older = &(*segments)[i - 1]; if (older->end_index != newer->first_index - 1) { tracef("discarding non contiguous segment %s", older->filename); break; } } } else { i = j; } /* Make sure that the first index of the first valid closed segment is not * greater than the snapshot's last index plus one (so there are no * missing entries). */ segment = &(*segments)[i]; if (segment->first_index > last_index + 1) { ErrMsgPrintf(uv->io->errmsg, "closed segment %s is past last snapshot %s", segment->filename, snapshot_filename); return RAFT_CORRUPT; } if (i != 0) { size_t new_n = *n - i; struct uvSegmentInfo *new_segments; new_segments = raft_malloc(new_n * sizeof *new_segments); if (new_segments == NULL) { return RAFT_NOMEM; } memcpy(new_segments, &(*segments)[i], new_n * sizeof *new_segments); raft_free(*segments); *segments = new_segments; *n = new_n; } return 0; } /* Load the last snapshot (if any) and all entries contained in all segment * files of the data directory. */ static int uvLoadSnapshotAndEntries(struct uv *uv, struct raft_snapshot **snapshot, raft_index *start_index, struct raft_entry *entries[], size_t *n) { struct uvSnapshotInfo *snapshots; struct uvSegmentInfo *segments; size_t n_snapshots; size_t n_segments; int rv; *snapshot = NULL; *start_index = 1; *entries = NULL; *n = 0; /* List available snapshots and segments. */ rv = UvList(uv, &snapshots, &n_snapshots, &segments, &n_segments, uv->io->errmsg); if (rv != 0) { goto err; } /* Load the most recent snapshot, if any. */ if (snapshots != NULL) { char snapshot_filename[UV__FILENAME_LEN]; *snapshot = HeapMalloc(sizeof **snapshot); if (*snapshot == NULL) { rv = RAFT_NOMEM; goto err; } rv = UvSnapshotLoad(uv, &snapshots[n_snapshots - 1], *snapshot, uv->io->errmsg); if (rv != 0) { HeapFree(*snapshot); *snapshot = NULL; goto err; } uvSnapshotFilenameOf(&snapshots[n_snapshots - 1], snapshot_filename); tracef("most recent snapshot at %lld", (*snapshot)->index); HeapFree(snapshots); snapshots = NULL; /* Update the start index. If there are closed segments on disk let's * make sure that the first index of the first closed segment is not * greater than the snapshot's last index plus one (so there are no * missing entries), and update the start index accordingly. */ rv = uvFilterSegments(uv, (*snapshot)->index, snapshot_filename, &segments, &n_segments); if (rv != 0) { goto err; } if (segments != NULL) { if (segments[0].is_open) { *start_index = (*snapshot)->index + 1; } else { *start_index = segments[0].first_index; } } else { *start_index = (*snapshot)->index + 1; } } /* Read data from segments, closing any open segments. */ if (segments != NULL) { raft_index last_index; rv = uvSegmentLoadAll(uv, *start_index, segments, n_segments, entries, n); if (rv != 0) { goto err; } /* Check if all entries that we loaded are actually behind the last * snapshot. This can happen if the last closed segment was behind the * last snapshot and there were open segments, but the entries in the * open segments turned out to be behind the snapshot as well. */ last_index = *start_index + *n - 1; if (*snapshot != NULL && last_index < (*snapshot)->index) { ErrMsgPrintf(uv->io->errmsg, "last entry on disk has index %llu, which is behind " "last snapshot's index %llu", last_index, (*snapshot)->index); rv = RAFT_CORRUPT; goto err; } raft_free(segments); segments = NULL; } return 0; err: assert(rv != 0); if (*snapshot != NULL) { snapshotDestroy(*snapshot); *snapshot = NULL; } if (snapshots != NULL) { raft_free(snapshots); } if (segments != NULL) { raft_free(segments); } if (*entries != NULL) { entryBatchesDestroy(*entries, *n); *entries = NULL; *n = 0; } return rv; } /* Implementation of raft_io->load. */ static int uvLoad(struct raft_io *io, raft_term *term, raft_id *voted_for, struct raft_snapshot **snapshot, raft_index *start_index, struct raft_entry **entries, size_t *n_entries) { struct uv *uv; raft_index last_index; int rv; uv = io->impl; *term = uv->metadata.term; *voted_for = uv->metadata.voted_for; *snapshot = NULL; rv = uvLoadSnapshotAndEntries(uv, snapshot, start_index, entries, n_entries); if (rv != 0) { return rv; } tracef("start index %lld, %zu entries", *start_index, *n_entries); if (*snapshot == NULL) { tracef("no snapshot"); } last_index = *start_index + *n_entries - 1; /* Set the index of the next entry that will be appended. */ uv->append_next_index = last_index + 1; return 0; } /* Implementation of raft_io->set_term. */ static int uvSetTerm(struct raft_io *io, const raft_term term) { struct uv *uv; int rv; uv = io->impl; uv->metadata.version++; uv->metadata.term = term; uv->metadata.voted_for = 0; rv = uvMetadataStore(uv, &uv->metadata); if (rv != 0) { return rv; } return 0; } /* Implementation of raft_io->set_term. */ static int uvSetVote(struct raft_io *io, const raft_id server_id) { struct uv *uv; int rv; uv = io->impl; uv->metadata.version++; uv->metadata.voted_for = server_id; rv = uvMetadataStore(uv, &uv->metadata); if (rv != 0) { return rv; } return 0; } /* Implementation of raft_io->bootstrap. */ static int uvBootstrap(struct raft_io *io, const struct raft_configuration *configuration) { struct uv *uv; int rv; uv = io->impl; /* We shouldn't have written anything else yet. */ if (uv->metadata.term != 0) { ErrMsgPrintf(io->errmsg, "metadata contains term %lld", uv->metadata.term); return RAFT_CANTBOOTSTRAP; } /* Write the term */ rv = uvSetTerm(io, 1); if (rv != 0) { return rv; } /* Create the first closed segment file, containing just one entry. */ rv = uvSegmentCreateFirstClosed(uv, configuration); if (rv != 0) { return rv; } return 0; } /* Implementation of raft_io->recover. */ static int uvRecover(struct raft_io *io, const struct raft_configuration *conf) { struct uv *uv = io->impl; struct raft_snapshot *snapshot; raft_index start_index; raft_index next_index; struct raft_entry *entries; size_t n_entries; int rv; /* Load the current state. This also closes any leftover open segment. */ rv = uvLoadSnapshotAndEntries(uv, &snapshot, &start_index, &entries, &n_entries); if (rv != 0) { return rv; } /* We don't care about the actual data, just index of the last entry. */ if (snapshot != NULL) { snapshotDestroy(snapshot); } if (entries != NULL) { entryBatchesDestroy(entries, n_entries); } assert(start_index > 0); next_index = start_index + n_entries; rv = uvSegmentCreateClosedWithConfiguration(uv, next_index, conf); if (rv != 0) { return rv; } return 0; } /* Implementation of raft_io->time. */ static raft_time uvTime(struct raft_io *io) { struct uv *uv; uv = io->impl; return uv_now(uv->loop); } /* Implementation of raft_io->random. */ static int uvRandom(struct raft_io *io, int min, int max) { static bool initialized = false; if (!initialized) { struct uv *uv = io->impl; srand((unsigned)uv_now(uv->loop) + (unsigned)uv->id); initialized = true; } return min + (abs(rand()) % (max - min)); } int raft_uv_init(struct raft_io *io, struct uv_loop_s *loop, const char *dir, struct raft_uv_transport *transport) { struct uv *uv; void *data; int rv; assert(io != NULL); assert(loop != NULL); assert(dir != NULL); assert(transport != NULL); data = io->data; memset(io, 0, sizeof *io); io->data = data; /* Ensure that the given path doesn't exceed our static buffer limit. */ if (!UV__DIR_HAS_VALID_LEN(dir)) { ErrMsgPrintf(io->errmsg, "directory path too long"); return RAFT_NAMETOOLONG; } /* Allocate the raft_io_uv object */ uv = raft_malloc(sizeof *uv); if (uv == NULL) { rv = RAFT_NOMEM; goto err; } memset(uv, 0, sizeof(struct uv)); uv->io = io; uv->loop = loop; strncpy(uv->dir, dir, sizeof(uv->dir)-1); uv->dir[sizeof(uv->dir)-1] = '\0'; uv->transport = transport; uv->transport->data = NULL; uv->tracer = &StderrTracer; uv->id = 0; /* Set by raft_io->config() */ uv->state = UV__PRISTINE; uv->errored = false; uv->direct_io = false; uv->async_io = false; #ifdef LZ4_ENABLED uv->snapshot_compression = true; #else uv->snapshot_compression = false; #endif uv->segment_size = UV__MAX_SEGMENT_SIZE; uv->block_size = 0; QUEUE_INIT(&uv->clients); QUEUE_INIT(&uv->servers); uv->connect_retry_delay = CONNECT_RETRY_DELAY; uv->prepare_inflight = NULL; QUEUE_INIT(&uv->prepare_reqs); QUEUE_INIT(&uv->prepare_pool); uv->prepare_next_counter = 1; uv->append_next_index = 1; QUEUE_INIT(&uv->append_segments); QUEUE_INIT(&uv->append_pending_reqs); QUEUE_INIT(&uv->append_writing_reqs); uv->barrier = NULL; QUEUE_INIT(&uv->finalize_reqs); uv->finalize_work.data = NULL; uv->truncate_work.data = NULL; QUEUE_INIT(&uv->snapshot_get_reqs); uv->snapshot_put_work.data = NULL; uv->timer.data = NULL; uv->tick_cb = NULL; /* Set by raft_io->start() */ uv->recv_cb = NULL; /* Set by raft_io->start() */ QUEUE_INIT(&uv->aborting); uv->closing = false; uv->close_cb = NULL; /* Set the raft_io implementation. */ io->version = 1; /* future-proof'ing */ io->impl = uv; io->init = uvInit; io->close = uvClose; io->start = uvStart; io->load = uvLoad; io->bootstrap = uvBootstrap; io->recover = uvRecover; io->set_term = uvSetTerm; io->set_vote = uvSetVote; io->append = UvAppend; io->truncate = UvTruncate; io->send = UvSend; io->snapshot_put = UvSnapshotPut; io->snapshot_get = UvSnapshotGet; io->time = uvTime; io->random = uvRandom; return 0; err: assert(rv != 0); if (rv == RAFT_NOMEM) { ErrMsgOom(io->errmsg); } return rv; } void raft_uv_close(struct raft_io *io) { struct uv *uv; uv = io->impl; raft_free(uv); } void raft_uv_set_segment_size(struct raft_io *io, size_t size) { struct uv *uv; uv = io->impl; uv->segment_size = size; } void raft_uv_set_block_size(struct raft_io *io, size_t size) { struct uv *uv; uv = io->impl; uv->block_size = size; } int raft_uv_set_snapshot_compression(struct raft_io *io, bool compressed) { struct uv *uv; uv = io->impl; #ifndef LZ4_AVAILABLE if (compressed) { return RAFT_INVALID; } #endif uv->snapshot_compression = compressed; return 0; } void raft_uv_set_connect_retry_delay(struct raft_io *io, unsigned msecs) { struct uv *uv; uv = io->impl; uv->connect_retry_delay = msecs; } void raft_uv_set_tracer(struct raft_io *io, struct raft_tracer *tracer) { struct uv *uv; uv = io->impl; uv->tracer = tracer; } #undef tracef raft-0.11.3/src/uv.h000066400000000000000000000370641415614527300141440ustar00rootroot00000000000000/* Implementation of the @raft_io interface based on libuv. */ #ifndef UV_H_ #define UV_H_ #include "../include/raft.h" #include "err.h" #include "queue.h" #include "tracing.h" #include "uv_fs.h" #include "uv_os.h" /* 8 Megabytes */ #define UV__MAX_SEGMENT_SIZE (8 * 1024 * 1024) /* Template string for closed segment filenames: start index (inclusive), end * index (inclusive). */ #define UV__CLOSED_TEMPLATE "%016llu-%016llu" /* Template string for open segment filenames: incrementing counter. */ #define UV__OPEN_TEMPLATE "open-%llu" /* Enough to hold a segment filename (either open or closed) */ #define UV__SEGMENT_FILENAME_BUF_SIZE 34 /* Template string for snapshot filenames: snapshot term, snapshot index, * creation timestamp (milliseconds since epoch). */ #define UV__SNAPSHOT_TEMPLATE "snapshot-%llu-%llu-%llu" #define UV__SNAPSHOT_META_SUFFIX ".meta" /* Template string for snapshot metadata filenames: snapshot term, snapshot * index, creation timestamp (milliseconds since epoch). */ #define UV__SNAPSHOT_META_TEMPLATE UV__SNAPSHOT_TEMPLATE UV__SNAPSHOT_META_SUFFIX /* State codes. */ enum { UV__PRISTINE, /* Metadata cache populated and I/O capabilities probed */ UV__ACTIVE, UV__CLOSED }; /* Open segment counter type */ typedef unsigned long long uvCounter; /* Information persisted in a single metadata file. */ struct uvMetadata { unsigned long long version; /* Monotonically increasing version */ raft_term term; /* Current term */ raft_id voted_for; /* Server ID of last vote, or 0 */ }; /* Hold state of a libuv-based raft_io implementation. */ struct uv { struct raft_io *io; /* I/O object we're implementing */ struct uv_loop_s *loop; /* UV event loop */ char dir[UV__DIR_LEN]; /* Data directory */ struct raft_uv_transport *transport; /* Network transport */ struct raft_tracer *tracer; /* Debug tracing */ raft_id id; /* Server ID */ int state; /* Current state */ bool snapshot_compression; /* If compression is enabled */ bool errored; /* If a disk I/O error was hit */ bool direct_io; /* Whether direct I/O is supported */ bool async_io; /* Whether async I/O is supported */ size_t segment_size; /* Initial size of open segments. */ size_t block_size; /* Block size of the data dir */ queue clients; /* Outbound connections */ queue servers; /* Inbound connections */ unsigned connect_retry_delay; /* Client connection retry delay */ void *prepare_inflight; /* Segment being prepared */ queue prepare_reqs; /* Pending prepare requests. */ queue prepare_pool; /* Prepared open segments */ uvCounter prepare_next_counter; /* Counter of next open segment */ raft_index append_next_index; /* Index of next entry to append */ queue append_segments; /* Open segments in use. */ queue append_pending_reqs; /* Pending append requests. */ queue append_writing_reqs; /* Append requests in flight */ struct UvBarrier *barrier; /* Inflight barrier request */ queue finalize_reqs; /* Segments waiting to be closed */ struct uv_work_s finalize_work; /* Resize and rename segments */ struct uv_work_s truncate_work; /* Execute truncate log requests */ queue snapshot_get_reqs; /* Inflight get snapshot requests */ struct uv_work_s snapshot_put_work; /* Execute snapshot put requests */ struct uvMetadata metadata; /* Cache of metadata on disk */ struct uv_timer_s timer; /* Timer for periodic ticks */ raft_io_tick_cb tick_cb; /* Invoked when the timer expires */ raft_io_recv_cb recv_cb; /* Invoked when upon RPC messages */ queue aborting; /* Cleanups upon errors or shutdown */ bool closing; /* True if we are closing */ raft_io_close_cb close_cb; /* Invoked when finishing closing */ }; /* Implementation of raft_io->truncate. */ int UvTruncate(struct raft_io *io, raft_index index); /* Load Raft metadata from disk, choosing the most recent version (either the * metadata1 or metadata2 file). */ int uvMetadataLoad(const char *dir, struct uvMetadata *metadata, char *errmsg); /* Store the given metadata to disk, writing the appropriate metadata file * according to the metadata version (if the version is odd, write metadata1, * otherwise write metadata2). */ int uvMetadataStore(struct uv *uv, const struct uvMetadata *metadata); /* Metadata about a segment file. */ struct uvSegmentInfo { bool is_open; /* Whether the segment is open */ union { struct { raft_index first_index; /* First index in a closed segment */ raft_index end_index; /* Last index in a closed segment */ }; struct { unsigned long long counter; /* Open segment counter */ }; }; char filename[UV__SEGMENT_FILENAME_BUF_SIZE]; /* Segment filename */ }; /* Append a new item to the given segment info list if the given filename * matches either the one of a closed segment (xxx-yyy) or the one of an open * segment (open-xxx). */ int uvSegmentInfoAppendIfMatch(const char *filename, struct uvSegmentInfo *infos[], size_t *n_infos, bool *appended); /* Sort the given list of segments by comparing their filenames. Closed segments * come before open segments. */ void uvSegmentSort(struct uvSegmentInfo *infos, size_t n_infos); /* Keep only the closed segments whose entries are within the given trailing * amount past the given snapshot last index. If the given trailing amount is 0, * unconditionally delete all closed segments. */ int uvSegmentKeepTrailing(struct uv *uv, struct uvSegmentInfo *segments, size_t n, raft_index last_index, size_t trailing, char *errmsg); /* Load all entries contained in the given closed segment. */ int uvSegmentLoadClosed(struct uv *uv, struct uvSegmentInfo *segment, struct raft_entry *entries[], size_t *n); /* Load raft entries from the given segments. The @start_index is the expected * index of the first entry of the first segment. */ int uvSegmentLoadAll(struct uv *uv, const raft_index start_index, struct uvSegmentInfo *segments, size_t n_segments, struct raft_entry **entries, size_t *n_entries); /* Return the number of blocks in a segments. */ #define uvSegmentBlocks(UV) (UV->segment_size / UV->block_size) /* A dynamically allocated buffer holding data to be written into a segment * file. * * The memory is aligned at disk block boundary, to allow for direct I/O. */ struct uvSegmentBuffer { size_t block_size; /* Disk block size for direct I/O */ uv_buf_t arena; /* Previously allocated memory that can be re-used */ size_t n; /* Write offset */ }; /* Initialize an empty buffer. */ void uvSegmentBufferInit(struct uvSegmentBuffer *b, size_t block_size); /* Release all memory used by the buffer. */ void uvSegmentBufferClose(struct uvSegmentBuffer *b); /* Encode the format version at the very beginning of the buffer. This function * must be called when the buffer is empty. */ int uvSegmentBufferFormat(struct uvSegmentBuffer *b); /* Extend the segment's buffer by encoding the given entries. * * Previous data in the buffer will be retained, and data for these new entries * will be appended. */ int uvSegmentBufferAppend(struct uvSegmentBuffer *b, const struct raft_entry entries[], unsigned n_entries); /* After all entries to write have been encoded, finalize the buffer by zeroing * the unused memory of the last block. The out parameter will point to the * memory to write. */ void uvSegmentBufferFinalize(struct uvSegmentBuffer *b, uv_buf_t *out); /* Reset the buffer preparing it for the next segment write. * * If the retain parameter is greater than zero, then the data of the retain'th * block will be copied at the beginning of the buffer and the write offset will * be set accordingly. */ void uvSegmentBufferReset(struct uvSegmentBuffer *b, unsigned retain); /* Write a closed segment, containing just one entry at the given index * for the given configuration. */ int uvSegmentCreateClosedWithConfiguration( struct uv *uv, raft_index index, const struct raft_configuration *configuration); /* Write the first closed segment, containing just one entry for the given * configuration. */ int uvSegmentCreateFirstClosed(struct uv *uv, const struct raft_configuration *configuration); /* Truncate a segment that was already closed. */ int uvSegmentTruncate(struct uv *uv, struct uvSegmentInfo *segment, raft_index index); /* Info about a persisted snapshot stored in snapshot metadata file. */ struct uvSnapshotInfo { raft_term term; raft_index index; unsigned long long timestamp; char filename[UV__FILENAME_LEN]; }; /* Render the filename of the data file of a snapshot */ void uvSnapshotFilenameOf(struct uvSnapshotInfo *info, char *filename); /* Upon success `orphan` will be true if filename is a snapshot file without a * sibling .meta file */ int UvSnapshotIsOrphan(const char *dir, const char *filename, bool *orphan); /* Upon success `orphan` will be true if filename is a snapshot .meta file * without a sibling snapshot file */ int UvSnapshotMetaIsOrphan(const char *dir, const char *filename, bool *orphan); /* Append a new item to the given snapshot info list if the given filename * matches the pattern of a snapshot metadata file (snapshot-xxx-yyy-zzz.meta) * and there is actually a matching non-empty snapshot file on disk. */ int UvSnapshotInfoAppendIfMatch(struct uv *uv, const char *filename, struct uvSnapshotInfo *infos[], size_t *n_infos, bool *appended); /* Sort the given list of snapshots by comparing their filenames. Older * snapshots will come first. */ void UvSnapshotSort(struct uvSnapshotInfo *infos, size_t n_infos); /* Load the snapshot associated with the given metadata. */ int UvSnapshotLoad(struct uv *uv, struct uvSnapshotInfo *meta, struct raft_snapshot *snapshot, char *errmsg); /* Implementation raft_io->snapshot_put (defined in uv_snapshot.c). */ int UvSnapshotPut(struct raft_io *io, unsigned trailing, struct raft_io_snapshot_put *req, const struct raft_snapshot *snapshot, raft_io_snapshot_put_cb cb); /* Implementation of raft_io->snapshot_get (defined in uv_snapshot.c). */ int UvSnapshotGet(struct raft_io *io, struct raft_io_snapshot_get *req, raft_io_snapshot_get_cb cb); /* Return a list of all snapshots and segments found in the data directory. Both * snapshots and segments are ordered by filename (closed segments come before * open ones). */ int UvList(struct uv *uv, struct uvSnapshotInfo *snapshots[], size_t *n_snapshots, struct uvSegmentInfo *segments[], size_t *n_segments, char *errmsg); /* Request to obtain a newly prepared open segment. */ struct uvPrepare; typedef void (*uvPrepareCb)(struct uvPrepare *req, int status); struct uvPrepare { void *data; /* User data */ uv_file fd; /* Resulting segment file descriptor */ unsigned long long counter; /* Resulting segment counter */ uvPrepareCb cb; /* Completion callback */ queue queue; /* Links in uv_io->prepare_reqs */ }; /* Get a prepared open segment ready for writing. If a prepared open segment is * already available in the pool, it will be returned immediately using the fd * and counter pointers and the request callback won't be invoked. Otherwise the * request will be queued and its callback invoked once a newly prepared segment * is available. */ int UvPrepare(struct uv *uv, uv_file *fd, uvCounter *counter, struct uvPrepare *req, uvPrepareCb cb); /* Cancel all pending prepare requests and start removing all unused prepared * open segments. If a segment currently being created, wait for it to complete * and then remove it immediately. */ void UvPrepareClose(struct uv *uv); /* Implementation of raft_io->append.*/ int UvAppend(struct raft_io *io, struct raft_io_append *req, const struct raft_entry entries[], unsigned n, raft_io_append_cb cb); /* Pause request object and callback. */ struct UvBarrier; typedef void (*UvBarrierCb)(struct UvBarrier *req); struct UvBarrier { void *data; /* User data */ UvBarrierCb cb; /* Completion callback */ }; /* Submit a barrier request to interrupt the normal flow of append * operations. * * The following will happen: * * - Replace uv->append_next_index with the given next_index, so the next entry * that will be appended will have the new index. * * - Execution of new writes for subsequent append requests will be blocked * until UvUnblock is called. * * - Wait for all currently pending and inflight append requests against all * open segments to complete, and for those open segments to be finalized, * then invoke the barrier callback. * * This API is used to implement truncate and snapshot install operations, which * need to wait until all pending writes have settled and modify the log state, * changing the next index. */ int UvBarrier(struct uv *uv, raft_index next_index, struct UvBarrier *barrier, UvBarrierCb cb); /* Returns @true if there are no more segments referencing uv->barrier */ bool UvBarrierReady(struct uv *uv); /* Resume writing append requests after UvBarrier has been called. */ void UvUnblock(struct uv *uv); /* Cancel all pending write requests and request the current segment to be * finalized. Must be invoked at closing time. */ void uvAppendClose(struct uv *uv); /* Submit a request to finalize the open segment with the given counter. * * Requests are processed one at a time, to avoid ending up closing open segment * N + 1 before closing open segment N. */ int UvFinalize(struct uv *uv, unsigned long long counter, size_t used, raft_index first_index, raft_index last_index); /* Implementation of raft_io->send. */ int UvSend(struct raft_io *io, struct raft_io_send *req, const struct raft_message *message, raft_io_send_cb cb); /* Stop all clients by closing the outbound stream handles and canceling all * pending send requests. */ void UvSendClose(struct uv *uv); /* Start receiving messages from new incoming connections. */ int UvRecvStart(struct uv *uv); /* Stop all servers by closing the inbound stream handles and aborting all * requests being received. */ void UvRecvClose(struct uv *uv); void uvMaybeFireCloseCb(struct uv *uv); #endif /* UV_H_ */ raft-0.11.3/src/uv_append.c000066400000000000000000000631741415614527300154670ustar00rootroot00000000000000#include "assert.h" #include "byte.h" #include "heap.h" #include "queue.h" #include "uv.h" #include "uv_encoding.h" #include "uv_writer.h" /* The happy path for an append request is: * * - If there is a current segment and it is has enough spare capacity to hold * the entries in the request, then queue the request, linking it to the * current segment. * * - If there is no current segment, or it hasn't enough spare capacity to hold * the entries in the request, then request a new open segment to be prepared, * queue the request and link it to the newly requested segment. * * - Wait for any pending write against the current segment to complete, and * also for the prepare request if we asked for a new segment. Also wait for * any in progress barrier to be removed. * * - Submit a write request for the entries in this append request. The write * request might contain other append requests targeted to the current segment * that might have accumulated in the meantime, if we have been waiting for a * segment to be prepared, or for the previous write to complete or for a * barrier to be removed. * * - Wait for the write request to finish and fire the append request's * callback. * * Possible failure modes are: * * - The request to prepare a new segment fails. * - The write request fails. * - The request to finalize a new segment fails to be submitted. * * In all these cases we mark the instance as errored and fire the relevant * callbacks. **/ /* An open segment being written or waiting to be written. */ struct uvAliveSegment { struct uv *uv; /* Our writer */ struct uvPrepare prepare; /* Prepare segment file request */ struct UvWriter writer; /* Writer to perform async I/O */ struct UvWriterReq write; /* Write request */ unsigned long long counter; /* Open segment counter */ raft_index first_index; /* Index of the first entry written */ raft_index pending_last_index; /* Index of the last entry written */ size_t size; /* Total number of bytes used */ unsigned next_block; /* Next segment block to write */ struct uvSegmentBuffer pending; /* Buffer for data yet to be written */ uv_buf_t buf; /* Write buffer for current write */ raft_index last_index; /* Last entry actually written */ size_t written; /* Number of bytes actually written */ queue queue; /* Segment queue */ struct UvBarrier *barrier; /* Barrier waiting on this segment */ bool finalize; /* Finalize the segment after writing */ }; struct uvAppend { struct raft_io_append *req; /* User request */ const struct raft_entry *entries; /* Entries to write */ unsigned n; /* Number of entries */ struct uvAliveSegment *segment; /* Segment to write to */ queue queue; }; static void uvAliveSegmentWriterCloseCb(struct UvWriter *writer) { struct uvAliveSegment *segment = writer->data; struct uv *uv = segment->uv; uvSegmentBufferClose(&segment->pending); HeapFree(segment); uvMaybeFireCloseCb(uv); } /* Submit a request to close the current open segment. */ static void uvAliveSegmentFinalize(struct uvAliveSegment *s) { struct uv *uv = s->uv; int rv; rv = UvFinalize(uv, s->counter, s->written, s->first_index, s->last_index); if (rv != 0) { uv->errored = true; /* We failed to submit the finalize request, but let's still close the * file handle and release the segment memory. */ } QUEUE_REMOVE(&s->queue); UvWriterClose(&s->writer, uvAliveSegmentWriterCloseCb); } /* Flush the append requests in the given queue, firing their callbacks with the * given status. */ static void uvAppendFinishRequestsInQueue(struct uv *uv, queue *q, int status) { queue queue_copy; struct uvAppend *append; QUEUE_INIT(&queue_copy); while (!QUEUE_IS_EMPTY(q)) { queue *head; head = QUEUE_HEAD(q); append = QUEUE_DATA(head, struct uvAppend, queue); /* Rollback the append next index if the result was unsuccessful. */ if (status != 0) { uv->append_next_index -= append->n; } QUEUE_REMOVE(head); QUEUE_PUSH(&queue_copy, head); } while (!QUEUE_IS_EMPTY(&queue_copy)) { queue *head; struct raft_io_append *req; head = QUEUE_HEAD(&queue_copy); append = QUEUE_DATA(head, struct uvAppend, queue); QUEUE_REMOVE(head); req = append->req; HeapFree(append); req->cb(req, status); } } /* Flush the append requests in the writing queue, firing their callbacks with * the given status. */ static void uvAppendFinishWritingRequests(struct uv *uv, int status) { uvAppendFinishRequestsInQueue(uv, &uv->append_writing_reqs, status); } /* Flush the append requests in the pending queue, firing their callbacks with * the given status. */ static void uvAppendFinishPendingRequests(struct uv *uv, int status) { uvAppendFinishRequestsInQueue(uv, &uv->append_pending_reqs, status); } /* Return the segment currently being written, or NULL when no segment has been * written yet. */ static struct uvAliveSegment *uvGetCurrentAliveSegment(struct uv *uv) { queue *head; if (QUEUE_IS_EMPTY(&uv->append_segments)) { return NULL; } head = QUEUE_HEAD(&uv->append_segments); return QUEUE_DATA(head, struct uvAliveSegment, queue); } /* Extend the segment's write buffer by encoding the entries in the given * request into it. IOW, previous data in the write buffer will be retained, and * data for these new entries will be appended. */ static int uvAliveSegmentEncodeEntriesToWriteBuf(struct uvAliveSegment *segment, struct uvAppend *append) { int rv; assert(append->segment == segment); /* If this is the very first write to the segment, we need to include the * format version */ if (segment->pending.n == 0 && segment->next_block == 0) { rv = uvSegmentBufferFormat(&segment->pending); if (rv != 0) { return rv; } } rv = uvSegmentBufferAppend(&segment->pending, append->entries, append->n); if (rv != 0) { return rv; } segment->pending_last_index += append->n; return 0; } static int uvAppendMaybeStart(struct uv *uv); static void uvAliveSegmentWriteCb(struct UvWriterReq *write, const int status) { struct uvAliveSegment *s = write->data; struct uv *uv = s->uv; unsigned n_blocks; int rv; assert(uv->state != UV__CLOSED); assert(s->buf.len % uv->block_size == 0); assert(s->buf.len >= uv->block_size); /* Check if the write was successful. */ if (status != 0) { Tracef(uv->tracer, "write: %s", uv->io->errmsg); uv->errored = true; goto out; } s->written = s->next_block * uv->block_size + s->pending.n; s->last_index = s->pending_last_index; /* Update our write markers. * * We have four cases: * * - The data fit completely in the leftover space of the first block that * we wrote and there is more space left. In this case we just keep the * scheduled marker unchanged. * * - The data fit completely in the leftover space of the first block that * we wrote and there is no space left. In this case we advance the * current block counter, reset the first write block and set the * scheduled marker to 0. * * - The data did not fit completely in the leftover space of the first * block that we wrote, so we wrote more than one block. The last block * that we wrote was not filled completely and has leftover space. In this * case we advance the current block counter and copy the memory used for * the last block to the head of the write arena list, updating the * scheduled marker accordingly. * * - The data did not fit completely in the leftover space of the first * block that we wrote, so we wrote more than one block. The last block * that we wrote was filled exactly and has no leftover space. In this * case we advance the current block counter, reset the first buffer and * set the scheduled marker to 0. */ n_blocks = (unsigned)(s->buf.len / uv->block_size); /* Number of blocks written. */ if (s->pending.n < uv->block_size) { /* Nothing to do */ assert(n_blocks == 1); } else if (s->pending.n == uv->block_size) { assert(n_blocks == 1); s->next_block++; uvSegmentBufferReset(&s->pending, 0); } else { assert(s->pending.n > uv->block_size); assert(s->buf.len > uv->block_size); if (s->pending.n % uv->block_size > 0) { s->next_block += n_blocks - 1; uvSegmentBufferReset(&s->pending, n_blocks - 1); } else { s->next_block += n_blocks; uvSegmentBufferReset(&s->pending, 0); } } out: /* Fire the callbacks of all requests that were fulfilled with this * write. */ uvAppendFinishWritingRequests(uv, status); /* During the closing sequence we should have already canceled all pending * request. */ if (uv->closing) { assert(QUEUE_IS_EMPTY(&uv->append_pending_reqs)); assert(s->finalize); uvAliveSegmentFinalize(s); return; } /* Possibly process waiting requests. */ if (!QUEUE_IS_EMPTY(&uv->append_pending_reqs)) { rv = uvAppendMaybeStart(uv); if (rv != 0) { uv->errored = true; } } else if (s->finalize) { /* If there are no more append_pending_reqs, this segment * must be finalized here in case we don't receive AppendEntries * RPCs anymore (could happen during a Snapshot install, causing * the BarrierCb to never fire) */ uvAliveSegmentFinalize(s); } } /* Submit a file write request to append the entries encoded in the write buffer * of the given segment. */ static int uvAliveSegmentWrite(struct uvAliveSegment *s) { int rv; assert(s->counter != 0); assert(s->pending.n > 0); uvSegmentBufferFinalize(&s->pending, &s->buf); rv = UvWriterSubmit(&s->writer, &s->write, &s->buf, 1, s->next_block * s->uv->block_size, uvAliveSegmentWriteCb); if (rv != 0) { return rv; } return 0; } /* Start writing all pending append requests for the current segment, unless we * are already writing, or the segment itself has not yet been prepared or we * are blocked on a barrier. If there are no more requests targeted at the * current segment, make sure it's marked to be finalize and try with the next * segment. */ static int uvAppendMaybeStart(struct uv *uv) { struct uvAliveSegment *segment; struct uvAppend *append; unsigned n_reqs; queue *head; queue q; int rv; assert(!uv->closing); assert(!QUEUE_IS_EMPTY(&uv->append_pending_reqs)); /* If we are already writing, let's wait. */ if (!QUEUE_IS_EMPTY(&uv->append_writing_reqs)) { return 0; } start: segment = uvGetCurrentAliveSegment(uv); assert(segment != NULL); /* If the preparer isn't done yet, let's wait. */ if (segment->counter == 0) { return 0; } /* If there's a barrier in progress, and it's not waiting for this segment * to be finalized, let's wait. */ if (uv->barrier != NULL && segment->barrier != uv->barrier) { return 0; } /* If there's no barrier in progress and this segment is marked with a * barrier, it means that this was a pending barrier, which we can become * the current barrier now. */ if (uv->barrier == NULL && segment->barrier != NULL) { uv->barrier = segment->barrier; } /* Let's add to the segment's write buffer all pending requests targeted to * this segment. */ QUEUE_INIT(&q); n_reqs = 0; while (!QUEUE_IS_EMPTY(&uv->append_pending_reqs)) { head = QUEUE_HEAD(&uv->append_pending_reqs); append = QUEUE_DATA(head, struct uvAppend, queue); assert(append->segment != NULL); if (append->segment != segment) { break; /* Not targeted to this segment */ } QUEUE_REMOVE(head); QUEUE_PUSH(&q, head); n_reqs++; rv = uvAliveSegmentEncodeEntriesToWriteBuf(segment, append); if (rv != 0) { goto err; } } /* If we have no more requests for this segment, let's check if it has been * marked for closing, and in that case finalize it and possibly trigger a * write against the next segment (unless there is a truncate request, in * that case we need to wait for it). Otherwise it must mean we have * exhausted the queue of pending append requests. */ if (n_reqs == 0) { assert(QUEUE_IS_EMPTY(&uv->append_writing_reqs)); if (segment->finalize) { uvAliveSegmentFinalize(segment); if (!QUEUE_IS_EMPTY(&uv->append_pending_reqs)) { goto start; } } assert(QUEUE_IS_EMPTY(&uv->append_pending_reqs)); return 0; } while (!QUEUE_IS_EMPTY(&q)) { head = QUEUE_HEAD(&q); QUEUE_REMOVE(head); QUEUE_PUSH(&uv->append_writing_reqs, head); } rv = uvAliveSegmentWrite(segment); if (rv != 0) { goto err; } return 0; err: assert(rv != 0); return rv; } /* Invoked when a newly added open segment becomes ready for writing, after the * associated UvPrepare request completes (either synchronously or * asynchronously). */ static int uvAliveSegmentReady(struct uv *uv, uv_file fd, uvCounter counter, struct uvAliveSegment *segment) { int rv; rv = UvWriterInit(&segment->writer, uv->loop, fd, uv->direct_io, uv->async_io, 1, uv->io->errmsg); if (rv != 0) { ErrMsgWrapf(uv->io->errmsg, "setup writer for open-%llu", counter); return rv; } segment->counter = counter; return 0; } static void uvAliveSegmentPrepareCb(struct uvPrepare *req, int status) { struct uvAliveSegment *segment = req->data; struct uv *uv = segment->uv; int rv; assert(segment->counter == 0); assert(segment->written == 0); /* If we have been closed, let's discard the segment. */ if (uv->closing) { QUEUE_REMOVE(&segment->queue); assert(status == RAFT_CANCELED); /* UvPrepare cancels pending reqs */ uvSegmentBufferClose(&segment->pending); HeapFree(segment); return; } if (status != 0) { rv = status; goto err; } assert(req->counter > 0); assert(req->fd >= 0); /* There must be pending appends that were waiting for this prepare * requests. */ assert(!QUEUE_IS_EMPTY(&uv->append_pending_reqs)); rv = uvAliveSegmentReady(uv, req->fd, req->counter, segment); if (rv != 0) { goto err; } rv = uvAppendMaybeStart(uv); if (rv != 0) { goto err; } return; err: QUEUE_REMOVE(&segment->queue); HeapFree(segment); uv->errored = true; uvAppendFinishPendingRequests(uv, rv); } /* Initialize a new open segment object. */ static void uvAliveSegmentInit(struct uvAliveSegment *s, struct uv *uv) { s->uv = uv; s->prepare.data = s; s->writer.data = s; s->write.data = s; s->counter = 0; s->first_index = uv->append_next_index; s->pending_last_index = s->first_index - 1; s->last_index = 0; s->size = sizeof(uint64_t) /* Format version */; s->next_block = 0; uvSegmentBufferInit(&s->pending, uv->block_size); s->written = 0; s->barrier = NULL; s->finalize = false; } /* Add a new active open segment, since the append request being submitted does * not fit in the last segment we scheduled writes for, or no segment had been * previously requested at all. */ static int uvAppendPushAliveSegment(struct uv *uv) { struct uvAliveSegment *segment; uv_file fd; uvCounter counter; int rv; segment = HeapMalloc(sizeof *segment); if (segment == NULL) { rv = RAFT_NOMEM; goto err; } uvAliveSegmentInit(segment, uv); QUEUE_PUSH(&uv->append_segments, &segment->queue); rv = UvPrepare(uv, &fd, &counter, &segment->prepare, uvAliveSegmentPrepareCb); if (rv != 0) { goto err_after_alloc; } /* If we've been returned a ready prepared segment right away, start writing * to it immediately. */ if (fd != -1) { rv = uvAliveSegmentReady(uv, fd, counter, segment); if (rv != 0) { goto err_after_prepare; } } return 0; err_after_prepare: UvOsClose(fd); UvFinalize(uv, counter, 0, 0, 0); err_after_alloc: QUEUE_REMOVE(&segment->queue); HeapFree(segment); err: assert(rv != 0); return rv; } /* Return the last segment that we have requested to prepare. */ static struct uvAliveSegment *uvGetLastAliveSegment(struct uv *uv) { queue *tail; if (QUEUE_IS_EMPTY(&uv->append_segments)) { return NULL; } tail = QUEUE_TAIL(&uv->append_segments); return QUEUE_DATA(tail, struct uvAliveSegment, queue); } /* Return #true if the remaining capacity of the given segment is equal or * greater than @size. */ static bool uvAliveSegmentHasEnoughSpareCapacity(struct uvAliveSegment *s, size_t size) { return s->size + size <= s->uv->segment_size; } /* Add @size bytes to the number of bytes that the segment will hold. The actual * write will happen when the previous write completes, if any. */ static void uvAliveSegmentReserveSegmentCapacity(struct uvAliveSegment *s, size_t size) { s->size += size; } /* Return the number of bytes needed to store the batch of entries of this * append request on disk. */ static size_t uvAppendSize(struct uvAppend *a) { size_t size = sizeof(uint32_t) * 2; /* CRC checksums */ unsigned i; size += uvSizeofBatchHeader(a->n); /* Batch header */ for (i = 0; i < a->n; i++) { /* Entries data */ size += bytePad64(a->entries[i].buf.len); } return size; } /* Enqueue an append entries request, assigning it to the appropriate active * open segment. */ static int uvAppendEnqueueRequest(struct uv *uv, struct uvAppend *append) { struct uvAliveSegment *segment; size_t size; bool fits; int rv; assert(append->entries != NULL); assert(append->n > 0); assert(uv->append_next_index > 0); size = uvAppendSize(append); /* If we have no segments yet, it means this is the very first append, and * we need to add a new segment. Otherwise we check if the last segment has * enough room for this batch of entries. */ segment = uvGetLastAliveSegment(uv); if (segment == NULL || segment->finalize) { fits = false; } else { fits = uvAliveSegmentHasEnoughSpareCapacity(segment, size); if (!fits) { segment->finalize = true; /* Finalize when all writes are done */ } } /* If there's no segment or if this batch does not fit in this segment, we * need to add a new one. */ if (!fits) { rv = uvAppendPushAliveSegment(uv); if (rv != 0) { goto err; } } segment = uvGetLastAliveSegment(uv); /* Get the last added segment */ uvAliveSegmentReserveSegmentCapacity(segment, size); append->segment = segment; QUEUE_PUSH(&uv->append_pending_reqs, &append->queue); uv->append_next_index += append->n; return 0; err: assert(rv != 0); return rv; } int UvAppend(struct raft_io *io, struct raft_io_append *req, const struct raft_entry entries[], unsigned n, raft_io_append_cb cb) { struct uv *uv; struct uvAppend *append; int rv; uv = io->impl; assert(!uv->closing); append = HeapMalloc(sizeof *append); if (append == NULL) { rv = RAFT_NOMEM; goto err; } append->req = req; append->entries = entries; append->n = n; req->cb = cb; rv = uvAppendEnqueueRequest(uv, append); if (rv != 0) { goto err_after_req_alloc; } assert(append->segment != NULL); assert(!QUEUE_IS_EMPTY(&uv->append_pending_reqs)); /* Try to write immediately. */ rv = uvAppendMaybeStart(uv); if (rv != 0) { return rv; } return 0; err_after_req_alloc: HeapFree(append); err: assert(rv != 0); return rv; } /* Finalize the current segment as soon as all its pending or inflight append * requests get completed. */ static void uvFinalizeCurrentAliveSegmentOnceIdle(struct uv *uv) { struct uvAliveSegment *s; queue *head; bool has_pending_reqs; bool has_writing_reqs; s = uvGetCurrentAliveSegment(uv); if (s == NULL) { return; } /* Check if there are pending append requests targeted to the current * segment. */ has_pending_reqs = false; QUEUE_FOREACH(head, &uv->append_pending_reqs) { struct uvAppend *r = QUEUE_DATA(head, struct uvAppend, queue); if (r->segment == s) { has_pending_reqs = true; break; } } has_writing_reqs = !QUEUE_IS_EMPTY(&uv->append_writing_reqs); /* If there is no pending append request or inflight write against the * current segment, we can submit a request for it to be closed * immediately. Otherwise, we set the finalize flag. * * TODO: is it actually possible to have pending requests with no writing * requests? Probably no. */ if (!has_pending_reqs && !has_writing_reqs) { uvAliveSegmentFinalize(s); } else { s->finalize = true; } } bool UvBarrierReady(struct uv *uv) { if (uv->barrier == NULL) { return true; } queue *head; QUEUE_FOREACH(head, &uv->append_segments) { struct uvAliveSegment *segment; segment = QUEUE_DATA(head, struct uvAliveSegment, queue); if (segment->barrier == uv->barrier) { return false; } } return true; } int UvBarrier(struct uv *uv, raft_index next_index, struct UvBarrier *barrier, UvBarrierCb cb) { queue *head; assert(!uv->closing); /* The next entry will be appended at this index. */ uv->append_next_index = next_index; /* Arrange for all open segments not already involved in other barriers to * be finalized as soon as their append requests get completed and mark them * as involved in this specific barrier request. */ QUEUE_FOREACH(head, &uv->append_segments) { struct uvAliveSegment *segment; segment = QUEUE_DATA(head, struct uvAliveSegment, queue); if (segment->barrier != NULL) { continue; } segment->barrier = barrier; if (segment == uvGetCurrentAliveSegment(uv)) { uvFinalizeCurrentAliveSegmentOnceIdle(uv); continue; } segment->finalize = true; } barrier->cb = cb; if (uv->barrier == NULL) { uv->barrier = barrier; /* If there's no pending append-related activity, we can fire the * callback immediately. * * TODO: find a way to avoid invoking this synchronously. */ if (QUEUE_IS_EMPTY(&uv->append_segments) && QUEUE_IS_EMPTY(&uv->finalize_reqs) && uv->finalize_work.data == NULL) { barrier->cb(barrier); } } return 0; } void UvUnblock(struct uv *uv) { assert(uv->barrier != NULL); uv->barrier = NULL; if (uv->closing) { uvMaybeFireCloseCb(uv); return; } if (!QUEUE_IS_EMPTY(&uv->append_pending_reqs)) { int rv; rv = uvAppendMaybeStart(uv); if (rv != 0) { uv->errored = true; } } } /* Fire all pending barrier requests, the barrier callback will notice that * we're closing and abort there. */ static void uvBarrierClose(struct uv *uv) { struct UvBarrier *barrier = NULL; queue *head; assert(uv->closing); QUEUE_FOREACH(head, &uv->append_segments) { struct uvAliveSegment *segment; segment = QUEUE_DATA(head, struct uvAliveSegment, queue); if (segment->barrier != NULL && segment->barrier != barrier) { barrier = segment->barrier; barrier->cb(barrier); if (segment->barrier == uv->barrier) { uv->barrier = NULL; } } segment->barrier = NULL; } /* There might still still be a current barrier set on uv->barrier, meaning * that the open segment it was associated with has started to be finalized * and is not anymore in the append_segments queue. Let's cancel that * too. */ if (uv->barrier != NULL) { uv->barrier->cb(uv->barrier); uv->barrier = NULL; } } void uvAppendClose(struct uv *uv) { struct uvAliveSegment *segment; assert(uv->closing); uvBarrierClose(uv); UvPrepareClose(uv); uvAppendFinishPendingRequests(uv, RAFT_CANCELED); uvFinalizeCurrentAliveSegmentOnceIdle(uv); /* Also finalize the segments that we didn't write at all and are just * sitting in the append_segments queue waiting for writes against the * current segment to complete. */ while (!QUEUE_IS_EMPTY(&uv->append_segments)) { segment = uvGetLastAliveSegment(uv); assert(segment != NULL); if (segment == uvGetCurrentAliveSegment(uv)) { break; /* We reached the head of the queue */ } assert(segment->written == 0); uvAliveSegmentFinalize(segment); } } raft-0.11.3/src/uv_encoding.c000066400000000000000000000376351415614527300160110ustar00rootroot00000000000000#include "uv_encoding.h" #include #include "../include/raft/uv.h" #include "assert.h" #include "byte.h" #include "configuration.h" /** * Size of the request preamble. */ #define RAFT_IO_UV__PREAMBLE_SIZE \ (sizeof(uint64_t) /* Message type. */ + \ sizeof(uint64_t) /* Message size. */) static size_t sizeofRequestVoteV1(void) { return sizeof(uint64_t) + /* Term. */ sizeof(uint64_t) + /* Candidate ID. */ sizeof(uint64_t) + /* Last log index. */ sizeof(uint64_t) /* Last log term. */; } static size_t sizeofRequestVote(void) { return sizeofRequestVoteV1() + sizeof(uint64_t) /* Leadership transfer. */; } static size_t sizeofRequestVoteResultV1(void) { return sizeof(uint64_t) + /* Term. */ sizeof(uint64_t) /* Vote granted. */; } static size_t sizeofRequestVoteResult(void) { return sizeofRequestVoteResultV1() + sizeof(uint64_t) /* Flags. */; } static size_t sizeofAppendEntries(const struct raft_append_entries *p) { return sizeof(uint64_t) + /* Leader's term. */ sizeof(uint64_t) + /* Leader ID */ sizeof(uint64_t) + /* Previous log entry index */ sizeof(uint64_t) + /* Previous log entry term */ sizeof(uint64_t) + /* Leader's commit index */ sizeof(uint64_t) + /* Number of entries in the batch */ 16 * p->n_entries /* One header per entry */; } static size_t sizeofAppendEntriesResult(void) { return sizeof(uint64_t) + /* Term. */ sizeof(uint64_t) + /* Success. */ sizeof(uint64_t) /* Last log index. */; } static size_t sizeofInstallSnapshot(const struct raft_install_snapshot *p) { size_t conf_size = configurationEncodedSize(&p->conf); return sizeof(uint64_t) + /* Leader's term. */ sizeof(uint64_t) + /* Leader ID */ sizeof(uint64_t) + /* Snapshot's last index */ sizeof(uint64_t) + /* Term of last index */ sizeof(uint64_t) + /* Configuration's index */ sizeof(uint64_t) + /* Length of configuration */ conf_size + /* Configuration data */ sizeof(uint64_t); /* Length of snapshot data */ } static size_t sizeofTimeoutNow(void) { return sizeof(uint64_t) + /* Term. */ sizeof(uint64_t) + /* Last log index. */ sizeof(uint64_t) /* Last log term. */; } size_t uvSizeofBatchHeader(size_t n) { return 8 + /* Number of entries in the batch, little endian */ 16 * n /* One header per entry */; } static void encodeRequestVote(const struct raft_request_vote *p, void *buf) { void *cursor = buf; uint64_t flags = 0; if (p->disrupt_leader) { flags |= 1 << 0; } if (p->pre_vote) { flags |= 1 << 1; } bytePut64(&cursor, p->term); bytePut64(&cursor, p->candidate_id); bytePut64(&cursor, p->last_log_index); bytePut64(&cursor, p->last_log_term); bytePut64(&cursor, flags); } static void encodeRequestVoteResult(const struct raft_request_vote_result *p, void *buf) { void *cursor = buf; uint64_t flags = 0; if (p->pre_vote == raft_tribool_true) { flags |= (1 << 0); } bytePut64(&cursor, p->term); bytePut64(&cursor, p->vote_granted); bytePut64(&cursor, flags); } static void encodeAppendEntries(const struct raft_append_entries *p, void *buf) { void *cursor; cursor = buf; bytePut64(&cursor, p->term); /* Leader's term. */ bytePut64(&cursor, p->prev_log_index); /* Previous index. */ bytePut64(&cursor, p->prev_log_term); /* Previous term. */ bytePut64(&cursor, p->leader_commit); /* Commit index. */ uvEncodeBatchHeader(p->entries, p->n_entries, cursor); } static void encodeAppendEntriesResult( const struct raft_append_entries_result *p, void *buf) { void *cursor = buf; bytePut64(&cursor, p->term); bytePut64(&cursor, p->rejected); bytePut64(&cursor, p->last_log_index); } static void encodeInstallSnapshot(const struct raft_install_snapshot *p, void *buf) { void *cursor; size_t conf_size = configurationEncodedSize(&p->conf); cursor = buf; bytePut64(&cursor, p->term); /* Leader's term. */ bytePut64(&cursor, p->last_index); /* Snapshot last index. */ bytePut64(&cursor, p->last_term); /* Term of last index. */ bytePut64(&cursor, p->conf_index); /* Configuration index. */ bytePut64(&cursor, conf_size); /* Configuration length. */ configurationEncodeToBuf(&p->conf, cursor); cursor = (uint8_t *)cursor + conf_size; bytePut64(&cursor, p->data.len); /* Snapshot data size. */ } static void encodeTimeoutNow(const struct raft_timeout_now *p, void *buf) { void *cursor = buf; bytePut64(&cursor, p->term); bytePut64(&cursor, p->last_log_index); bytePut64(&cursor, p->last_log_term); } int uvEncodeMessage(const struct raft_message *message, uv_buf_t **bufs, unsigned *n_bufs) { uv_buf_t header; void *cursor; /* Figure out the length of the header for this request and allocate a * buffer for it. */ header.len = RAFT_IO_UV__PREAMBLE_SIZE; switch (message->type) { case RAFT_IO_REQUEST_VOTE: header.len += sizeofRequestVote(); break; case RAFT_IO_REQUEST_VOTE_RESULT: header.len += sizeofRequestVoteResult(); break; case RAFT_IO_APPEND_ENTRIES: header.len += sizeofAppendEntries(&message->append_entries); break; case RAFT_IO_APPEND_ENTRIES_RESULT: header.len += sizeofAppendEntriesResult(); break; case RAFT_IO_INSTALL_SNAPSHOT: header.len += sizeofInstallSnapshot(&message->install_snapshot); break; case RAFT_IO_TIMEOUT_NOW: header.len += sizeofTimeoutNow(); break; default: return RAFT_MALFORMED; }; header.base = raft_malloc(header.len); if (header.base == NULL) { goto oom; } cursor = header.base; /* Encode the request preamble, with message type and message size. */ bytePut64(&cursor, message->type); bytePut64(&cursor, header.len - RAFT_IO_UV__PREAMBLE_SIZE); /* Encode the request header. */ switch (message->type) { case RAFT_IO_REQUEST_VOTE: encodeRequestVote(&message->request_vote, cursor); break; case RAFT_IO_REQUEST_VOTE_RESULT: encodeRequestVoteResult(&message->request_vote_result, cursor); break; case RAFT_IO_APPEND_ENTRIES: encodeAppendEntries(&message->append_entries, cursor); break; case RAFT_IO_APPEND_ENTRIES_RESULT: encodeAppendEntriesResult(&message->append_entries_result, cursor); break; case RAFT_IO_INSTALL_SNAPSHOT: encodeInstallSnapshot(&message->install_snapshot, cursor); break; case RAFT_IO_TIMEOUT_NOW: encodeTimeoutNow(&message->timeout_now, cursor); break; }; *n_bufs = 1; /* For AppendEntries request we also send the entries payload. */ if (message->type == RAFT_IO_APPEND_ENTRIES) { *n_bufs += message->append_entries.n_entries; } /* For InstallSnapshot request we also send the snapshot payload. */ if (message->type == RAFT_IO_INSTALL_SNAPSHOT) { *n_bufs += 1; } *bufs = raft_calloc(*n_bufs, sizeof **bufs); if (*bufs == NULL) { goto oom_after_header_alloc; } (*bufs)[0] = header; if (message->type == RAFT_IO_APPEND_ENTRIES) { unsigned i; for (i = 0; i < message->append_entries.n_entries; i++) { const struct raft_entry *entry = &message->append_entries.entries[i]; (*bufs)[i + 1].base = entry->buf.base; (*bufs)[i + 1].len = entry->buf.len; } } if (message->type == RAFT_IO_INSTALL_SNAPSHOT) { (*bufs)[1].base = message->install_snapshot.data.base; (*bufs)[1].len = message->install_snapshot.data.len; } return 0; oom_after_header_alloc: raft_free(header.base); oom: return RAFT_NOMEM; } void uvEncodeBatchHeader(const struct raft_entry *entries, unsigned n, void *buf) { unsigned i; void *cursor = buf; /* Number of entries in the batch, little endian */ bytePut64(&cursor, n); for (i = 0; i < n; i++) { const struct raft_entry *entry = &entries[i]; /* Term in which the entry was created, little endian. */ bytePut64(&cursor, entry->term); /* Message type (Either RAFT_COMMAND or RAFT_CHANGE) */ bytePut8(&cursor, (uint8_t)entry->type); cursor = (uint8_t *)cursor + 3; /* Unused */ /* Size of the log entry data, little endian. */ bytePut32(&cursor, (uint32_t)entry->buf.len); } } static void decodeRequestVote(const uv_buf_t *buf, struct raft_request_vote *p) { const void *cursor; cursor = buf->base; p->term = byteGet64(&cursor); p->candidate_id = byteGet64(&cursor); p->last_log_index = byteGet64(&cursor); p->last_log_term = byteGet64(&cursor); /* Support for legacy request vote that doesn't have disrupt_leader. */ if (buf->len == sizeofRequestVoteV1()) { p->disrupt_leader = false; p->pre_vote = false; } else { uint64_t flags = byteGet64(&cursor); p->disrupt_leader = (bool)(flags & 1 << 0); p->pre_vote = (bool)(flags & 1 << 1); } } static void decodeRequestVoteResult(const uv_buf_t *buf, struct raft_request_vote_result *p) { const void *cursor; cursor = buf->base; p->term = byteGet64(&cursor); p->vote_granted = byteGet64(&cursor); /* Support legacy RequestVoteResultV1 */ p->pre_vote = raft_tribool_unknown; if (buf->len > sizeofRequestVoteResultV1()) { uint64_t flags = byteGet64(&cursor); p->pre_vote = TO_RAFT_TRIBOOL(flags & (1 << 0)); } } int uvDecodeBatchHeader(const void *batch, struct raft_entry **entries, unsigned *n) { const void *cursor = batch; size_t i; int rv; *n = (unsigned)byteGet64(&cursor); if (*n == 0) { *entries = NULL; return 0; } *entries = raft_malloc(*n * sizeof **entries); if (*entries == NULL) { rv = RAFT_NOMEM; goto err; } for (i = 0; i < *n; i++) { struct raft_entry *entry = &(*entries)[i]; entry->term = byteGet64(&cursor); entry->type = byteGet8(&cursor); if (entry->type != RAFT_COMMAND && entry->type != RAFT_BARRIER && entry->type != RAFT_CHANGE) { rv = RAFT_MALFORMED; goto err_after_alloc; } cursor = (uint8_t *)cursor + 3; /* Unused */ /* Size of the log entry data, little endian. */ entry->buf.len = byteGet32(&cursor); } return 0; err_after_alloc: raft_free(*entries); *entries = NULL; err: assert(rv != 0); return rv; } static int decodeAppendEntries(const uv_buf_t *buf, struct raft_append_entries *args) { const void *cursor; int rv; assert(buf != NULL); assert(args != NULL); cursor = buf->base; args->term = byteGet64(&cursor); args->prev_log_index = byteGet64(&cursor); args->prev_log_term = byteGet64(&cursor); args->leader_commit = byteGet64(&cursor); rv = uvDecodeBatchHeader(cursor, &args->entries, &args->n_entries); if (rv != 0) { return rv; } return 0; } static void decodeAppendEntriesResult(const uv_buf_t *buf, struct raft_append_entries_result *p) { const void *cursor; cursor = buf->base; p->term = byteGet64(&cursor); p->rejected = byteGet64(&cursor); p->last_log_index = byteGet64(&cursor); } static int decodeInstallSnapshot(const uv_buf_t *buf, struct raft_install_snapshot *args) { const void *cursor; struct raft_buffer conf; int rv; assert(buf != NULL); assert(args != NULL); cursor = buf->base; args->term = byteGet64(&cursor); args->last_index = byteGet64(&cursor); args->last_term = byteGet64(&cursor); args->conf_index = byteGet64(&cursor); conf.len = (size_t)byteGet64(&cursor); conf.base = (void *)cursor; configurationInit(&args->conf); rv = configurationDecode(&conf, &args->conf); if (rv != 0) { return rv; } cursor = (uint8_t *)cursor + conf.len; args->data.len = (size_t)byteGet64(&cursor); return 0; } static void decodeTimeoutNow(const uv_buf_t *buf, struct raft_timeout_now *p) { const void *cursor; cursor = buf->base; p->term = byteGet64(&cursor); p->last_log_index = byteGet64(&cursor); p->last_log_term = byteGet64(&cursor); } int uvDecodeMessage(const unsigned long type, const uv_buf_t *header, struct raft_message *message, size_t *payload_len) { unsigned i; int rv = 0; /* TODO: check type overflow */ message->type = (unsigned short)type; *payload_len = 0; /* Decode the header. */ switch (type) { case RAFT_IO_REQUEST_VOTE: decodeRequestVote(header, &message->request_vote); break; case RAFT_IO_REQUEST_VOTE_RESULT: decodeRequestVoteResult(header, &message->request_vote_result); break; case RAFT_IO_APPEND_ENTRIES: rv = decodeAppendEntries(header, &message->append_entries); for (i = 0; i < message->append_entries.n_entries; i++) { *payload_len += message->append_entries.entries[i].buf.len; } break; case RAFT_IO_APPEND_ENTRIES_RESULT: decodeAppendEntriesResult(header, &message->append_entries_result); break; case RAFT_IO_INSTALL_SNAPSHOT: rv = decodeInstallSnapshot(header, &message->install_snapshot); *payload_len += message->install_snapshot.data.len; break; case RAFT_IO_TIMEOUT_NOW: decodeTimeoutNow(header, &message->timeout_now); break; default: rv = RAFT_IOERR; break; }; return rv; } void uvDecodeEntriesBatch(uint8_t *batch, size_t offset, struct raft_entry *entries, unsigned n) { uint8_t *cursor; size_t i; assert(batch != NULL); cursor = batch + offset; for (i = 0; i < n; i++) { struct raft_entry *entry = &entries[i]; entry->batch = batch; if (entry->buf.len == 0) { entry->buf.base = NULL; continue; } entry->buf.base = cursor; cursor = cursor + entry->buf.len; if (entry->buf.len % 8 != 0) { /* Add padding */ cursor = cursor + 8 - (entry->buf.len % 8); } } } int uvEncodeSnapshotMeta(const struct raft_configuration *conf, raft_index conf_index, struct raft_buffer *buf) { size_t conf_len; void *cursor; uint64_t *header; void *conf_buf; unsigned crc; conf_len = configurationEncodedSize(conf); buf->len = sizeof(*header) * 4; /* Format, CRC, configuration index/len */ buf->len += conf_len; buf->base = raft_malloc(buf->len); if (buf->base == NULL) { return RAFT_NOMEM; } header = buf->base; conf_buf = header + 4; configurationEncodeToBuf(conf, conf_buf); cursor = header; bytePut64(&cursor, UV__DISK_FORMAT); bytePut64(&cursor, 0); bytePut64(&cursor, conf_index); bytePut64(&cursor, conf_len); crc = byteCrc32(&header[2], sizeof(uint64_t) * 2, 0); /* Conf index/len */ crc = byteCrc32(conf_buf, conf_len, crc); cursor = &header[1]; bytePut64(&cursor, crc); return 0; } raft-0.11.3/src/uv_encoding.h000066400000000000000000000042301415614527300157770ustar00rootroot00000000000000/* Encoding routines for the the libuv-based @raft_io backend. */ #ifndef UV_ENCODING_H_ #define UV_ENCODING_H_ #include #include "../include/raft.h" /* Current disk format version. */ #define UV__DISK_FORMAT 1 int uvEncodeMessage(const struct raft_message *message, uv_buf_t **bufs, unsigned *n_bufs); int uvDecodeMessage(unsigned long type, const uv_buf_t *header, struct raft_message *message, size_t *payload_len); int uvDecodeBatchHeader(const void *batch, struct raft_entry **entries, unsigned *n); void uvDecodeEntriesBatch(uint8_t *batch, size_t offset, struct raft_entry *entries, unsigned n); /** * The layout of the memory pointed at by a @batch pointer is the following: * * [8 bytes] Number of entries in the batch, little endian. * [header1] Header data of the first entry of the batch. * [ ... ] More headers * [headerN] Header data of the last entry of the batch. * [data1 ] Payload data of the first entry of the batch. * [ ... ] More data * [dataN ] Payload data of the last entry of the batch. * * An entry header is 16-byte long and has the following layout: * * [8 bytes] Term in which the entry was created, little endian. * [1 byte ] Message type (Either RAFT_COMMAND or RAFT_CHANGE) * [3 bytes] Currently unused. * [4 bytes] Size of the log entry data, little endian. * * A payload data section for an entry is simply a sequence of bytes of * arbitrary lengths, possibly padded with extra bytes to reach 8-byte boundary * (which means that all entry data pointers are 8-byte aligned). */ size_t uvSizeofBatchHeader(size_t n); void uvEncodeBatchHeader(const struct raft_entry *entries, unsigned n, void *buf); /* Encode the content of a snapshot metadata file. */ int uvEncodeSnapshotMeta(const struct raft_configuration *conf, raft_index conf_index, struct raft_buffer *buf); #endif /* UV_ENCODING_H_ */ raft-0.11.3/src/uv_finalize.c000066400000000000000000000111671415614527300160140ustar00rootroot00000000000000#include "assert.h" #include "heap.h" #include "queue.h" #include "uv.h" #include "uv_os.h" #define tracef(...) Tracef(uv->tracer, __VA_ARGS__) /* Metadata about an open segment not used anymore and that should be closed or * remove (if not written at all). */ struct uvDyingSegment { struct uv *uv; uvCounter counter; /* Segment counter */ size_t used; /* Number of used bytes */ raft_index first_index; /* Index of first entry */ raft_index last_index; /* Index of last entry */ int status; /* Status code of blocking syscalls */ queue queue; /* Link to finalize queue */ }; /* Run all blocking syscalls involved in closing a used open segment. * * An open segment is closed by truncating its length to the number of bytes * that were actually written into it and then renaming it. */ static void uvFinalizeWorkCb(uv_work_t *work) { struct uvDyingSegment *segment = work->data; struct uv *uv = segment->uv; char filename1[UV__FILENAME_LEN]; char filename2[UV__FILENAME_LEN]; char errmsg[RAFT_ERRMSG_BUF_SIZE]; int rv; sprintf(filename1, UV__OPEN_TEMPLATE, segment->counter); sprintf(filename2, UV__CLOSED_TEMPLATE, segment->first_index, segment->last_index); tracef("finalize %s into %s", filename1, filename2); /* If the segment hasn't actually been used (because the writer has been * closed or aborted before making any write), just remove it. */ if (segment->used == 0) { rv = UvFsRemoveFile(uv->dir, filename1, errmsg); if (rv != 0) { goto err; } goto sync; } /* Truncate and rename the segment.*/ rv = UvFsTruncateAndRenameFile(uv->dir, segment->used, filename1, filename2, errmsg); if (rv != 0) { goto err; } sync: rv = UvFsSyncDir(uv->dir, errmsg); if (rv != 0) { goto err; } segment->status = 0; return; err: tracef("truncate segment %s: %s", filename1, errmsg); assert(rv != 0); segment->status = rv; } static int uvFinalizeStart(struct uvDyingSegment *segment); static void uvFinalizeAfterWorkCb(uv_work_t *work, int status) { struct uvDyingSegment *segment = work->data; struct uv *uv = segment->uv; queue *head; int rv; assert(status == 0); /* We don't cancel worker requests */ uv->finalize_work.data = NULL; if (segment->status != 0) { uv->errored = true; } HeapFree(segment); /* If we have no more dismissed segments to close, check if there's a * barrier to unblock or if we are done closing. */ if (QUEUE_IS_EMPTY(&uv->finalize_reqs)) { if (uv->barrier != NULL && UvBarrierReady(uv)) { uv->barrier->cb(uv->barrier); } uvMaybeFireCloseCb(uv); return; } /* Grab a new dismissed segment to close. */ head = QUEUE_HEAD(&uv->finalize_reqs); segment = QUEUE_DATA(head, struct uvDyingSegment, queue); QUEUE_REMOVE(&segment->queue); rv = uvFinalizeStart(segment); if (rv != 0) { HeapFree(segment); uv->errored = true; } } /* Start finalizing an open segment. */ static int uvFinalizeStart(struct uvDyingSegment *segment) { struct uv *uv = segment->uv; int rv; assert(uv->finalize_work.data == NULL); assert(segment->counter > 0); uv->finalize_work.data = segment; rv = uv_queue_work(uv->loop, &uv->finalize_work, uvFinalizeWorkCb, uvFinalizeAfterWorkCb); if (rv != 0) { ErrMsgPrintf(uv->io->errmsg, "start to truncate segment file %llu: %s", segment->counter, uv_strerror(rv)); return RAFT_IOERR; } return 0; } int UvFinalize(struct uv *uv, unsigned long long counter, size_t used, raft_index first_index, raft_index last_index) { struct uvDyingSegment *segment; int rv; if (used > 0) { assert(first_index > 0); assert(last_index >= first_index); } segment = HeapMalloc(sizeof *segment); if (segment == NULL) { return RAFT_NOMEM; } segment->uv = uv; segment->counter = counter; segment->used = used; segment->first_index = first_index; segment->last_index = last_index; /* If we're already processing a segment, let's put the request in the queue * and wait. */ if (uv->finalize_work.data != NULL) { QUEUE_PUSH(&uv->finalize_reqs, &segment->queue); return 0; } rv = uvFinalizeStart(segment); if (rv != 0) { HeapFree(segment); return rv; } return 0; } #undef tracef raft-0.11.3/src/uv_fs.c000066400000000000000000000465221415614527300146260ustar00rootroot00000000000000#include "uv_fs.h" #include #include #include #include #include "assert.h" #include "compress.h" #include "err.h" #include "heap.h" #include "uv_os.h" int UvFsCheckDir(const char *dir, char *errmsg) { struct uv_fs_s req; int rv; /* Make sure we have a directory we can write into. */ rv = uv_fs_stat(NULL, &req, dir, NULL); if (rv != 0) { switch (rv) { case UV_ENOENT: ErrMsgPrintf((char *)errmsg, "directory '%s' does not exist", dir); return RAFT_NOTFOUND; case UV_EACCES: ErrMsgPrintf((char *)errmsg, "can't access directory '%s'", dir); return RAFT_UNAUTHORIZED; case UV_ENOTDIR: ErrMsgPrintf((char *)errmsg, "path '%s' is not a directory", dir); return RAFT_INVALID; } ErrMsgPrintf((char *)errmsg, "can't stat '%s': %s", dir, uv_strerror(rv)); return RAFT_IOERR; } if (!(req.statbuf.st_mode & S_IFDIR)) { ErrMsgPrintf((char *)errmsg, "path '%s' is not a directory", dir); return RAFT_INVALID; } if (!(req.statbuf.st_mode & S_IWRITE)) { ErrMsgPrintf((char *)errmsg, "directory '%s' is not writable", dir); return RAFT_INVALID; } return 0; } int UvFsSyncDir(const char *dir, char *errmsg) { uv_file fd; int rv; rv = UvOsOpen(dir, UV_FS_O_RDONLY | UV_FS_O_DIRECTORY, 0, &fd); if (rv != 0) { UvOsErrMsg(errmsg, "open directory", rv); return RAFT_IOERR; } rv = UvOsFsync(fd); UvOsClose(fd); if (rv != 0) { UvOsErrMsg(errmsg, "fsync directory", rv); return RAFT_IOERR; } return 0; } int UvFsFileExists(const char *dir, const char *filename, bool *exists, char *errmsg) { uv_stat_t sb; char path[UV__PATH_SZ]; int rv; UvOsJoin(dir, filename, path); rv = UvOsStat(path, &sb); if (rv != 0) { if (rv == UV_ENOENT) { *exists = false; goto out; } UvOsErrMsg(errmsg, "stat", rv); return RAFT_IOERR; } *exists = true; out: return 0; } /* Get the size of the given file. */ int UvFsFileSize(const char *dir, const char *filename, off_t *size, char *errmsg) { uv_stat_t sb; char path[UV__PATH_SZ]; int rv; UvOsJoin(dir, filename, path); rv = UvOsStat(path, &sb); if (rv != 0) { UvOsErrMsg(errmsg, "stat", rv); return RAFT_IOERR; } *size = (off_t)sb.st_size; return 0; } int UvFsFileIsEmpty(const char *dir, const char *filename, bool *empty, char *errmsg) { off_t size; int rv; rv = UvFsFileSize(dir, filename, &size, errmsg); if (rv != 0) { return rv; } *empty = size == 0 ? true : false; return 0; } /* Open a file in a directory. */ static int uvFsOpenFile(const char *dir, const char *filename, int flags, int mode, uv_file *fd, char *errmsg) { char path[UV__PATH_SZ]; int rv; UvOsJoin(dir, filename, path); rv = UvOsOpen(path, flags, mode, fd); if (rv != 0) { UvOsErrMsg(errmsg, "open", rv); return RAFT_IOERR; } return 0; } int UvFsOpenFileForReading(const char *dir, const char *filename, uv_file *fd, char *errmsg) { char path[UV__PATH_SZ]; int flags = O_RDONLY; UvOsJoin(dir, filename, path); return uvFsOpenFile(dir, filename, flags, 0, fd, errmsg); } int UvFsAllocateFile(const char *dir, const char *filename, size_t size, uv_file *fd, char *errmsg) { char path[UV__PATH_SZ]; int flags = O_WRONLY | O_CREAT | O_EXCL; /* Common open flags */ int rv = 0; UvOsJoin(dir, filename, path); /* TODO: use RWF_DSYNC instead, if available. */ flags |= O_DSYNC; rv = uvFsOpenFile(dir, filename, flags, S_IRUSR | S_IWUSR, fd, errmsg); if (rv != 0) { goto err; } /* Allocate the desired size. */ rv = UvOsFallocate(*fd, 0, (off_t)size); if (rv != 0) { switch (rv) { case UV_ENOSPC: ErrMsgPrintf(errmsg, "not enough space to allocate %zu bytes", size); rv = RAFT_NOSPACE; break; default: UvOsErrMsg(errmsg, "posix_allocate", rv); rv = RAFT_IOERR; break; } goto err_after_open; } return 0; err_after_open: UvOsClose(*fd); UvOsUnlink(path); err: assert(rv != 0); return rv; } static int uvFsWriteFile(const char *dir, const char *filename, int flags, struct raft_buffer *bufs, unsigned n_bufs, char *errmsg) { uv_file fd; int rv; size_t size; unsigned i; size = 0; for (i = 0; i < n_bufs; i++) { size += bufs[i].len; } rv = uvFsOpenFile(dir, filename, flags, S_IRUSR | S_IWUSR, &fd, errmsg); if (rv != 0) { goto err; } rv = UvOsWrite(fd, (const uv_buf_t *)bufs, n_bufs, 0); if (rv != (int)(size)) { if (rv < 0) { UvOsErrMsg(errmsg, "write", rv); } else { ErrMsgPrintf(errmsg, "short write: %d only bytes written", rv); } goto err_after_file_open; } rv = UvOsFsync(fd); if (rv != 0) { UvOsErrMsg(errmsg, "fsync", rv); goto err_after_file_open; } rv = UvOsClose(fd); if (rv != 0) { UvOsErrMsg(errmsg, "close", rv); goto err; } return 0; err_after_file_open: UvOsClose(fd); err: return rv; } int UvFsMakeFile(const char *dir, const char *filename, struct raft_buffer *bufs, unsigned n_bufs, char *errmsg) { int rv; char tmp_filename[UV__FILENAME_LEN+1] = {0}; char path[UV__PATH_SZ] = {0}; char tmp_path[UV__PATH_SZ] = {0}; /* Create a temp file with the given content * TODO as of libuv 1.34.0, use `uv_fs_mkstemp` */ size_t sz = sizeof(tmp_filename); rv = snprintf(tmp_filename, sz, TMP_FILE_FMT, filename); if (rv < 0 || rv >= (int) sz) { return rv; } int flags = UV_FS_O_WRONLY | UV_FS_O_CREAT | UV_FS_O_EXCL; rv = uvFsWriteFile(dir, tmp_filename, flags, bufs, n_bufs, errmsg); if (rv != 0) { goto err_after_tmp_create; } /* Check if the file exists */ bool exists = false; rv = UvFsFileExists(dir, filename, &exists, errmsg); if (rv != 0) { goto err_after_tmp_create; } if (exists) { rv = -1; goto err_after_tmp_create; } /* Rename the temp file. Remark that there is a race between the existence * check and the rename, there is no `renameat2` equivalent in libuv. * However, in the current implementation this should pose no problems.*/ UvOsJoin(dir, tmp_filename, tmp_path); UvOsJoin(dir, filename, path); rv = UvOsRename(tmp_path, path); if (rv != 0) { UvOsErrMsg(errmsg, "rename", rv); goto err_after_tmp_create; } rv = UvFsSyncDir(dir, errmsg); if (rv != 0) { char ignored[RAFT_ERRMSG_BUF_SIZE]; UvFsRemoveFile(dir, filename, ignored); return rv; } return 0; err_after_tmp_create: UvFsRemoveFile(dir, tmp_filename, errmsg); return rv; } int UvFsMakeOrOverwriteFile(const char *dir, const char *filename, const struct raft_buffer *buf, char *errmsg) { char path[UV__PATH_SZ]; int flags = UV_FS_O_WRONLY; int mode = 0; bool exists = true; uv_file fd; int rv; UvOsJoin(dir, filename, path); open: rv = UvOsOpen(path, flags, mode, &fd); if (rv != 0) { if (rv == UV_ENOENT && !(flags & UV_FS_O_CREAT)) { exists = false; flags |= UV_FS_O_CREAT; mode = S_IRUSR | S_IWUSR; goto open; } goto err; } rv = UvOsWrite(fd, (const uv_buf_t *)buf, 1, 0); if (rv != (int)(buf->len)) { if (rv < 0) { UvOsErrMsg(errmsg, "write", rv); } else { ErrMsgPrintf(errmsg, "short write: %d only bytes written", rv); } goto err_after_file_open; } if (exists) { rv = UvOsFdatasync(fd); if (rv != 0) { UvOsErrMsg(errmsg, "fsync", rv); goto err_after_file_open; } } else { rv = UvOsFsync(fd); if (rv != 0) { UvOsErrMsg(errmsg, "fsync", rv); goto err_after_file_open; } } rv = UvOsClose(fd); if (rv != 0) { UvOsErrMsg(errmsg, "close", rv); goto err; } if (!exists) { rv = UvFsSyncDir(dir, errmsg); if (rv != 0) { goto err; } } return 0; err_after_file_open: UvOsClose(fd); err: return RAFT_IOERR; } int UvFsReadInto(uv_file fd, struct raft_buffer *buf, char *errmsg) { int rv; /* TODO: use uv_fs_read() */ rv = (int)read(fd, buf->base, buf->len); if (rv == -1) { UvOsErrMsg(errmsg, "read", -errno); return RAFT_IOERR; } assert(rv >= 0); if ((size_t)rv < buf->len) { ErrMsgPrintf(errmsg, "short read: %d bytes instead of %zu", rv, buf->len); return RAFT_IOERR; } return 0; } int UvFsReadFile(const char *dir, const char *filename, struct raft_buffer *buf, char *errmsg) { uv_stat_t sb; char path[UV__PATH_SZ]; uv_file fd; int rv; UvOsJoin(dir, filename, path); rv = UvOsStat(path, &sb); if (rv != 0) { UvOsErrMsg(errmsg, "stat", rv); rv = RAFT_IOERR; goto err; } rv = uvFsOpenFile(dir, filename, O_RDONLY, 0, &fd, errmsg); if (rv != 0) { goto err; } buf->len = (size_t)sb.st_size; buf->base = HeapMalloc(buf->len); if (buf->base == NULL) { ErrMsgOom(errmsg); rv = RAFT_NOMEM; goto err_after_open; } rv = UvFsReadInto(fd, buf, errmsg); if (rv != 0) { goto err_after_buf_alloc; } UvOsClose(fd); return 0; err_after_buf_alloc: HeapFree(buf->base); err_after_open: UvOsClose(fd); err: return rv; } int UvFsReadFileInto(const char *dir, const char *filename, struct raft_buffer *buf, char *errmsg) { char path[UV__PATH_SZ]; uv_file fd; int rv; UvOsJoin(dir, filename, path); rv = uvFsOpenFile(dir, filename, O_RDONLY, 0, &fd, errmsg); if (rv != 0) { goto err; } rv = UvFsReadInto(fd, buf, errmsg); if (rv != 0) { goto err_after_open; } UvOsClose(fd); return 0; err_after_open: UvOsClose(fd); err: return rv; } int UvFsRemoveFile(const char *dir, const char *filename, char *errmsg) { char path[UV__PATH_SZ]; int rv; UvOsJoin(dir, filename, path); rv = UvOsUnlink(path); if (rv != 0) { UvOsErrMsg(errmsg, "unlink", rv); return RAFT_IOERR; } return 0; } int UvFsTruncateAndRenameFile(const char *dir, size_t size, const char *filename1, const char *filename2, char *errmsg) { char path1[UV__PATH_SZ]; char path2[UV__PATH_SZ]; uv_file fd; int rv; UvOsJoin(dir, filename1, path1); UvOsJoin(dir, filename2, path2); /* Truncate and rename. */ rv = UvOsOpen(path1, UV_FS_O_RDWR, 0, &fd); if (rv != 0) { UvOsErrMsg(errmsg, "open", rv); goto err; } rv = UvOsTruncate(fd, (off_t)size); if (rv != 0) { UvOsErrMsg(errmsg, "truncate", rv); goto err_after_open; } rv = UvOsFsync(fd); if (rv != 0) { UvOsErrMsg(errmsg, "fsync", rv); goto err_after_open; } UvOsClose(fd); rv = UvOsRename(path1, path2); if (rv != 0) { UvOsErrMsg(errmsg, "rename", rv); goto err; } return 0; err_after_open: UvOsClose(fd); err: return RAFT_IOERR; } /* Check if direct I/O is possible on the given fd. */ static int probeDirectIO(int fd, size_t *size, char *errmsg) { struct statfs fs_info; /* To check the file system type. */ void *buf; /* Buffer to use for the probe write. */ int rv; rv = UvOsSetDirectIo(fd); if (rv != 0) { if (rv != UV_EINVAL) { /* UNTESTED: the parameters are ok, so this should never happen. */ UvOsErrMsg(errmsg, "fnctl", rv); return RAFT_IOERR; } rv = fstatfs(fd, &fs_info); if (rv == -1) { /* UNTESTED: in practice ENOMEM should be the only failure mode */ UvOsErrMsg(errmsg, "fstatfs", -errno); return RAFT_IOERR; } switch (fs_info.f_type) { case 0x01021994: /* TMPFS_MAGIC */ case 0x2fc12fc1: /* ZFS magic */ case 0x24051905: /* UBIFS Support magic */ *size = 0; return 0; default: /* UNTESTED: this is an unsupported file system. */ #if defined(__s390x__) ErrMsgPrintf(errmsg, "unsupported file system: %ux", fs_info.f_type); #else ErrMsgPrintf(errmsg, "unsupported file system: %zx", fs_info.f_type); #endif return RAFT_IOERR; } } /* Try to perform direct I/O, using various buffer size. */ *size = 4096; while (*size >= 512) { buf = raft_aligned_alloc(*size, *size); if (buf == NULL) { ErrMsgOom(errmsg); return RAFT_NOMEM; } memset(buf, 0, *size); rv = (int)write(fd, buf, *size); raft_aligned_free(*size, buf); if (rv > 0) { /* Since we fallocate'ed the file, we should never fail because of * lack of disk space, and all bytes should have been written. */ assert(rv == (int)(*size)); return 0; } assert(rv == -1); if (errno != EIO && errno != EOPNOTSUPP) { /* UNTESTED: this should basically fail only because of disk errors, * since we allocated the file with posix_fallocate. */ /* FIXME: this is a workaround because shiftfs doesn't return EINVAL * in the fnctl call above, for example when the underlying fs is * ZFS. */ if (errno == EINVAL && *size == 4096) { *size = 0; return 0; } UvOsErrMsg(errmsg, "write", -errno); return RAFT_IOERR; } *size = *size / 2; } *size = 0; return 0; } #if defined(RWF_NOWAIT) /* Check if fully non-blocking async I/O is possible on the given fd. */ static int probeAsyncIO(int fd, size_t size, bool *ok, char *errmsg) { void *buf; /* Buffer to use for the probe write */ aio_context_t ctx = 0; /* KAIO context handle */ struct iocb iocb; /* KAIO request object */ struct iocb *iocbs = &iocb; /* Because the io_submit() API sucks */ struct io_event event; /* KAIO response object */ int n_events; int rv; /* Setup the KAIO context handle */ rv = UvOsIoSetup(1, &ctx); if (rv != 0) { UvOsErrMsg(errmsg, "io_setup", rv); /* UNTESTED: in practice this should fail only with ENOMEM */ return RAFT_IOERR; } /* Allocate the write buffer */ buf = raft_aligned_alloc(size, size); if (buf == NULL) { ErrMsgOom(errmsg); return RAFT_NOMEM; } memset(buf, 0, size); /* Prepare the KAIO request object */ memset(&iocb, 0, sizeof iocb); iocb.aio_lio_opcode = IOCB_CMD_PWRITE; *((void **)(&iocb.aio_buf)) = buf; iocb.aio_nbytes = size; iocb.aio_offset = 0; iocb.aio_fildes = (uint32_t)fd; iocb.aio_reqprio = 0; iocb.aio_rw_flags |= RWF_NOWAIT | RWF_DSYNC; /* Submit the KAIO request */ rv = UvOsIoSubmit(ctx, 1, &iocbs); if (rv != 0) { /* UNTESTED: in practice this should fail only with ENOMEM */ raft_aligned_free(size, buf); UvOsIoDestroy(ctx); /* On ZFS 0.8 this is not properly supported yet. Also, when running on * older kernels a binary compiled on a kernel with RWF_NOWAIT support, * we might get EINVAL. */ if (errno == EOPNOTSUPP || errno == EINVAL) { *ok = false; return 0; } UvOsErrMsg(errmsg, "io_submit", rv); return RAFT_IOERR; } /* Fetch the response: will block until done. */ n_events = UvOsIoGetevents(ctx, 1, 1, &event, NULL); assert(n_events == 1); if (n_events != 1) { /* UNTESTED */ UvOsErrMsg(errmsg, "UvOsIoGetevents", n_events); return RAFT_IOERR; } /* Release the write buffer. */ raft_aligned_free(size, buf); /* Release the KAIO context handle. */ rv = UvOsIoDestroy(ctx); if (rv != 0) { UvOsErrMsg(errmsg, "io_destroy", rv); return RAFT_IOERR; } if (event.res > 0) { assert(event.res == (int)size); *ok = true; } else { /* UNTESTED: this should basically fail only because of disk errors, * since we allocated the file with posix_fallocate and the block size * is supposed to be correct. */ *ok = false; } return 0; } #endif /* RWF_NOWAIT */ #define UV__FS_PROBE_FILE ".probe" #define UV__FS_PROBE_FILE_SIZE 4096 int UvFsProbeCapabilities(const char *dir, size_t *direct, bool *async, char *errmsg) { int fd; /* File descriptor of the probe file */ int rv; char ignored[RAFT_ERRMSG_BUF_SIZE]; /* Create a temporary probe file. */ UvFsRemoveFile(dir, UV__FS_PROBE_FILE, ignored); rv = UvFsAllocateFile(dir, UV__FS_PROBE_FILE, UV__FS_PROBE_FILE_SIZE, &fd, errmsg); if (rv != 0) { ErrMsgWrapf(errmsg, "create I/O capabilities probe file"); goto err; } UvFsRemoveFile(dir, UV__FS_PROBE_FILE, ignored); /* Check if we can use direct I/O. */ rv = probeDirectIO(fd, direct, errmsg); if (rv != 0) { goto err_after_file_open; } #if !defined(RWF_NOWAIT) /* We can't have fully async I/O, since io_submit might potentially block. */ *async = false; #else /* If direct I/O is not possible, we can't perform fully asynchronous * I/O, because io_submit might potentially block. */ if (*direct == 0) { *async = false; goto out; } rv = probeAsyncIO(fd, *direct, async, errmsg); if (rv != 0) { goto err_after_file_open; } #endif /* RWF_NOWAIT */ #if defined(RWF_NOWAIT) out: #endif /* RWF_NOWAIT */ close(fd); return 0; err_after_file_open: close(fd); err: return rv; } raft-0.11.3/src/uv_fs.h000066400000000000000000000076351415614527300146350ustar00rootroot00000000000000/* File system related utilities. */ #ifndef UV_FS_H_ #define UV_FS_H_ #include #include "../include/raft.h" #include "err.h" #define TMP_FILE_PREFIX "tmp-" #define TMP_FILE_FMT TMP_FILE_PREFIX "%s" /* Check that the given directory can be used. */ int UvFsCheckDir(const char *dir, char *errmsg); /* Sync the given directory by calling fsync(). */ int UvFsSyncDir(const char *dir, char *errmsg); /* Check whether a the given file exists. */ int UvFsFileExists(const char *dir, const char *filename, bool *exists, char *errmsg); /* Get the size of the given file. */ int UvFsFileSize(const char *dir, const char *filename, off_t *size, char *errmsg); /* Check whether the given file in the given directory is empty. */ int UvFsFileIsEmpty(const char *dir, const char *filename, bool *empty, char *errmsg); /* Create the given file in the given directory and allocate the given size to * it, returning its file descriptor. The file must not exist yet. */ int UvFsAllocateFile(const char *dir, const char *filename, size_t size, uv_file *fd, char *errmsg); /* Create a file and write the given content into it. */ int UvFsMakeFile(const char *dir, const char *filename, struct raft_buffer *bufs, unsigned n_bufs, char *errmsg); /* Create or overwrite a file. * * If the file does not exists yet, it gets created, the given content written * to it, and then fully persisted to disk by fsync()'ing the file and the * dir. * * If the file already exists, it gets overwritten. The assumption is that the * file size will stay the same and its content will change, so only fdatasync() * will be used */ int UvFsMakeOrOverwriteFile(const char *dir, const char *filename, const struct raft_buffer *buf, char *errmsg); /* Open a file for reading. */ int UvFsOpenFileForReading(const char *dir, const char *filename, uv_file *fd, char *errmsg); /* Read exactly buf->len bytes from the given file descriptor into buf->base. Fail if less than buf->len bytes are read. */ int UvFsReadInto(uv_file fd, struct raft_buffer *buf, char *errmsg); /* Read all the content of the given file. */ int UvFsReadFile(const char *dir, const char *filename, struct raft_buffer *buf, char *errmsg); /* Read exactly buf->len bytes from the given file into buf->base. Fail if less * than buf->len bytes are read. */ int UvFsReadFileInto(const char *dir, const char *filename, struct raft_buffer *buf, char *errmsg); /* Synchronously remove a file, calling the unlink() system call. */ int UvFsRemoveFile(const char *dir, const char *filename, char *errmsg); /* Synchronously truncate a file to the given size and then rename it. */ int UvFsTruncateAndRenameFile(const char *dir, size_t size, const char *filename1, const char *filename2, char *errmsg); /* Return information about the I/O capabilities of the underlying file * system. * * The @direct parameter will be set to zero if direct I/O is not possible, or * to the block size to use for direct I/O otherwise. * * The @async parameter will be set to true if fully asynchronous I/O is * possible using the KAIO API. */ int UvFsProbeCapabilities(const char *dir, size_t *direct, bool *async, char *errmsg); #endif /* UV_FS_H_ */ raft-0.11.3/src/uv_ip.c000066400000000000000000000012001415614527300146060ustar00rootroot00000000000000#include #include #include #include "../include/raft.h" #include "uv_ip.h" int uvIpParse(const char *address, struct sockaddr_in *addr) { char buf[256]; size_t n; char *host; char *port; char *colon = ":"; int rv; /* TODO: turn this poor man parsing into proper one */ n = sizeof(buf)-1; strncpy(buf, address, n); buf[n] = '\0'; host = strtok(buf, colon); port = strtok(NULL, ":"); if (port == NULL) { port = "8080"; } rv = uv_ip4_addr(host, atoi(port), addr); if (rv != 0) { return RAFT_NOCONNECTION; } return 0; } raft-0.11.3/src/uv_ip.h000066400000000000000000000003631415614527300146240ustar00rootroot00000000000000/* IP-related utils. */ #ifndef UV_IP_H_ #define UV_IP_H_ #include /* Split @address into @host and @port and populate @addr accordingly. */ int uvIpParse(const char *address, struct sockaddr_in *addr); #endif /* UV_IP_H */ raft-0.11.3/src/uv_list.c000066400000000000000000000056721415614527300151720ustar00rootroot00000000000000#include #include "assert.h" #include "uv.h" #define tracef(...) Tracef(uv->tracer, __VA_ARGS__) static const char *uvListIgnored[] = {".", "..", "metadata1", "metadata2", NULL}; /* Return true if the given filename should be ignored. */ static bool uvListShouldIgnore(const char *filename) { const char **cursor = uvListIgnored; bool result = false; if (strlen(filename) >= UV__FILENAME_LEN) { return true; } while (*cursor != NULL) { if (strcmp(filename, *cursor) == 0) { result = true; break; } cursor++; } return result; } int UvList(struct uv *uv, struct uvSnapshotInfo *snapshots[], size_t *n_snapshots, struct uvSegmentInfo *segments[], size_t *n_segments, char *errmsg) { struct uv_fs_s req; struct uv_dirent_s entry; int n; int i; int rv; int rv2; n = uv_fs_scandir(NULL, &req, uv->dir, 0, NULL); if (n < 0) { ErrMsgPrintf(errmsg, "scan data directory: %s", uv_strerror(n)); return RAFT_IOERR; } *snapshots = NULL; *n_snapshots = 0; *segments = NULL; *n_segments = 0; rv = 0; for (i = 0; i < n; i++) { const char *filename; bool appended; rv = uv_fs_scandir_next(&req, &entry); assert(rv == 0); /* Can't fail in libuv */ filename = entry.name; /* If an error occurred while processing a preceeding entry or if we * know that this is not a segment filename, just free it and skip to * the next one. */ if (rv != 0 || uvListShouldIgnore(filename)) { if (rv == 0) { tracef("ignore %s", filename); } continue; } /* Append to the snapshot list if it's a snapshot metadata filename and * a valid associated snapshot file exists. */ rv = UvSnapshotInfoAppendIfMatch(uv, filename, snapshots, n_snapshots, &appended); if (appended || rv != 0) { if (rv == 0) { tracef("snapshot %s", filename); } continue; } /* Append to the segment list if it's a segment filename */ rv = uvSegmentInfoAppendIfMatch(entry.name, segments, n_segments, &appended); if (appended || rv != 0) { if (rv == 0) { tracef("segment %s", filename); } continue; } tracef("ignore %s", filename); } rv2 = uv_fs_scandir_next(&req, &entry); assert(rv2 == UV_EOF); if (rv != 0 && *segments != NULL) { raft_free(*segments); } if (*snapshots != NULL) { UvSnapshotSort(*snapshots, *n_snapshots); } if (*segments != NULL) { uvSegmentSort(*segments, *n_segments); } return rv; } #undef tracef raft-0.11.3/src/uv_metadata.c000066400000000000000000000136251415614527300157740ustar00rootroot00000000000000#include "assert.h" #include "byte.h" #include "uv.h" #include "uv_encoding.h" /* We have metadata1 and metadata2. */ #define METADATA_FILENAME_PREFIX "metadata" #define METADATA_FILENAME_SIZE (sizeof(METADATA_FILENAME_PREFIX) + 2) /* Format, version, term, vote */ #define METADATA_CONTENT_SIZE (8 * 4) /* Encode the content of a metadata file. */ static void uvMetadataEncode(const struct uvMetadata *metadata, void *buf) { void *cursor = buf; bytePut64(&cursor, UV__DISK_FORMAT); bytePut64(&cursor, metadata->version); bytePut64(&cursor, metadata->term); bytePut64(&cursor, metadata->voted_for); } /* Decode the content of a metadata file. */ static int uvMetadataDecode(const void *buf, struct uvMetadata *metadata, char *errmsg) { const void *cursor = buf; uint64_t format; format = byteGet64(&cursor); if (format != UV__DISK_FORMAT) { ErrMsgPrintf(errmsg, "bad format version %ju", format); return RAFT_MALFORMED; } metadata->version = byteGet64(&cursor); metadata->term = byteGet64(&cursor); metadata->voted_for = byteGet64(&cursor); /* Coherence checks that values make sense */ if (metadata->version == 0) { ErrMsgPrintf(errmsg, "version is set to zero"); return RAFT_CORRUPT; } return 0; } /* Render the filename of the metadata file with index @n. */ static void uvMetadataFilename(const unsigned short n, char *filename) { sprintf(filename, METADATA_FILENAME_PREFIX "%d", n); } /* Read the n'th metadata file (with n equal to 1 or 2) and decode the content * of the file, populating the given metadata buffer accordingly. */ static int uvMetadataLoadN(const char *dir, const unsigned short n, struct uvMetadata *metadata, char *errmsg) { char filename[METADATA_FILENAME_SIZE]; /* Filename of the metadata file */ uint8_t content[METADATA_CONTENT_SIZE]; /* Content of metadata file */ off_t size; struct raft_buffer buf; bool exists; int rv; assert(n == 1 || n == 2); /* Render the metadata path */ uvMetadataFilename(n, filename); rv = UvFsFileExists(dir, filename, &exists, errmsg); if (rv != 0) { ErrMsgWrapf(errmsg, "check if %s exists", filename); return rv; } memset(metadata, 0, sizeof *metadata); /* If the file does not exist, just return. */ if (!exists) { return 0; } /* If the file exists but has less bytes than expected assume that the * server crashed while writing this metadata file, and pretend it has not * been written at all. If it has more file than expected, return an * error. */ rv = UvFsFileSize(dir, filename, &size, errmsg); if (rv != 0) { ErrMsgWrapf(errmsg, "check size of %s", filename); return rv; } if (size != sizeof content) { if ((size_t)size < sizeof content) { rv = UvFsRemoveFile(dir, filename, errmsg); if (rv != 0) { return rv; } return 0; } ErrMsgPrintf(errmsg, "%s has size %ju instead of %zu", filename, size, sizeof content); return RAFT_CORRUPT; } /* Read the content of the metadata file. */ buf.base = content; buf.len = sizeof content; rv = UvFsReadFileInto(dir, filename, &buf, errmsg); if (rv != 0) { ErrMsgWrapf(errmsg, "read content of %s", filename); return rv; }; /* Decode the content of the metadata file. */ rv = uvMetadataDecode(content, metadata, errmsg); if (rv != 0) { ErrMsgWrapf(errmsg, "decode content of %s", filename); return rv; } return 0; } int uvMetadataLoad(const char *dir, struct uvMetadata *metadata, char *errmsg) { struct uvMetadata metadata1; struct uvMetadata metadata2; int rv; /* Read the two metadata files (if available). */ rv = uvMetadataLoadN(dir, 1, &metadata1, errmsg); if (rv != 0) { return rv; } rv = uvMetadataLoadN(dir, 2, &metadata2, errmsg); if (rv != 0) { return rv; } /* Check the versions. */ if (metadata1.version == 0 && metadata2.version == 0) { /* Neither metadata file exists: have a brand new server. */ metadata->version = 0; metadata->term = 0; metadata->voted_for = 0; } else if (metadata1.version == metadata2.version) { /* The two metadata files can't have the same version. */ ErrMsgPrintf(errmsg, "metadata1 and metadata2 are both at version %llu", metadata1.version); return RAFT_CORRUPT; } else { /* Pick the metadata with the grater version. */ if (metadata1.version > metadata2.version) { *metadata = metadata1; } else { *metadata = metadata2; } } return 0; } /* Return the metadata file index associated with the given version. */ static unsigned short uvMetadataFileIndex(unsigned long long version) { return version % 2 == 1 ? 1 : 2; } int uvMetadataStore(struct uv *uv, const struct uvMetadata *metadata) { char filename[METADATA_FILENAME_SIZE]; /* Filename of the metadata file */ uint8_t content[METADATA_CONTENT_SIZE]; /* Content of metadata file */ struct raft_buffer buf; unsigned short n; int rv; assert(metadata->version > 0); /* Encode the given metadata. */ uvMetadataEncode(metadata, content); /* Render the metadata file name. */ n = uvMetadataFileIndex(metadata->version); uvMetadataFilename(n, filename); /* Write the metadata file, creating it if it does not exist. */ buf.base = content; buf.len = sizeof content; rv = UvFsMakeOrOverwriteFile(uv->dir, filename, &buf, uv->io->errmsg); if (rv != 0) { ErrMsgWrapf(uv->io->errmsg, "persist %s", filename); return rv; } return 0; } raft-0.11.3/src/uv_os.c000066400000000000000000000114501415614527300146270ustar00rootroot00000000000000#include "uv_os.h" #include #include #include #include #include #include #include #include #include #include #include #include "assert.h" #include "err.h" #include "syscall.h" /* Default permissions when creating a directory. */ #define DEFAULT_DIR_PERM 0700 int UvOsOpen(const char *path, int flags, int mode, uv_file *fd) { struct uv_fs_s req; int rv; rv = uv_fs_open(NULL, &req, path, flags, mode, NULL); if (rv < 0) { return rv; } *fd = rv; return 0; } int UvOsClose(uv_file fd) { struct uv_fs_s req; return uv_fs_close(NULL, &req, fd, NULL); } /* Emulate fallocate(). Mostly taken from glibc's implementation. */ static int uvOsFallocateEmulation(int fd, off_t offset, off_t len) { ssize_t increment; struct statfs f; int rv; rv = fstatfs(fd, &f); if (rv != 0) { return errno; } if (f.f_bsize == 0) { increment = 512; } else if (f.f_bsize < 4096) { increment = f.f_bsize; } else { increment = 4096; } for (offset += (len - 1) % increment; len > 0; offset += increment) { len -= increment; rv = (int)pwrite(fd, "", 1, offset); if (rv != 1) return errno; } return 0; } int UvOsFallocate(uv_file fd, off_t offset, off_t len) { int rv; rv = posix_fallocate(fd, offset, len); if (rv != 0) { /* From the manual page: * * posix_fallocate() returns zero on success, or an error number on * failure. Note that errno is not set. */ if (rv != EOPNOTSUPP) { return -rv; } /* This might be a libc implementation (e.g. musl) that doesn't * implement a transparent fallback if fallocate() is not supported * by the underlying file system. */ rv = uvOsFallocateEmulation(fd, offset, len); if (rv != 0) { return -EOPNOTSUPP; } } return 0; } int UvOsTruncate(uv_file fd, off_t offset) { struct uv_fs_s req; return uv_fs_ftruncate(NULL, &req, fd, offset, NULL); } int UvOsFsync(uv_file fd) { struct uv_fs_s req; return uv_fs_fsync(NULL, &req, fd, NULL); } int UvOsFdatasync(uv_file fd) { struct uv_fs_s req; return uv_fs_fdatasync(NULL, &req, fd, NULL); } int UvOsStat(const char *path, uv_stat_t *sb) { struct uv_fs_s req; int rv; rv = uv_fs_stat(NULL, &req, path, NULL); if (rv != 0) { return rv; } memcpy(sb, &req.statbuf, sizeof *sb); return 0; } int UvOsWrite(uv_file fd, const uv_buf_t bufs[], unsigned int nbufs, int64_t offset) { struct uv_fs_s req; return uv_fs_write(NULL, &req, fd, bufs, nbufs, offset, NULL); } int UvOsUnlink(const char *path) { struct uv_fs_s req; return uv_fs_unlink(NULL, &req, path, NULL); } int UvOsRename(const char *path1, const char *path2) { struct uv_fs_s req; return uv_fs_rename(NULL, &req, path1, path2, NULL); } void UvOsJoin(const char *dir, const char *filename, char *path) { assert(UV__DIR_HAS_VALID_LEN(dir)); assert(UV__FILENAME_HAS_VALID_LEN(filename)); strcpy(path, dir); strcat(path, "/"); strcat(path, filename); } int UvOsIoSetup(unsigned nr, aio_context_t *ctxp) { int rv; rv = io_setup(nr, ctxp); if (rv == -1) { return -errno; } return 0; } int UvOsIoDestroy(aio_context_t ctx) { int rv; rv = io_destroy(ctx); if (rv == -1) { return -errno; } return 0; } int UvOsIoSubmit(aio_context_t ctx, long nr, struct iocb **iocbpp) { int rv; rv = io_submit(ctx, nr, iocbpp); if (rv == -1) { return -errno; } assert(rv == nr); /* TODO: can something else be returned? */ return 0; } int UvOsIoGetevents(aio_context_t ctx, long min_nr, long max_nr, struct io_event *events, struct timespec *timeout) { int rv; do { rv = io_getevents(ctx, min_nr, max_nr, events, timeout); } while (rv == -1 && errno == EINTR); if (rv == -1) { return -errno; } assert(rv >= min_nr); assert(rv <= max_nr); return rv; } int UvOsEventfd(unsigned int initval, int flags) { int rv; /* At the moment only UV_FS_O_NONBLOCK is supported */ assert(flags == UV_FS_O_NONBLOCK); flags = EFD_NONBLOCK | EFD_CLOEXEC; rv = eventfd(initval, flags); if (rv == -1) { return -errno; } return rv; } int UvOsSetDirectIo(uv_file fd) { int flags; /* Current fcntl flags */ int rv; flags = fcntl(fd, F_GETFL); rv = fcntl(fd, F_SETFL, flags | UV_FS_O_DIRECT); if (rv == -1) { return -errno; } return 0; } raft-0.11.3/src/uv_os.h000066400000000000000000000063761415614527300146470ustar00rootroot00000000000000/* Operating system related utilities. */ #ifndef UV_OS_H_ #define UV_OS_H_ #include #include #include #include #include /* For backward compat with older libuv */ #if !defined(UV_FS_O_RDONLY) #define UV_FS_O_RDONLY O_RDONLY #endif #if !defined(UV_FS_O_DIRECTORY) #define UV_FS_O_DIRECTORY O_DIRECTORY #endif #if !defined(UV_FS_O_WRONLY) #define UV_FS_O_WRONLY O_WRONLY #endif #if !defined(UV_FS_O_RDWR) #define UV_FS_O_RDWR O_RDWR #endif #if !defined(UV_FS_O_CREAT) #define UV_FS_O_CREAT O_CREAT #endif #if !defined(UV_FS_O_TRUNC) #define UV_FS_O_TRUNC O_TRUNC #endif #if !defined(UV_FS_O_EXCL) #define UV_FS_O_EXCL O_EXCL #endif #if !defined(UV_FS_O_DIRECT) #define UV_FS_O_DIRECT O_DIRECT #endif #if !defined(UV_FS_O_NONBLOCK) #define UV_FS_O_NONBLOCK O_NONBLOCK #endif /* Maximum size of a full file system path string. */ #define UV__PATH_SZ 1024 /* Maximum length of a filename string. */ #define UV__FILENAME_LEN 128 /* Length of path separator. */ #define UV__SEP_LEN 1 /* strlen("/") */ /* True if STR's length is at most LEN. */ #define LEN_AT_MOST_(STR, LEN) (strnlen(STR, LEN + 1) <= LEN) /* Maximum length of a directory path string. */ #define UV__DIR_LEN (UV__PATH_SZ - UV__SEP_LEN - UV__FILENAME_LEN - 1) /* True if the given DIR string has at most UV__DIR_LEN chars. */ #define UV__DIR_HAS_VALID_LEN(DIR) LEN_AT_MOST_(DIR, UV__DIR_LEN) /* True if the given FILENAME string has at most UV__FILENAME_LEN chars. */ #define UV__FILENAME_HAS_VALID_LEN(FILENAME) \ LEN_AT_MOST_(FILENAME, UV__FILENAME_LEN) /* Portable open() */ int UvOsOpen(const char *path, int flags, int mode, uv_file *fd); /* Portable close() */ int UvOsClose(uv_file fd); /* TODO: figure a portable abstraction. */ int UvOsFallocate(uv_file fd, off_t offset, off_t len); /* Portable truncate() */ int UvOsTruncate(uv_file fd, off_t offset); /* Portable fsync() */ int UvOsFsync(uv_file fd); /* Portable fdatasync() */ int UvOsFdatasync(uv_file fd); /* Portable stat() */ int UvOsStat(const char *path, uv_stat_t *sb); /* Portable write() */ int UvOsWrite(uv_file fd, const uv_buf_t bufs[], unsigned int nbufs, int64_t offset); /* Portable unlink() */ int UvOsUnlink(const char *path); /* Portable rename() */ int UvOsRename(const char *path1, const char *path2); /* Join dir and filename into a full OS path. */ void UvOsJoin(const char *dir, const char *filename, char *path); /* TODO: figure a portable abstraction. */ int UvOsIoSetup(unsigned nr, aio_context_t *ctxp); int UvOsIoDestroy(aio_context_t ctx); int UvOsIoSubmit(aio_context_t ctx, long nr, struct iocb **iocbpp); int UvOsIoGetevents(aio_context_t ctx, long min_nr, long max_nr, struct io_event *events, struct timespec *timeout); int UvOsEventfd(unsigned int initval, int flags); int UvOsSetDirectIo(uv_file fd); /* Format an error message caused by a failed system call or stdlib function. */ #define UvOsErrMsg(ERRMSG, SYSCALL, ERRNUM) \ { \ ErrMsgPrintf(ERRMSG, "%s", uv_strerror(ERRNUM)); \ ErrMsgWrapf(ERRMSG, SYSCALL); \ } #endif /* UV_OS_H_ */ raft-0.11.3/src/uv_prepare.c000066400000000000000000000226631415614527300156540ustar00rootroot00000000000000#include #include #include "assert.h" #include "heap.h" #include "uv.h" #include "uv_os.h" #define tracef(...) Tracef(uv->tracer, __VA_ARGS__) /* The happy path for UvPrepare is: * * - If there is an unused open segment available, return its fd and counter * immediately. * * - Otherwise, wait for the creation of a new open segment to complete, * possibly kicking off the creation logic if no segment is being created * currently. * * Possible failure modes are: * * - The create file request fails, in that case we fail all pending prepare * requests and we mark the uv instance as errored. * * On close: * * - Cancel all pending prepare requests. * - Remove unused prepared open segments. * - Wait for any pending internal segment creation and then discard the newly * created segment. */ /* Number of open segments that we try to keep ready for writing. */ #define UV__TARGET_POOL_SIZE 2 /* An open segment being prepared or sitting in the pool */ struct uvIdleSegment { struct uv *uv; /* Open segment file */ size_t size; /* Segment size */ struct uv_work_s work; /* To execute logic in the threadpool */ int status; /* Result of threadpool callback */ char errmsg[RAFT_ERRMSG_BUF_SIZE]; /* Error of threadpool callback */ unsigned long long counter; /* Segment counter */ char filename[UV__FILENAME_LEN]; /* Filename of the segment */ uv_file fd; /* File descriptor of prepared file */ queue queue; /* Pool */ }; static void uvPrepareWorkCb(uv_work_t *work) { struct uvIdleSegment *segment = work->data; struct uv *uv = segment->uv; int rv; rv = UvFsAllocateFile(uv->dir, segment->filename, segment->size, &segment->fd, segment->errmsg); if (rv != 0) { goto err; } rv = UvFsSyncDir(uv->dir, segment->errmsg); if (rv != 0) { goto err_after_allocate; } segment->status = 0; return; err_after_allocate: UvOsClose(segment->fd); err: assert(rv != 0); segment->status = rv; return; } /* Flush all pending requests, invoking their callbacks with the given * status. */ static void uvPrepareFinishAllRequests(struct uv *uv, int status) { while (!QUEUE_IS_EMPTY(&uv->prepare_reqs)) { queue *head; struct uvPrepare *req; head = QUEUE_HEAD(&uv->prepare_reqs); req = QUEUE_DATA(head, struct uvPrepare, queue); QUEUE_REMOVE(&req->queue); req->cb(req, status); } } /* Pop the oldest prepared segment in the pool and return its fd and counter * through the given pointers. */ static void uvPrepareConsume(struct uv *uv, uv_file *fd, uvCounter *counter) { queue *head; struct uvIdleSegment *segment; /* Pop a segment from the pool. */ head = QUEUE_HEAD(&uv->prepare_pool); segment = QUEUE_DATA(head, struct uvIdleSegment, queue); assert(segment->fd >= 0); QUEUE_REMOVE(&segment->queue); *fd = segment->fd; *counter = segment->counter; HeapFree(segment); } /* Finish the oldest pending prepare request using the next available prepared * segment. */ static void uvPrepareFinishOldestRequest(struct uv *uv) { queue *head; struct uvPrepare *req; assert(!uv->closing); assert(!QUEUE_IS_EMPTY(&uv->prepare_reqs)); assert(!QUEUE_IS_EMPTY(&uv->prepare_pool)); /* Pop the head of the prepare requests queue. */ head = QUEUE_HEAD(&uv->prepare_reqs); req = QUEUE_DATA(head, struct uvPrepare, queue); QUEUE_REMOVE(&req->queue); /* Finish the request */ uvPrepareConsume(uv, &req->fd, &req->counter); req->cb(req, 0); } /* Return the number of ready prepared open segments in the pool. */ static unsigned uvPrepareCount(struct uv *uv) { queue *head; unsigned n; n = 0; QUEUE_FOREACH(head, &uv->prepare_pool) { n++; } return n; } static void uvPrepareAfterWorkCb(uv_work_t *work, int status); /* Start creating a new segment file. */ static int uvPrepareStart(struct uv *uv) { struct uvIdleSegment *segment; int rv; assert(uv->prepare_inflight == NULL); assert(uvPrepareCount(uv) < UV__TARGET_POOL_SIZE); segment = HeapMalloc(sizeof *segment); if (segment == NULL) { rv = RAFT_NOMEM; goto err; } memset(segment, 0, sizeof *segment); segment->uv = uv; segment->counter = uv->prepare_next_counter; segment->work.data = segment; segment->fd = -1; segment->size = uv->block_size * uvSegmentBlocks(uv); sprintf(segment->filename, UV__OPEN_TEMPLATE, segment->counter); tracef("create open segment %s", segment->filename); rv = uv_queue_work(uv->loop, &segment->work, uvPrepareWorkCb, uvPrepareAfterWorkCb); if (rv != 0) { /* UNTESTED: with the current libuv implementation this can't fail. */ tracef("can't create segment %s: %s", segment->filename, uv_strerror(rv)); rv = RAFT_IOERR; goto err_after_segment_alloc; } uv->prepare_inflight = segment; uv->prepare_next_counter++; return 0; err_after_segment_alloc: HeapFree(segment); err: assert(rv != 0); return rv; } static void uvPrepareAfterWorkCb(uv_work_t *work, int status) { struct uvIdleSegment *segment = work->data; struct uv *uv = segment->uv; int rv; assert(status == 0); uv->prepare_inflight = NULL; /* Reset the creation in-progress marker. */ /* If we are closing, let's discard the segment. All pending requests have * already being fired with RAFT_CANCELED. */ if (uv->closing) { assert(QUEUE_IS_EMPTY(&uv->prepare_pool)); assert(QUEUE_IS_EMPTY(&uv->prepare_reqs)); if (segment->status == 0) { char errmsg[RAFT_ERRMSG_BUF_SIZE]; UvOsClose(segment->fd); UvFsRemoveFile(uv->dir, segment->filename, errmsg); } tracef("canceled creation of %s", segment->filename); HeapFree(segment); uvMaybeFireCloseCb(uv); return; } /* If the request has failed, mark all pending requests as failed and don't * try to create any further segment. * * Note that if there's no pending request, we don't set the error message, * to avoid overwriting previous errors. */ if (segment->status != 0) { if (!QUEUE_IS_EMPTY(&uv->prepare_reqs)) { ErrMsgTransferf(segment->errmsg, uv->io->errmsg, "create segment %s", segment->filename); uvPrepareFinishAllRequests(uv, segment->status); } uv->errored = true; HeapFree(segment); return; } assert(segment->fd >= 0); tracef("completed creation of %s", segment->filename); QUEUE_PUSH(&uv->prepare_pool, &segment->queue); /* Let's process any pending request. */ if (!QUEUE_IS_EMPTY(&uv->prepare_reqs)) { uvPrepareFinishOldestRequest(uv); } /* If we are already creating a segment, we're done. */ if (uv->prepare_inflight != NULL) { return; } /* If we have already enough prepared open segments, we're done. There can't * be any outstanding prepare requests, since if the request queue was not * empty, we would have called uvPrepareFinishOldestRequest() above, thus * reducing the pool size and making it smaller than the target size. */ if (uvPrepareCount(uv) >= UV__TARGET_POOL_SIZE) { assert(QUEUE_IS_EMPTY(&uv->prepare_reqs)); return; } /* Let's start preparing a new open segment. */ rv = uvPrepareStart(uv); if (rv != 0) { uvPrepareFinishAllRequests(uv, rv); uv->errored = true; } } /* Discard a prepared open segment, closing its file descriptor and removing the * underlying file. */ static void uvPrepareDiscard(struct uv *uv, uv_file fd, uvCounter counter) { char errmsg[RAFT_ERRMSG_BUF_SIZE]; char filename[UV__FILENAME_LEN]; assert(counter > 0); assert(fd >= 0); sprintf(filename, UV__OPEN_TEMPLATE, counter); UvOsClose(fd); UvFsRemoveFile(uv->dir, filename, errmsg); } int UvPrepare(struct uv *uv, uv_file *fd, uvCounter *counter, struct uvPrepare *req, uvPrepareCb cb) { int rv; assert(!uv->closing); if (!QUEUE_IS_EMPTY(&uv->prepare_pool)) { uvPrepareConsume(uv, fd, counter); goto maybe_start; } *fd = -1; *counter = 0; req->cb = cb; QUEUE_PUSH(&uv->prepare_reqs, &req->queue); maybe_start: /* If we are already creating a segment, let's just wait. */ if (uv->prepare_inflight != NULL) { return 0; } rv = uvPrepareStart(uv); if (rv != 0) { goto err; } return 0; err: if (*fd != -1) { uvPrepareDiscard(uv, *fd, *counter); } else { QUEUE_REMOVE(&req->queue); } assert(rv != 0); return rv; } void UvPrepareClose(struct uv *uv) { assert(uv->closing); /* Cancel all pending prepare requests. */ uvPrepareFinishAllRequests(uv, RAFT_CANCELED); /* Remove any unused prepared segment. */ while (!QUEUE_IS_EMPTY(&uv->prepare_pool)) { queue *head; struct uvIdleSegment *segment; head = QUEUE_HEAD(&uv->prepare_pool); segment = QUEUE_DATA(head, struct uvIdleSegment, queue); QUEUE_REMOVE(&segment->queue); uvPrepareDiscard(uv, segment->fd, segment->counter); HeapFree(segment); } } #undef tracef raft-0.11.3/src/uv_recv.c000066400000000000000000000300421415614527300151430ustar00rootroot00000000000000#include #include "../include/raft/uv.h" #include "assert.h" #include "byte.h" #include "configuration.h" #include "err.h" #include "heap.h" #include "uv.h" #include "uv_encoding.h" #define tracef(...) Tracef(uv->tracer, __VA_ARGS__) /* The happy path for a receiving an RPC message is: * * - When a peer server successfully establishes a new connection with us, the * transport invokes our accept callback. * * - A new server object is created and added to the servers array. It starts * reading from the stream handle of the new connection. * * - The RPC message preamble is read, which contains the message type and the * message length. * * - The RPC message header is read, whose content depends on the message type. * * - Optionally, the RPC message payload is read (for AppendEntries requests). * * - The recv callback passed to raft_io->start() gets fired with the received * message. * * Possible failure modes are: * * - The peer server disconnects. In this case the read callback will fire with * UV_EOF, we'll close the stream handle and then release all memory * associated with the server object. * * - The peer server sends us invalid data. In this case we close the stream * handle and act like above. */ struct uvServer { struct uv *uv; /* libuv I/O implementation object */ raft_id id; /* ID of the remote server */ char *address; /* Address of the other server */ struct uv_stream_s *stream; /* Connection handle */ uv_buf_t buf; /* Sliding buffer for reading incoming data */ uint64_t preamble[2]; /* Static buffer with the request preamble */ uv_buf_t header; /* Dynamic buffer with the request header */ uv_buf_t payload; /* Dynamic buffer with the request payload */ struct raft_message message; /* The message being received */ queue queue; /* Servers queue */ }; /* Initialize a new server object for reading requests from an incoming * connection. */ static int uvServerInit(struct uvServer *s, struct uv *uv, const raft_id id, const char *address, struct uv_stream_s *stream) { s->uv = uv; s->id = id; s->address = HeapMalloc(strlen(address) + 1); if (s->address == NULL) { return RAFT_NOMEM; } strcpy(s->address, address); s->stream = stream; s->stream->data = s; s->buf.base = NULL; s->buf.len = 0; s->preamble[0] = 0; s->preamble[1] = 0; s->header.base = NULL; s->header.len = 0; s->message.type = 0; s->payload.base = NULL; s->payload.len = 0; QUEUE_PUSH(&uv->servers, &s->queue); return 0; } static void uvServerDestroy(struct uvServer *s) { QUEUE_REMOVE(&s->queue); if (s->header.base != NULL) { /* This means we were interrupted while reading the header. */ HeapFree(s->header.base); switch (s->message.type) { case RAFT_IO_APPEND_ENTRIES: HeapFree(s->message.append_entries.entries); break; case RAFT_IO_INSTALL_SNAPSHOT: configurationClose(&s->message.install_snapshot.conf); break; } } if (s->payload.base != NULL) { /* This means we were interrupted while reading the payload. */ HeapFree(s->payload.base); } HeapFree(s->address); HeapFree(s->stream); } /* Invoked to initialize the read buffer for the next asynchronous read on the * socket. */ static void uvServerAllocCb(uv_handle_t *handle, size_t suggested_size, uv_buf_t *buf) { struct uvServer *s = handle->data; (void)suggested_size; assert(!s->uv->closing); /* If this is the first read of the preamble, or of the header, or of the * payload, then initialize the read buffer, according to the chunk of data * that we expect next. */ if (s->buf.len == 0) { assert(s->buf.base == NULL); /* Check if we expect the preamble. */ if (s->header.len == 0) { assert(s->preamble[0] == 0); assert(s->preamble[1] == 0); s->buf.base = (char *)s->preamble; s->buf.len = sizeof s->preamble; goto out; } /* Check if we expect the header. */ if (s->payload.len == 0) { assert(s->header.len > 0); assert(s->header.base == NULL); s->header.base = HeapMalloc(s->header.len); if (s->header.base == NULL) { /* Setting all buffer fields to 0 will make read_cb fail with * ENOBUFS. */ memset(buf, 0, sizeof *buf); return; } s->buf = s->header; goto out; } /* If we get here we should be expecting the payload. */ assert(s->payload.len > 0); s->payload.base = HeapMalloc(s->payload.len); if (s->payload.base == NULL) { /* Setting all buffer fields to 0 will make read_cb fail with * ENOBUFS. */ memset(buf, 0, sizeof *buf); return; } s->buf = s->payload; } out: *buf = s->buf; } /* Callback invoked afer the stream handle of this server connection has been * closed. We can release all resources associated with the server object. */ static void uvServerStreamCloseCb(uv_handle_t *handle) { struct uvServer *s = handle->data; struct uv *uv = s->uv; uvServerDestroy(s); HeapFree(s); uvMaybeFireCloseCb(uv); } static void uvServerAbort(struct uvServer *s) { struct uv *uv = s->uv; QUEUE_REMOVE(&s->queue); QUEUE_PUSH(&uv->aborting, &s->queue); uv_close((struct uv_handle_s *)s->stream, uvServerStreamCloseCb); } /* Invoke the receive callback. */ static void uvFireRecvCb(struct uvServer *s) { s->uv->recv_cb(s->uv->io, &s->message); /* Reset our state as we'll start reading a new message. We don't need to * release the payload buffer, since ownership was transferred to the * user. */ memset(s->preamble, 0, sizeof s->preamble); raft_free(s->header.base); s->message.type = 0; s->header.base = NULL; s->header.len = 0; s->payload.base = NULL; s->payload.len = 0; } /* Callback invoked when data has been read from the socket. */ static void uvServerReadCb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { struct uvServer *s = stream->data; int rv; (void)buf; assert(!s->uv->closing); /* If the read was successful, let's check if we have received all the data * we expected. */ if (nread > 0) { size_t n = (size_t)nread; /* We shouldn't have read more data than the pending amount. */ assert(n <= s->buf.len); /* Advance the read window */ s->buf.base += n; s->buf.len -= n; /* If there's more data to read in order to fill the current * read buffer, just return, we'll be invoked again. */ if (s->buf.len > 0) { return; } if (s->header.len == 0) { /* If the header buffer is not set, it means that we've just * completed reading the preamble. */ assert(s->header.base == NULL); s->header.len = (size_t)byteFlip64(s->preamble[1]); /* The length of the header must be greater than zero. */ if (s->header.len == 0) { Tracef(s->uv->tracer, "message has zero length"); goto abort; } } else if (s->payload.len == 0) { /* If the payload buffer is not set, it means we just completed * reading the message header. */ uint64_t type; assert(s->header.base != NULL); type = byteFlip64(s->preamble[0]); assert(type > 0); rv = uvDecodeMessage((unsigned long)type, &s->header, &s->message, &s->payload.len); if (rv != 0) { Tracef(s->uv->tracer, "decode message: %s", errCodeToString(rv)); goto abort; } s->message.server_id = s->id; s->message.server_address = s->address; /* If the message has no payload, we're done. */ if (s->payload.len == 0) { uvFireRecvCb(s); } } else { /* If we get here it means that we've just completed reading the * payload. TODO: avoid converting from uv_buf_t */ struct raft_buffer payload; assert(s->payload.base != NULL); assert(s->payload.len > 0); switch (s->message.type) { case RAFT_IO_APPEND_ENTRIES: payload.base = s->payload.base; payload.len = s->payload.len; uvDecodeEntriesBatch(payload.base, 0, s->message.append_entries.entries, s->message.append_entries.n_entries); break; case RAFT_IO_INSTALL_SNAPSHOT: s->message.install_snapshot.data.base = s->payload.base; break; default: /* We should never have read a payload in the first place */ assert(0); } uvFireRecvCb(s); } /* Mark that we're done with this chunk. When the alloc callback will * trigger again it will notice that it needs to change the read * buffer. */ assert(s->buf.len == 0); s->buf.base = NULL; return; } /* The if nread>0 condition above should always exit the function with a * goto abort or a return. */ assert(nread <= 0); if (nread == 0) { /* Empty read */ return; } if (nread != UV_EOF) { Tracef(s->uv->tracer, "receive data: %s", uv_strerror((int)nread)); } abort: uvServerAbort(s); } /* Start reading incoming requests. */ static int uvServerStart(struct uvServer *s) { int rv; rv = uv_read_start(s->stream, uvServerAllocCb, uvServerReadCb); if (rv != 0) { Tracef(s->uv->tracer, "start reading: %s", uv_strerror(rv)); return RAFT_IOERR; } return 0; } static int uvAddServer(struct uv *uv, raft_id id, const char *address, struct uv_stream_s *stream) { struct uvServer *server; int rv; /* Initialize the new connection */ server = HeapMalloc(sizeof *server); if (server == NULL) { rv = RAFT_NOMEM; goto err; } rv = uvServerInit(server, uv, id, address, stream); if (rv != 0) { goto err_after_server_alloc; } /* This will start reading requests. */ rv = uvServerStart(server); if (rv != 0) { goto err_after_init_server; } return 0; err_after_init_server: uvServerDestroy(server); err_after_server_alloc: raft_free(server); err: assert(rv != 0); return rv; } static void uvRecvAcceptCb(struct raft_uv_transport *transport, raft_id id, const char *address, struct uv_stream_s *stream) { struct uv *uv = transport->data; int rv; assert(!uv->closing); rv = uvAddServer(uv, id, address, stream); if (rv != 0) { tracef("add server: %s", errCodeToString(rv)); uv_close((struct uv_handle_s *)stream, (uv_close_cb)HeapFree); } } int UvRecvStart(struct uv *uv) { int rv; rv = uv->transport->listen(uv->transport, uvRecvAcceptCb); if (rv != 0) { return rv; } return 0; } void UvRecvClose(struct uv *uv) { while (!QUEUE_IS_EMPTY(&uv->servers)) { queue *head; struct uvServer *server; head = QUEUE_HEAD(&uv->servers); server = QUEUE_DATA(head, struct uvServer, queue); uvServerAbort(server); } } #undef tracef raft-0.11.3/src/uv_segment.c000066400000000000000000000746321415614527300156630ustar00rootroot00000000000000#include #include #include #include #include #include "array.h" #include "assert.h" #include "byte.h" #include "configuration.h" #include "entry.h" #include "heap.h" #include "uv.h" #include "uv_encoding.h" #define tracef(...) Tracef(uv->tracer, __VA_ARGS__) /* Check if the given filename matches the one of a closed segment (xxx-yyy), or * of an open segment (open-xxx), and fill the given info structure if so. * * Return true if the filename matched, false otherwise. */ static bool uvSegmentInfoMatch(const char *filename, struct uvSegmentInfo *info) { int consumed; int matched; size_t n; size_t filename_len = strnlen(filename, UV__FILENAME_LEN + 1); assert(filename_len < UV__FILENAME_LEN); matched = sscanf(filename, UV__CLOSED_TEMPLATE "%n", &info->first_index, &info->end_index, &consumed); if (matched == 2 && consumed == (int)filename_len) { info->is_open = false; goto match; } matched = sscanf(filename, UV__OPEN_TEMPLATE "%n", &info->counter, &consumed); if (matched == 1 && consumed == (int)filename_len) { info->is_open = true; goto match; } return false; match: n = sizeof(info->filename) - 1; strncpy(info->filename, filename, n); info->filename[n] = '\0'; return true; } int uvSegmentInfoAppendIfMatch(const char *filename, struct uvSegmentInfo *infos[], size_t *n_infos, bool *appended) { struct uvSegmentInfo info; bool matched; int rv; /* Check if it's a closed or open filename */ matched = uvSegmentInfoMatch(filename, &info); /* If this is neither a closed or an open segment, return. */ if (!matched) { *appended = false; return 0; } ARRAY__APPEND(struct uvSegmentInfo, info, infos, n_infos, rv); if (rv == -1) { return RAFT_NOMEM; } *appended = true; return 0; } /* Compare two segments to decide which one is more recent. */ static int uvSegmentInfoCompare(const void *p1, const void *p2) { struct uvSegmentInfo *s1 = (struct uvSegmentInfo *)p1; struct uvSegmentInfo *s2 = (struct uvSegmentInfo *)p2; /* Closed segments are less recent than open segments. */ if (s1->is_open && !s2->is_open) { return 1; } if (!s1->is_open && s2->is_open) { return -1; } /* If the segments are open, compare the counter. */ if (s1->is_open) { assert(s2->is_open); assert(s1->counter != s2->counter); return s1->counter < s2->counter ? -1 : 1; } /* If the segments are closed, compare the first index. The index ranges * must be disjoint. */ if (s2->first_index > s1->end_index) { return -1; } return 1; } void uvSegmentSort(struct uvSegmentInfo *infos, size_t n_infos) { qsort(infos, n_infos, sizeof *infos, uvSegmentInfoCompare); } int uvSegmentKeepTrailing(struct uv *uv, struct uvSegmentInfo *segments, size_t n, raft_index last_index, size_t trailing, char *errmsg) { raft_index retain_index; size_t i; int rv; assert(last_index > 0); assert(n > 0); if (last_index <= trailing) { return 0; } /* Index of the oldest entry we want to retain. */ retain_index = last_index - trailing + 1; for (i = 0; i < n; i++) { struct uvSegmentInfo *segment = &segments[i]; if (segment->is_open) { break; } if (trailing == 0 || segment->end_index < retain_index) { rv = UvFsRemoveFile(uv->dir, segment->filename, errmsg); if (rv != 0) { ErrMsgWrapf(errmsg, "delete closed segment %s", segment->filename); return rv; } } else { break; } } return 0; } /* Read a segment file and return its format version. */ static int uvReadSegmentFile(struct uv *uv, const char *filename, struct raft_buffer *buf, uint64_t *format) { char errmsg[RAFT_ERRMSG_BUF_SIZE]; int rv; rv = UvFsReadFile(uv->dir, filename, buf, errmsg); if (rv != 0) { ErrMsgTransfer(errmsg, uv->io->errmsg, "read file"); return RAFT_IOERR; } if (buf->len < 8) { ErrMsgPrintf(uv->io->errmsg, "file has only %zu bytes", buf->len); HeapFree(buf->base); return RAFT_IOERR; } *format = byteFlip64(*(uint64_t *)buf->base); return 0; } /* Consume the content buffer, returning a pointer to the current position and * advancing the offset of n bytes. Return an error if not enough bytes are * available. */ static int uvConsumeContent(const struct raft_buffer *content, size_t *offset, size_t n, void **data, char *errmsg) { if (*offset + n > content->len) { size_t remaining = content->len - *offset; ErrMsgPrintf(errmsg, "short read: %zu bytes instead of %zu", remaining, n); return RAFT_IOERR; } if (data != NULL) { *data = &((uint8_t *)content->base)[*offset]; } *offset += n; return 0; } /* Load a single batch of entries from a segment. * * Set @last to #true if the loaded batch is the last one. */ static int uvLoadEntriesBatch(struct uv *uv, const struct raft_buffer *content, struct raft_entry **entries, unsigned *n_entries, size_t *offset, /* Offset of last batch */ bool *last) { void *checksums; /* CRC32 checksums */ void *batch; /* Entries batch */ unsigned long n; /* Number of entries in the batch */ unsigned max_n; /* Maximum number of entries we expect */ unsigned i; /* Iterate through the entries */ struct raft_buffer header; /* Batch header */ struct raft_buffer data; /* Batch data */ uint32_t crc1; /* Target checksum */ uint32_t crc2; /* Actual checksum */ char errmsg[RAFT_ERRMSG_BUF_SIZE]; size_t start; int rv; /* Save the current offset, to provide more information when logging. */ start = *offset; /* Read the checksums. */ rv = uvConsumeContent(content, offset, sizeof(uint32_t) * 2, &checksums, errmsg); if (rv != 0) { ErrMsgTransfer(errmsg, uv->io->errmsg, "read preamble"); return RAFT_IOERR; } /* Read the first 8 bytes of the batch, which contains the number of entries * in the batch. */ rv = uvConsumeContent(content, offset, sizeof(uint64_t), &batch, errmsg); if (rv != 0) { ErrMsgTransfer(errmsg, uv->io->errmsg, "read preamble"); return RAFT_IOERR; } n = (size_t)byteFlip64(*(uint64_t *)batch); if (n == 0) { ErrMsgPrintf(uv->io->errmsg, "entries count in preamble is zero"); rv = RAFT_CORRUPT; goto err; } /* Very optimistic upper bound of the number of entries we should * expect. This is mainly a protection against allocating too much * memory. Each entry will consume at least 4 words (for term, type, size * and payload). */ max_n = UV__MAX_SEGMENT_SIZE / (sizeof(uint64_t) * 4); if (n > max_n) { ErrMsgPrintf(uv->io->errmsg, "entries count %lu in preamble is too high", n); rv = RAFT_CORRUPT; goto err; } /* Consume the batch header, excluding the first 8 bytes containing the * number of entries, which we have already read. */ header.len = uvSizeofBatchHeader(n); header.base = batch; rv = uvConsumeContent(content, offset, uvSizeofBatchHeader(n) - sizeof(uint64_t), NULL, errmsg); if (rv != 0) { ErrMsgTransfer(errmsg, uv->io->errmsg, "read header"); rv = RAFT_IOERR; goto err; } /* Check batch header integrity. */ crc1 = byteFlip32(((uint32_t *)checksums)[0]); crc2 = byteCrc32(header.base, header.len, 0); if (crc1 != crc2) { ErrMsgPrintf(uv->io->errmsg, "header checksum mismatch"); rv = RAFT_CORRUPT; goto err; } /* Decode the batch header, allocating the entries array. */ rv = uvDecodeBatchHeader(header.base, entries, n_entries); if (rv != 0) { goto err; } /* Calculate the total size of the batch data */ data.len = 0; for (i = 0; i < n; i++) { data.len += (*entries)[i].buf.len; } data.base = (uint8_t *)content->base + *offset; /* Consume the batch data */ rv = uvConsumeContent(content, offset, data.len, NULL, errmsg); if (rv != 0) { ErrMsgTransfer(errmsg, uv->io->errmsg, "read data"); rv = RAFT_IOERR; goto err_after_header_decode; } /* Check batch data integrity. */ crc1 = byteFlip32(((uint32_t *)checksums)[1]); crc2 = byteCrc32(data.base, data.len, 0); if (crc1 != crc2) { ErrMsgPrintf(uv->io->errmsg, "data checksum mismatch"); rv = RAFT_CORRUPT; goto err_after_header_decode; } uvDecodeEntriesBatch(content->base, *offset - data.len, *entries, *n_entries); *last = *offset == content->len; return 0; err_after_header_decode: HeapFree(*entries); err: *entries = NULL; *n_entries = 0; assert(rv != 0); *offset = start; return rv; } /* Append to @entries2 all entries in @entries1. */ static int extendEntries(const struct raft_entry *entries1, const size_t n_entries1, struct raft_entry **entries2, size_t *n_entries2) { struct raft_entry *entries; /* To re-allocate the given entries */ size_t i; entries = raft_realloc(*entries2, (*n_entries2 + n_entries1) * sizeof *entries); if (entries == NULL) { return RAFT_NOMEM; } for (i = 0; i < n_entries1; i++) { entries[*n_entries2 + i] = entries1[i]; } *entries2 = entries; *n_entries2 += n_entries1; return 0; } int uvSegmentLoadClosed(struct uv *uv, struct uvSegmentInfo *info, struct raft_entry *entries[], size_t *n) { bool empty; /* Whether the file is empty */ uint64_t format; /* Format version */ bool last; /* Whether the last batch was reached */ struct raft_entry *tmp_entries; /* Entries in current batch */ struct raft_buffer buf; /* Segment file content */ size_t offset; /* Content read cursor */ unsigned tmp_n; /* Number of entries in current batch */ unsigned expected_n; /* Number of entries that we expect to find */ int i; char errmsg[RAFT_ERRMSG_BUF_SIZE]; int rv; expected_n = (unsigned)(info->end_index - info->first_index + 1); /* If the segment is completely empty, just bail out. */ rv = UvFsFileIsEmpty(uv->dir, info->filename, &empty, errmsg); if (rv != 0) { tracef("stat %s: %s", info->filename, errmsg); rv = RAFT_IOERR; goto err; } if (empty) { ErrMsgPrintf(uv->io->errmsg, "file is empty"); rv = RAFT_CORRUPT; goto err; } /* Open the segment file. */ rv = uvReadSegmentFile(uv, info->filename, &buf, &format); if (rv != 0) { goto err; } if (format != UV__DISK_FORMAT) { ErrMsgPrintf(uv->io->errmsg, "unexpected format version %ju", format); rv = RAFT_CORRUPT; goto err_after_read; } /* Load all batches in the segment. */ *entries = NULL; *n = 0; last = false; offset = sizeof format; for (i = 1; !last; i++) { rv = uvLoadEntriesBatch(uv, &buf, &tmp_entries, &tmp_n, &offset, &last); if (rv != 0) { ErrMsgWrapf(uv->io->errmsg, "entries batch %u starting at byte %zu", i, offset); goto err_after_read; } rv = extendEntries(tmp_entries, tmp_n, entries, n); if (rv != 0) { goto err_after_batch_load; } raft_free(tmp_entries); } if (*n != expected_n) { ErrMsgPrintf(uv->io->errmsg, "found %zu entries (expected %u)", *n, expected_n); rv = RAFT_CORRUPT; goto err_after_extend_entries; } assert(i > 1); /* At least one batch was loaded. */ assert(*n > 0); /* At least one entry was loaded. */ return 0; err_after_batch_load: raft_free(tmp_entries[0].batch); raft_free(tmp_entries); err_after_extend_entries: if (*entries != NULL) { HeapFree(*entries); } err_after_read: HeapFree(buf.base); err: assert(rv != 0); return rv; } /* Check if the content of the segment file contains all zeros from the current * offset onward. */ static bool uvContentHasOnlyTrailingZeros(const struct raft_buffer *buf, size_t offset) { size_t i; for (i = offset; i < buf->len; i++) { if (((char *)buf->base)[i] != 0) { return false; } } return true; } /* Load all entries contained in an open segment. */ static int uvLoadOpenSegment(struct uv *uv, struct uvSegmentInfo *info, struct raft_entry *entries[], size_t *n, raft_index *next_index) { raft_index first_index; /* Index of first entry in segment */ bool all_zeros; /* Whether the file is zero'ed */ bool empty; /* Whether the segment file is empty */ bool remove = false; /* Whether to remove this segment */ bool last = false; /* Whether the last batch was reached */ uint64_t format; /* Format version */ size_t n_batches = 0; /* Number of loaded batches */ struct raft_entry *tmp_entries; /* Entries in current batch */ struct raft_buffer buf = {0}; /* Segment file content */ size_t offset; /* Content read cursor */ unsigned tmp_n_entries; /* Number of entries in current batch */ int i; char errmsg[RAFT_ERRMSG_BUF_SIZE]; int rv; first_index = *next_index; rv = UvFsFileIsEmpty(uv->dir, info->filename, &empty, errmsg); if (rv != 0) { tracef("check if %s is empty: %s", info->filename, errmsg); rv = RAFT_IOERR; goto err; } if (empty) { /* Empty segment, let's discard it. */ tracef("remove empty open segment %s", info->filename); remove = true; goto done; } rv = uvReadSegmentFile(uv, info->filename, &buf, &format); if (rv != 0) { goto err; } /* Check that the format is the expected one, or perhaps 0, indicating that * the segment was allocated but never written. */ offset = sizeof format; if (format != UV__DISK_FORMAT) { if (format == 0) { all_zeros = uvContentHasOnlyTrailingZeros(&buf, offset); if (all_zeros) { /* This is equivalent to the empty case, let's remove the * segment. */ tracef("remove zeroed open segment %s", info->filename); remove = true; HeapFree(buf.base); buf.base = NULL; goto done; } } ErrMsgPrintf(uv->io->errmsg, "unexpected format version %ju", format); rv = RAFT_CORRUPT; goto err_after_read; } /* Load all batches in the segment. */ for (i = 1; !last; i++) { rv = uvLoadEntriesBatch(uv, &buf, &tmp_entries, &tmp_n_entries, &offset, &last); if (rv != 0) { /* If this isn't a decoding error, just bail out. */ if (rv != RAFT_CORRUPT) { ErrMsgWrapf(uv->io->errmsg, "entries batch %u starting at byte %zu", i, offset); goto err_after_read; } /* If this is a decoding error, and not an OS error, check if the * rest of the file is filled with zeros. In that case we assume * that the server shutdown uncleanly and we just truncate this * incomplete data. */ all_zeros = uvContentHasOnlyTrailingZeros(&buf, offset); if (!all_zeros) { tracef("%s has non-zero trail", info->filename); } Tracef(uv->tracer, "truncate open segment %s at %zu (batch %d), since it has " "corrupted " "entries", info->filename, offset, i); break; } rv = extendEntries(tmp_entries, tmp_n_entries, entries, n); if (rv != 0) { goto err_after_batch_load; } raft_free(tmp_entries); n_batches++; *next_index += tmp_n_entries; } if (n_batches == 0) { HeapFree(buf.base); buf.base = NULL; remove = true; } done: /* If the segment has no valid entries in it, we remove it. Otherwise we * rename it and keep it. */ if (remove) { rv = UvFsRemoveFile(uv->dir, info->filename, errmsg); if (rv != 0) { tracef("unlink %s: %s", info->filename, errmsg); rv = RAFT_IOERR; goto err_after_read; } } else { char filename[UV__SEGMENT_FILENAME_BUF_SIZE]; raft_index end_index = *next_index - 1; /* At least one entry was loaded */ assert(end_index >= first_index); int nb = snprintf(filename, sizeof(filename), UV__CLOSED_TEMPLATE, first_index, end_index); if ((nb < 0) || ((size_t)nb >= sizeof(filename))) { tracef("snprintf failed: %d", nb); rv = RAFT_IOERR; goto err; } tracef("finalize %s into %s", info->filename, filename); rv = UvFsTruncateAndRenameFile(uv->dir, (size_t)offset, info->filename, filename, errmsg); if (rv != 0) { tracef("finalize %s: %s", info->filename, errmsg); rv = RAFT_IOERR; goto err; } info->is_open = false; info->first_index = first_index; info->end_index = end_index; memset(info->filename, '\0', sizeof(info->filename)); _Static_assert(sizeof(info->filename) >= sizeof(filename), "Destination buffer too small"); /* info->filename is zeroed out, info->filename is at least as large as * filename and we checked that nb < sizeof(filename) -> we won't * overflow and the result will be zero terminated. */ memcpy(info->filename, filename, (size_t)nb); } return 0; err_after_batch_load: raft_free(tmp_entries[0].batch); raft_free(tmp_entries); err_after_read: if (buf.base != NULL) { HeapFree(buf.base); } err: assert(rv != 0); return rv; } /* Ensure that the write buffer of the given segment is large enough to hold the * the given number of bytes size. */ static int uvEnsureSegmentBufferIsLargeEnough(struct uvSegmentBuffer *b, size_t size) { unsigned n = (unsigned)(size / b->block_size); void *base; size_t len; if (b->arena.len >= size) { assert(b->arena.base != NULL); return 0; } if (size % b->block_size != 0) { n++; } len = b->block_size * n; base = raft_aligned_alloc(b->block_size, len); if (base == NULL) { return RAFT_NOMEM; } memset(base, 0, len); /* If the current arena is initialized, we need to copy its content, since * it might have data that we want to retain in the next write. */ if (b->arena.base != NULL) { assert(b->arena.len >= b->block_size); memcpy(base, b->arena.base, b->arena.len); raft_aligned_free(b->block_size, b->arena.base); } b->arena.base = base; b->arena.len = len; return 0; } void uvSegmentBufferInit(struct uvSegmentBuffer *b, size_t block_size) { b->block_size = block_size; b->arena.base = NULL; b->arena.len = 0; b->n = 0; } void uvSegmentBufferClose(struct uvSegmentBuffer *b) { if (b->arena.base != NULL) { raft_aligned_free(b->block_size, b->arena.base); } } int uvSegmentBufferFormat(struct uvSegmentBuffer *b) { int rv; void *cursor; size_t n; assert(b->n == 0); n = sizeof(uint64_t); rv = uvEnsureSegmentBufferIsLargeEnough(b, n); if (rv != 0) { return rv; } b->n = n; cursor = b->arena.base; bytePut64(&cursor, UV__DISK_FORMAT); return 0; } int uvSegmentBufferAppend(struct uvSegmentBuffer *b, const struct raft_entry entries[], unsigned n_entries) { size_t size; /* Total size of the batch */ uint32_t crc1; /* Header checksum */ uint32_t crc2; /* Data checksum */ void *crc1_p; /* Pointer to header checksum slot */ void *crc2_p; /* Pointer to data checksum slot */ void *header; /* Pointer to the header section */ void *cursor; unsigned i; int rv; size = sizeof(uint32_t) * 2; /* CRC checksums */ size += uvSizeofBatchHeader(n_entries); /* Batch header */ for (i = 0; i < n_entries; i++) { /* Entries data */ size += bytePad64(entries[i].buf.len); } rv = uvEnsureSegmentBufferIsLargeEnough(b, b->n + size); if (rv != 0) { return rv; } cursor = b->arena.base + b->n; /* Placeholder of the checksums */ crc1_p = cursor; bytePut32(&cursor, 0); crc2_p = cursor; bytePut32(&cursor, 0); /* Batch header */ header = cursor; uvEncodeBatchHeader(entries, n_entries, cursor); crc1 = byteCrc32(header, uvSizeofBatchHeader(n_entries), 0); cursor = (uint8_t *)cursor + uvSizeofBatchHeader(n_entries); /* Batch data */ crc2 = 0; for (i = 0; i < n_entries; i++) { const struct raft_entry *entry = &entries[i]; /* TODO: enforce the requirement of 8-byte alignment also in the * higher-level APIs. */ assert(entry->buf.len % sizeof(uint64_t) == 0); memcpy(cursor, entry->buf.base, entry->buf.len); crc2 = byteCrc32(cursor, entry->buf.len, crc2); cursor = (uint8_t *)cursor + entry->buf.len; } bytePut32(&crc1_p, crc1); bytePut32(&crc2_p, crc2); b->n += size; return 0; } void uvSegmentBufferFinalize(struct uvSegmentBuffer *b, uv_buf_t *out) { unsigned n_blocks; unsigned tail; n_blocks = (unsigned)(b->n / b->block_size); if (b->n % b->block_size != 0) { n_blocks++; } /* Set the remainder of the last block to 0 */ tail = (unsigned)(b->n % b->block_size); if (tail != 0) { memset(b->arena.base + b->n, 0, b->block_size - tail); } out->base = b->arena.base; out->len = n_blocks * b->block_size; } void uvSegmentBufferReset(struct uvSegmentBuffer *b, unsigned retain) { assert(b->n > 0); assert(b->arena.base != NULL); if (retain == 0) { b->n = 0; memset(b->arena.base, 0, b->block_size); return; } memcpy(b->arena.base, b->arena.base + retain * b->block_size, b->block_size); b->n = b->n % b->block_size; } int uvSegmentLoadAll(struct uv *uv, const raft_index start_index, struct uvSegmentInfo *infos, size_t n_infos, struct raft_entry **entries, size_t *n_entries) { raft_index next_index; /* Next entry to load from disk */ struct raft_entry *tmp_entries; /* Entries in current segment */ size_t tmp_n; /* Number of entries in current segment */ size_t i; int rv; assert(start_index >= 1); assert(n_infos > 0); *entries = NULL; *n_entries = 0; next_index = start_index; for (i = 0; i < n_infos; i++) { struct uvSegmentInfo *info = &infos[i]; tracef("load segment %s", info->filename); if (info->is_open) { rv = uvLoadOpenSegment(uv, info, entries, n_entries, &next_index); ErrMsgWrapf(uv->io->errmsg, "load open segment %s", info->filename); if (rv != 0) { goto err; } } else { assert(info->first_index >= start_index); assert(info->first_index <= info->end_index); /* Check that the start index encoded in the name of the segment * matches what we expect and there are no gaps in the sequence. */ if (info->first_index != next_index) { ErrMsgPrintf(uv->io->errmsg, "unexpected closed segment %s: first index should " "have been %llu", info->filename, next_index); rv = RAFT_CORRUPT; goto err; } rv = uvSegmentLoadClosed(uv, info, &tmp_entries, &tmp_n); if (rv != 0) { ErrMsgWrapf(uv->io->errmsg, "load closed segment %s", info->filename); goto err; } assert(tmp_n > 0); rv = extendEntries(tmp_entries, tmp_n, entries, n_entries); if (rv != 0) { /* TODO: release memory of entries in tmp_entries */ goto err; } raft_free(tmp_entries); next_index += tmp_n; } } return 0; err: assert(rv != 0); /* Free any batch that we might have allocated and the entries array as * well. */ if (*entries != NULL) { void *batch = NULL; for (i = 0; i < *n_entries; i++) { struct raft_entry *entry = &(*entries)[i]; if (entry->batch != batch) { batch = entry->batch; raft_free(batch); } } raft_free(*entries); *entries = NULL; *n_entries = 0; } return rv; } /* Write a closed segment */ static int uvWriteClosedSegment(struct uv *uv, raft_index first_index, raft_index last_index, const struct raft_buffer *conf) { char filename[UV__FILENAME_LEN]; struct uvSegmentBuffer buf; struct raft_buffer data; struct raft_entry entry; size_t cap; char errmsg[RAFT_ERRMSG_BUF_SIZE]; int rv; assert(first_index <= last_index); /* Render the path */ sprintf(filename, UV__CLOSED_TEMPLATE, first_index, last_index); /* Make sure that the given encoded configuration fits in the first * block */ cap = uv->block_size - (sizeof(uint64_t) /* Format version */ + sizeof(uint64_t) /* Checksums */ + uvSizeofBatchHeader(1)); if (conf->len > cap) { return RAFT_TOOBIG; } uvSegmentBufferInit(&buf, uv->block_size); rv = uvSegmentBufferFormat(&buf); if (rv != 0) { return rv; } entry.term = 1; entry.type = RAFT_CHANGE; entry.buf = *conf; rv = uvSegmentBufferAppend(&buf, &entry, 1); if (rv != 0) { uvSegmentBufferClose(&buf); return rv; } data.base = buf.arena.base; data.len = buf.n; rv = UvFsMakeFile(uv->dir, filename, &data, 1, errmsg); uvSegmentBufferClose(&buf); if (rv != 0) { tracef("write segment %s: %s", filename, errmsg); return RAFT_IOERR; } return 0; } int uvSegmentCreateFirstClosed(struct uv *uv, const struct raft_configuration *configuration) { return uvSegmentCreateClosedWithConfiguration(uv, 1, configuration); } int uvSegmentCreateClosedWithConfiguration( struct uv *uv, raft_index index, const struct raft_configuration *configuration) { struct raft_buffer buf; char filename[UV__FILENAME_LEN]; int rv; /* Render the path */ sprintf(filename, UV__CLOSED_TEMPLATE, index, index); /* Encode the given configuration. */ rv = configurationEncode(configuration, &buf); if (rv != 0) { goto err; } /* Write the file */ rv = uvWriteClosedSegment(uv, index, index, &buf); if (rv != 0) { goto err_after_configuration_encode; } raft_free(buf.base); rv = UvFsSyncDir(uv->dir, uv->io->errmsg); if (rv != 0) { return RAFT_IOERR; } return 0; err_after_configuration_encode: raft_free(buf.base); err: assert(rv != 0); return rv; } int uvSegmentTruncate(struct uv *uv, struct uvSegmentInfo *segment, raft_index index) { char filename[UV__FILENAME_LEN]; struct raft_entry *entries; struct uvSegmentBuffer buf; struct raft_buffer data; size_t n; unsigned m; char errmsg[RAFT_ERRMSG_BUF_SIZE]; int rv; assert(!segment->is_open); tracef("truncate %llu-%llu at %llu", segment->first_index, segment->end_index, index); rv = uvSegmentLoadClosed(uv, segment, &entries, &n); if (rv != 0) { ErrMsgWrapf(uv->io->errmsg, "load closed segment %s", segment->filename); goto out; } /* Discard all entries after the truncate index (included) */ assert(index - segment->first_index < n); m = (unsigned)(index - segment->first_index); uvSegmentBufferInit(&buf, uv->block_size); rv = uvSegmentBufferFormat(&buf); if (rv != 0) { goto out_after_buffer_init; } rv = uvSegmentBufferAppend(&buf, entries, m); if (rv != 0) { goto out_after_buffer_init; } /* Render the path. * * TODO: we should use a temporary file name so in case of crash we don't * consider this segment as corrupted. */ sprintf(filename, UV__CLOSED_TEMPLATE, segment->first_index, index - 1); data.base = buf.arena.base; data.len = buf.n; rv = UvFsMakeFile(uv->dir, filename, &data, 1, errmsg); if (rv != 0) { tracef("write %s: %s", filename, errmsg); rv = RAFT_IOERR; goto out_after_buffer_init; } out_after_buffer_init: uvSegmentBufferClose(&buf); entryBatchesDestroy(entries, n); out: return rv; } #undef tracef raft-0.11.3/src/uv_send.c000066400000000000000000000354641415614527300151520ustar00rootroot00000000000000#include #include "../include/raft/uv.h" #include "assert.h" #include "heap.h" #include "uv.h" #include "uv_encoding.h" #define tracef(...) Tracef(c->uv->tracer, __VA_ARGS__) /* The happy path for an raft_io_send request is: * * - Get the uvClient object whose address matches the one of target server. * - Encode the message and write it using the uvClient's TCP handle. * - Once the write completes, fire the send request callback. * * Possible failure modes are: * * - The uv->clients queue has no client object with a matching address. In this * case add a new client object to the array, add the send request to the * queue of pending requests and submit a connection request. Once the * connection request succeeds, try to write the encoded request to the * connected stream handle. If the connection request fails, schedule another * attempt. * * - The uv->clients queue has a client object which is not connected. Add the * send request to the pending queue, and, if there's no connection attempt * already in progress, start a new one. * * - The write request fails (either synchronously or asynchronously). In this * case we fire the request callback with an error, close the connection * stream, and start a re-connection attempt. */ /* Maximum number of requests that can be buffered. */ #define UV__CLIENT_MAX_PENDING 3 struct uvClient { struct uv *uv; /* libuv I/O implementation object */ struct uv_timer_s timer; /* Schedule connection attempts */ struct raft_uv_connect connect; /* Connection request */ struct uv_stream_s *stream; /* Current connection handle */ struct uv_stream_s *old_stream; /* Connection handle being closed */ unsigned n_connect_attempt; /* Consecutive connection attempts */ raft_id id; /* ID of the other server */ char *address; /* Address of the other server */ queue pending; /* Pending send message requests */ queue queue; /* Clients queue */ bool closing; /* True after calling uvClientAbort */ }; /* Hold state for a single send RPC message request. */ struct uvSend { struct uvClient *client; /* Client connected to the target server */ struct raft_io_send *req; /* User request */ uv_buf_t *bufs; /* Encoded raft RPC message to send */ unsigned n_bufs; /* Number of buffers */ uv_write_t write; /* Stream write request */ queue queue; /* Pending send requests queue */ }; /* Free all memory used by the given send request object, including the object * itself. */ static void uvSendDestroy(struct uvSend *s) { if (s->bufs != NULL) { /* Just release the first buffer. Further buffers are entry or snapshot * payloads, which we were passed but we don't own. */ HeapFree(s->bufs[0].base); /* Release the buffers array. */ HeapFree(s->bufs); } HeapFree(s); } /* Initialize a new client associated with the given server. */ static int uvClientInit(struct uvClient *c, struct uv *uv, raft_id id, const char *address) { int rv; c->uv = uv; c->timer.data = c; c->connect.data = NULL; /* Set upon starting a connect request */ c->stream = NULL; /* Set upon successful connection */ c->old_stream = NULL; /* Set after closing the current connection */ c->n_connect_attempt = 0; c->id = id; c->address = HeapMalloc(strlen(address) + 1); if (c->address == NULL) { return RAFT_NOMEM; } rv = uv_timer_init(c->uv->loop, &c->timer); assert(rv == 0); strcpy(c->address, address); QUEUE_INIT(&c->pending); c->closing = false; QUEUE_PUSH(&uv->clients, &c->queue); return 0; } /* If there's no more pending cleanup, remove the client from the abort queue * and destroy it. */ static void uvClientMaybeDestroy(struct uvClient *c) { struct uv *uv = c->uv; assert(c->stream == NULL); if (c->connect.data != NULL) { return; } if (c->timer.data != NULL) { return; } if (c->old_stream != NULL) { return; } while (!QUEUE_IS_EMPTY(&c->pending)) { queue *head; struct uvSend *send; struct raft_io_send *req; head = QUEUE_HEAD(&c->pending); send = QUEUE_DATA(head, struct uvSend, queue); QUEUE_REMOVE(head); req = send->req; uvSendDestroy(send); if (req->cb != NULL) { req->cb(req, RAFT_CANCELED); } } QUEUE_REMOVE(&c->queue); assert(c->address != NULL); HeapFree(c->address); HeapFree(c); uvMaybeFireCloseCb(uv); } /* Forward declaration. */ static void uvClientConnect(struct uvClient *c); static void uvClientDisconnectCloseCb(struct uv_handle_s *handle) { struct uvClient *c = handle->data; assert(c->old_stream != NULL); assert(c->stream == NULL); assert(handle == (struct uv_handle_s *)c->old_stream); HeapFree(c->old_stream); c->old_stream = NULL; if (c->closing) { uvClientMaybeDestroy(c); } else { uvClientConnect(c); /* Trigger a new connection attempt. */ } } /* Close the current connection. */ static void uvClientDisconnect(struct uvClient *c) { assert(c->stream != NULL); assert(c->old_stream == NULL); c->old_stream = c->stream; c->stream = NULL; uv_close((struct uv_handle_s *)c->old_stream, uvClientDisconnectCloseCb); } /* Invoked once an encoded RPC message has been written out. */ static void uvSendWriteCb(struct uv_write_s *write, const int status) { struct uvSend *send = write->data; struct uvClient *c = send->client; struct raft_io_send *req = send->req; int cb_status = 0; /* If the write failed and we're not currently closing, let's consider the * current stream handle as busted and start disconnecting (unless we're * already doing so). We'll trigger a new connection attempt once the handle * is closed. */ if (status != 0) { cb_status = RAFT_IOERR; if (!c->closing) { if (c->stream != NULL) { uvClientDisconnect(c); } } else if (status == UV_ECANCELED) { cb_status = RAFT_CANCELED; } } uvSendDestroy(send); if (req->cb != NULL) { req->cb(req, cb_status); } } static int uvClientSend(struct uvClient *c, struct uvSend *send) { int rv; assert(!c->closing); send->client = c; /* If there's no connection available, let's queue the request. */ if (c->stream == NULL) { tracef("no connection available -> enqueue message"); QUEUE_PUSH(&c->pending, &send->queue); return 0; } tracef("connection available -> write message"); send->write.data = send; rv = uv_write(&send->write, c->stream, send->bufs, send->n_bufs, uvSendWriteCb); if (rv != 0) { tracef("write message failed -> rv %d", rv); /* UNTESTED: what are the error conditions? perhaps ENOMEM */ return RAFT_IOERR; } return 0; } /* Try to execute all send requests that were blocked in the queue waiting for a * connection. */ static void uvClientSendPending(struct uvClient *c) { int rv; assert(c->stream != NULL); tracef("send pending messages"); while (!QUEUE_IS_EMPTY(&c->pending)) { queue *head; struct uvSend *send; head = QUEUE_HEAD(&c->pending); send = QUEUE_DATA(head, struct uvSend, queue); QUEUE_REMOVE(head); rv = uvClientSend(c, send); if (rv != 0) { if (send->req->cb != NULL) { send->req->cb(send->req, rv); } uvSendDestroy(send); } } } static void uvClientTimerCb(uv_timer_t *timer) { struct uvClient *c = timer->data; tracef("timer expired -> attempt to reconnect"); uvClientConnect(c); /* Retry to connect. */ } /* Return the number of send requests that we have been parked in the send queue * because no connection is available yet. */ static unsigned uvClientPendingCount(struct uvClient *c) { queue *head; unsigned n = 0; QUEUE_FOREACH(head, &c->pending) { n++; } return n; } static void uvClientConnectCb(struct raft_uv_connect *req, struct uv_stream_s *stream, int status) { struct uvClient *c = req->data; unsigned n_pending; int rv; tracef("connect attempt completed -> status %s", errCodeToString(status)); assert(c->connect.data != NULL); assert(c->stream == NULL); assert(c->old_stream == NULL); assert(!uv_is_active((struct uv_handle_s *)&c->timer)); c->connect.data = NULL; /* If we are closing, bail out, possibly discarding the new connection. */ if (c->closing) { if (status == 0) { assert(stream != NULL); c->stream = stream; c->stream->data = c; uvClientDisconnect(c); } else { uvClientMaybeDestroy(c); } return; } /* If, the connection attempt was successful, we're good. If we have pending * requests, let's try to execute them. */ if (status == 0) { assert(stream != NULL); c->stream = stream; c->n_connect_attempt = 0; c->stream->data = c; uvClientSendPending(c); return; } /* Shrink the queue of pending requests, by failing the oldest ones */ n_pending = uvClientPendingCount(c); if (n_pending > UV__CLIENT_MAX_PENDING) { unsigned i; for (i = 0; i < n_pending - UV__CLIENT_MAX_PENDING; i++) { tracef("queue full -> evict oldest message"); queue *head; struct uvSend *old_send; struct raft_io_send *old_req; head = QUEUE_HEAD(&c->pending); old_send = QUEUE_DATA(head, struct uvSend, queue); QUEUE_REMOVE(head); old_req = old_send->req; uvSendDestroy(old_send); if (old_req->cb != NULL) { old_req->cb(old_req, RAFT_NOCONNECTION); } } } /* Let's schedule another attempt. */ rv = uv_timer_start(&c->timer, uvClientTimerCb, c->uv->connect_retry_delay, 0); assert(rv == 0); } /* Perform a single connection attempt, scheduling a retry if it fails. */ static void uvClientConnect(struct uvClient *c) { int rv; assert(!c->closing); assert(c->stream == NULL); assert(c->old_stream == NULL); assert(!uv_is_active((struct uv_handle_s *)&c->timer)); assert(c->connect.data == NULL); c->n_connect_attempt++; c->connect.data = c; rv = c->uv->transport->connect(c->uv->transport, &c->connect, c->id, c->address, uvClientConnectCb); if (rv != 0) { /* Restart the timer, so we can retry. */ c->connect.data = NULL; rv = uv_timer_start(&c->timer, uvClientTimerCb, c->uv->connect_retry_delay, 0); assert(rv == 0); } } /* Final callback in the close chain of an io_uv__client object */ static void uvClientTimerCloseCb(struct uv_handle_s *handle) { struct uvClient *c = handle->data; assert(handle == (struct uv_handle_s *)&c->timer); c->timer.data = NULL; uvClientMaybeDestroy(c); } /* Start shutting down a client. This happens when the `raft_io` instance * has been closed or when the address of the client has changed. */ static void uvClientAbort(struct uvClient *c) { struct uv *uv = c->uv; int rv; assert(c->stream != NULL || c->old_stream != NULL || uv_is_active((struct uv_handle_s *)&c->timer) || c->connect.data != NULL); QUEUE_REMOVE(&c->queue); QUEUE_PUSH(&uv->aborting, &c->queue); rv = uv_timer_stop(&c->timer); assert(rv == 0); /* If we are connected, let's close the outbound stream handle. This will * eventually complete all inflight write requests, possibly with failing * them with UV_ECANCELED. */ if (c->stream != NULL) { uvClientDisconnect(c); } /* Closing the timer implicitly stop it, so the timeout callback won't be * fired. */ uv_close((struct uv_handle_s *)&c->timer, uvClientTimerCloseCb); c->closing = true; } /* Find the client object associated with the given server, or create one if * there's none yet. */ static int uvGetClient(struct uv *uv, const raft_id id, const char *address, struct uvClient **client) { queue *head; int rv; /* Check if we already have a client object for this peer server. */ QUEUE_FOREACH(head, &uv->clients) { *client = QUEUE_DATA(head, struct uvClient, queue); if ((*client)->id != id) { continue; } /* Client address has changed, abort connection and create a new one. */ if (strcmp((*client)->address, address) != 0) { uvClientAbort(*client); break; } return 0; } /* Initialize the new connection */ *client = HeapMalloc(sizeof **client); if (*client == NULL) { rv = RAFT_NOMEM; goto err; } rv = uvClientInit(*client, uv, id, address); if (rv != 0) { goto err_after_client_alloc; } /* Make a first connection attempt right away.. */ uvClientConnect(*client); return 0; err_after_client_alloc: HeapFree(*client); err: assert(rv != 0); return rv; } int UvSend(struct raft_io *io, struct raft_io_send *req, const struct raft_message *message, raft_io_send_cb cb) { struct uv *uv = io->impl; struct uvSend *send; struct uvClient *client; int rv; assert(!uv->closing); /* Allocate a new request object. */ send = HeapMalloc(sizeof *send); if (send == NULL) { rv = RAFT_NOMEM; goto err; } send->req = req; req->cb = cb; rv = uvEncodeMessage(message, &send->bufs, &send->n_bufs); if (rv != 0) { send->bufs = NULL; goto err_after_send_alloc; } /* Get a client object connected to the target server, creating it if it * doesn't exist yet. */ rv = uvGetClient(uv, message->server_id, message->server_address, &client); if (rv != 0) { goto err_after_send_alloc; } rv = uvClientSend(client, send); if (rv != 0) { goto err_after_send_alloc; } return 0; err_after_send_alloc: uvSendDestroy(send); err: assert(rv != 0); return rv; } void UvSendClose(struct uv *uv) { assert(uv->closing); while (!QUEUE_IS_EMPTY(&uv->clients)) { queue *head; struct uvClient *client; head = QUEUE_HEAD(&uv->clients); client = QUEUE_DATA(head, struct uvClient, queue); uvClientAbort(client); } } #undef tracef raft-0.11.3/src/uv_snapshot.c000066400000000000000000000536571415614527300160640ustar00rootroot00000000000000#include #include #include #include "array.h" #include "assert.h" #include "byte.h" #include "compress.h" #include "configuration.h" #include "heap.h" #include "uv.h" #include "uv_encoding.h" #include "uv_os.h" #define tracef(...) Tracef(uv->tracer, __VA_ARGS__) /* Arbitrary maximum configuration size. Should be practically be enough */ #define UV__META_MAX_CONFIGURATION_SIZE 1024 * 1024 /* Returns true if the filename is a valid snapshot file or snapshot meta * filename depending on the `meta` switch. If the parse is successful, the * arguments will contain the parsed values. */ static bool uvSnapshotParseFilename(const char *filename, bool meta, raft_term *term, raft_index *index, raft_time *timestamp) { /* Check if it's a well-formed snapshot filename */ int consumed = 0; int matched; size_t filename_len = strlen(filename); assert(filename_len < UV__FILENAME_LEN); if (meta) { matched = sscanf(filename, UV__SNAPSHOT_META_TEMPLATE "%n", term, index, timestamp, &consumed); } else { matched = sscanf(filename, UV__SNAPSHOT_TEMPLATE "%n", term, index, timestamp, &consumed); } if (matched != 3 || consumed != (int)filename_len) { return false; } return true; } /* Check if the given filename matches the pattern of a snapshot metadata * filename (snapshot-xxx-yyy-zzz.meta), and fill the given info structure if * so. * * Return true if the filename matched, false otherwise. */ static bool uvSnapshotInfoMatch(const char *filename, struct uvSnapshotInfo *info) { if (!uvSnapshotParseFilename(filename, true, &info->term, &info->index, &info->timestamp)) { return false; } /* Allow room for '\0' terminator */ size_t n = sizeof(info->filename) - 1; strncpy(info->filename, filename, n); info->filename[n] = '\0'; return true; } void uvSnapshotFilenameOf(struct uvSnapshotInfo *info, char *filename) { size_t len = strlen(info->filename) - strlen(".meta"); assert(len < UV__FILENAME_LEN); strcpy(filename, info->filename); filename[len] = 0; } int UvSnapshotInfoAppendIfMatch(struct uv *uv, const char *filename, struct uvSnapshotInfo *infos[], size_t *n_infos, bool *appended) { struct uvSnapshotInfo info; bool matched; char snapshot_filename[UV__FILENAME_LEN]; bool exists; bool is_empty; char errmsg[RAFT_ERRMSG_BUF_SIZE]; int rv; /* Check if it's a snapshot metadata filename */ matched = uvSnapshotInfoMatch(filename, &info); if (!matched) { *appended = false; return 0; } /* Check if there's actually a valid snapshot file for this snapshot * metadata. If there's none or it's empty, it means that we aborted before * finishing the snapshot, or that another thread is still busy writing the * snapshot. */ uvSnapshotFilenameOf(&info, snapshot_filename); rv = UvFsFileExists(uv->dir, snapshot_filename, &exists, errmsg); if (rv != 0) { tracef("stat %s: %s", snapshot_filename, errmsg); rv = RAFT_IOERR; return rv; } if (!exists) { *appended = false; return 0; } /* TODO This check is strictly not needed, snapshot files are created by * renaming fully written and synced tmp-files. Leaving it here, just to be * extra-safe. Can probably be removed once more data integrity checks are * performed at startup. */ rv = UvFsFileIsEmpty(uv->dir, snapshot_filename, &is_empty, errmsg); if (rv != 0) { tracef("is_empty %s: %s", snapshot_filename, errmsg); rv = RAFT_IOERR; return rv; } if (is_empty) { *appended = false; return 0; } ARRAY__APPEND(struct uvSnapshotInfo, info, infos, n_infos, rv); if (rv == -1) { return RAFT_NOMEM; } *appended = true; return 0; } static int uvSnapshotIsOrphanInternal(const char *dir, const char *filename, bool meta, bool *orphan) { int rv; *orphan = false; raft_term term; raft_index index; raft_time timestamp; if (!uvSnapshotParseFilename(filename, meta, &term, &index, ×tamp)) { return 0; } /* filename is a well-formed snapshot filename, check if the sibling file exists. */ char sibling_filename[UV__FILENAME_LEN]; if (meta) { rv = snprintf(sibling_filename, UV__FILENAME_LEN, UV__SNAPSHOT_TEMPLATE, term, index, timestamp); } else { rv = snprintf(sibling_filename, UV__FILENAME_LEN, UV__SNAPSHOT_META_TEMPLATE, term, index, timestamp); } if (rv >= UV__FILENAME_LEN) { /* Output truncated */ return -1; } bool sibling_exists = false; char ignored[RAFT_ERRMSG_BUF_SIZE]; rv = UvFsFileExists(dir, sibling_filename, &sibling_exists, ignored); if (rv != 0) { return rv; } *orphan = !sibling_exists; return 0; } int UvSnapshotIsOrphan(const char *dir, const char *filename, bool *orphan) { return uvSnapshotIsOrphanInternal(dir, filename, false, orphan); } int UvSnapshotMetaIsOrphan(const char *dir, const char *filename, bool *orphan) { return uvSnapshotIsOrphanInternal(dir, filename, true, orphan); } /* Compare two snapshots to decide which one is more recent. */ static int uvSnapshotCompare(const void *p1, const void *p2) { struct uvSnapshotInfo *s1 = (struct uvSnapshotInfo *)p1; struct uvSnapshotInfo *s2 = (struct uvSnapshotInfo *)p2; /* If terms are different, the snapshot with the highest term is the most * recent. */ if (s1->term != s2->term) { return s1->term < s2->term ? -1 : 1; } /* If the term are identical and the index differ, the snapshot with the * highest index is the most recent */ if (s1->index != s2->index) { return s1->index < s2->index ? -1 : 1; } /* If term and index are identical, compare the timestamp. */ return s1->timestamp < s2->timestamp ? -1 : 1; } /* Sort the given snapshots. */ void UvSnapshotSort(struct uvSnapshotInfo *infos, size_t n_infos) { qsort(infos, n_infos, sizeof *infos, uvSnapshotCompare); } /* Parse the metadata file of a snapshot and populate the metadata portion of * the given snapshot object accordingly. */ static int uvSnapshotLoadMeta(struct uv *uv, struct uvSnapshotInfo *info, struct raft_snapshot *snapshot, char *errmsg) { uint64_t header[1 + /* Format version */ 1 + /* CRC checksum */ 1 + /* Configuration index */ 1 /* Configuration length */]; struct raft_buffer buf; uint64_t format; uint32_t crc1; uint32_t crc2; uv_file fd; int rv; snapshot->term = info->term; snapshot->index = info->index; rv = UvFsOpenFileForReading(uv->dir, info->filename, &fd, errmsg); if (rv != 0) { tracef("open %s: %s", info->filename, errmsg); rv = RAFT_IOERR; goto err; } buf.base = header; buf.len = sizeof header; rv = UvFsReadInto(fd, &buf, errmsg); if (rv != 0) { tracef("read %s: %s", info->filename, errmsg); rv = RAFT_IOERR; goto err_after_open; } format = byteFlip64(header[0]); if (format != UV__DISK_FORMAT) { tracef("load %s: unsupported format %ju", info->filename, format); rv = RAFT_MALFORMED; goto err_after_open; } crc1 = (uint32_t)byteFlip64(header[1]); snapshot->configuration_index = byteFlip64(header[2]); buf.len = (size_t)byteFlip64(header[3]); if (buf.len > UV__META_MAX_CONFIGURATION_SIZE) { tracef("load %s: configuration data too big (%zd)", info->filename, buf.len); rv = RAFT_CORRUPT; goto err_after_open; } if (buf.len == 0) { tracef("load %s: no configuration data", info->filename); rv = RAFT_CORRUPT; goto err_after_open; } buf.base = HeapMalloc(buf.len); if (buf.base == NULL) { rv = RAFT_NOMEM; goto err_after_open; } rv = UvFsReadInto(fd, &buf, errmsg); if (rv != 0) { tracef("read %s: %s", info->filename, errmsg); rv = RAFT_IOERR; goto err_after_buf_malloc; } crc2 = byteCrc32(header + 2, sizeof header - sizeof(uint64_t) * 2, 0); crc2 = byteCrc32(buf.base, buf.len, crc2); if (crc1 != crc2) { ErrMsgPrintf(errmsg, "read %s: checksum mismatch", info->filename); rv = RAFT_CORRUPT; goto err_after_buf_malloc; } configurationInit(&snapshot->configuration); rv = configurationDecode(&buf, &snapshot->configuration); if (rv != 0) { goto err_after_buf_malloc; } HeapFree(buf.base); UvOsClose(fd); return 0; err_after_buf_malloc: HeapFree(buf.base); err_after_open: close(fd); err: assert(rv != 0); return rv; } /* Load the snapshot data file and populate the data portion of the given * snapshot object accordingly. */ static int uvSnapshotLoadData(struct uv *uv, struct uvSnapshotInfo *info, struct raft_snapshot *snapshot, char *errmsg) { char filename[UV__FILENAME_LEN]; struct raft_buffer buf; int rv; uvSnapshotFilenameOf(info, filename); rv = UvFsReadFile(uv->dir, filename, &buf, errmsg); if (rv != 0) { tracef("stat %s: %s", filename, errmsg); goto err; } if (IsCompressed(buf.base, buf.len)) { struct raft_buffer decompressed = {0}; tracef("snapshot decompress start"); rv = Decompress(buf, &decompressed, errmsg); tracef("snapshot decompress end %d", rv); if (rv != 0) { tracef("decompress failed rv:%d", rv); goto err_after_read_file; } HeapFree(buf.base); buf = decompressed; } snapshot->bufs = HeapMalloc(sizeof *snapshot->bufs); snapshot->n_bufs = 1; if (snapshot->bufs == NULL) { rv = RAFT_NOMEM; goto err_after_read_file; } snapshot->bufs[0] = buf; return 0; err_after_read_file: HeapFree(buf.base); err: assert(rv != 0); return rv; } int UvSnapshotLoad(struct uv *uv, struct uvSnapshotInfo *meta, struct raft_snapshot *snapshot, char *errmsg) { int rv; rv = uvSnapshotLoadMeta(uv, meta, snapshot, errmsg); if (rv != 0) { return rv; } rv = uvSnapshotLoadData(uv, meta, snapshot, errmsg); if (rv != 0) { return rv; } return 0; } struct uvSnapshotPut { struct uv *uv; size_t trailing; struct raft_io_snapshot_put *req; const struct raft_snapshot *snapshot; struct { unsigned long long timestamp; uint64_t header[4]; /* Format, CRC, configuration index/len */ struct raft_buffer bufs[2]; /* Preamble and configuration */ } meta; char errmsg[RAFT_ERRMSG_BUF_SIZE]; int status; struct UvBarrier barrier; }; struct uvSnapshotGet { struct uv *uv; struct raft_io_snapshot_get *req; struct raft_snapshot *snapshot; struct uv_work_s work; char errmsg[RAFT_ERRMSG_BUF_SIZE]; int status; queue queue; }; static int uvSnapshotKeepLastTwo(struct uv *uv, struct uvSnapshotInfo *snapshots, size_t n) { size_t i; char errmsg[RAFT_ERRMSG_BUF_SIZE]; int rv; /* Leave at least two snapshots, for safety. */ if (n <= 2) { return 0; } for (i = 0; i < n - 2; i++) { struct uvSnapshotInfo *snapshot = &snapshots[i]; char filename[UV__FILENAME_LEN]; rv = UvFsRemoveFile(uv->dir, snapshot->filename, errmsg); if (rv != 0) { tracef("unlink %s: %s", snapshot->filename, errmsg); return RAFT_IOERR; } uvSnapshotFilenameOf(snapshot, filename); rv = UvFsRemoveFile(uv->dir, filename, errmsg); if (rv != 0) { tracef("unlink %s: %s", filename, errmsg); return RAFT_IOERR; } } return 0; } /* Remove all segments and snapshots that are not needed anymore, because their past the trailing amount. */ static int uvRemoveOldSegmentsAndSnapshots(struct uv *uv, raft_index last_index, size_t trailing, char *errmsg) { struct uvSnapshotInfo *snapshots; struct uvSegmentInfo *segments; size_t n_snapshots; size_t n_segments; int rv = 0; rv = UvList(uv, &snapshots, &n_snapshots, &segments, &n_segments, errmsg); if (rv != 0) { goto out; } rv = uvSnapshotKeepLastTwo(uv, snapshots, n_snapshots); if (rv != 0) { goto out; } if (segments != NULL) { rv = uvSegmentKeepTrailing(uv, segments, n_segments, last_index, trailing, errmsg); if (rv != 0) { goto out; } } rv = UvFsSyncDir(uv->dir, errmsg); out: if (snapshots != NULL) { HeapFree(snapshots); } if (segments != NULL) { HeapFree(segments); } return rv; } static int makeFileCompressed(const char *dir, const char *filename, struct raft_buffer *bufs, unsigned n_bufs, char *errmsg) { int rv; struct raft_buffer compressed = {0}; rv = Compress(bufs, n_bufs, &compressed, errmsg); if (rv != 0) { ErrMsgWrapf(errmsg, "compress %s", filename); return RAFT_IOERR; } rv = UvFsMakeFile(dir, filename, &compressed, 1, errmsg); raft_free(compressed.base); return rv; } static void uvSnapshotPutWorkCb(uv_work_t *work) { struct uvSnapshotPut *put = work->data; struct uv *uv = put->uv; char metadata[UV__FILENAME_LEN]; char snapshot[UV__FILENAME_LEN]; char errmsg[RAFT_ERRMSG_BUF_SIZE]; int rv; sprintf(metadata, UV__SNAPSHOT_META_TEMPLATE, put->snapshot->term, put->snapshot->index, put->meta.timestamp); rv = UvFsMakeFile(uv->dir, metadata, put->meta.bufs, 2, put->errmsg); if (rv != 0) { tracef("snapshot.meta creation failed %d", rv); ErrMsgWrapf(put->errmsg, "write %s", metadata); put->status = RAFT_IOERR; return; } sprintf(snapshot, UV__SNAPSHOT_TEMPLATE, put->snapshot->term, put->snapshot->index, put->meta.timestamp); tracef("snapshot write start"); if (uv->snapshot_compression) { rv = makeFileCompressed(uv->dir, snapshot, put->snapshot->bufs, put->snapshot->n_bufs, put->errmsg); } else { rv = UvFsMakeFile(uv->dir, snapshot, put->snapshot->bufs, put->snapshot->n_bufs, put->errmsg); } tracef("snapshot write end %d", rv); if (rv != 0) { tracef("snapshot creation failed %d", rv); ErrMsgWrapf(put->errmsg, "write %s", snapshot); UvFsRemoveFile(uv->dir, metadata, errmsg); UvFsRemoveFile(uv->dir, snapshot, errmsg); put->status = RAFT_IOERR; return; } rv = UvFsSyncDir(uv->dir, put->errmsg); if (rv != 0) { put->status = RAFT_IOERR; return; } rv = uvRemoveOldSegmentsAndSnapshots(uv, put->snapshot->index, put->trailing, put->errmsg); if (rv != 0) { put->status = rv; return; } put->status = 0; return; } /* Finish the put request, releasing all associated memory and invoking its * callback. */ static void uvSnapshotPutFinish(struct uvSnapshotPut *put) { struct raft_io_snapshot_put *req = put->req; int status = put->status; struct uv *uv = put->uv; assert(uv->snapshot_put_work.data == NULL); HeapFree(put->meta.bufs[1].base); HeapFree(put); req->cb(req, status); } static void uvSnapshotPutAfterWorkCb(uv_work_t *work, int status) { struct uvSnapshotPut *put = work->data; struct uv *uv = put->uv; bool is_install = put->trailing == 0; assert(status == 0); uv->snapshot_put_work.data = NULL; uvSnapshotPutFinish(put); if (is_install) { UvUnblock(uv); } uvMaybeFireCloseCb(uv); } /* Start processing the given put request. */ static void uvSnapshotPutStart(struct uvSnapshotPut *put) { struct uv *uv = put->uv; int rv; /* If this is an install request, the barrier callback must have fired. */ if (put->trailing == 0) { assert(put->barrier.data == NULL); } uv->snapshot_put_work.data = put; rv = uv_queue_work(uv->loop, &uv->snapshot_put_work, uvSnapshotPutWorkCb, uvSnapshotPutAfterWorkCb); if (rv != 0) { tracef("store snapshot %lld: %s", put->snapshot->index, uv_strerror(rv)); uv->errored = true; } } static void uvSnapshotPutBarrierCb(struct UvBarrier *barrier) { struct uvSnapshotPut *put = barrier->data; if (put == NULL) { return; } struct uv *uv = put->uv; assert(put->trailing == 0); put->barrier.data = NULL; /* If we're closing, abort the request. */ if (uv->closing) { put->status = RAFT_CANCELED; uvSnapshotPutFinish(put); uvMaybeFireCloseCb(uv); return; } uvSnapshotPutStart(put); } int UvSnapshotPut(struct raft_io *io, unsigned trailing, struct raft_io_snapshot_put *req, const struct raft_snapshot *snapshot, raft_io_snapshot_put_cb cb) { struct uv *uv; struct uvSnapshotPut *put; void *cursor; unsigned crc; int rv; uv = io->impl; assert(!uv->closing); assert(uv->snapshot_put_work.data == NULL); tracef("put snapshot at %lld, keeping %d", snapshot->index, trailing); put = HeapMalloc(sizeof *put); if (put == NULL) { rv = RAFT_NOMEM; goto err; } put->uv = uv; put->req = req; put->snapshot = snapshot; put->meta.timestamp = uv_now(uv->loop); put->trailing = trailing; put->barrier.data = put; req->cb = cb; /* Prepare the buffers for the metadata file. */ put->meta.bufs[0].base = put->meta.header; put->meta.bufs[0].len = sizeof put->meta.header; rv = configurationEncode(&snapshot->configuration, &put->meta.bufs[1]); if (rv != 0) { goto err_after_req_alloc; } cursor = put->meta.header; bytePut64(&cursor, UV__DISK_FORMAT); bytePut64(&cursor, 0); bytePut64(&cursor, snapshot->configuration_index); bytePut64(&cursor, put->meta.bufs[1].len); crc = byteCrc32(&put->meta.header[2], sizeof(uint64_t) * 2, 0); crc = byteCrc32(put->meta.bufs[1].base, put->meta.bufs[1].len, crc); cursor = &put->meta.header[1]; bytePut64(&cursor, crc); /* If the trailing parameter is set to 0, it means that we're restoring a * snapshot. Submit a barrier request setting the next append index to the * snapshot's last index + 1. */ if (trailing == 0) { rv = UvBarrier(uv, snapshot->index + 1, &put->barrier, uvSnapshotPutBarrierCb); if (rv != 0) { goto err_after_configuration_encode; } } else { uvSnapshotPutStart(put); } return 0; err_after_configuration_encode: HeapFree(put->meta.bufs[1].base); err_after_req_alloc: HeapFree(put); err: assert(rv != 0); return rv; } static void uvSnapshotGetWorkCb(uv_work_t *work) { struct uvSnapshotGet *get = work->data; struct uv *uv = get->uv; struct uvSnapshotInfo *snapshots; size_t n_snapshots; struct uvSegmentInfo *segments; size_t n_segments; int rv; get->status = 0; rv = UvList(uv, &snapshots, &n_snapshots, &segments, &n_segments, get->errmsg); if (rv != 0) { get->status = rv; goto out; } if (snapshots != NULL) { rv = UvSnapshotLoad(uv, &snapshots[n_snapshots - 1], get->snapshot, get->errmsg); if (rv != 0) { get->status = rv; } HeapFree(snapshots); } if (segments != NULL) { HeapFree(segments); } out: return; } static void uvSnapshotGetAfterWorkCb(uv_work_t *work, int status) { struct uvSnapshotGet *get = work->data; struct raft_io_snapshot_get *req = get->req; struct raft_snapshot *snapshot = get->snapshot; int req_status = get->status; struct uv *uv = get->uv; assert(status == 0); QUEUE_REMOVE(&get->queue); HeapFree(get); req->cb(req, snapshot, req_status); uvMaybeFireCloseCb(uv); } int UvSnapshotGet(struct raft_io *io, struct raft_io_snapshot_get *req, raft_io_snapshot_get_cb cb) { struct uv *uv; struct uvSnapshotGet *get; int rv; uv = io->impl; assert(!uv->closing); get = HeapMalloc(sizeof *get); if (get == NULL) { rv = RAFT_NOMEM; goto err; } get->uv = uv; get->req = req; req->cb = cb; get->snapshot = HeapMalloc(sizeof *get->snapshot); if (get->snapshot == NULL) { rv = RAFT_NOMEM; goto err_after_req_alloc; } get->work.data = get; QUEUE_PUSH(&uv->snapshot_get_reqs, &get->queue); rv = uv_queue_work(uv->loop, &get->work, uvSnapshotGetWorkCb, uvSnapshotGetAfterWorkCb); if (rv != 0) { QUEUE_REMOVE(&get->queue); tracef("get last snapshot: %s", uv_strerror(rv)); rv = RAFT_IOERR; goto err_after_snapshot_alloc; } return 0; err_after_snapshot_alloc: HeapFree(get->snapshot); err_after_req_alloc: HeapFree(get); err: assert(rv != 0); return rv; } #undef tracef raft-0.11.3/src/uv_tcp.c000066400000000000000000000043161415614527300147770ustar00rootroot00000000000000#include "uv_tcp.h" #include #include "../include/raft.h" #include "../include/raft/uv.h" #include "assert.h" #include "err.h" /* Implementation of raft_uv_transport->init. */ static int uvTcpInit(struct raft_uv_transport *transport, raft_id id, const char *address) { struct UvTcp *t = transport->impl; int rv; assert(id > 0); assert(address != NULL); t->id = id; t->address = address; rv = uv_tcp_init(t->loop, &t->listener); if (rv != 0) { return rv; } t->listener.data = t; return 0; } /* Implementation of raft_uv_transport->close. */ static void uvTcpClose(struct raft_uv_transport *transport, raft_uv_transport_close_cb cb) { struct UvTcp *t = transport->impl; assert(!t->closing); t->closing = true; t->close_cb = cb; UvTcpListenClose(t); UvTcpConnectClose(t); } void UvTcpMaybeFireCloseCb(struct UvTcp *t) { if (!t->closing) { return; } assert(QUEUE_IS_EMPTY(&t->accepting)); assert(QUEUE_IS_EMPTY(&t->connecting)); if (t->listener.data != NULL) { return; } if (!QUEUE_IS_EMPTY(&t->aborting)) { return; } if (t->close_cb != NULL) { t->close_cb(t->transport); } } int raft_uv_tcp_init(struct raft_uv_transport *transport, struct uv_loop_s *loop) { struct UvTcp *t; void *data = transport->data; memset(transport, 0, sizeof *transport); transport->data = data; t = raft_malloc(sizeof *t); if (t == NULL) { ErrMsgOom(transport->errmsg); return RAFT_NOMEM; } t->transport = transport; t->loop = loop; t->id = 0; t->address = NULL; t->listener.data = NULL; t->accept_cb = NULL; QUEUE_INIT(&t->accepting); QUEUE_INIT(&t->connecting); QUEUE_INIT(&t->aborting); t->closing = false; t->close_cb = NULL; transport->impl = t; transport->init = uvTcpInit; transport->close = uvTcpClose; transport->listen = UvTcpListen; transport->connect = UvTcpConnect; return 0; } void raft_uv_tcp_close(struct raft_uv_transport *transport) { struct UvTcp *t = transport->impl; raft_free(t); } raft-0.11.3/src/uv_tcp.h000066400000000000000000000033331415614527300150020ustar00rootroot00000000000000#ifndef UV_TCP_H_ #define UV_TCP_H_ #include "../include/raft/uv.h" #include "queue.h" /* Protocol version. */ #define UV__TCP_HANDSHAKE_PROTOCOL 1 struct UvTcp { struct raft_uv_transport *transport; /* Interface object we implement */ struct uv_loop_s *loop; /* Event loop */ raft_id id; /* ID of this raft server */ const char *address; /* Address of this raft server */ struct uv_tcp_s listener; /* Listening TCP socket handle */ raft_uv_accept_cb accept_cb; /* Call after accepting a connection */ queue accepting; /* Connections being accepted */ queue connecting; /* Pending connection requests */ queue aborting; /* Connections being aborted */ bool closing; /* True after close() is called */ raft_uv_transport_close_cb close_cb; /* Call when it's safe to free us */ }; /* Implementation of raft_uv_transport->listen. */ int UvTcpListen(struct raft_uv_transport *transport, raft_uv_accept_cb cb); /* Stop accepting new connection and close all connections being accepted. */ void UvTcpListenClose(struct UvTcp *t); /* Implementation of raft_uv_transport->connect. */ int UvTcpConnect(struct raft_uv_transport *transport, struct raft_uv_connect *req, raft_id id, const char *address, raft_uv_connect_cb cb); /* Abort all pending connection requests. */ void UvTcpConnectClose(struct UvTcp *t); /* Fire the transport close callback if the transport is closing and there's no * more pending callback. */ void UvTcpMaybeFireCloseCb(struct UvTcp *t); #endif /* UV_TCP_H_ */ raft-0.11.3/src/uv_tcp_connect.c000066400000000000000000000162171415614527300165130ustar00rootroot00000000000000#include #include "assert.h" #include "byte.h" #include "err.h" #include "heap.h" #include "uv_ip.h" #include "uv_tcp.h" /* The happy path of a connection request is: * * - Create a TCP handle and submit a TCP connect request. * - Once connected over TCP, submit a write request for the handshake. * - Once the write completes, fire the connection request callback. * * Possible failure modes are: * * - The transport get closed, close the TCP handle and and fire the request * callback with RAFT_CANCELED. * * - Either the TCP connect or the write request fails: close the TCP handle and * fire the request callback with RAFT_NOCONNECTION. */ /* Hold state for a single connection request. */ struct uvTcpConnect { struct UvTcp *t; /* Transport implementation */ struct raft_uv_connect *req; /* User request */ uv_buf_t handshake; /* Handshake data */ struct uv_tcp_s *tcp; /* TCP connection socket handle */ struct uv_connect_s connect; /* TCP connection request */ struct uv_write_s write; /* TCP handshake request */ int status; /* Returned to the request callback */ queue queue; /* Pending connect queue */ }; /* Encode an handshake message into the given buffer. */ static int uvTcpEncodeHandshake(raft_id id, const char *address, uv_buf_t *buf) { void *cursor; size_t address_len = bytePad64(strlen(address) + 1); buf->len = sizeof(uint64_t) + /* Protocol version. */ sizeof(uint64_t) + /* Server ID. */ sizeof(uint64_t) /* Size of the address buffer */; buf->len += address_len; buf->base = HeapMalloc(buf->len); if (buf->base == NULL) { return RAFT_NOMEM; } cursor = buf->base; bytePut64(&cursor, UV__TCP_HANDSHAKE_PROTOCOL); bytePut64(&cursor, id); bytePut64(&cursor, address_len); strcpy(cursor, address); return 0; } /* Finish the connect request, releasing its memory and firing the connect * callback. */ static void uvTcpConnectFinish(struct uvTcpConnect *connect) { struct uv_stream_s *stream = (struct uv_stream_s *)connect->tcp; struct raft_uv_connect *req = connect->req; int status = connect->status; QUEUE_REMOVE(&connect->queue); HeapFree(connect->handshake.base); raft_free(connect); req->cb(req, stream, status); } /* The TCP connection handle has been closed in consequence of an error or * because the transport is closing. */ static void uvTcpConnectUvCloseCb(struct uv_handle_s *handle) { struct uvTcpConnect *connect = handle->data; struct UvTcp *t = connect->t; assert(connect->status != 0); assert(handle == (struct uv_handle_s *)connect->tcp); HeapFree(connect->tcp); connect->tcp = NULL; uvTcpConnectFinish(connect); UvTcpMaybeFireCloseCb(t); } /* Abort a connection request. */ static void uvTcpConnectAbort(struct uvTcpConnect *connect) { QUEUE_REMOVE(&connect->queue); QUEUE_PUSH(&connect->t->aborting, &connect->queue); uv_close((struct uv_handle_s *)connect->tcp, uvTcpConnectUvCloseCb); } /* The handshake TCP write completes. Fire the connect callback. */ static void uvTcpConnectUvWriteCb(struct uv_write_s *write, int status) { struct uvTcpConnect *connect = write->data; struct UvTcp *t = connect->t; if (t->closing) { connect->status = RAFT_CANCELED; return; } if (status != 0) { assert(status != UV_ECANCELED); /* t->closing would have been true */ connect->status = RAFT_NOCONNECTION; uvTcpConnectAbort(connect); return; } uvTcpConnectFinish(connect); } /* The TCP connection is established. Write the handshake data. */ static void uvTcpConnectUvConnectCb(struct uv_connect_s *req, int status) { struct uvTcpConnect *connect = req->data; struct UvTcp *t = connect->t; int rv; if (t->closing) { connect->status = RAFT_CANCELED; return; } if (status != 0) { assert(status != UV_ECANCELED); /* t->closing would have been true */ connect->status = RAFT_NOCONNECTION; ErrMsgPrintf(t->transport->errmsg, "uv_tcp_connect(): %s", uv_strerror(status)); goto err; } rv = uv_write(&connect->write, (struct uv_stream_s *)connect->tcp, &connect->handshake, 1, uvTcpConnectUvWriteCb); if (rv != 0) { /* UNTESTED: what are the error conditions? perhaps ENOMEM */ connect->status = RAFT_NOCONNECTION; goto err; } return; err: uvTcpConnectAbort(connect); } /* Create a new TCP handle and submit a connection request to the event loop. */ static int uvTcpConnectStart(struct uvTcpConnect *r, const char *address) { struct UvTcp *t = r->t; struct sockaddr_in addr; int rv; rv = uvIpParse(address, &addr); if (rv != 0) { goto err; } /* Initialize the handshake buffer. */ rv = uvTcpEncodeHandshake(t->id, t->address, &r->handshake); if (rv != 0) { assert(rv == RAFT_NOMEM); ErrMsgOom(r->t->transport->errmsg); goto err; } r->tcp = HeapMalloc(sizeof *r->tcp); if (r->tcp == NULL) { ErrMsgOom(t->transport->errmsg); rv = RAFT_NOMEM; goto err_after_encode_handshake; } rv = uv_tcp_init(r->t->loop, r->tcp); assert(rv == 0); r->tcp->data = r; rv = uv_tcp_connect(&r->connect, r->tcp, (struct sockaddr *)&addr, uvTcpConnectUvConnectCb); if (rv != 0) { /* UNTESTED: since parsing succeed, this should fail only because of * lack of system resources */ ErrMsgPrintf(t->transport->errmsg, "uv_tcp_connect(): %s", uv_strerror(rv)); rv = RAFT_NOCONNECTION; goto err_after_tcp_init; } return 0; err_after_tcp_init: uv_close((uv_handle_t *)r->tcp, (uv_close_cb)HeapFree); err_after_encode_handshake: HeapFree(r->handshake.base); err: return rv; } int UvTcpConnect(struct raft_uv_transport *transport, struct raft_uv_connect *req, raft_id id, const char *address, raft_uv_connect_cb cb) { struct UvTcp *t = transport->impl; struct uvTcpConnect *r; int rv; (void)id; assert(!t->closing); /* Create and initialize a new TCP connection request object */ r = HeapMalloc(sizeof *r); if (r == NULL) { rv = RAFT_NOMEM; ErrMsgOom(transport->errmsg); goto err; } r->t = t; r->req = req; r->status = 0; r->write.data = r; r->connect.data = r; req->cb = cb; /* Keep track of the pending request */ QUEUE_PUSH(&t->connecting, &r->queue); /* Start connecting */ rv = uvTcpConnectStart(r, address); if (rv != 0) { goto err_after_alloc; } return 0; err_after_alloc: QUEUE_REMOVE(&r->queue); HeapFree(r); err: return rv; } void UvTcpConnectClose(struct UvTcp *t) { while (!QUEUE_IS_EMPTY(&t->connecting)) { struct uvTcpConnect *connect; queue *head; head = QUEUE_HEAD(&t->connecting); connect = QUEUE_DATA(head, struct uvTcpConnect, queue); uvTcpConnectAbort(connect); } } raft-0.11.3/src/uv_tcp_listen.c000066400000000000000000000233541415614527300163600ustar00rootroot00000000000000#include #include "assert.h" #include "byte.h" #include "heap.h" #include "uv_ip.h" #include "uv_tcp.h" /* The happy path of an incoming connection is: * * - The connection callback is fired on the listener TCP handle, and the * incoming connection is uv_accept()'ed. We call uv_read_start() to get * notified about received handshake data. * * - Once the preamble is received, we start waiting for the server address. * * - Once the server address is received, we fire the receive callback. * * Possible failure modes are: * * - The accept process gets canceled in the transport->close() implementation, * by calling tcp_accept_stop(): the incoming TCP connection handle gets * closed, preventing any further handshake data notification, and all * allocated memory gets released in the handle close callback. */ /* Hold state for a connection being accepted. */ struct uvTcpHandshake { uint64_t preamble[3]; /* Preamble buffer */ uv_buf_t address; /* Address buffer */ size_t nread; /* Number of bytes read */ }; /* Hold handshake data for a new connection being established. */ struct uvTcpIncoming { struct UvTcp *t; /* Transport implementation */ struct uv_tcp_s *tcp; /* TCP connection socket handle */ struct uvTcpHandshake handshake; /* Handshake data */ queue queue; /* Pending accept queue */ }; /* Decode the handshake preamble, containing the protocol version, the ID of the * connecting server and the length of its address. Also, allocate the buffer to * start reading the server address. */ static int uvTcpDecodePreamble(struct uvTcpHandshake *h) { uint64_t protocol; protocol = byteFlip64(h->preamble[0]); if (protocol != UV__TCP_HANDSHAKE_PROTOCOL) { return RAFT_MALFORMED; } h->address.len = (size_t)byteFlip64(h->preamble[2]); h->address.base = HeapMalloc(h->address.len); if (h->address.base == NULL) { return RAFT_NOMEM; } h->nread = 0; return 0; } /* The accepted TCP client connection has been closed, release all memory * associated with accept object. We can get here only if an error occurrent * during the handshake or if raft_uv_transport->close() has been invoked. */ static void uvTcpIncomingCloseCb(struct uv_handle_s *handle) { struct uvTcpIncoming *incoming = handle->data; struct UvTcp *t = incoming->t; QUEUE_REMOVE(&incoming->queue); if (incoming->handshake.address.base != NULL) { HeapFree(incoming->handshake.address.base); } HeapFree(incoming->tcp); HeapFree(incoming); UvTcpMaybeFireCloseCb(t); } /* Close an incoming TCP connection which hasn't complete the handshake yet. */ static void uvTcpIncomingAbort(struct uvTcpIncoming *incoming) { struct UvTcp *t = incoming->t; /* After uv_close() returns we are guaranteed that no more alloc_cb or * read_cb will be called. */ QUEUE_REMOVE(&incoming->queue); QUEUE_PUSH(&t->aborting, &incoming->queue); uv_close((struct uv_handle_s *)incoming->tcp, uvTcpIncomingCloseCb); } /* Read the address part of the handshake. */ static void uvTcpIncomingAllocCbAddress(struct uv_handle_s *handle, size_t suggested_size, uv_buf_t *buf) { struct uvTcpIncoming *incoming = handle->data; (void)suggested_size; assert(!incoming->t->closing); buf->base = incoming->handshake.address.base + incoming->handshake.nread; buf->len = incoming->handshake.address.len - incoming->handshake.nread; } static void uvTcpIncomingReadCbAddress(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { struct uvTcpIncoming *incoming = stream->data; char *address; raft_id id; size_t n; int rv; (void)buf; assert(!incoming->t->closing); if (nread == 0) { /* Empty read just ignore it. */ return; } if (nread < 0) { uvTcpIncomingAbort(incoming); return; } /* We shouldn't have read more data than the pending amount. */ n = (size_t)nread; assert(n <= incoming->handshake.address.len - incoming->handshake.nread); /* Advance the read window */ incoming->handshake.nread += n; /* If there's more data to read in order to fill the current * read buffer, just return, we'll be invoked again. */ if (incoming->handshake.nread < incoming->handshake.address.len) { return; } /* If we have completed reading the address, let's fire the callback. */ rv = uv_read_stop(stream); assert(rv == 0); id = byteFlip64(incoming->handshake.preamble[1]); address = incoming->handshake.address.base; QUEUE_REMOVE(&incoming->queue); incoming->t->accept_cb(incoming->t->transport, id, address, (struct uv_stream_s *)incoming->tcp); HeapFree(incoming->handshake.address.base); HeapFree(incoming); } /* Read the preamble of the handshake. */ static void uvTcpIncomingAllocCbPreamble(struct uv_handle_s *handle, size_t suggested_size, uv_buf_t *buf) { struct uvTcpIncoming *incoming = handle->data; (void)suggested_size; buf->base = (char *)incoming->handshake.preamble + incoming->handshake.nread; buf->len = sizeof incoming->handshake.preamble - incoming->handshake.nread; } static void uvTcpIncomingReadCbPreamble(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { struct uvTcpIncoming *incoming = stream->data; size_t n; int rv; (void)buf; if (nread == 0) { /* Empty read just ignore it. */ return; } if (nread < 0) { uvTcpIncomingAbort(incoming); return; } /* We shouldn't have read more data than the pending amount. */ n = (size_t)nread; assert(n <= sizeof incoming->handshake.preamble - incoming->handshake.nread); /* Advance the read window */ incoming->handshake.nread += n; /* If there's more data to read in order to fill the current * read buffer, just return, we'll be invoked again. */ if (incoming->handshake.nread < sizeof incoming->handshake.preamble) { return; } /* If we have completed reading the preamble, let's parse it. */ rv = uvTcpDecodePreamble(&incoming->handshake); if (rv != 0) { uvTcpIncomingAbort(incoming); return; } rv = uv_read_stop(stream); assert(rv == 0); rv = uv_read_start((uv_stream_t *)incoming->tcp, uvTcpIncomingAllocCbAddress, uvTcpIncomingReadCbAddress); assert(rv == 0); } /* Start reading handshake data for a new incoming connection. */ static int uvTcpIncomingStart(struct uvTcpIncoming *incoming) { int rv; memset(&incoming->handshake, 0, sizeof incoming->handshake); incoming->tcp = HeapMalloc(sizeof *incoming->tcp); if (incoming->tcp == NULL) { return RAFT_NOMEM; } incoming->tcp->data = incoming; rv = uv_tcp_init(incoming->t->loop, incoming->tcp); assert(rv == 0); rv = uv_accept((struct uv_stream_s *)&incoming->t->listener, (struct uv_stream_s *)incoming->tcp); if (rv != 0) { rv = RAFT_IOERR; goto err_after_tcp_init; } rv = uv_read_start((uv_stream_t *)incoming->tcp, uvTcpIncomingAllocCbPreamble, uvTcpIncomingReadCbPreamble); assert(rv == 0); return 0; err_after_tcp_init: uv_close((uv_handle_t *)incoming->tcp, (uv_close_cb)HeapFree); return rv; } /* Called when there's a new incoming connection: create a new tcp_accept object * and start receiving handshake data. */ static void uvTcpListenCb(struct uv_stream_s *stream, int status) { struct UvTcp *t = stream->data; struct uvTcpIncoming *incoming; int rv; assert(stream == (struct uv_stream_s *)&t->listener); if (status != 0) { rv = RAFT_IOERR; goto err; } incoming = HeapMalloc(sizeof *incoming); if (incoming == NULL) { rv = RAFT_NOMEM; goto err; } incoming->t = t; QUEUE_PUSH(&t->accepting, &incoming->queue); rv = uvTcpIncomingStart(incoming); if (rv != 0) { goto err_after_accept_alloc; } return; err_after_accept_alloc: QUEUE_REMOVE(&incoming->queue); HeapFree(incoming); err: assert(rv != 0); } int UvTcpListen(struct raft_uv_transport *transport, raft_uv_accept_cb cb) { struct UvTcp *t; struct sockaddr_in addr; int rv; t = transport->impl; t->accept_cb = cb; rv = uvIpParse(t->address, &addr); if (rv != 0) { return rv; } rv = uv_tcp_bind(&t->listener, (const struct sockaddr *)&addr, 0); if (rv != 0) { /* UNTESTED: what are the error conditions? */ return RAFT_IOERR; } rv = uv_listen((uv_stream_t *)&t->listener, 1, uvTcpListenCb); if (rv != 0) { /* UNTESTED: what are the error conditions? */ return RAFT_IOERR; } return 0; } /* Close callback for uvTcp->listener. */ static void uvTcpListenCloseCbListener(struct uv_handle_s *handle) { struct UvTcp *t = handle->data; assert(t->closing); t->listener.data = NULL; UvTcpMaybeFireCloseCb(t); } void UvTcpListenClose(struct UvTcp *t) { queue *head; assert(t->closing); assert(t->listener.data != NULL); while (!QUEUE_IS_EMPTY(&t->accepting)) { struct uvTcpIncoming *incoming; head = QUEUE_HEAD(&t->accepting); incoming = QUEUE_DATA(head, struct uvTcpIncoming, queue); uvTcpIncomingAbort(incoming); } uv_close((struct uv_handle_s *)&t->listener, uvTcpListenCloseCbListener); } raft-0.11.3/src/uv_truncate.c000066400000000000000000000106631415614527300160400ustar00rootroot00000000000000#include #include #include "assert.h" #include "byte.h" #include "heap.h" #include "uv.h" #include "uv_encoding.h" #define tracef(...) Tracef(uv->tracer, __VA_ARGS__) /* Track a truncate request. */ struct uvTruncate { struct uv *uv; struct UvBarrier barrier; raft_index index; int status; }; /* Execute a truncate request in a thread. */ static void uvTruncateWorkCb(uv_work_t *work) { struct uvTruncate *truncate = work->data; struct uv *uv = truncate->uv; struct uvSnapshotInfo *snapshots; struct uvSegmentInfo *segments; struct uvSegmentInfo *segment; size_t n_snapshots; size_t n_segments; size_t i; size_t j; char errmsg[RAFT_ERRMSG_BUF_SIZE]; int rv; /* Load all segments on disk. */ rv = UvList(uv, &snapshots, &n_snapshots, &segments, &n_segments, errmsg); if (rv != 0) { goto err; } if (snapshots != NULL) { HeapFree(snapshots); } assert(segments != NULL); /* Find the segment that contains the truncate point. */ segment = NULL; /* Suppress warnings. */ for (i = 0; i < n_segments; i++) { segment = &segments[i]; if (segment->is_open) { continue; } if (truncate->index >= segment->first_index && truncate->index <= segment->end_index) { break; } } assert(i < n_segments); /* If the truncate index is not the first of the segment, we need to * truncate it. */ if (truncate->index > segment->first_index) { rv = uvSegmentTruncate(uv, segment, truncate->index); if (rv != 0) { goto err_after_list; } } /* Remove all closed segments past the one containing the truncate index. */ for (j = i; j < n_segments; j++) { segment = &segments[j]; if (segment->is_open) { continue; } rv = UvFsRemoveFile(uv->dir, segment->filename, errmsg); if (rv != 0) { tracef("unlink segment %s: %s", segment->filename, errmsg); rv = RAFT_IOERR; goto err_after_list; } } rv = UvFsSyncDir(uv->dir, errmsg); if (rv != 0) { tracef("sync data directory: %s", errmsg); rv = RAFT_IOERR; goto err_after_list; } HeapFree(segments); truncate->status = 0; return; err_after_list: HeapFree(segments); err: assert(rv != 0); truncate->status = rv; } static void uvTruncateAfterWorkCb(uv_work_t *work, int status) { struct uvTruncate *truncate = work->data; struct uv *uv = truncate->uv; assert(status == 0); if (truncate->status != 0) { uv->errored = true; } uv->truncate_work.data = NULL; HeapFree(truncate); UvUnblock(uv); } static void uvTruncateBarrierCb(struct UvBarrier *barrier) { struct uvTruncate *truncate = barrier->data; struct uv *uv = truncate->uv; int rv; /* If we're closing, don't perform truncation at all and abort here. */ if (uv->closing) { HeapFree(truncate); return; } assert(QUEUE_IS_EMPTY(&uv->append_writing_reqs)); assert(QUEUE_IS_EMPTY(&uv->finalize_reqs)); assert(uv->finalize_work.data == NULL); assert(uv->truncate_work.data == NULL); uv->truncate_work.data = truncate; rv = uv_queue_work(uv->loop, &uv->truncate_work, uvTruncateWorkCb, uvTruncateAfterWorkCb); if (rv != 0) { tracef("truncate index %lld: %s", truncate->index, uv_strerror(rv)); uv->truncate_work.data = NULL; uv->errored = true; } } int UvTruncate(struct raft_io *io, raft_index index) { struct uv *uv; struct uvTruncate *truncate; int rv; uv = io->impl; assert(!uv->closing); /* We should truncate only entries that we were requested to append in the * first place. */ assert(index > 0); assert(index < uv->append_next_index); truncate = HeapMalloc(sizeof *truncate); if (truncate == NULL) { rv = RAFT_NOMEM; goto err; } truncate->uv = uv; truncate->index = index; truncate->barrier.data = truncate; /* Make sure that we wait for any inflight writes to finish and then close * the current segment. */ rv = UvBarrier(uv, index, &truncate->barrier, uvTruncateBarrierCb); if (rv != 0) { goto err_after_req_alloc; } return 0; err_after_req_alloc: HeapFree(truncate); err: assert(rv != 0); return rv; } #undef tracef raft-0.11.3/src/uv_writer.c000066400000000000000000000373571415614527300155400ustar00rootroot00000000000000#include "uv_writer.h" #include #include #include "../include/raft.h" #include "assert.h" #include "heap.h" /* Copy the error message from the request object to the writer object. */ static void uvWriterReqTransferErrMsg(struct UvWriterReq *req) { ErrMsgPrintf(req->writer->errmsg, "%s", req->errmsg); } /* Set the request status according the given result code. */ static void uvWriterReqSetStatus(struct UvWriterReq *req, int result) { if (result < 0) { ErrMsgPrintf(req->errmsg, "write failed: %d", result); req->status = RAFT_IOERR; } else if ((size_t)result < req->len) { ErrMsgPrintf(req->errmsg, "short write: %d bytes instead of %zu", result, req->len); req->status = RAFT_NOSPACE; } else { req->status = 0; } } /* Remove the request from the queue of inflight writes and invoke the request * callback if set. */ static void uvWriterReqFinish(struct UvWriterReq *req) { QUEUE_REMOVE(&req->queue); if (req->status != 0) { uvWriterReqTransferErrMsg(req); } req->cb(req, req->status); } /* Wrapper around the low-level OS syscall, providing a better error message. */ static int uvWriterIoSetup(unsigned n, aio_context_t *ctx, char *errmsg) { int rv; rv = UvOsIoSetup(n, ctx); if (rv != 0) { switch (rv) { case UV_EAGAIN: ErrMsgPrintf(errmsg, "AIO events user limit exceeded"); rv = RAFT_TOOMANY; break; default: UvOsErrMsg(errmsg, "io_setup", rv); rv = RAFT_IOERR; break; } return rv; } return 0; } /* Run blocking syscalls involved in a file write request. * * Perform a KAIO write request and synchronously wait for it to complete. */ static void uvWriterWorkCb(uv_work_t *work) { struct UvWriterReq *req; /* Writer request object */ struct UvWriter *w; /* Writer object */ aio_context_t ctx; /* KAIO handle */ struct iocb *iocbs; /* Pointer to KAIO request object */ struct io_event event; /* KAIO response object */ int n_events; int rv; req = work->data; w = req->writer; iocbs = &req->iocb; /* If more than one write in parallel is allowed, submit the AIO request * using a dedicated context, to avoid synchronization issues between * threads when multiple writes are submitted in parallel. This is * suboptimal but in real-world users should use file systems and kernels * with proper async write support. */ if (w->n_events > 1) { ctx = 0; rv = uvWriterIoSetup(1 /* Maximum concurrent requests */, &ctx, req->errmsg); if (rv != 0) { goto out; } } else { ctx = w->ctx; } /* Submit the request */ rv = UvOsIoSubmit(ctx, 1, &iocbs); if (rv != 0) { /* UNTESTED: since we're not using NOWAIT and the parameters are valid, * this shouldn't fail. */ UvOsErrMsg(req->errmsg, "io_submit", rv); rv = RAFT_IOERR; goto out_after_io_setup; } /* Wait for the request to complete */ n_events = UvOsIoGetevents(ctx, 1, 1, &event, NULL); assert(n_events == 1); if (n_events != 1) { /* UNTESTED */ rv = n_events >= 0 ? -1 : n_events; } out_after_io_setup: if (w->n_events > 1) { UvOsIoDestroy(ctx); } out: if (rv != 0) { req->status = rv; } else { uvWriterReqSetStatus(req, (int)event.res); } return; } /* Callback run after writeWorkCb has returned. It normally invokes the write * request callback. */ static void uvWriterAfterWorkCb(uv_work_t *work, int status) { struct UvWriterReq *req = work->data; /* Write file request object */ assert(status == 0); /* We don't cancel worker requests */ uvWriterReqFinish(req); } /* Callback fired when the event fd associated with AIO write requests should be * ready for reading (i.e. when a write has completed). */ static void uvWriterPollCb(uv_poll_t *poller, int status, int events) { struct UvWriter *w = poller->data; uint64_t completed; /* True if the write is complete */ unsigned i; int n_events; int rv; assert(w->event_fd >= 0); assert(status == 0); if (status != 0) { /* UNTESTED libuv docs: If an error happens while polling, status will be < 0 and * corresponds with one of the UV_E* error codes. */ goto fail_requests; } assert(events & UV_READABLE); /* Read the event file descriptor */ rv = (int)read(w->event_fd, &completed, sizeof completed); if (rv != sizeof completed) { /* UNTESTED: According to eventfd(2) this is the only possible failure * mode, meaning that epoll has indicated that the event FD is not yet * ready. */ assert(errno == EAGAIN); return; } /* TODO: this assertion fails in unit tests */ /* assert(completed == 1); */ /* Try to fetch the write responses. * * If we got here at least one write should have completed and io_events * should return immediately without blocking. */ n_events = UvOsIoGetevents(w->ctx, 1, (long int)w->n_events, w->events, NULL); assert(n_events >= 1); if (n_events < 1) { /* UNTESTED */ status = n_events == 0 ? -1 : n_events; goto fail_requests; } for (i = 0; i < (unsigned)n_events; i++) { struct io_event *event = &w->events[i]; struct UvWriterReq *req = *((void **)&event->data); #if defined(RWF_NOWAIT) /* If we got EAGAIN, it means it was not possible to perform the write * asynchronously, so let's fall back to the threadpool. */ if (event->res == -EAGAIN) { req->iocb.aio_flags &= (unsigned)~IOCB_FLAG_RESFD; req->iocb.aio_resfd = 0; req->iocb.aio_rw_flags &= ~RWF_NOWAIT; assert(req->work.data == NULL); req->work.data = req; rv = uv_queue_work(w->loop, &req->work, uvWriterWorkCb, uvWriterAfterWorkCb); if (rv != 0) { /* UNTESTED: with the current libuv implementation this should * never fail. */ UvOsErrMsg(req->errmsg, "uv_queue_work", rv); req->status = RAFT_IOERR; goto finish; } return; } #endif /* RWF_NOWAIT */ uvWriterReqSetStatus(req, (int)event->res); #if defined(RWF_NOWAIT) finish: #endif /* RWF_NOWAIT */ uvWriterReqFinish(req); } return; fail_requests: while (!QUEUE_IS_EMPTY(&w->poll_queue)) { queue *head; struct UvWriterReq *req; head = QUEUE_HEAD(&w->poll_queue); req = QUEUE_DATA(head, struct UvWriterReq, queue); uvWriterReqSetStatus(req, status); uvWriterReqFinish(req); } } int UvWriterInit(struct UvWriter *w, struct uv_loop_s *loop, uv_file fd, bool direct /* Whether to use direct I/O */, bool async /* Whether async I/O is available */, unsigned max_concurrent_writes, char *errmsg) { void *data = w->data; int rv = 0; memset(w, 0, sizeof *w); w->data = data; w->loop = loop; w->fd = fd; w->async = async; w->ctx = 0; w->events = NULL; w->n_events = max_concurrent_writes; w->event_fd = -1; w->event_poller.data = NULL; w->check.data = NULL; w->close_cb = NULL; QUEUE_INIT(&w->poll_queue); QUEUE_INIT(&w->work_queue); w->closing = false; w->errmsg = errmsg; /* Set direct I/O if available. */ if (direct) { rv = UvOsSetDirectIo(w->fd); if (rv != 0) { UvOsErrMsg(errmsg, "fcntl", rv); goto err; } } /* Setup the AIO context. */ rv = uvWriterIoSetup(w->n_events, &w->ctx, errmsg); if (rv != 0) { goto err; } /* Initialize the array of re-usable event objects. */ w->events = HeapCalloc(w->n_events, sizeof *w->events); if (w->events == NULL) { /* UNTESTED: todo */ ErrMsgOom(errmsg); rv = RAFT_NOMEM; goto err_after_io_setup; } /* Create an event file descriptor to get notified when a write has * completed. */ rv = UvOsEventfd(0, UV_FS_O_NONBLOCK); if (rv < 0) { /* UNTESTED: should fail only with ENOMEM */ UvOsErrMsg(errmsg, "eventfd", rv); rv = RAFT_IOERR; goto err_after_events_alloc; } w->event_fd = rv; rv = uv_poll_init(loop, &w->event_poller, w->event_fd); if (rv != 0) { /* UNTESTED: with the current libuv implementation this should never * fail. */ UvOsErrMsg(errmsg, "uv_poll_init", rv); rv = RAFT_IOERR; goto err_after_event_fd; } w->event_poller.data = w; rv = uv_check_init(loop, &w->check); if (rv != 0) { /* UNTESTED: with the current libuv implementation this should never * fail. */ UvOsErrMsg(errmsg, "uv_check_init", rv); rv = RAFT_IOERR; goto err_after_event_fd; } w->check.data = w; rv = uv_poll_start(&w->event_poller, UV_READABLE, uvWriterPollCb); if (rv != 0) { /* UNTESTED: with the current libuv implementation this should never * fail. */ UvOsErrMsg(errmsg, "uv_poll_start", rv); rv = RAFT_IOERR; goto err_after_event_fd; } return 0; err_after_event_fd: UvOsClose(w->event_fd); err_after_events_alloc: HeapFree(w->events); err_after_io_setup: UvOsIoDestroy(w->ctx); err: assert(rv != 0); return rv; } static void uvWriterCleanUpAndFireCloseCb(struct UvWriter *w) { assert(w->closing); UvOsClose(w->fd); HeapFree(w->events); UvOsIoDestroy(w->ctx); if (w->close_cb != NULL) { w->close_cb(w); } } static void uvWriterPollerCloseCb(struct uv_handle_s *handle) { struct UvWriter *w = handle->data; w->event_poller.data = NULL; /* Cancel all pending requests. */ while (!QUEUE_IS_EMPTY(&w->poll_queue)) { queue *head; struct UvWriterReq *req; head = QUEUE_HEAD(&w->poll_queue); req = QUEUE_DATA(head, struct UvWriterReq, queue); assert(req->work.data == NULL); req->status = RAFT_CANCELED; uvWriterReqFinish(req); } if (w->check.data != NULL) { return; } uvWriterCleanUpAndFireCloseCb(w); } static void uvWriterCheckCloseCb(struct uv_handle_s *handle) { struct UvWriter *w = handle->data; w->check.data = NULL; if (w->event_poller.data != NULL) { return; } uvWriterCleanUpAndFireCloseCb(w); } static void uvWriterCheckCb(struct uv_check_s *check) { struct UvWriter *w = check->data; if (!QUEUE_IS_EMPTY(&w->work_queue)) { return; } uv_close((struct uv_handle_s *)&w->check, uvWriterCheckCloseCb); } void UvWriterClose(struct UvWriter *w, UvWriterCloseCb cb) { int rv; assert(!w->closing); w->closing = true; w->close_cb = cb; /* We can close the event file descriptor right away, but we shouldn't close * the main file descriptor or destroy the AIO context since there might be * threadpool requests in flight. */ UvOsClose(w->event_fd); rv = uv_poll_stop(&w->event_poller); assert(rv == 0); /* Can this ever fail? */ uv_close((struct uv_handle_s *)&w->event_poller, uvWriterPollerCloseCb); /* If we have requests executing in the threadpool, we need to wait for * them. That's done in the check callback. */ if (!QUEUE_IS_EMPTY(&w->work_queue)) { uv_check_start(&w->check, uvWriterCheckCb); } else { uv_close((struct uv_handle_s *)&w->check, uvWriterCheckCloseCb); } } /* Return the total lengths of the given buffers. */ static size_t lenOfBufs(const uv_buf_t bufs[], unsigned n) { size_t len = 0; unsigned i; for (i = 0; i < n; i++) { len += bufs[i].len; } return len; } int UvWriterSubmit(struct UvWriter *w, struct UvWriterReq *req, const uv_buf_t bufs[], unsigned n, size_t offset, UvWriterReqCb cb) { int rv = 0; #if defined(RWF_NOWAIT) struct iocb *iocbs = &req->iocb; #endif /* RWF_NOWAIT */ assert(!w->closing); /* TODO: at the moment we are not leveraging the support for concurrent * writes, so ensure that we're getting write requests * sequentially. */ if (w->n_events == 1) { assert(QUEUE_IS_EMPTY(&w->poll_queue)); assert(QUEUE_IS_EMPTY(&w->work_queue)); } assert(w->fd >= 0); assert(w->event_fd >= 0); assert(w->ctx != 0); assert(req != NULL); assert(bufs != NULL); assert(n > 0); req->writer = w; req->len = lenOfBufs(bufs, n); req->status = -1; req->work.data = NULL; req->cb = cb; memset(&req->iocb, 0, sizeof req->iocb); memset(req->errmsg, 0, sizeof req->errmsg); req->iocb.aio_fildes = (uint32_t)w->fd; req->iocb.aio_lio_opcode = IOCB_CMD_PWRITEV; req->iocb.aio_reqprio = 0; *((void **)(&req->iocb.aio_buf)) = (void *)bufs; req->iocb.aio_nbytes = n; req->iocb.aio_offset = (int64_t)offset; *((void **)(&req->iocb.aio_data)) = (void *)req; #if defined(RWF_HIPRI) /* High priority request, if possible */ /* TODO: do proper kernel feature detection for this one. */ /* req->iocb.aio_rw_flags |= RWF_HIPRI; */ #endif #if defined(RWF_DSYNC) /* Use per-request synchronous I/O if available. Otherwise, we have opened * the file with O_DSYNC. */ /* TODO: do proper kernel feature detection for this one. */ /* req->iocb.aio_rw_flags |= RWF_DSYNC; */ #endif #if defined(RWF_NOWAIT) /* If io_submit can be run in a 100% non-blocking way, we'll try to write * without using the threadpool. */ if (w->async) { req->iocb.aio_flags |= IOCB_FLAG_RESFD; req->iocb.aio_resfd = (uint32_t)w->event_fd; req->iocb.aio_rw_flags |= RWF_NOWAIT; } #else /* Since there's no support for NOWAIT, io_submit might occasionally block * and we need to run it in the threadpool. */ assert(w->async == false); #endif /* RWF_NOWAIT */ #if defined(RWF_NOWAIT) /* Try to submit the write request asynchronously */ if (w->async) { QUEUE_PUSH(&w->poll_queue, &req->queue); rv = UvOsIoSubmit(w->ctx, 1, &iocbs); /* If no error occurred, we're done, the write request was * submitted. */ if (rv == 0) { goto done; } QUEUE_REMOVE(&req->queue); /* Check the reason of the error. */ switch (rv) { case UV_EAGAIN: break; default: /* Unexpected error */ UvOsErrMsg(w->errmsg, "io_submit", rv); rv = RAFT_IOERR; goto err; } /* Submitting the write would block, or NOWAIT is not * supported. Let's run this request in the threadpool. */ req->iocb.aio_flags &= (unsigned)~IOCB_FLAG_RESFD; req->iocb.aio_resfd = 0; req->iocb.aio_rw_flags &= ~RWF_NOWAIT; } #endif /* RWF_NOWAIT */ /* If we got here it means we need to run io_submit in the threadpool. */ QUEUE_PUSH(&w->work_queue, &req->queue); req->work.data = req; rv = uv_queue_work(w->loop, &req->work, uvWriterWorkCb, uvWriterAfterWorkCb); if (rv != 0) { /* UNTESTED: with the current libuv implementation this can't fail. */ req->work.data = NULL; QUEUE_REMOVE(&req->queue); UvOsErrMsg(w->errmsg, "uv_queue_work", rv); rv = RAFT_IOERR; goto err; } #if defined(RWF_NOWAIT) done: #endif /* RWF_NOWAIT */ return 0; err: assert(rv != 0); return rv; } raft-0.11.3/src/uv_writer.h000066400000000000000000000056151415614527300155350ustar00rootroot00000000000000/* Asynchronous API to write a file. */ #ifndef UV_WRITER_H_ #define UV_WRITER_H_ #include #include "err.h" #include "queue.h" #include "uv_os.h" /* Perform asynchronous writes to a single file. */ struct UvWriter; /* Callback called after the memory associated with a file handle can be * released. */ typedef void (*UvWriterCloseCb)(struct UvWriter *w); struct UvWriter { void *data; /* User data */ struct uv_loop_s *loop; /* Event loop */ uv_file fd; /* File handle */ bool async; /* Whether fully async I/O is supported */ aio_context_t ctx; /* KAIO handle */ struct io_event *events; /* Array of KAIO response objects */ unsigned n_events; /* Length of the events array */ int event_fd; /* Poll'ed to check if write is finished */ struct uv_poll_s event_poller; /* Poll event_fd for completed poll requests */ struct uv_check_s check; /* Check for completed threadpool requests */ UvWriterCloseCb close_cb; /* Close callback */ queue poll_queue; /* Pollable write requests */ queue work_queue; /* Threadpool write requests */ bool closing; /* Whether we're closing or closed */ char *errmsg; /* Description of last error */ }; /* Initialize a file writer. */ int UvWriterInit(struct UvWriter *w, struct uv_loop_s *loop, uv_file fd, bool direct /* Whether to use direct I/O */, bool async /* Whether async I/O is available */, unsigned max_concurrent_writes, char *errmsg); /* Close the given file and release all associated resources. */ void UvWriterClose(struct UvWriter *w, UvWriterCloseCb cb); /* Write request. */ struct UvWriterReq; /* Callback called after a write request has been completed. */ typedef void (*UvWriterReqCb)(struct UvWriterReq *req, int status); struct UvWriterReq { void *data; /* User data */ struct UvWriter *writer; /* Originating writer */ size_t len; /* Total number of bytes to write */ int status; /* Request result code */ struct uv_work_s work; /* To execute logic in the threadpool */ UvWriterReqCb cb; /* Callback to invoke upon request completion */ struct iocb iocb; /* KAIO request (for writing) */ char errmsg[256]; /* Error description (for thread-safety) */ queue queue; /* Prev/next links in the inflight queue */ }; /* Asynchronously write data to the underlying file. */ int UvWriterSubmit(struct UvWriter *w, struct UvWriterReq *req, const uv_buf_t bufs[], unsigned n, size_t offset, UvWriterReqCb cb); #endif /* UV_WRITER_H_ */ raft-0.11.3/test/000077500000000000000000000000001415614527300135175ustar00rootroot00000000000000raft-0.11.3/test/fuzzy/000077500000000000000000000000001415614527300147065ustar00rootroot00000000000000raft-0.11.3/test/fuzzy/main_core.c000066400000000000000000000004531415614527300170100ustar00rootroot00000000000000#include "../lib/runner.h" MunitSuite _main_suites[64]; int _main_suites_n = 0; /* Test runner executable */ int main(int argc, char *argv[MUNIT_ARRAY_PARAM(argc)]) { MunitSuite suite = {(char *)"", NULL, _main_suites, 1, 0}; return munit_suite_main(&suite, (void *)"unit", argc, argv); } raft-0.11.3/test/fuzzy/test_election.c000066400000000000000000000036401415614527300177160ustar00rootroot00000000000000#include "../lib/cluster.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture * *****************************************************************************/ struct fixture { FIXTURE_CLUSTER; }; static char *cluster_n[] = {"3", "5", "7", NULL}; static char *cluster_pre_vote[] = {"0", "1", NULL}; static MunitParameterEnum _params[] = { {CLUSTER_N_PARAM, cluster_n}, {CLUSTER_PRE_VOTE_PARAM, cluster_pre_vote}, {NULL, NULL}, }; static void *setup(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); unsigned i; SETUP_CLUSTER(0); for (i = 0; i < CLUSTER_N; i++) { raft_set_pre_vote(CLUSTER_RAFT(i), true); } CLUSTER_BOOTSTRAP; CLUSTER_RANDOMIZE; CLUSTER_START; return f; } static void tear_down(void *data) { struct fixture *f = data; TEAR_DOWN_CLUSTER; free(f); } /****************************************************************************** * * Tests * *****************************************************************************/ SUITE(election) /* A leader is eventually elected */ TEST(election, win, setup, tear_down, 0, _params) { struct fixture *f = data; CLUSTER_STEP_UNTIL_HAS_LEADER(10000); return MUNIT_OK; } /* A new leader is elected if the current one dies. */ TEST(election, change, setup, tear_down, 0, _params) { struct fixture *f = data; CLUSTER_STEP_UNTIL_HAS_LEADER(10000); CLUSTER_KILL_LEADER; CLUSTER_STEP_UNTIL_HAS_NO_LEADER(10000); CLUSTER_STEP_UNTIL_HAS_LEADER(20000); return MUNIT_OK; } /* If no majority of servers is online, no leader is elected. */ TEST(election, noQuorum, setup, tear_down, 0, _params) { struct fixture *f = data; CLUSTER_KILL_MAJORITY; CLUSTER_STEP_UNTIL_ELAPSED(30000); munit_assert_false(CLUSTER_HAS_LEADER); return MUNIT_OK; } raft-0.11.3/test/fuzzy/test_liveness.c000066400000000000000000000100741415614527300177430ustar00rootroot00000000000000#include "../lib/cluster.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture * *****************************************************************************/ /* Maximum number of cluster loop iterations each test should perform. */ #define MAX_ITERATIONS 25000 /* Maximum number of cluster loop iterations a pair of servers should stay * disconnected. */ #define MAX_DISCONNECT 150 struct disconnection { unsigned id1; unsigned id2; int start; int duration; }; struct fixture { FIXTURE_CLUSTER; struct disconnection *disconnections; }; static char *cluster_n[] = {"3", NULL}; static char *cluster_pre_vote[] = {"0", "1", NULL}; static MunitParameterEnum _params[] = { {CLUSTER_N_PARAM, cluster_n}, {CLUSTER_PRE_VOTE_PARAM, cluster_pre_vote}, {NULL, NULL}, }; /* Return the number of distinct server pairs in the cluster. */ static int __server_pairs(struct fixture *f) { return CLUSTER_N * (CLUSTER_N - 1) / 2; } /* Update the cluster connectivity for the given iteration. */ static void __update_connectivity(struct fixture *f, int i) { int p; int pairs = __server_pairs(f); for (p = 0; p < pairs; p++) { struct disconnection *disconnection = &f->disconnections[p]; unsigned id1 = disconnection->id1; unsigned id2 = disconnection->id2; if (disconnection->start == 0) { /* Decide whether to disconnect this pair. */ if (munit_rand_int_range(1, 10) <= 1) { disconnection->start = i; disconnection->duration = munit_rand_int_range(50, MAX_DISCONNECT); raft_fixture_saturate(&f->cluster, id1 - 1, id2 - 1); raft_fixture_saturate(&f->cluster, id2 - 1, id1 - 1); } } else { /* Decide whether to reconnect this pair. */ if (i - disconnection->start > disconnection->duration) { raft_fixture_desaturate(&f->cluster, id1 - 1, id2 - 1); raft_fixture_desaturate(&f->cluster, id2 - 1, id1 - 1); disconnection->start = 0; } } } } static void *setup(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); int pairs; size_t i, j, k; SETUP_CLUSTER(0); CLUSTER_BOOTSTRAP; CLUSTER_RANDOMIZE; CLUSTER_START; /* Number of distinct pairs of servers. */ pairs = __server_pairs(f); f->disconnections = munit_malloc(pairs * sizeof *f->disconnections); k = 0; for (i = 0; i < CLUSTER_N; i++) { for (j = i + 1; j < CLUSTER_N; j++) { struct disconnection *disconnection = &f->disconnections[k]; disconnection->id1 = i + 1; disconnection->id2 = j + 1; disconnection->start = 0; disconnection->duration = 0; k++; } } return f; } static void tear_down(void *data) { struct fixture *f = data; TEAR_DOWN_CLUSTER; free(f->disconnections); free(f); } /****************************************************************************** * * Tests * *****************************************************************************/ SUITE(liveness) static void apply_cb(struct raft_apply *req, int status, void *result) { (void)status; (void)result; free(req); } /* The system makes progress even in case of network disruptions. */ TEST(liveness, networkDisconnect, setup, tear_down, 0, _params) { struct fixture *f = data; int i = 0; (void)params; for (i = 0; i < MAX_ITERATIONS; i++) { __update_connectivity(f, i); raft_fixture_step(&f->cluster); if (CLUSTER_LEADER != CLUSTER_N) { struct raft_apply *req = munit_malloc(sizeof *req); CLUSTER_APPLY_ADD_X(CLUSTER_LEADER, req, 1, apply_cb); if (CLUSTER_LAST_APPLIED(CLUSTER_LEADER) >= 2) { break; } } } // munit_assert_int(CLUSTER_LAST_APPLIED(CLUSTER_LEADER), >=, 2); return MUNIT_OK; } raft-0.11.3/test/fuzzy/test_membership.c000066400000000000000000000047751415614527300202610ustar00rootroot00000000000000#include "../lib/cluster.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture * *****************************************************************************/ struct fixture { FIXTURE_CLUSTER; struct raft_change req; }; static char *cluster_n[] = {"3", "4", "5", NULL}; static MunitParameterEnum _params[] = { {CLUSTER_N_PARAM, cluster_n}, {NULL, NULL}, }; static void *setup(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_CLUSTER(0); CLUSTER_BOOTSTRAP; CLUSTER_RANDOMIZE; CLUSTER_START; CLUSTER_STEP_UNTIL_HAS_LEADER(10000); return f; } static void tear_down(void *data) { struct fixture *f = data; TEAR_DOWN_CLUSTER; free(f); } /****************************************************************************** * * Tests * *****************************************************************************/ SUITE(membership) TEST(membership, addNonVoting, setup, tear_down, 0, _params) { struct fixture *f = data; const struct raft_server *server; struct raft *raft; CLUSTER_ADD(&f->req); CLUSTER_STEP_UNTIL_APPLIED(CLUSTER_LEADER, 2, 2000); /* Then promote it. */ CLUSTER_ASSIGN(&f->req, RAFT_STANDBY); CLUSTER_STEP_UNTIL_APPLIED(CLUSTER_N, 3, 2000); raft = CLUSTER_RAFT(CLUSTER_LEADER); server = &raft->configuration.servers[CLUSTER_N - 1]; munit_assert_int(server->id, ==, CLUSTER_N); return MUNIT_OK; } TEST(membership, addVoting, setup, tear_down, 0, _params) { struct fixture *f = data; const struct raft_server *server; struct raft *raft; (void)params; CLUSTER_ADD(&f->req); CLUSTER_STEP_UNTIL_APPLIED(CLUSTER_LEADER, 2, 2000); /* Then promote it. */ CLUSTER_ASSIGN(&f->req, RAFT_VOTER); CLUSTER_STEP_UNTIL_APPLIED(CLUSTER_N, 3, 2000); raft = CLUSTER_RAFT(CLUSTER_LEADER); server = &raft->configuration.servers[CLUSTER_N - 1]; munit_assert_int(server->role, ==, RAFT_VOTER); return MUNIT_OK; } TEST(membership, removeVoting, setup, tear_down, 0, _params) { struct fixture *f = data; struct raft *raft; int rv; (void)params; raft = CLUSTER_RAFT(CLUSTER_LEADER); rv = raft_remove(raft, &f->req, CLUSTER_LEADER % CLUSTER_N + 1, NULL); munit_assert_int(rv, ==, 0); CLUSTER_STEP_UNTIL_APPLIED(CLUSTER_LEADER, 2, 2000); munit_assert_int(raft->configuration.n, ==, CLUSTER_N - 1); return 0; } raft-0.11.3/test/fuzzy/test_replication.c000066400000000000000000000105751415614527300204320ustar00rootroot00000000000000#include "../lib/cluster.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture * *****************************************************************************/ struct fixture { FIXTURE_CLUSTER; }; static char *cluster_n[] = {"3", "5", "7", NULL}; static MunitParameterEnum _params[] = { {CLUSTER_N_PARAM, cluster_n}, {NULL, NULL}, }; static void *setup(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_CLUSTER(0); CLUSTER_BOOTSTRAP; CLUSTER_RANDOMIZE; CLUSTER_START; CLUSTER_STEP_UNTIL_HAS_LEADER(10000); return f; } static void tear_down(void *data) { struct fixture *f = data; TEAR_DOWN_CLUSTER; free(f); } /****************************************************************************** * * Helper macros * *****************************************************************************/ #define APPLY_ADD_ONE(REQ) CLUSTER_APPLY_ADD_X(CLUSTER_LEADER, REQ, 1, NULL) /****************************************************************************** * * Tests * *****************************************************************************/ SUITE(replication) /* New entries on the leader are eventually replicated to followers. */ TEST(replication, appendEntries, setup, tear_down, 0, _params) { struct fixture *f = data; struct raft_apply *req = munit_malloc(sizeof *req); (void)params; APPLY_ADD_ONE(req); CLUSTER_STEP_UNTIL_APPLIED(CLUSTER_N, 2, 2000); free(req); return MUNIT_OK; } /* The cluster remains available even if the current leader dies and a new * leader gets elected. */ TEST(replication, availability, setup, tear_down, 0, _params) { struct fixture *f = data; struct raft_apply *req1 = munit_malloc(sizeof *req1); struct raft_apply *req2 = munit_malloc(sizeof *req2); (void)params; APPLY_ADD_ONE(req1); CLUSTER_STEP_UNTIL_APPLIED(CLUSTER_N, 2, 2000); CLUSTER_KILL_LEADER; CLUSTER_STEP_UNTIL_HAS_NO_LEADER(10000); CLUSTER_STEP_UNTIL_HAS_LEADER(10000); APPLY_ADD_ONE(req2); CLUSTER_STEP_UNTIL_APPLIED(CLUSTER_LEADER, 3, 2000); free(req1); free(req2); return MUNIT_OK; } static void apply_cb(struct raft_apply *req, int status, void *result) { (void)status; (void)result; free(req); } /* If no quorum is available, entries don't get committed. */ TEST(replication, noQuorum, setup, tear_down, 0, _params) { struct fixture *f = data; struct raft_apply *req = munit_malloc(sizeof *req); unsigned i; (void)params; CLUSTER_APPLY_ADD_X(CLUSTER_LEADER, req, 1, apply_cb); CLUSTER_KILL_MAJORITY; CLUSTER_STEP_UNTIL_ELAPSED(10000); for (i = 0; i < CLUSTER_N; i++) { munit_assert_int(CLUSTER_LAST_APPLIED(i), ==, 1); } return MUNIT_OK; } /* If the cluster is partitioned, entries don't get committed. */ TEST(replication, partitioned, setup, tear_down, 0, _params) { struct fixture *f = data; struct raft_apply *req1 = munit_malloc(sizeof *req1); struct raft_apply *req2 = munit_malloc(sizeof *req2); unsigned leader_id; size_t i; size_t n; (void)params; leader_id = CLUSTER_LEADER + 1; /* Disconnect the leader from a majority of servers */ n = 0; for (i = 0; n < (CLUSTER_N / 2) + 1; i++) { struct raft *raft = CLUSTER_RAFT(i); if (raft->id == leader_id) { continue; } raft_fixture_saturate(&f->cluster, leader_id - 1, raft->id - 1); raft_fixture_saturate(&f->cluster, raft->id - 1, leader_id - 1); n++; } /* Try to append a new entry using the disconnected leader. */ CLUSTER_APPLY_ADD_X(CLUSTER_LEADER, req1, 1, apply_cb); /* The leader gets deposed. */ CLUSTER_STEP_UNTIL_HAS_NO_LEADER(10000); /* The entry does not get committed. */ CLUSTER_STEP_UNTIL_ELAPSED(5000); /* Reconnect the old leader */ for (i = 0; i < CLUSTER_N; i++) { struct raft *raft = CLUSTER_RAFT(i); if (raft->id == leader_id) { continue; } raft_fixture_desaturate(&f->cluster, leader_id - 1, raft->id - 1); } CLUSTER_STEP_UNTIL_HAS_LEADER(30000); /* Re-try now to append the entry. */ CLUSTER_APPLY_ADD_X(CLUSTER_LEADER, req2, 1, apply_cb); CLUSTER_STEP_UNTIL_APPLIED(CLUSTER_LEADER, 2, 10000); return MUNIT_OK; } raft-0.11.3/test/integration/000077500000000000000000000000001415614527300160425ustar00rootroot00000000000000raft-0.11.3/test/integration/append_helpers.h000066400000000000000000000112141415614527300212030ustar00rootroot00000000000000#include "../../src/uv.h" #include "../lib/runner.h" /****************************************************************************** * * Helper macros * *****************************************************************************/ struct result { int status; bool done; void *data; }; static void appendCbAssertResult(struct raft_io_append *req, int status) { struct result *result = req->data; munit_assert_int(status, ==, result->status); result->done = true; } /* Declare and fill the entries array for the append request identified by * I. The array will have N entries, and each entry will have a data buffer of * SIZE bytes.*/ #define ENTRIES(I, N, SIZE) \ struct raft_entry _entries##I[N]; \ uint8_t _entries_data##I[N * SIZE]; \ { \ int _i; \ for (_i = 0; _i < N; _i++) { \ struct raft_entry *entry = &_entries##I[_i]; \ entry->term = 1; \ entry->type = RAFT_COMMAND; \ entry->buf.base = &_entries_data##I[_i * SIZE]; \ entry->buf.len = SIZE; \ entry->batch = NULL; \ munit_assert_ptr_not_null(entry->buf.base); \ memset(entry->buf.base, 0, entry->buf.len); \ *(uint64_t *)entry->buf.base = f->count; \ f->count++; \ } \ } /* Submit an append request identified by I, with N_ENTRIES entries, each one of * size ENTRY_SIZE. When the append request completes, CB will be called * and DATA will be available in result->data. */ #define APPEND_SUBMIT_CB_DATA(I, N_ENTRIES, ENTRY_SIZE, CB, DATA) \ struct raft_io_append _req##I; \ struct result _result##I = {0, false, DATA}; \ int _rv##I; \ ENTRIES(I, N_ENTRIES, ENTRY_SIZE); \ _req##I.data = &_result##I; \ _rv##I = f->io.append(&f->io, &_req##I, _entries##I, N_ENTRIES, \ CB); \ munit_assert_int(_rv##I, ==, 0) /* Submit an append request identified by I, with N_ENTRIES entries, each one of * size ENTRY_SIZE. The default expectation is for the operation to succeed. A * custom STATUS can be set with APPEND_EXPECT. */ #define APPEND_SUBMIT(I, N_ENTRIES, ENTRY_SIZE) \ APPEND_SUBMIT_CB_DATA(I, N_ENTRIES, ENTRY_SIZE, \ appendCbAssertResult, NULL) \ /* Try to submit an append request and assert that the given error code and * message are returned. */ #define APPEND_ERROR(N_ENTRIES, ENTRY_SIZE, RV, ERRMSG) \ do { \ struct raft_io_append _req; \ int _rv; \ ENTRIES(0, N_ENTRIES, ENTRY_SIZE); \ _rv = f->io.append(&f->io, &_req, _entries0, N_ENTRIES, NULL); \ munit_assert_int(_rv, ==, RV); \ /* munit_assert_string_equal(f->io.errmsg, ERRMSG);*/ \ } while (0) #define APPEND_EXPECT(I, STATUS) _result##I.status = STATUS /* Wait for the append request identified by I to complete. */ #define APPEND_WAIT(I) LOOP_RUN_UNTIL(&_result##I.done) /* Submit an append request with an entries array with N_ENTRIES entries, each * one of size ENTRY_SIZE, and wait for the operation to successfully * complete. */ #define APPEND(N_ENTRIES, ENTRY_SIZE) \ do { \ APPEND_SUBMIT(0, N_ENTRIES, ENTRY_SIZE); \ APPEND_WAIT(0); \ } while (0) /* Submit an append request with the given parameters and wait for the operation * to fail with the given code and message. */ #define APPEND_FAILURE(N_ENTRIES, ENTRY_SIZE, STATUS, ERRMSG) \ { \ APPEND_SUBMIT(0, N_ENTRIES, ENTRY_SIZE); \ APPEND_EXPECT(0, STATUS); \ APPEND_WAIT(0); \ munit_assert_string_equal(f->io.errmsg, ERRMSG); \ } raft-0.11.3/test/integration/main_core.c000066400000000000000000000000531415614527300201400ustar00rootroot00000000000000#include "../lib/runner.h" RUNNER("core") raft-0.11.3/test/integration/main_uv.c000066400000000000000000000000511415614527300176400ustar00rootroot00000000000000#include "../lib/runner.h" RUNNER("uv") raft-0.11.3/test/integration/test_apply.c000066400000000000000000000104141415614527300203720ustar00rootroot00000000000000#include "../lib/cluster.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture * *****************************************************************************/ struct fixture { FIXTURE_CLUSTER; }; static void *setUp(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_CLUSTER(2); CLUSTER_BOOTSTRAP; CLUSTER_START; CLUSTER_ELECT(0); return f; } static void tearDown(void *data) { struct fixture *f = data; TEAR_DOWN_CLUSTER; free(f); } /****************************************************************************** * * Helper macros * *****************************************************************************/ struct result { int status; bool done; }; static void applyCbAssertResult(struct raft_apply *req, int status, void *_) { struct result *result = req->data; (void)_; munit_assert_int(status, ==, result->status); result->done = true; } static bool applyCbHasFired(struct raft_fixture *f, void *arg) { struct result *result = arg; (void)f; return result->done; } /* Submit an apply request. */ #define APPLY_SUBMIT(I) \ struct raft_buffer _buf; \ struct raft_apply _req; \ struct result _result = {0, false}; \ int _rv; \ FsmEncodeSetX(123, &_buf); \ _req.data = &_result; \ _rv = raft_apply(CLUSTER_RAFT(I), &_req, &_buf, 1, applyCbAssertResult); \ munit_assert_int(_rv, ==, 0); /* Expect the apply callback to fire with the given status. */ #define APPLY_EXPECT(STATUS) _result.status = STATUS /* Wait until an apply request completes. */ #define APPLY_WAIT CLUSTER_STEP_UNTIL(applyCbHasFired, &_result, 2000) /* Submit to the I'th server a request to apply a new RAFT_COMMAND entry and * wait for the operation to succeed. */ #define APPLY(I) \ do { \ APPLY_SUBMIT(I); \ APPLY_WAIT; \ } while (0) /* Submit to the I'th server a request to apply a new RAFT_COMMAND entry and * assert that the given error is returned. */ #define APPLY_ERROR(I, RV, ERRMSG) \ do { \ struct raft_buffer _buf; \ struct raft_apply _req; \ int _rv; \ FsmEncodeSetX(123, &_buf); \ _rv = raft_apply(CLUSTER_RAFT(I), &_req, &_buf, 1, NULL); \ munit_assert_int(_rv, ==, RV); \ munit_assert_string_equal(CLUSTER_ERRMSG(I), ERRMSG); \ raft_free(_buf.base); \ } while (0) /****************************************************************************** * * Success scenarios * *****************************************************************************/ SUITE(raft_apply) /* Append the very first command entry. */ TEST(raft_apply, first, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPLY(0); munit_assert_int(FsmGetX(CLUSTER_FSM(0)), ==, 123); return MUNIT_OK; } /****************************************************************************** * * Failure scenarios * *****************************************************************************/ /* If the raft instance is not in leader state, an error is returned. */ TEST(raft_apply, notLeader, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPLY_ERROR(1, RAFT_NOTLEADER, "server is not the leader"); return MUNIT_OK; } /* If the raft instance steps down from leader state, the apply callback fires * with an error. */ TEST(raft_apply, leadershipLost, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPLY_SUBMIT(0); APPLY_EXPECT(RAFT_LEADERSHIPLOST); CLUSTER_DEPOSE; APPLY_WAIT; return MUNIT_OK; } raft-0.11.3/test/integration/test_assign.c000066400000000000000000000363121415614527300205360ustar00rootroot00000000000000#include "../lib/cluster.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture * *****************************************************************************/ struct fixture { FIXTURE_CLUSTER; }; /****************************************************************************** * * Helper macros * *****************************************************************************/ struct result { int status; bool done; }; /* Add a an empty server to the cluster and start it. */ #define GROW \ { \ int rv__; \ CLUSTER_GROW; \ rv__ = raft_start(CLUSTER_RAFT(2)); \ munit_assert_int(rv__, ==, 0); \ } static void changeCbAssertResult(struct raft_change *req, int status) { struct result *result = req->data; munit_assert_int(status, ==, result->status); result->done = true; } static bool changeCbHasFired(struct raft_fixture *f, void *arg) { struct result *result = arg; (void)f; return result->done; } /* Submit an add request. */ #define ADD_SUBMIT(I, ID) \ struct raft_change _req; \ char _address[16]; \ struct result _result = {0, false}; \ int _rv; \ _req.data = &_result; \ sprintf(_address, "%d", ID); \ _rv = \ raft_add(CLUSTER_RAFT(I), &_req, ID, _address, changeCbAssertResult); \ munit_assert_int(_rv, ==, 0); #define ADD(I, ID) \ do { \ ADD_SUBMIT(I, ID); \ CLUSTER_STEP_UNTIL(changeCbHasFired, &_result, 2000); \ } while (0) /* Submit an assign role request. */ #define ASSIGN_SUBMIT(I, ID, ROLE) \ struct raft_change _req; \ struct result _result = {0, false}; \ int _rv; \ _req.data = &_result; \ _rv = raft_assign(CLUSTER_RAFT(I), &_req, ID, ROLE, changeCbAssertResult); \ munit_assert_int(_rv, ==, 0); /* Expect the request callback to fire with the given status. */ #define ASSIGN_EXPECT(STATUS) _result.status = STATUS; /* Wait until a promote request completes. */ #define ASSIGN_WAIT CLUSTER_STEP_UNTIL(changeCbHasFired, &_result, 10000) /* Submit a request to assign the I'th server to the given role and wait for the * operation to succeed. */ #define ASSIGN(I, ID, ROLE) \ do { \ ASSIGN_SUBMIT(I, ID, ROLE); \ ASSIGN_WAIT; \ } while (0) /* Invoke raft_assign() against the I'th server and assert it the given error * code. */ #define ASSIGN_ERROR(I, ID, ROLE, RV, ERRMSG) \ { \ struct raft_change __req; \ int __rv; \ __rv = raft_assign(CLUSTER_RAFT(I), &__req, ID, ROLE, NULL); \ munit_assert_int(__rv, ==, RV); \ munit_assert_string_equal(ERRMSG, CLUSTER_ERRMSG(I)); \ } /****************************************************************************** * * Set up a cluster of 2 servers, with the first as leader. * *****************************************************************************/ static void *setUp(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_CLUSTER(2); CLUSTER_BOOTSTRAP; CLUSTER_START; CLUSTER_ELECT(0); return f; } static void tearDown(void *data) { struct fixture *f = data; TEAR_DOWN_CLUSTER; free(f); } /****************************************************************************** * * Assertions * *****************************************************************************/ /* Assert the values of the committed and uncommitted configuration indexes on * the raft instance with the given index. */ #define ASSERT_CONFIGURATION_INDEXES(I, COMMITTED, UNCOMMITTED) \ { \ struct raft *raft_ = CLUSTER_RAFT(I); \ munit_assert_int(raft_->configuration_index, ==, COMMITTED); \ munit_assert_int(raft_->configuration_uncommitted_index, ==, \ UNCOMMITTED); \ } /* Assert that the state of the current catch up round matches the given * values. */ #define ASSERT_CATCH_UP_ROUND(I, PROMOTEED_ID, NUMBER, DURATION) \ { \ struct raft *raft_ = CLUSTER_RAFT(I); \ munit_assert_int(raft_->leader_state.promotee_id, ==, PROMOTEED_ID); \ munit_assert_int(raft_->leader_state.round_number, ==, NUMBER); \ munit_assert_int( \ raft_->io->time(raft_->io) - raft_->leader_state.round_start, >=, \ DURATION); \ } /****************************************************************************** * * raft_assign * *****************************************************************************/ SUITE(raft_assign) /* Assigning the voter role to a spare server whose log is already up-to-date * results in the relevant configuration change to be submitted immediately. */ TEST(raft_assign, promoteUpToDate, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft *raft; const struct raft_server *server; GROW; ADD(0, 3); CLUSTER_STEP_N(3); ASSIGN(0, 3, RAFT_VOTER); /* Server 3 is being considered as voting, even though the configuration * change is not committed yet. */ raft = CLUSTER_RAFT(0); server = &raft->configuration.servers[2]; munit_assert_int(server->role, ==, RAFT_VOTER); /* The configuration change request eventually succeeds. */ CLUSTER_STEP_UNTIL_APPLIED(0, 3, 2000); return MUNIT_OK; } static bool thirdServerHasCaughtUp(struct raft_fixture *f, void *arg) { struct raft *raft = raft_fixture_get(f, 0); (void)arg; return raft->leader_state.promotee_id == 0; } /* Assigning the voter role to a spare server whose log is not up-to-date * results in catch-up rounds to start. When the server has caught up, the * configuration change request gets submitted. */ TEST(raft_assign, promoteCatchUp, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft *raft; const struct raft_server *server; CLUSTER_MAKE_PROGRESS; GROW; ADD(0, 3); ASSIGN_SUBMIT(0, 3, RAFT_VOTER); /* Server 3 is not being considered as voting, since its log is behind. */ raft = CLUSTER_RAFT(0); server = &raft->configuration.servers[2]; munit_assert_int(server->role, ==, RAFT_SPARE); /* Advance the match index of server 3, by acknowledging the AppendEntries * request that the leader has sent to it. */ CLUSTER_STEP_UNTIL_APPLIED(2, 2, 2000); /* Disconnect the second server, so it doesn't participate in the quorum */ CLUSTER_SATURATE_BOTHWAYS(0, 1); /* Eventually the leader notices that the third server has caught. */ CLUSTER_STEP_UNTIL(thirdServerHasCaughtUp, NULL, 2000); /* The leader has submitted a configuration change request, but it's * uncommitted. */ ASSERT_CONFIGURATION_INDEXES(0, 3, 4); /* The third server notifies that it has appended the new * configuration. Since it's considered voting already, it counts for the * majority and the entry gets committed. */ CLUSTER_STEP_UNTIL_APPLIED(0, 4, 2000); CLUSTER_STEP_UNTIL_APPLIED(2, 4, 2000); /* The promotion is completed. */ ASSERT_CONFIGURATION_INDEXES(0, 4, 0); return MUNIT_OK; } static bool thirdServerHasCompletedFirstRound(struct raft_fixture *f, void *arg) { struct raft *raft = raft_fixture_get(f, 0); (void)arg; return raft->leader_state.round_number != 1; } /* Assigning the voter role to a spare a server whose log is not up-to-date * results in catch-up rounds to start. If new entries are appended after a * round is started, a new round is initiated once the former one completes. */ TEST(raft_assign, promoteNewRound, setUp, tearDown, 0, NULL) { struct fixture *f = data; unsigned election_timeout = CLUSTER_RAFT(0)->election_timeout; struct raft_apply *req = munit_malloc(sizeof *req); CLUSTER_MAKE_PROGRESS; GROW; ADD(0, 3); ASSIGN_SUBMIT(0, 3, RAFT_VOTER); ASSERT_CATCH_UP_ROUND(0, 3, 1, 0); /* Now that the catch-up round started, submit a new entry and set a very * high latency on the server being promoted, so it won't deliver * AppendEntry results within the round duration. */ CLUSTER_APPLY_ADD_X(0, req, 1, NULL); CLUSTER_STEP_UNTIL_ELAPSED(election_timeout + 100); // FIXME: unstable with 0xcf1f25b6 // ASSERT_CATCH_UP_ROUND(0, 3, 1, election_timeout + 100); /* The leader eventually receives the AppendEntries result from the * promotee, acknowledging all entries except the last one. The first round * has completes and a new one has starts. */ CLUSTER_STEP_UNTIL(thirdServerHasCompletedFirstRound, NULL, 2000); /* Eventually the server is promoted and everyone applies the entry. */ CLUSTER_STEP_UNTIL_APPLIED(0, req->index, 5000); /* The promotion is eventually completed. */ CLUSTER_STEP_UNTIL_APPLIED(0, req->index + 1, 5000); ASSERT_CONFIGURATION_INDEXES(0, 5, 0); free(req); return MUNIT_SKIP; } static bool secondServerHasNewConfiguration(struct raft_fixture *f, void *arg) { struct raft *raft = raft_fixture_get(f, 1); (void)arg; return raft->configuration.servers[2].role == RAFT_VOTER; } /* If a follower receives an AppendEntries RPC containing a RAFT_CHANGE entry * which changes the role of a server, the configuration change is immediately * applied locally, even if the entry is not yet committed. Once the entry is * committed, the change becomes permanent.*/ TEST(raft_assign, changeIsImmediate, setUp, tearDown, 0, NULL) { struct fixture *f = data; GROW; CLUSTER_MAKE_PROGRESS; ADD(0, 3); CLUSTER_STEP_UNTIL_APPLIED(1, 3, 2000); ASSIGN_SUBMIT(0, 3, RAFT_VOTER); CLUSTER_STEP_UNTIL(secondServerHasNewConfiguration, NULL, 3000); ASSERT_CONFIGURATION_INDEXES(1, 3, 4); ASSIGN_WAIT; return MUNIT_OK; } /* Assign the stand-by role to an idle server. */ TEST(raft_assign, promoteToStandBy, setUp, tearDown, 0, NULL) { struct fixture *f = data; GROW; ADD(0, 3); ASSIGN(0, 3, RAFT_STANDBY); return MUNIT_OK; } /* Trying to promote a server on a raft instance which is not the leader results * in an error. */ TEST(raft_assign, notLeader, setUp, tearDown, 0, NULL) { struct fixture *f = data; ASSIGN_ERROR(1, 3, RAFT_VOTER, RAFT_NOTLEADER, "server is not the leader"); return MUNIT_OK; } /* Trying to change the role of a server whose ID is unknown results in an * error. */ TEST(raft_assign, unknownId, setUp, tearDown, 0, NULL) { struct fixture *f = data; ASSIGN_ERROR(0, 3, RAFT_VOTER, RAFT_NOTFOUND, "no server has ID 3"); return MUNIT_OK; } /* Trying to promote a server to an unknown role in an. */ TEST(raft_assign, badRole, setUp, tearDown, 0, NULL) { struct fixture *f = data; ASSIGN_ERROR(0, 3, 999, RAFT_BADROLE, "server role is not valid"); return MUNIT_OK; } /* Trying to assign the voter role to a server which has already it results in * an error. */ TEST(raft_assign, alreadyHasRole, setUp, tearDown, 0, NULL) { struct fixture *f = data; ASSIGN_ERROR(0, 1, RAFT_VOTER, RAFT_BADROLE, "server is already voter"); return MUNIT_OK; } /* Trying to assign a new role to a server while a configuration change is in * progress results in an error. */ TEST(raft_assign, changeRequestAlreadyInProgress, setUp, tearDown, 0, NULL) { struct fixture *f = data; GROW; ADD(0, 3); ASSIGN_SUBMIT(0, 3, RAFT_VOTER); ASSIGN_ERROR(0, 3, RAFT_VOTER, RAFT_CANTCHANGE, "a configuration change is already in progress"); ASSIGN_WAIT; return MUNIT_OK; } /* If leadership is lost before the configuration change log entry for setting * the new server role is committed, the leader configuration gets rolled back * and the role of server being changed is reverted. */ TEST(raft_assign, leadershipLost, setUp, tearDown, 0, NULL) { struct fixture *f = data; const struct raft_server *server; /* TODO: fix */ return MUNIT_SKIP; GROW; ADD(0, 3); CLUSTER_STEP_N(2); ASSIGN_SUBMIT(0, 3, RAFT_VOTER); /* Server 3 is being considered as voting, even though the configuration * change is not committed yet. */ ASSERT_CATCH_UP_ROUND(0, 0, 0, 0); ASSERT_CONFIGURATION_INDEXES(0, 2, 3); server = configurationGet(&CLUSTER_RAFT(0)->configuration, 3); munit_assert_int(server->role, ==, RAFT_VOTER); /* Lose leadership. */ CLUSTER_DEPOSE; /* A new leader gets elected */ CLUSTER_ELECT(1); CLUSTER_STEP_N(5); /* Server 3 is not being considered voting anymore. */ server = configurationGet(&CLUSTER_RAFT(0)->configuration, 3); munit_assert_int(server->role, ==, RAFT_STANDBY); return MUNIT_OK; } /* Trying to assign the voter role to an unresponsive server eventually * fails. */ TEST(raft_assign, promoteUnresponsive, setUp, tearDown, 0, NULL) { struct fixture *f = data; CLUSTER_MAKE_PROGRESS; GROW; ADD(0, 3); ASSIGN_SUBMIT(0, 3, RAFT_VOTER); CLUSTER_KILL(2); ASSIGN_EXPECT(RAFT_NOCONNECTION); ASSIGN_WAIT; return MUNIT_OK; } /* Demote a voter node to stand-by. */ TEST(raft_assign, demoteToStandBy, setUp, tearDown, 0, NULL) { struct fixture *f = data; ASSIGN(0, 2, RAFT_STANDBY); return MUNIT_OK; } /* The leader can be demoted to stand-by and will no longer act as leader */ TEST(raft_assign, demoteLeader, setUp, tearDown, 0, NULL) { struct fixture *f = data; ASSIGN_SUBMIT(0, 1, RAFT_STANDBY); munit_assert_int(CLUSTER_LEADER, ==, 0); ASSIGN_WAIT; CLUSTER_STEP_UNTIL_HAS_LEADER(5000); munit_assert_int(CLUSTER_LEADER, =!, 0); return MUNIT_OK; } /* The leader can be demoted to spare and will no longer act as leader */ TEST(raft_assign, demoteLeaderToSpare, setUp, tearDown, 0, NULL) { struct fixture *f = data; ASSIGN_SUBMIT(0, 1, RAFT_SPARE); munit_assert_int(CLUSTER_LEADER, ==, 0); ASSIGN_WAIT; CLUSTER_STEP_UNTIL_HAS_LEADER(5000); munit_assert_int(CLUSTER_LEADER, =!, 0); return MUNIT_OK; } raft-0.11.3/test/integration/test_barrier.c000066400000000000000000000046651415614527300207060ustar00rootroot00000000000000#include "../lib/cluster.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture * *****************************************************************************/ struct fixture { FIXTURE_CLUSTER; }; static void *setUp(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_CLUSTER(2); CLUSTER_BOOTSTRAP; CLUSTER_START; CLUSTER_ELECT(0); return f; } static void tearDown(void *data) { struct fixture *f = data; TEAR_DOWN_CLUSTER; free(f); } /****************************************************************************** * * Helper macros * *****************************************************************************/ struct result { int status; bool done; }; static void barrierCbAssertResult(struct raft_barrier *req, int status) { struct result *result = req->data; munit_assert_int(status, ==, result->status); result->done = true; } static bool barrierCbHasFired(struct raft_fixture *f, void *arg) { struct result *result = arg; (void)f; return result->done; } /* Submit a barrier request. */ #define BARRIER_SUBMIT(I) \ struct raft_barrier _req; \ struct result _result = {0, false}; \ int _rv; \ _req.data = &_result; \ _rv = raft_barrier(CLUSTER_RAFT(I), &_req, barrierCbAssertResult); \ munit_assert_int(_rv, ==, 0); /* Expect the barrier callback to fire with the given status. */ #define BARRIER_EXPECT(STATUS) _result.status = STATUS /* Wait until the barrier request completes. */ #define BARRIER_WAIT CLUSTER_STEP_UNTIL(barrierCbHasFired, &_result, 2000) /* Submit to the I'th server a barrier request and wait for the operation to * succeed. */ #define BARRIER(I) \ do { \ BARRIER_SUBMIT(I); \ BARRIER_WAIT; \ } while (0) /****************************************************************************** * * Success scenarios * *****************************************************************************/ SUITE(raft_barrier) TEST(raft_barrier, cb, setUp, tearDown, 0, NULL) { struct fixture *f = data; BARRIER(0); return MUNIT_OK; } raft-0.11.3/test/integration/test_bootstrap.c000066400000000000000000000025401415614527300212630ustar00rootroot00000000000000#include "../lib/cluster.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture holding a pristine raft instance. * *****************************************************************************/ struct fixture { FIXTURE_CLUSTER; }; static void *setUp(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_CLUSTER(1); return f; } static void tearDown(void *data) { struct fixture *f = data; TEAR_DOWN_CLUSTER; free(f); } /****************************************************************************** * * Bootstrap tests. * *****************************************************************************/ SUITE(raft_bootstrap) /* Attempting to bootstrap an instance that's already started results in * RAFT_BUSY. */ TEST(raft_bootstrap, busy, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft *raft; struct raft_configuration configuration; int rv; /* Bootstrap and the first server. */ CLUSTER_BOOTSTRAP_N_VOTING(1); CLUSTER_START; raft = CLUSTER_RAFT(0); CLUSTER_CONFIGURATION(&configuration); rv = raft_bootstrap(raft, &configuration); munit_assert_int(rv, ==, RAFT_BUSY); raft_configuration_close(&configuration); return MUNIT_OK; } raft-0.11.3/test/integration/test_digest.c000066400000000000000000000005571415614527300205330ustar00rootroot00000000000000#include "../../include/raft.h" #include "../lib/runner.h" SUITE(raft_digest) /* Generation of the ID of the bootstrap dqlite node. */ TEST(raft_digest, bootstrapServerId, NULL, NULL, 0, NULL) { const char *address = "127.0.0.1:65536"; unsigned long long id; id = raft_digest(address, 0); munit_assert_int(id, ==, 138882483); return MUNIT_OK; } raft-0.11.3/test/integration/test_election.c000066400000000000000000000554361415614527300210640ustar00rootroot00000000000000#include "../../src/configuration.h" #include "../lib/cluster.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture * *****************************************************************************/ struct fixture { FIXTURE_CLUSTER; }; static void *setUp(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); unsigned i; SETUP_CLUSTER(2); CLUSTER_BOOTSTRAP; for (i = 0; i < CLUSTER_N; i++) { struct raft *raft = CLUSTER_RAFT(i); raft->data = f; } return f; } static void tearDown(void *data) { struct fixture *f = data; TEAR_DOWN_CLUSTER; free(f); } /****************************************************************************** * * Parameters * *****************************************************************************/ static char *cluster_5[] = {"5", NULL}; static MunitParameterEnum cluster_5_params[] = { {CLUSTER_N_PARAM, cluster_5}, {NULL, NULL}, }; static char *cluster_3[] = {"3", NULL}; static MunitParameterEnum cluster_3_params[] = { {CLUSTER_N_PARAM, cluster_3}, {NULL, NULL}, }; /****************************************************************************** * * Helper macros * *****************************************************************************/ /* Wait until the I'th server becomes candidate. */ #define STEP_UNTIL_CANDIDATE(I) \ CLUSTER_STEP_UNTIL_STATE_IS(I, RAFT_CANDIDATE, 2000) /* Wait until the I'th server becomes leader. */ #define STEP_UNTIL_LEADER(I) CLUSTER_STEP_UNTIL_STATE_IS(I, RAFT_LEADER, 2000) /****************************************************************************** * * Assertions * *****************************************************************************/ /* Assert that the I'th server is in follower state. */ #define ASSERT_FOLLOWER(I) munit_assert_int(CLUSTER_STATE(I), ==, RAFT_FOLLOWER) /* Assert that the I'th server is in candidate state. */ #define ASSERT_CANDIDATE(I) \ munit_assert_int(CLUSTER_STATE(I), ==, RAFT_CANDIDATE) /* Assert that the I'th server is in leader state. */ #define ASSERT_LEADER(I) munit_assert_int(CLUSTER_STATE(I), ==, RAFT_LEADER) /* Assert that the I'th server is unavailable. */ #define ASSERT_UNAVAILABLE(I) \ munit_assert_int(CLUSTER_STATE(I), ==, RAFT_UNAVAILABLE) /* Assert that the I'th server has voted for the server with the given ID. */ #define ASSERT_VOTED_FOR(I, ID) munit_assert_int(CLUSTER_VOTED_FOR(I), ==, ID) /* Assert that the I'th server has the given current term. */ #define ASSERT_TERM(I, TERM) \ { \ struct raft *raft_ = CLUSTER_RAFT(I); \ munit_assert_int(raft_->current_term, ==, TERM); \ } /* Assert that the fixture time matches the given value */ #define ASSERT_TIME(TIME) munit_assert_int(CLUSTER_TIME, ==, TIME) /****************************************************************************** * * Successful election round * *****************************************************************************/ SUITE(election) /* Test an election round with two voters. */ TEST(election, twoVoters, setUp, tearDown, 0, NULL) { struct fixture *f = data; (void)params; CLUSTER_START; /* The first server eventually times out and converts to candidate. */ STEP_UNTIL_CANDIDATE(0); ASSERT_TIME(1000); CLUSTER_STEP; /* Server 1 tick */ ASSERT_FOLLOWER(1); CLUSTER_STEP; /* Server 0 completes sending a RequestVote RPC */ CLUSTER_STEP; /* Server 1 receives RequestVote RPC */ ASSERT_VOTED_FOR(1, 1); ASSERT_TIME(1015); CLUSTER_STEP; /* Server 1 completes sending RequestVote RPC */ CLUSTER_STEP; /* Server 1 receives RequestVote RPC result */ ASSERT_LEADER(0); ASSERT_TIME(1030); return MUNIT_OK; } /* If we have already voted and the same candidate requests the vote again, the * vote is granted. */ TEST(election, grantAgain, setUp, tearDown, 0, NULL) { struct fixture *f = data; (void)params; raft_fixture_set_randomized_election_timeout(&f->cluster, 1, 10000); raft_set_election_timeout(CLUSTER_RAFT(1), 10000); CLUSTER_START; /* The first server converts to candidate. */ STEP_UNTIL_CANDIDATE(0); ASSERT_TIME(1000); CLUSTER_STEP; /* Server 1 tick */ ASSERT_FOLLOWER(1); /* Disconnect the second server, so the first server does not receive the * result and eventually starts a new election round. */ CLUSTER_SATURATE_BOTHWAYS(0, 1); CLUSTER_STEP_UNTIL_TERM_IS(0, 3, 2000); ASSERT_CANDIDATE(0); ASSERT_TIME(2000); /* Reconnecting the two servers eventually makes the first server win the * election. */ CLUSTER_DESATURATE_BOTHWAYS(0, 1); STEP_UNTIL_LEADER(0); ASSERT_TIME(2030); return MUNIT_OK; } /* If the requester last log entry index is the same, the vote is granted. */ TEST(election, grantIfLastIndexIsSame, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry entry1; struct raft_entry entry2; (void)params; entry1.type = RAFT_COMMAND; entry1.term = 1; FsmEncodeSetX(1, &entry1.buf); entry2.type = RAFT_COMMAND; entry2.term = 1; FsmEncodeSetX(1, &entry2.buf); CLUSTER_ADD_ENTRY(0, &entry1); CLUSTER_ADD_ENTRY(1, &entry2); CLUSTER_SET_TERM(1, 2); CLUSTER_START; /* The first server converts to candidate. */ STEP_UNTIL_CANDIDATE(0); /* The first server eventually receives a RequestVote result RPC and * converts to leader */ STEP_UNTIL_LEADER(0); ASSERT_TIME(1030); return MUNIT_OK; } /* If the requester last log entry index is higher, the vote is granted. */ TEST(election, grantIfLastIndexIsHigher, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry entry; (void)params; entry.type = RAFT_COMMAND; entry.term = 1; FsmEncodeSetX(1, &entry.buf); CLUSTER_ADD_ENTRY(0, &entry); CLUSTER_SET_TERM(1, 2); CLUSTER_START; /* The first server converts to candidate. */ STEP_UNTIL_CANDIDATE(0); /* The second server grants its vote. */ CLUSTER_STEP_UNTIL_VOTED_FOR(1, 0, 2000); /* The first server receives a RequestVote result RPC and converts to * leader */ CLUSTER_STEP_N(2); ASSERT_LEADER(0); return MUNIT_OK; } /* If a candidate receives a vote request response granting the vote but the * quorum is not reached, it stays candidate. */ TEST(election, waitQuorum, setUp, tearDown, 0, cluster_5_params) { struct fixture *f = data; (void)params; CLUSTER_START; /* The first server converts to candidate. */ STEP_UNTIL_CANDIDATE(0); /* All servers grant their vote. */ CLUSTER_STEP_UNTIL_VOTED_FOR(1, 0, 2000); CLUSTER_STEP_UNTIL_VOTED_FOR(2, 0, 2000); CLUSTER_STEP_UNTIL_VOTED_FOR(3, 0, 2000); CLUSTER_STEP_UNTIL_VOTED_FOR(4, 0, 2000); ASSERT_TIME(1015); /* The first server receives the first RequestVote result RPC but stays * candidate since it has only 2 votes, and 3 are required. */ CLUSTER_STEP_N(4); /* Send completes on all other servers */ CLUSTER_STEP; /* First message is delivered */ ASSERT_TIME(1030); ASSERT_CANDIDATE(0); /* Eventually we are elected */ CLUSTER_STEP; /* Second message is delivered */ ASSERT_LEADER(0); /* Server 0 reaches the quorum */ ASSERT_TIME(1030); return MUNIT_OK; } /* The vote request gets rejected if our term is higher. */ TEST(election, rejectIfHigherTerm, setUp, tearDown, 0, NULL) { struct fixture *f = data; (void)params; CLUSTER_SET_TERM(1, 3); CLUSTER_START; /* The first server converts to candidate. */ STEP_UNTIL_CANDIDATE(0); CLUSTER_STEP_N(3); /* Server 1 tick and RequestVote send/delivery */ /* The second server receives a RequestVote RPC and rejects the vote for the * first server. */ ASSERT_VOTED_FOR(1, 0); CLUSTER_STEP_N(2); /* RequestVote result send/delivery */ /* The first server receives the RequestVote result RPC and converts to * follower because it discovers the newer term. */ ASSERT_FOLLOWER(0); return 0; } /* If the server already has a leader, the vote is not granted (even if the * request has a higher term). */ TEST(election, rejectIfHasLeader, setUp, tearDown, 0, cluster_3_params) { struct fixture *f = data; (void)params; CLUSTER_START; /* Server 0 wins the elections. */ STEP_UNTIL_LEADER(0); /* Server 2 gets disconnected and becomes candidate. */ CLUSTER_SATURATE_BOTHWAYS(0, 2); STEP_UNTIL_CANDIDATE(2); /* Server 2 stays candidate since its requests get rejected. */ CLUSTER_STEP_N(20); ASSERT_CANDIDATE(2); return MUNIT_OK; } /* If a server has already voted, vote is not granted. */ TEST(election, rejectIfAlreadyVoted, setUp, tearDown, 0, cluster_3_params) { struct fixture *f = data; (void)params; /* Disconnect server 1 from server 0 and change its randomized election * timeout to match the one of server 0. This way server 1 will convert to * candidate but not receive vote requests. */ raft_fixture_set_randomized_election_timeout(&f->cluster, 1, 1000); CLUSTER_SATURATE_BOTHWAYS(0, 1); CLUSTER_START; /* Server 0 and server 1 both become candidates. */ STEP_UNTIL_CANDIDATE(0); STEP_UNTIL_CANDIDATE(1); ASSERT_TIME(1000); /* Server 2 receives the vote request from server 0 and grants it. */ CLUSTER_STEP_UNTIL_VOTED_FOR(2, 0, 2000); ASSERT_TIME(1015); /* Server 0 receives the vote result from server 2 and becomes leader. */ STEP_UNTIL_LEADER(0); ASSERT_TIME(1030); /* Server 1 is still candidate because its vote request got rejected. */ ASSERT_CANDIDATE(1); return MUNIT_OK; } /* If the requester last log entry term is lower than ours, the vote is not * granted. */ TEST(election, rejectIfLastTermIsLower, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry entry1; struct raft_entry entry2; (void)params; entry1.type = RAFT_COMMAND; entry1.term = 1; FsmEncodeSetX(123, &entry1.buf); entry2.type = RAFT_COMMAND; entry2.term = 2; FsmEncodeSetX(456, &entry2.buf); CLUSTER_ADD_ENTRY(0, &entry1); CLUSTER_ADD_ENTRY(1, &entry2); CLUSTER_START; /* The first server becomes candidate. */ STEP_UNTIL_CANDIDATE(0); ASSERT_TIME(1000); /* The second server receives a RequestVote RPC and rejects the vote for the * first server. */ CLUSTER_STEP_UNTIL_DELIVERED(0, 1, 100); ASSERT_VOTED_FOR(1, 0); ASSERT_TIME(1015); /* The first server receives the response and stays candidate. */ CLUSTER_STEP_UNTIL_DELIVERED(1, 0, 100); ASSERT_CANDIDATE(0); ASSERT_TIME(1030); /* Eventually the second server becomes leader because it has a longer * log. */ STEP_UNTIL_LEADER(1); ASSERT_TIME(1130); return MUNIT_OK; } /* If the requester last log entry index is the lower, the vote is not * granted. */ TEST(election, rejectIfLastIndexIsLower, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry entry; (void)params; entry.type = RAFT_COMMAND; entry.term = 2; FsmEncodeSetX(123, &entry.buf); CLUSTER_ADD_ENTRY(1, &entry); CLUSTER_START; /* The first server becomes candidate. */ STEP_UNTIL_CANDIDATE(0); ASSERT_TIME(1000); /* The second server receives a RequestVote RPC and rejects the vote for the * first server. */ CLUSTER_STEP_UNTIL_DELIVERED(0, 1, 100); ASSERT_VOTED_FOR(1, 0); ASSERT_TIME(1015); /* The first server receives the response and stays candidate. */ CLUSTER_STEP_UNTIL_DELIVERED(1, 0, 100); ASSERT_CANDIDATE(0); ASSERT_TIME(1030); /* Eventually the second server becomes leader because it has a longer * log. */ STEP_UNTIL_LEADER(1); ASSERT_TIME(1130); return MUNIT_OK; } static char *reject_not_voting_n[] = {"3", NULL}; static char *reject_not_voting_n_voting[] = {"2", NULL}; static MunitParameterEnum reject_not_voting_params[] = { {CLUSTER_N_PARAM, reject_not_voting_n}, {CLUSTER_N_VOTING_PARAM, reject_not_voting_n_voting}, {NULL, NULL}, }; /* If we are not a voting server, the vote is not granted. */ TEST(election, rejectIfNotVoter, setUp, tearDown, 0, reject_not_voting_params) { struct fixture *f = data; /* Disconnect server 0 from server 1, so server 0 can't win the elections * (since there are only 2 voting servers). */ CLUSTER_SATURATE_BOTHWAYS(0, 1); CLUSTER_START; /* Server 0 becomes candidate. */ STEP_UNTIL_CANDIDATE(0); ASSERT_TIME(1000); /* Server 0 stays candidate because it can't reach a quorum. */ CLUSTER_STEP_UNTIL_TERM_IS(0, 3, 2000); ASSERT_CANDIDATE(0); ASSERT_TIME(2000); return MUNIT_OK; } /* If a candidate server receives a response indicating that the vote was not * granted, nothing happens (e.g. the server has already voted for someone * else). */ TEST(election, receiveRejectResult, setUp, tearDown, 0, cluster_5_params) { struct fixture *f = data; (void)params; /* Lower the randomized election timeout of server 4, so it becomes * candidate just after server 0 */ raft_fixture_set_randomized_election_timeout(&f->cluster, 4, 1020); /* Disconnect server 0 from all others except server 1. */ CLUSTER_SATURATE_BOTHWAYS(0, 2); CLUSTER_SATURATE_BOTHWAYS(0, 3); CLUSTER_SATURATE_BOTHWAYS(0, 4); /* Disconnect server 4 from all others except the server 1. */ CLUSTER_SATURATE_BOTHWAYS(4, 0); CLUSTER_SATURATE_BOTHWAYS(4, 2); CLUSTER_SATURATE_BOTHWAYS(4, 3); CLUSTER_START; /* The server 0 becomes candidate, server 4 one is still follower. */ STEP_UNTIL_CANDIDATE(0); ASSERT_TIME(1000); ASSERT_FOLLOWER(4); /* Server 1 receives a RequestVote RPC and grants its vote. */ CLUSTER_STEP_UNTIL_DELIVERED(0, 1, 100); ASSERT_TIME(1015); ASSERT_VOTED_FOR(1, 1); ASSERT_CANDIDATE(0); ASSERT_FOLLOWER(4); /* Disconnect server 0 from server 1, so it doesn't receive further * messages. */ CLUSTER_SATURATE_BOTHWAYS(0, 1); /* Server 4 server eventually becomes candidate */ STEP_UNTIL_CANDIDATE(4); ASSERT_TIME(1100); ASSERT_CANDIDATE(0); /* The second server receives a RequestVote RPC but rejects its vote since * it has already voted. */ CLUSTER_STEP_UNTIL_DELIVERED(4, 0, 100); ASSERT_VOTED_FOR(1, 1); ASSERT_CANDIDATE(0); ASSERT_CANDIDATE(4); return MUNIT_OK; } static char *ioErrorConvertDelay[] = {"0", "1", NULL}; static MunitParameterEnum ioErrorConvert[] = { {"delay", ioErrorConvertDelay}, {NULL, NULL}, }; /* An I/O error occurs when converting to candidate. */ TEST(election, ioErrorConvert, setUp, tearDown, 0, ioErrorConvert) { struct fixture *f = data; const char *delay = munit_parameters_get(params, "delay"); return MUNIT_SKIP; CLUSTER_START; /* The first server fails to convert to candidate. */ CLUSTER_IO_FAULT(0, atoi(delay), 1); CLUSTER_STEP; ASSERT_UNAVAILABLE(0); return MUNIT_OK; } /* The I/O error occurs when sending a vote request, and gets ignored. */ TEST(election, ioErrorSendVoteRequest, setUp, tearDown, 0, NULL) { struct fixture *f = data; return MUNIT_SKIP; CLUSTER_START; /* The first server fails to send a RequestVote RPC. */ CLUSTER_IO_FAULT(0, 2, 1); CLUSTER_STEP; /* The first server is still candidate. */ CLUSTER_STEP; ASSERT_CANDIDATE(0); return MUNIT_OK; } /* The I/O error occurs when the second node tries to persist its vote. */ TEST(election, ioErrorPersistVote, setUp, tearDown, 0, NULL) { struct fixture *f = data; return MUNIT_SKIP; CLUSTER_START; /* The first server becomes candidate. */ CLUSTER_STEP; ASSERT_CANDIDATE(0); /* The second server receives a RequestVote RPC but fails to persist its * vote. */ CLUSTER_IO_FAULT(1, 0, 1); CLUSTER_STEP; ASSERT_UNAVAILABLE(1); return MUNIT_OK; } /* Test an election round with two voters and pre-vote. */ TEST(election, preVote, setUp, tearDown, 0, NULL) { struct fixture *f = data; raft_set_pre_vote(CLUSTER_RAFT(0), true); raft_set_pre_vote(CLUSTER_RAFT(1), true); CLUSTER_START; /* The first server eventually times out and converts to candidate, but it * does not increment its term yet.*/ STEP_UNTIL_CANDIDATE(0); ASSERT_TIME(1000); ASSERT_TERM(0, 1); CLUSTER_STEP; /* Server 1 tick */ ASSERT_FOLLOWER(1); CLUSTER_STEP; /* Server 0 completes sending a pre-vote RequestVote RPC */ CLUSTER_STEP; /* Server 1 receives the pre-vote RequestVote RPC */ ASSERT_TERM(1, 1); /* Server 1 does increment its term */ ASSERT_VOTED_FOR(1, 0); /* Server 1 does not persist its vote */ ASSERT_TIME(1015); CLUSTER_STEP; /* Server 1 completes sending pre-vote RequestVote result */ CLUSTER_STEP; /* Server 0 receives the pre-vote RequestVote result */ ASSERT_CANDIDATE(0); ASSERT_TERM(0, 2); /* Server 0 has now incremented its term. */ ASSERT_TIME(1030); CLUSTER_STEP; /* Server 1 completes sending an actual RequestVote RPC */ CLUSTER_STEP; /* Server 1 receives the actual RequestVote RPC */ ASSERT_TERM(1, 2); /* Server 1 does increment its term. */ ASSERT_VOTED_FOR(1, 1); /* Server 1 does persists its vote */ CLUSTER_STEP; /* Server 1 completes sending actual RequestVote result */ CLUSTER_STEP; /* Server 0 receives the actual RequestVote result */ ASSERT_LEADER(0); return MUNIT_OK; } /* A candidate receives votes then crashes. */ TEST(election, preVoteWithcandidateCrash, setUp, tearDown, 0, cluster_3_params) { struct fixture *f = data; raft_set_pre_vote(CLUSTER_RAFT(0), true); raft_set_pre_vote(CLUSTER_RAFT(1), true); raft_set_pre_vote(CLUSTER_RAFT(2), true); CLUSTER_START; /* The first server eventually times out and converts to candidate, but it * does not increment its term yet.*/ STEP_UNTIL_CANDIDATE(0); ASSERT_TIME(1000); ASSERT_TERM(0, 1); /* Server 1 and 2 ticks */ CLUSTER_STEP_N(2); ASSERT_FOLLOWER(1); ASSERT_FOLLOWER(2); /* Server 0 completes sending a pre-vote RequestVote RPCs */ CLUSTER_STEP_N(2); CLUSTER_STEP; /* Server 1 receives the pre-vote RequestVote RPC */ ASSERT_TERM(1, 1); /* Server 1 does not increment its term */ ASSERT_VOTED_FOR(1, 0); /* Server 1 does not persist its vote */ ASSERT_TIME(1015); CLUSTER_STEP; /* Server 2 receives the pre-vote RequestVote RPC */ ASSERT_TERM(2, 1); /* Server 2 does not increment its term */ ASSERT_VOTED_FOR(2, 0); /* Server 1 does not persist its vote */ ASSERT_TIME(1015); /* Server 1 and 2 complete sending pre-vote RequestVote results */ CLUSTER_STEP_N(2); /* Server 0 receives the pre-vote RequestVote results */ CLUSTER_STEP_N(2); ASSERT_CANDIDATE(0); ASSERT_TERM(0, 2); /* Server 0 has now incremented its term. */ ASSERT_TIME(1030); /* Server 0 completes sending actual RequestVote RPCs */ CLUSTER_STEP_N(2); CLUSTER_STEP; /* Server 1 receives the actual RequestVote RPC */ ASSERT_TERM(1, 2); /* Server 1 does increment its term. */ ASSERT_VOTED_FOR(1, 1); /* Server 1 does persists its vote */ CLUSTER_STEP; /* Server 2 receives the actual RequestVote RPC */ ASSERT_TERM(2, 2); /* Server 2 does increment its term. */ ASSERT_VOTED_FOR(2, 1); /* Server 2 does persists its vote */ /* Server 0 crashes. */ CLUSTER_KILL(0); /* Server 1 times out and starts an election. * It doesn't increment its term */ STEP_UNTIL_CANDIDATE(1); ASSERT_TIME(2200); ASSERT_TERM(1, 2); /* Server 1 completes sending the pre-vote RequestVote RPCs and server 2 has * received those RPCs. * Since server 2 has no current leader (the leader crashed before sending a * HeartBeat), it will grant its vote to server 1, but will not persist it * due to pre-vote, it's persisted vote is still for Server 0 (id 1) */ CLUSTER_STEP_N(5); ASSERT_TERM(2, 2); /* Server 2 does not increment its term */ ASSERT_VOTED_FOR(2, 1); /* Server 1 receives the pre-vote RequestVote Result */ CLUSTER_STEP_N(2); /* Server 1 increments it's term to start a non pre-vote election */ ASSERT_TERM(1, 3); /* Server 1 has now incremented its term. */ ASSERT_VOTED_FOR(1, 2); /* Server 1 has persisted its vote */ ASSERT_TIME(2230); /* Server 1 completes sending actual RequestVote RPCs */ CLUSTER_STEP_N(2); /* Server 2 receives the actual RequestVote RPCs */ CLUSTER_STEP_N(2); ASSERT_VOTED_FOR(2, 2); /* Server 2 persists its vote */ /* Server 1 receives RequestVote RPCs results and becomes leader */ CLUSTER_STEP_N(2); ASSERT_LEADER(1); return MUNIT_OK; } /* Ensure delayed pre-vote responses are not counted towards the real election * quorum. */ TEST(election, preVoteNoStaleVotes, setUp, tearDown, 0, cluster_3_params) { struct fixture *f = data; raft_set_pre_vote(CLUSTER_RAFT(0), true); raft_set_pre_vote(CLUSTER_RAFT(1), true); raft_set_pre_vote(CLUSTER_RAFT(2), true); /* Server 2 is 1 term ahead of the other servers, this will allow it to send stale * pre-vote responses that pass the term checks. */ CLUSTER_SET_TERM(2, 2); CLUSTER_START; /* The first server eventually times out and converts to candidate, but it * does not increment its term yet.*/ STEP_UNTIL_CANDIDATE(0); ASSERT_TIME(1000); ASSERT_TERM(0, 1); /* Server 1 and 2 ticks */ CLUSTER_STEP_N(2); ASSERT_FOLLOWER(1); ASSERT_FOLLOWER(2); /* Server 0 completes sending a pre-vote RequestVote RPCs */ CLUSTER_STEP_N(2); CLUSTER_STEP; /* Server 1 receives the pre-vote RequestVote RPC */ ASSERT_TERM(1, 1); /* Server 1 does not increment its term */ ASSERT_VOTED_FOR(1, 0); /* Server 1 does not persist its vote */ ASSERT_TIME(1015); CLUSTER_STEP; /* Server 2 receives the pre-vote RequestVote RPC */ ASSERT_TERM(2, 2); /* Server 2 does not increment its term */ ASSERT_VOTED_FOR(2, 0); /* Server 1 does not persist its vote */ ASSERT_TIME(1015); /* Slow down responses of Server 2 */ CLUSTER_SET_NETWORK_LATENCY(2, 100); /* Server 1 completes sending pre-vote RequestVote results */ CLUSTER_STEP_N(2); /* Server 0 receives the pre-vote RequestVote results */ CLUSTER_STEP_N(2); ASSERT_CANDIDATE(0); ASSERT_TERM(0, 2); /* Server 0 has now incremented its term. */ ASSERT_TIME(1030); /* Don't send messages from 0, this ensures no real RequestVote RPCs are sent */ CLUSTER_SATURATE(0, 1); CLUSTER_SATURATE(0, 2); /* Wait until all messages from 2 to 0 are delivered */ CLUSTER_STEP_UNTIL_DELIVERED(2, 0, 100); /* Make sure we haven't counted the pre-vote result as a real vote */ ASSERT_CANDIDATE(0); return MUNIT_OK; } raft-0.11.3/test/integration/test_fixture.c000066400000000000000000000214111415614527300207320ustar00rootroot00000000000000#include "../../include/raft/fixture.h" #include "../lib/fsm.h" #include "../lib/heap.h" #include "../lib/runner.h" #define N_SERVERS 3 /****************************************************************************** * * Fixture * *****************************************************************************/ struct fixture { FIXTURE_HEAP; struct raft_fsm fsms[N_SERVERS]; struct raft_fixture fixture; }; static void *setUp(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); struct raft_configuration configuration; unsigned i; int rc; SET_UP_HEAP; for (i = 0; i < N_SERVERS; i++) { FsmInit(&f->fsms[i]); } rc = raft_fixture_init(&f->fixture, N_SERVERS, f->fsms); munit_assert_int(rc, ==, 0); rc = raft_fixture_configuration(&f->fixture, N_SERVERS, &configuration); munit_assert_int(rc, ==, 0); rc = raft_fixture_bootstrap(&f->fixture, &configuration); munit_assert_int(rc, ==, 0); raft_configuration_close(&configuration); rc = raft_fixture_start(&f->fixture); munit_assert_int(rc, ==, 0); return f; } static void tearDown(void *data) { struct fixture *f = data; unsigned i; raft_fixture_close(&f->fixture); for (i = 0; i < N_SERVERS; i++) { FsmClose(&f->fsms[i]); } TEAR_DOWN_HEAP; free(f); } /****************************************************************************** * * Helper macros * *****************************************************************************/ #define GET(I) raft_fixture_get(&f->fixture, I) #define STEP raft_fixture_step(&f->fixture) #define STEP_N(N) raft_fixture_step_n(&f->fixture, N) #define STEP_UNTIL_STATE_IS(I, STATE) \ { \ bool done_; \ done_ = raft_fixture_step_until_state_is(&f->fixture, I, STATE, 2000); \ munit_assert_true(done_); \ } #define STATE(I) raft_state(GET(I)) #define ELECT(I) raft_fixture_elect(&f->fixture, I) #define DEPOSE raft_fixture_depose(&f->fixture) #define APPLY(I, REQ) \ { \ struct raft_buffer buf; \ int rc; \ FsmEncodeAddX(1, &buf); \ rc = raft_apply(GET(I), REQ, &buf, 1, NULL); \ munit_assert_int(rc, ==, 0); \ } #define STEP_UNTIL_APPLIED(INDEX) \ raft_fixture_step_until_applied(&f->fixture, N_SERVERS, INDEX, INDEX * 1000) /****************************************************************************** * * Assertions * *****************************************************************************/ /* Assert that the fixture time matches the given value */ #define ASSERT_TIME(TIME) \ munit_assert_int(raft_fixture_time(&f->fixture), ==, TIME) /* Assert that the I'th server is in the given state. */ #define ASSERT_STATE(I, S) munit_assert_int(STATE(I), ==, S) /* Assert that the x field of the FSM with the given index matches the given * value. */ #define ASSERT_FSM_X(I, VALUE) \ munit_assert_int(FsmGetX(&f->fsms[I]), ==, VALUE) /****************************************************************************** * * raft_fixture_step * *****************************************************************************/ SUITE(raft_fixture_step) /* If there is no disk I/O in progress or network messages in flight, the tick * callbacks are called. */ TEST(raft_fixture_step, tick, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_fixture_event *event; (void)params; ASSERT_TIME(0); event = STEP; munit_assert_int(event->server_index, ==, 0); munit_assert_int(event->type, ==, RAFT_FIXTURE_TICK); ASSERT_TIME(100); event = STEP; munit_assert_int(event->server_index, ==, 1); munit_assert_int(event->type, ==, RAFT_FIXTURE_TICK); ASSERT_TIME(100); event = STEP; munit_assert_int(event->server_index, ==, 2); munit_assert_int(event->type, ==, RAFT_FIXTURE_TICK); ASSERT_TIME(100); event = STEP; munit_assert_int(event->server_index, ==, 0); munit_assert_int(event->type, ==, RAFT_FIXTURE_TICK); ASSERT_TIME(200); return MUNIT_OK; } /* By default the election timeout of server 0 is the first to expire . */ TEST(raft_fixture_step, electionTimeout, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_fixture_event *event; (void)params; event = STEP_N(28); munit_assert_int(event->server_index, ==, 0); munit_assert_int(event->type, ==, RAFT_FIXTURE_TICK); ASSERT_TIME(1000); ASSERT_STATE(0, RAFT_CANDIDATE); ASSERT_STATE(1, RAFT_FOLLOWER); ASSERT_STATE(2, RAFT_FOLLOWER); munit_log(MUNIT_LOG_INFO, "done"); return MUNIT_OK; } /* Send requests are flushed immediately. */ TEST(raft_fixture_step, flushSend, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_fixture_event *event; (void)params; STEP_UNTIL_STATE_IS(0, RAFT_CANDIDATE); event = STEP; munit_assert_int(event->server_index, ==, 0); munit_assert_int(event->type, ==, RAFT_FIXTURE_NETWORK); ASSERT_TIME(1000); event = STEP; munit_assert_int(event->server_index, ==, 0); munit_assert_int(event->type, ==, RAFT_FIXTURE_NETWORK); ASSERT_TIME(1000); return MUNIT_OK; } /* Messages are delivered according to the current network latency. */ TEST(raft_fixture_step, deliver, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_fixture_event *event; (void)params; STEP_UNTIL_STATE_IS(0, RAFT_CANDIDATE); /* Server 0 starts election */ STEP_N(2); /* Server 0 sends 2 RequestVote */ STEP_N(2); /* Ticks for server 1 and 2 */ ASSERT_TIME(1000); event = STEP; munit_assert_int(event->server_index, ==, 0); munit_assert_int(event->type, ==, RAFT_FIXTURE_NETWORK); ASSERT_TIME(1015); return MUNIT_OK; } /****************************************************************************** * * raft_fixture_elect * *****************************************************************************/ SUITE(raft_fixture_elect) /* Trigger the election of the first server. */ TEST(raft_fixture_elect, first, setUp, tearDown, 0, NULL) { struct fixture *f = data; ELECT(0); ASSERT_STATE(0, RAFT_LEADER); ASSERT_STATE(1, RAFT_FOLLOWER); ASSERT_STATE(2, RAFT_FOLLOWER); return MUNIT_OK; } /* Trigger the election of the second server. */ TEST(raft_fixture_elect, second, setUp, tearDown, 0, NULL) { struct fixture *f = data; ELECT(1); ASSERT_STATE(0, RAFT_FOLLOWER); ASSERT_STATE(1, RAFT_LEADER); ASSERT_STATE(2, RAFT_FOLLOWER); return MUNIT_OK; } /* Trigger an election change. */ TEST(raft_fixture_elect, change, setUp, tearDown, 0, NULL) { struct fixture *f = data; ELECT(0); DEPOSE; ASSERT_STATE(0, RAFT_FOLLOWER); ASSERT_STATE(1, RAFT_FOLLOWER); ASSERT_STATE(2, RAFT_FOLLOWER); ELECT(1); ASSERT_STATE(0, RAFT_FOLLOWER); ASSERT_STATE(1, RAFT_LEADER); ASSERT_STATE(2, RAFT_FOLLOWER); return MUNIT_OK; } /* Trigger an election that re-elects the same node. */ TEST(raft_fixture_elect, again, setUp, tearDown, 0, NULL) { struct fixture *f = data; ELECT(0); DEPOSE; ASSERT_STATE(0, RAFT_FOLLOWER); ASSERT_STATE(1, RAFT_FOLLOWER); ASSERT_STATE(2, RAFT_FOLLOWER); ELECT(0); ASSERT_STATE(0, RAFT_LEADER); ASSERT_STATE(1, RAFT_FOLLOWER); ASSERT_STATE(2, RAFT_FOLLOWER); return MUNIT_OK; } /****************************************************************************** * * raft_fixture_step_until_applied * *****************************************************************************/ SUITE(raft_fixture_step_until_applied) /* Wait for one entry to be applied. */ TEST(raft_fixture_step_until_applied, one, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_apply *req = munit_malloc(sizeof *req); ELECT(0); APPLY(0, req); STEP_UNTIL_APPLIED(2); ASSERT_FSM_X(0, 1); ASSERT_FSM_X(1, 1); ASSERT_FSM_X(2, 1); free(req); return MUNIT_OK; } /* Wait for two entries to be applied. */ TEST(raft_fixture_step_until_applied, two, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_apply *req1 = munit_malloc(sizeof *req1); struct raft_apply *req2 = munit_malloc(sizeof *req2); ELECT(0); APPLY(0, req1); APPLY(0, req2); STEP_UNTIL_APPLIED(3); ASSERT_FSM_X(0, 2); ASSERT_FSM_X(1, 2); ASSERT_FSM_X(2, 2); free(req1); free(req2); return MUNIT_OK; } raft-0.11.3/test/integration/test_heap.c000066400000000000000000000022211415614527300201570ustar00rootroot00000000000000#include "../../include/raft.h" #include "../lib/runner.h" /****************************************************************************** * * Default heap functions * *****************************************************************************/ SUITE(raft_heap) TEST(raft_heap, malloc, NULL, NULL, 0, NULL) { void *p; p = raft_malloc(8); munit_assert_ptr_not_null(p); raft_free(p); return MUNIT_OK; } TEST(raft_heap, calloc, NULL, NULL, 0, NULL) { void *p; p = raft_calloc(1, 8); munit_assert_ptr_not_null(p); munit_assert_int(*(uint64_t*)p, ==, 0); raft_free(p); return MUNIT_OK; } TEST(raft_heap, realloc, NULL, NULL, 0, NULL) { void *p; p = raft_realloc(NULL, 8); munit_assert_ptr_not_null(p); *(uint64_t*)p = 1; p = raft_realloc(p, 16); munit_assert_ptr_not_null(p); munit_assert_int(*(uint64_t*)p, ==, 1); raft_free(p); return MUNIT_OK; } TEST(raft_heap, aligned_alloc, NULL, NULL, 0, NULL) { void *p; p = raft_aligned_alloc(1024, 2048); munit_assert_ptr_not_null(p); munit_assert_int((uintptr_t)p % 1024, ==, 0); raft_free(p); return MUNIT_OK; } raft-0.11.3/test/integration/test_membership.c000066400000000000000000000253331415614527300214060ustar00rootroot00000000000000#include "../../src/configuration.h" #include "../lib/cluster.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture * *****************************************************************************/ struct fixture { FIXTURE_CLUSTER; struct raft_change req; }; /* Set up a cluster of 2 servers, with the first as leader. */ static void *setup(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_CLUSTER(2); CLUSTER_BOOTSTRAP; CLUSTER_START; CLUSTER_ELECT(0); return f; } static void tear_down(void *data) { struct fixture *f = data; TEAR_DOWN_CLUSTER; free(f); } /****************************************************************************** * * Helper macros * *****************************************************************************/ /* Add a an empty server to the cluster and start it. */ #define GROW \ { \ int rv__; \ CLUSTER_GROW; \ rv__ = raft_start(CLUSTER_RAFT(2)); \ munit_assert_int(rv__, ==, 0); \ } /* Invoke raft_add against the I'th node and assert it returns the given * value. */ #define ADD(I, ID, RV) \ { \ int rv_; \ char address_[16]; \ sprintf(address_, "%d", ID); \ rv_ = raft_add(CLUSTER_RAFT(I), &f->req, ID, address_, NULL); \ munit_assert_int(rv_, ==, RV); \ } /* Submit a request to assign the given ROLE to the server with the given ID. */ #define ASSIGN(I, ID, ROLE) \ { \ int _rv; \ _rv = raft_assign(CLUSTER_RAFT(I), &f->req, ID, ROLE, NULL); \ munit_assert_int(_rv, ==, 0); \ } /* Invoke raft_remove against the I'th node and assert it returns the given * value. */ #define REMOVE(I, ID, RV) \ { \ int rv_; \ rv_ = raft_remove(CLUSTER_RAFT(I), &f->req, ID, NULL); \ munit_assert_int(rv_, ==, RV); \ } struct result { int status; bool done; }; /* Submit an apply request. */ #define APPLY_SUBMIT(I) \ struct raft_buffer _buf; \ struct raft_apply _req; \ struct result _result = {0, false}; \ int _rv; \ FsmEncodeSetX(123, &_buf); \ _req.data = &_result; \ _rv = raft_apply(CLUSTER_RAFT(I), &_req, &_buf, 1, NULL); \ munit_assert_int(_rv, ==, 0); /****************************************************************************** * * Assertions * *****************************************************************************/ /* Assert the values of the committed and uncommitted configuration indexes on * the raft instance with the given index. */ #define ASSERT_CONFIGURATION_INDEXES(I, COMMITTED, UNCOMMITTED) \ { \ struct raft *raft_ = CLUSTER_RAFT(I); \ munit_assert_int(raft_->configuration_index, ==, COMMITTED); \ munit_assert_int(raft_->configuration_uncommitted_index, ==, \ UNCOMMITTED); \ } /****************************************************************************** * * raft_add * *****************************************************************************/ SUITE(raft_add) /* After a request to add a new non-voting server is committed, the new * configuration is not marked as uncommitted anymore */ TEST(raft_add, committed, setup, tear_down, 0, NULL) { struct fixture *f = data; struct raft *raft = CLUSTER_RAFT(0); const struct raft_server *server; ADD(0 /* I */, 3 /* ID */, 0); /* The new configuration is already effective. */ munit_assert_int(raft->configuration.n, ==, 3); server = &raft->configuration.servers[2]; munit_assert_int(server->id, ==, 3); munit_assert_string_equal(server->address, "3"); munit_assert_int(server->role, ==, RAFT_SPARE); /* The new configuration is marked as uncommitted. */ ASSERT_CONFIGURATION_INDEXES(0, 1, 2); /* The next/match indexes now include an entry for the new server. */ munit_assert_int(raft->leader_state.progress[2].next_index, ==, 3); munit_assert_int(raft->leader_state.progress[2].match_index, ==, 0); CLUSTER_STEP_UNTIL_APPLIED(0, 2, 2000); ASSERT_CONFIGURATION_INDEXES(0, 2, 0); /* The new configuration is marked as committed. */ return MUNIT_OK; } /* Trying to add a server on a node which is not the leader results in an * error. */ TEST(raft_add, notLeader, setup, tear_down, 0, NULL) { struct fixture *f = data; ADD(1 /* I */, 3 /* ID */, RAFT_NOTLEADER); return MUNIT_OK; } /* Trying to add a server while a configuration change is already in progress * results in an error. */ TEST(raft_add, busy, setup, tear_down, 0, NULL) { struct fixture *f = data; ADD(0 /* I */, 3 /* ID */, 0); ADD(0 /* I */, 4 /* ID */, RAFT_CANTCHANGE); munit_log(MUNIT_LOG_INFO, "done"); return MUNIT_OK; } /* Trying to add a server with an ID which is already in use results in an * error. */ TEST(raft_add, duplicateId, setup, tear_down, 0, NULL) { struct fixture *f = data; ADD(0 /* I */, 2 /* ID */, RAFT_DUPLICATEID); return MUNIT_OK; } /****************************************************************************** * * raft_remove * *****************************************************************************/ SUITE(raft_remove) /* After a request to remove server is committed, the new configuration is not * marked as uncommitted anymore */ TEST(raft_remove, committed, setup, tear_down, 0, NULL) { struct fixture *f = data; GROW; ADD(0, 3, 0); CLUSTER_STEP_UNTIL_APPLIED(0, 2, 2000); ASSIGN(0, 3, RAFT_STANDBY); CLUSTER_STEP_UNTIL_APPLIED(2, 1, 2000); CLUSTER_STEP_N(2); REMOVE(0, 3, 0); ASSERT_CONFIGURATION_INDEXES(0, 3, 4); CLUSTER_STEP_UNTIL_APPLIED(0, 4, 2000); ASSERT_CONFIGURATION_INDEXES(0, 4, 0); munit_assert_int(CLUSTER_RAFT(0)->configuration.n, ==, 2); return MUNIT_OK; } /* A leader gets a request to remove itself. */ TEST(raft_remove, self, setup, tear_down, 0, NULL) { struct fixture *f = data; REMOVE(0, 1, 0); CLUSTER_STEP_UNTIL_APPLIED(0, 2, 2000); CLUSTER_STEP_UNTIL_APPLIED(1, 2, 10000); return MUNIT_OK; } /* A leader gets a request to remove itself from a 3-node cluster */ TEST(raft_remove, selfThreeNodeClusterReplicate, setup, tear_down, 0, NULL) { struct fixture *f = data; /* Add a third node */ GROW; ADD(0, 3, 0); CLUSTER_STEP_UNTIL_APPLIED(0, 2, 2000); ASSIGN(0, 3, RAFT_VOTER); CLUSTER_STEP_UNTIL_APPLIED(0, 3, 2000); /* Verify node with id 1 is the leader */ raft_id leader_id = 0xDEADBEEF; const char *leader_address = NULL; raft_leader(CLUSTER_RAFT(0), &leader_id, &leader_address); munit_assert_ulong(leader_id, ==, 1); munit_assert_ptr_not_null(leader_address); /* The leader is requested to remove itself from the configuration */ REMOVE(0, 1, 0); /* The - removed - leader should still replicate entries. * * Raft dissertation 4.2.2 * `First, there will be a period of time (while it is committing Cnew) when * a leader can manage a cluster that does not include itself; it replicates * log entries but does not count itself in majorities.` * * */ APPLY_SUBMIT(0) /* The removed leader eventually steps down */ CLUSTER_STEP_UNTIL_HAS_NO_LEADER(5000); raft_leader(CLUSTER_RAFT(0), &leader_id, &leader_address); munit_assert_ulong(leader_id, ==, 0); munit_assert_ptr_null(leader_address); /* Every node should have all entries */ CLUSTER_STEP_UNTIL_APPLIED(0, 4, 10000); CLUSTER_STEP_UNTIL_APPLIED(1, 4, 10000); CLUSTER_STEP_UNTIL_APPLIED(2, 4, 10000); /* The removed leader eventually steps down */ CLUSTER_STEP_UNTIL_HAS_LEADER(5000); /* The removed leader doesn't know who the leader is */ raft_leader(CLUSTER_RAFT(0), &leader_id, &leader_address); munit_assert_ulong(leader_id, ==, 0); munit_assert_ptr_null(leader_address); /* The new configuration has a leader */ raft_leader(CLUSTER_RAFT(1), &leader_id, &leader_address); munit_assert_ulong(leader_id, !=, 0); munit_assert_ulong(leader_id, !=, 1); munit_assert_ptr_not_null(leader_address); return MUNIT_OK; } /* Trying to remove a server on a node which is not the leader results in an * error. */ TEST(raft_remove, notLeader, setup, tear_down, 0, NULL) { struct fixture *f = data; REMOVE(1 /* I */, 3 /* ID */, RAFT_NOTLEADER); return MUNIT_OK; } /* Trying to remove a server while a configuration change is already in progress * results in an error. */ TEST(raft_remove, inProgress, setup, tear_down, 0, NULL) { struct fixture *f = data; ADD(0, 3, 0); REMOVE(0, 3, RAFT_CANTCHANGE); return MUNIT_OK; } /* Trying to remove a server with an unknown ID results in an error. */ TEST(raft_remove, badId, setup, tear_down, 0, NULL) { struct fixture *f = data; REMOVE(0, 3, RAFT_BADID); return MUNIT_OK; } raft-0.11.3/test/integration/test_recover.c000066400000000000000000000024551415614527300207200ustar00rootroot00000000000000#include "../lib/cluster.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture holding a bootstrapped raft cluster. * *****************************************************************************/ struct fixture { FIXTURE_CLUSTER; }; static void *setUp(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_CLUSTER(3); CLUSTER_BOOTSTRAP; return f; } static void tearDown(void *data) { struct fixture *f = data; TEAR_DOWN_CLUSTER; free(f); } /****************************************************************************** * * Recover tests. * *****************************************************************************/ SUITE(raft_recover) /* Attempting to recover a running instance results in RAFT_BUSY. */ TEST(raft_recover, busy, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft *raft; struct raft_configuration configuration; int rv; /* Start all servers. */ CLUSTER_START; raft = CLUSTER_RAFT(0); CLUSTER_CONFIGURATION(&configuration); rv = raft_recover(raft, &configuration); munit_assert_int(rv, ==, RAFT_BUSY); raft_configuration_close(&configuration); return MUNIT_OK; } raft-0.11.3/test/integration/test_replication.c000066400000000000000000000652301415614527300215640ustar00rootroot00000000000000#include "../lib/cluster.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture * *****************************************************************************/ struct fixture { FIXTURE_CLUSTER; }; /****************************************************************************** * * Helper macros * *****************************************************************************/ /* Standard startup sequence, bootstrapping the cluster and electing server 0 */ #define BOOTSTRAP_START_AND_ELECT \ CLUSTER_BOOTSTRAP; \ CLUSTER_START; \ CLUSTER_ELECT(0); \ ASSERT_TIME(1045) /****************************************************************************** * * Set up a cluster with a two servers. * *****************************************************************************/ static void *setUp(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_CLUSTER(2); return f; } static void tearDown(void *data) { struct fixture *f = data; TEAR_DOWN_CLUSTER; free(f); } /****************************************************************************** * * Assertions * *****************************************************************************/ /* Assert that the I'th server is in follower state. */ #define ASSERT_FOLLOWER(I) munit_assert_int(CLUSTER_STATE(I), ==, RAFT_FOLLOWER) /* Assert that the I'th server is in candidate state. */ #define ASSERT_CANDIDATE(I) \ munit_assert_int(CLUSTER_STATE(I), ==, RAFT_CANDIDATE) /* Assert that the I'th server is in leader state. */ #define ASSERT_LEADER(I) munit_assert_int(CLUSTER_STATE(I), ==, RAFT_LEADER) /* Assert that the fixture time matches the given value */ #define ASSERT_TIME(TIME) munit_assert_int(CLUSTER_TIME, ==, TIME) /****************************************************************************** * * Log replication. * *****************************************************************************/ SUITE(replication) /* A leader sends a heartbeat message as soon as it gets elected. */ TEST(replication, sendInitialHeartbeat, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft *raft; CLUSTER_BOOTSTRAP; CLUSTER_START; /* Server 0 becomes candidate and sends vote requests after the election * timeout. */ CLUSTER_STEP_N(19); ASSERT_TIME(1000); ASSERT_CANDIDATE(0); /* Server 0 receives the vote result, becomes leader and sends * heartbeats. */ CLUSTER_STEP_N(6); ASSERT_LEADER(0); ASSERT_TIME(1030); raft = CLUSTER_RAFT(0); munit_assert_int(raft->leader_state.progress[1].last_send, ==, 1030); /* Server 1 receives the heartbeat from server 0 and resets its election * timer. */ raft = CLUSTER_RAFT(1); munit_assert_int(raft->election_timer_start, ==, 1015); CLUSTER_STEP_N(2); munit_assert_int(raft->election_timer_start, ==, 1045); munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 1); munit_assert_int(CLUSTER_N_RECV(1, RAFT_IO_APPEND_ENTRIES), ==, 1); return MUNIT_OK; } /* A leader keeps sending heartbeat messages at regular intervals to * maintain leadership. */ TEST(replication, sendFollowupHeartbeat, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft *raft; CLUSTER_BOOTSTRAP; CLUSTER_START; /* Server 0 becomes leader and sends the initial heartbeat. */ CLUSTER_STEP_N(24); ASSERT_LEADER(0); ASSERT_TIME(1030); raft = CLUSTER_RAFT(1); /* Server 1 receives the first heartbeat. */ CLUSTER_STEP_N(2); munit_assert_int(raft->election_timer_start, ==, 1045); /* Server 1 receives the second heartbeat. */ CLUSTER_STEP_N(8); munit_assert_int(raft->election_timer_start, ==, 1215); /* Server 1 receives the third heartbeat. */ CLUSTER_STEP_N(7); munit_assert_int(raft->election_timer_start, ==, 1315); /* Server 1 receives the fourth heartbeat. */ CLUSTER_STEP_N(7); munit_assert_int(raft->election_timer_start, ==, 1415); munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 4); munit_assert_int(CLUSTER_N_RECV(0, RAFT_IO_APPEND_ENTRIES_RESULT), ==, 4); munit_assert_int(CLUSTER_N_RECV(1, RAFT_IO_APPEND_ENTRIES), ==, 4); munit_assert_int(CLUSTER_N_SEND(1, RAFT_IO_APPEND_ENTRIES_RESULT), ==, 4); return MUNIT_OK; } /* If a leader replicates some entries during a given heartbeat interval, it * skips sending the heartbeat for that interval. */ TEST(replication, sendSkipHeartbeat, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft *raft; struct raft_apply req; CLUSTER_BOOTSTRAP; CLUSTER_START; raft = CLUSTER_RAFT(0); /* Server 0 becomes leader and sends the first two heartbeats. */ CLUSTER_STEP_UNTIL_ELAPSED(1215); ASSERT_LEADER(0); munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 2); munit_assert_int(CLUSTER_N_RECV(1, RAFT_IO_APPEND_ENTRIES), ==, 2); /* Server 0 starts replicating a new entry after 15 milliseconds. */ CLUSTER_STEP_UNTIL_ELAPSED(15); ASSERT_TIME(1230); CLUSTER_APPLY_ADD_X(0, &req, 1, NULL); CLUSTER_STEP_N(1); munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 3); munit_assert_int(raft->leader_state.progress[1].last_send, ==, 1230); /* When the heartbeat timeout expires, server 0 does not send an empty * append entries. */ CLUSTER_STEP_UNTIL_ELAPSED(70); ASSERT_TIME(1300); CLUSTER_STEP_N(1); munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 3); munit_assert_int(raft->leader_state.progress[1].last_send, ==, 1230); return MUNIT_OK; } /* The leader doesn't send replication messages to idle servers. */ TEST(replication, skipIdle, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_change req1; struct raft_apply req2; BOOTSTRAP_START_AND_ELECT; CLUSTER_ADD(&req1); CLUSTER_STEP_UNTIL_APPLIED(0, 2, 1000); CLUSTER_APPLY_ADD_X(CLUSTER_LEADER, &req2, 1, NULL); CLUSTER_STEP_UNTIL_ELAPSED(1000); munit_assert_int(CLUSTER_LAST_APPLIED(0), ==, 3); munit_assert_int(CLUSTER_LAST_APPLIED(1), ==, 3); munit_assert_int(CLUSTER_LAST_APPLIED(2), ==, 0); return MUNIT_OK; } /* A follower remains in probe mode until the leader receives a successful * AppendEntries response. */ TEST(replication, sendProbe, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_apply req1; struct raft_apply req2; CLUSTER_BOOTSTRAP; CLUSTER_START; /* Server 0 becomes leader and sends the initial heartbeat. */ CLUSTER_STEP_N(25); ASSERT_LEADER(0); ASSERT_TIME(1030); munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 1); /* Set a very high network latency for server 1, so server 0 will send a * second probe AppendEntries without transitioning to pipeline mode. */ munit_assert_int(CLUSTER_N_RECV(1, RAFT_IO_APPEND_ENTRIES), ==, 0); CLUSTER_SET_NETWORK_LATENCY(1, 250); /* Server 0 receives a new entry after 15 milliseconds. Since the follower * is still in probe mode and since an AppendEntries message was already * sent recently, it does not send the new entry immediately. */ CLUSTER_STEP_UNTIL_ELAPSED(15); CLUSTER_APPLY_ADD_X(0, &req1, 1, NULL); CLUSTER_STEP; munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 1); /* A heartbeat timeout elapses without receiving a response, so server 0 * sends an new AppendEntries to server 1. */ CLUSTER_STEP_UNTIL_ELAPSED(85); CLUSTER_STEP; munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 2); /* Server 0 receives a second entry after 15 milliseconds. Since the * follower is still in probe mode and since an AppendEntries message was * already sent recently, it does not send the new entry immediately. */ CLUSTER_STEP_UNTIL_ELAPSED(15); CLUSTER_APPLY_ADD_X(0, &req2, 1, NULL); CLUSTER_STEP; munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 2); /* Eventually server 0 receives AppendEntries results for both entries. */ CLUSTER_STEP_UNTIL_APPLIED(0, 3, 1000); return MUNIT_OK; } /* A follower transitions to pipeline mode after the leader receives a * successful AppendEntries response from it. */ TEST(replication, sendPipeline, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft *raft; struct raft_apply req1; struct raft_apply req2; CLUSTER_BOOTSTRAP; CLUSTER_START; raft = CLUSTER_RAFT(0); /* Server 0 becomes leader and sends the initial heartbeat, receiving a * successful response. */ CLUSTER_STEP_UNTIL_ELAPSED(1060); ASSERT_LEADER(0); ASSERT_TIME(1060); munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 1); /* Server 0 receives a new entry after 15 milliseconds. Since the follower * has transitioned to pipeline mode the new entry is sent immediately and * the next index is optimistically increased. */ CLUSTER_STEP_UNTIL_ELAPSED(15); CLUSTER_APPLY_ADD_X(0, &req1, 1, NULL); CLUSTER_STEP; munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 2); munit_assert_int(raft->leader_state.progress[1].next_index, ==, 3); /* After another 15 milliseconds server 0 receives a second apply request, * which is also sent out immediately */ CLUSTER_STEP_UNTIL_ELAPSED(15); CLUSTER_APPLY_ADD_X(0, &req2, 1, NULL); CLUSTER_STEP; munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 3); munit_assert_int(raft->leader_state.progress[1].next_index, ==, 4); /* Eventually server 0 receives AppendEntries results for both entries. */ CLUSTER_STEP_UNTIL_APPLIED(0, 3, 1000); return MUNIT_OK; } /* A follower disconnects while in probe mode. */ TEST(replication, sendDisconnect, setUp, tearDown, 0, NULL) { struct fixture *f = data; CLUSTER_BOOTSTRAP; CLUSTER_START; /* Server 0 becomes leader and sends the initial heartbeat, however they * fail because server 1 has disconnected. */ CLUSTER_STEP_N(24); ASSERT_LEADER(0); CLUSTER_DISCONNECT(0, 1); CLUSTER_STEP; munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 0); /* After the heartbeat timeout server 0 retries, but still fails. */ CLUSTER_STEP_UNTIL_ELAPSED(100); CLUSTER_STEP; munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 0); /* After another heartbeat timeout server 0 retries and this time * succeeds. */ CLUSTER_STEP_UNTIL_ELAPSED(100); CLUSTER_RECONNECT(0, 1); CLUSTER_STEP; munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 1); return MUNIT_OK; } /* A follower disconnects while in pipeline mode. */ TEST(replication, sendDisconnectPipeline, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_apply req1; struct raft_apply req2; CLUSTER_BOOTSTRAP; CLUSTER_START; /* Server 0 becomes leader and sends a couple of heartbeats. */ CLUSTER_STEP_UNTIL_ELAPSED(1215); munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 2); /* It then starts to replicate a few entries, however the follower * disconnects before delivering results. */ CLUSTER_APPLY_ADD_X(0, &req1, 1, NULL); CLUSTER_STEP; munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 3); CLUSTER_APPLY_ADD_X(0, &req2, 1, NULL); CLUSTER_STEP; munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 4); CLUSTER_DISCONNECT(0, 1); /* The next heartbeat fails, transitioning the follower back to probe * mode. */ CLUSTER_STEP_UNTIL_ELAPSED(115); munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_APPEND_ENTRIES), ==, 4); /* After reconnection the follower eventually replicates the entries and * reports back. */ CLUSTER_RECONNECT(0, 1); CLUSTER_STEP_UNTIL_APPLIED(0, 3, 1000); return MUNIT_OK; } static char *send_oom_heap_fault_delay[] = {"5", NULL}; static char *send_oom_heap_fault_repeat[] = {"1", NULL}; static MunitParameterEnum send_oom_params[] = { {TEST_HEAP_FAULT_DELAY, send_oom_heap_fault_delay}, {TEST_HEAP_FAULT_REPEAT, send_oom_heap_fault_repeat}, {NULL, NULL}, }; /* Out of memory failures. */ TEST(replication, sendOom, setUp, tearDown, 0, send_oom_params) { struct fixture *f = data; return MUNIT_SKIP; struct raft_apply req; BOOTSTRAP_START_AND_ELECT; HEAP_FAULT_ENABLE; CLUSTER_APPLY_ADD_X(0, &req, 1, NULL); CLUSTER_STEP; return MUNIT_OK; } /* A failure occurs upon submitting the I/O request. */ TEST(replication, sendIoError, setUp, tearDown, 0, NULL) { struct fixture *f = data; return MUNIT_SKIP; struct raft_apply req; BOOTSTRAP_START_AND_ELECT; CLUSTER_IO_FAULT(0, 1, 1); CLUSTER_APPLY_ADD_X(0, &req, 1, NULL); CLUSTER_STEP; return MUNIT_OK; } /* Receive the same entry a second time, before the first has been persisted. */ TEST(replication, recvTwice, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_apply *req = munit_malloc(sizeof *req); BOOTSTRAP_START_AND_ELECT; CLUSTER_APPLY_ADD_X(CLUSTER_LEADER, req, 1, NULL); /* Set a high disk latency for server 1, so server 0 won't receive an * AppendEntries result within the heartbeat and will re-send the same * entries */ CLUSTER_SET_DISK_LATENCY(1, 300); CLUSTER_STEP_UNTIL_DELIVERED(0, 1, 100); /* First AppendEntries */ CLUSTER_STEP_UNTIL_ELAPSED(110); /* Heartbeat timeout */ CLUSTER_STEP_UNTIL_DELIVERED(0, 1, 100); /* Second AppendEntries */ CLUSTER_STEP_UNTIL_APPLIED(0, req->index, 500); free(req); return MUNIT_OK; } /* If the term in the request is stale, the server rejects it. */ TEST(replication, recvStaleTerm, setUp, tearDown, 0, NULL) { struct fixture *f = data; CLUSTER_GROW; BOOTSTRAP_START_AND_ELECT; /* Set a very high election timeout and the disconnect the leader so it will * keep sending heartbeats. */ raft_fixture_set_randomized_election_timeout(&f->cluster, 0, 5000); raft_set_election_timeout(CLUSTER_RAFT(0), 5000); CLUSTER_SATURATE_BOTHWAYS(0, 1); CLUSTER_SATURATE_BOTHWAYS(0, 2); /* Eventually a new leader gets elected. */ CLUSTER_STEP_UNTIL_HAS_NO_LEADER(5000); CLUSTER_STEP_UNTIL_HAS_LEADER(10000); munit_assert_int(CLUSTER_LEADER, ==, 1); /* Reconnect the old leader to the current follower. */ CLUSTER_DESATURATE_BOTHWAYS(0, 2); /* Step a few times, so the old leader sends heartbeats to the follower, * which rejects them. */ CLUSTER_STEP_UNTIL_ELAPSED(200); return MUNIT_OK; } /* If server's log is shorter than prevLogIndex, the request is rejected . */ TEST(replication, recvMissingEntries, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry entry; CLUSTER_BOOTSTRAP; /* Server 0 has an entry that server 1 doesn't have */ entry.type = RAFT_COMMAND; entry.term = 1; FsmEncodeSetX(1, &entry.buf); CLUSTER_ADD_ENTRY(0, &entry); /* Server 0 wins the election because it has a longer log. */ CLUSTER_START; CLUSTER_STEP_UNTIL_HAS_LEADER(5000); munit_assert_int(CLUSTER_LEADER, ==, 0); /* The first server replicates missing entries to the second. */ CLUSTER_STEP_UNTIL_APPLIED(1, 2, 3000); return MUNIT_OK; } /* If the term of the last log entry on the server is different from the one * prevLogTerm, and value of prevLogIndex is greater than server's commit commit * index (i.e. this is a normal inconsistency), we reject the request. */ TEST(replication, recvPrevLogTermMismatch, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry entry1; struct raft_entry entry2; CLUSTER_BOOTSTRAP; /* The servers have an entry with a conflicting term. */ entry1.type = RAFT_COMMAND; entry1.term = 2; FsmEncodeSetX(1, &entry1.buf); CLUSTER_ADD_ENTRY(0, &entry1); entry2.type = RAFT_COMMAND; entry2.term = 1; FsmEncodeSetX(2, &entry2.buf); CLUSTER_ADD_ENTRY(1, &entry2); CLUSTER_START; CLUSTER_ELECT(0); /* The follower eventually replicates the entry */ CLUSTER_STEP_UNTIL_APPLIED(1, 2, 3000); return MUNIT_OK; } /* If any of the new entry has the same index of an existing entry in our log, * but different term, and that entry index is already committed, we bail out * with an error. */ TEST(replication, recvPrevIndexConflict, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry entry1; struct raft_entry entry2; CLUSTER_BOOTSTRAP; /* The servers have an entry with a conflicting term. */ entry1.type = RAFT_COMMAND; entry1.term = 2; FsmEncodeSetX(1, &entry1.buf); CLUSTER_ADD_ENTRY(0, &entry1); entry2.type = RAFT_COMMAND; entry2.term = 1; FsmEncodeSetX(2, &entry2.buf); CLUSTER_ADD_ENTRY(1, &entry2); CLUSTER_START; CLUSTER_ELECT(0); /* Artificially bump the commit index on the second server */ CLUSTER_RAFT(1)->commit_index = 2; CLUSTER_STEP; CLUSTER_STEP; return MUNIT_OK; } /* A write log request is submitted for outstanding log entries. If some entries * are already existing in the log, they will be skipped. */ TEST(replication, recvSkip, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_apply *req = munit_malloc(sizeof *req); BOOTSTRAP_START_AND_ELECT; /* Submit an entry */ CLUSTER_APPLY_ADD_X(0, req, 1, NULL); /* The leader replicates the entry to the follower however it does not get * notified about the result, so it sends the entry again. */ CLUSTER_STEP; CLUSTER_SATURATE_BOTHWAYS(0, 1); CLUSTER_STEP_UNTIL_ELAPSED(150); /* The follower reconnects and receives again the same entry. This time the * leader receives the notification. */ CLUSTER_DESATURATE_BOTHWAYS(0, 1); CLUSTER_STEP_UNTIL_APPLIED(0, req->index, 2000); free(req); return MUNIT_OK; } /* If the index and term of the last snapshot on the server match prevLogIndex * and prevLogTerm the request is accepted. */ TEST(replication, recvMatch_last_snapshot, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry entry; struct raft_configuration configuration; int rv; CLUSTER_CONFIGURATION(&configuration); rv = raft_bootstrap(CLUSTER_RAFT(0), &configuration); munit_assert_int(rv, ==, 0); raft_configuration_close(&configuration); /* The first server has entry 2 */ entry.type = RAFT_COMMAND; entry.term = 2; FsmEncodeSetX(5, &entry.buf); CLUSTER_ADD_ENTRY(0, &entry); /* The second server has a snapshot up to entry 2 */ CLUSTER_SET_SNAPSHOT(1 /* */, 2 /* last index */, 2 /* last term */, 1 /* conf index */, 5 /* x */, 0 /* y */); CLUSTER_SET_TERM(1, 2); CLUSTER_START; CLUSTER_ELECT(0); /* Apply an additional entry and check that it gets replicated on the * follower. */ CLUSTER_MAKE_PROGRESS; CLUSTER_STEP_UNTIL_APPLIED(1, 3, 3000); return MUNIT_OK; } /* If a candidate server receives a request containing the same term as its * own, it it steps down to follower and accept the request . */ TEST(replication, recvCandidateSameTerm, setUp, tearDown, 0, NULL) { struct fixture *f = data; CLUSTER_GROW; CLUSTER_BOOTSTRAP; /* Disconnect server 2 from the other two and set a low election timeout on * it, so it will immediately start an election. */ CLUSTER_SATURATE_BOTHWAYS(2, 0); CLUSTER_SATURATE_BOTHWAYS(2, 1); raft_fixture_set_randomized_election_timeout(&f->cluster, 2, 800); raft_set_election_timeout(CLUSTER_RAFT(2), 800); /* Server 2 becomes candidate. */ CLUSTER_START; CLUSTER_STEP_UNTIL_STATE_IS(2, RAFT_CANDIDATE, 1000); munit_assert_int(CLUSTER_TERM(2), ==, 2); /* Server 0 wins the election and replicates an entry. */ CLUSTER_STEP_UNTIL_STATE_IS(0, RAFT_LEADER, 2000); munit_assert_int(CLUSTER_TERM(0), ==, 2); munit_assert_int(CLUSTER_TERM(1), ==, 2); munit_assert_int(CLUSTER_TERM(2), ==, 2); CLUSTER_MAKE_PROGRESS; /* Now reconnect the third server, which eventually steps down and * replicates the entry. */ munit_assert_int(CLUSTER_STATE(2), ==, RAFT_CANDIDATE); munit_assert_int(CLUSTER_TERM(2), ==, 2); CLUSTER_DESATURATE_BOTHWAYS(2, 0); CLUSTER_DESATURATE_BOTHWAYS(2, 1); CLUSTER_STEP_UNTIL_STATE_IS(2, RAFT_FOLLOWER, 2000); CLUSTER_STEP_UNTIL_APPLIED(2, 2, 2000); return MUNIT_OK; } /* If a candidate server receives a request containing an higher term as its * own, it it steps down to follower and accept the request . */ TEST(replication, recvCandidateHigherTerm, setUp, tearDown, 0, NULL) { struct fixture *f = data; CLUSTER_GROW; CLUSTER_BOOTSTRAP; /* Set a high election timeout on server 1, so it won't become candidate */ raft_fixture_set_randomized_election_timeout(&f->cluster, 1, 2000); raft_set_election_timeout(CLUSTER_RAFT(1), 2000); /* Disconnect server 2 from the other two. */ CLUSTER_SATURATE_BOTHWAYS(2, 0); CLUSTER_SATURATE_BOTHWAYS(2, 1); /* Set a low election timeout on server 0, and disconnect it from server 1, * so by the time it wins the second round, server 2 will have turned * candidate */ raft_fixture_set_randomized_election_timeout(&f->cluster, 0, 800); raft_set_election_timeout(CLUSTER_RAFT(0), 800); CLUSTER_SATURATE_BOTHWAYS(0, 1); CLUSTER_START; /* Server 2 becomes candidate, and server 0 already is candidate. */ CLUSTER_STEP_UNTIL_STATE_IS(2, RAFT_CANDIDATE, 1500); munit_assert_int(CLUSTER_TERM(2), ==, 2); munit_assert_int(CLUSTER_STATE(0), ==, RAFT_CANDIDATE); munit_assert_int(CLUSTER_TERM(0), ==, 2); /* Server 0 starts a new election, while server 2 is still candidate */ CLUSTER_STEP_UNTIL_TERM_IS(0, 3, 2000); munit_assert_int(CLUSTER_TERM(2), ==, 2); munit_assert_int(CLUSTER_STATE(2), ==, RAFT_CANDIDATE); /* Reconnect the first and second server and let the election succeed and * replicate an entry. */ CLUSTER_DESATURATE_BOTHWAYS(0, 1); CLUSTER_STEP_UNTIL_HAS_LEADER(1000); CLUSTER_MAKE_PROGRESS; /* Now reconnect the third server, which eventually steps down and * replicates the entry. */ munit_assert_int(CLUSTER_STATE(2), ==, RAFT_CANDIDATE); munit_assert_int(CLUSTER_TERM(2), ==, 2); CLUSTER_DESATURATE_BOTHWAYS(2, 0); CLUSTER_DESATURATE_BOTHWAYS(2, 1); CLUSTER_STEP_UNTIL_STATE_IS(2, RAFT_FOLLOWER, 2000); CLUSTER_STEP_UNTIL_APPLIED(2, 2, 2000); return MUNIT_OK; } /* If the server handling the response is not the leader, the result * is ignored. */ TEST(replication, resultNotLeader, setUp, tearDown, 0, NULL) { struct fixture *f = data; BOOTSTRAP_START_AND_ELECT; /* Set a very high-latency for the second server's outgoing messages, so the * first server won't get notified about the results for a while. */ CLUSTER_SET_NETWORK_LATENCY(1, 400); /* Set a low election timeout on the first server so it will step down very * soon. */ raft_fixture_set_randomized_election_timeout(&f->cluster, 0, 200); raft_set_election_timeout(CLUSTER_RAFT(0), 200); /* Eventually leader steps down and becomes candidate. */ CLUSTER_STEP_UNTIL_STATE_IS(0, RAFT_CANDIDATE, 2000); /* The AppendEntries result eventually gets delivered, but the candidate * ignores it. */ CLUSTER_STEP_UNTIL_ELAPSED(400); return MUNIT_OK; } /* If the response has a term which is lower than the server's one, it's * ignored. */ TEST(replication, resultLowerTerm, setUp, tearDown, 0, NULL) { struct fixture *f = data; CLUSTER_GROW; BOOTSTRAP_START_AND_ELECT; /* Set a very high-latency for the second server's outgoing messages, so the * first server won't get notified about the results for a while. */ CLUSTER_SET_NETWORK_LATENCY(1, 2000); /* Set a high election timeout on server 1, so it won't become candidate */ raft_fixture_set_randomized_election_timeout(&f->cluster, 1, 2000); raft_set_election_timeout(CLUSTER_RAFT(1), 2000); /* Disconnect server 0 and set a low election timeout on it so it will step * down very soon. */ CLUSTER_SATURATE_BOTHWAYS(0, 2); raft_fixture_set_randomized_election_timeout(&f->cluster, 0, 200); raft_set_election_timeout(CLUSTER_RAFT(0), 200); CLUSTER_STEP_UNTIL_STATE_IS(0, RAFT_FOLLOWER, 2000); /* Make server 0 become leader again. */ CLUSTER_DESATURATE_BOTHWAYS(0, 2); CLUSTER_STEP_UNTIL_STATE_IS(0, RAFT_LEADER, 4000); /* Eventually deliver the result message. */ CLUSTER_STEP_UNTIL_ELAPSED(2500); return MUNIT_OK; } /* If the response has a term which is higher than the server's one, step down * to follower. */ TEST(replication, resultHigherTerm, setUp, tearDown, 0, NULL) { struct fixture *f = data; CLUSTER_GROW; BOOTSTRAP_START_AND_ELECT; /* Set a very high election timeout for server 0 so it won't step down. */ raft_fixture_set_randomized_election_timeout(&f->cluster, 0, 5000); raft_set_election_timeout(CLUSTER_RAFT(0), 5000); /* Disconnect the server 0 from the rest of the cluster. */ CLUSTER_SATURATE_BOTHWAYS(0, 1); CLUSTER_SATURATE_BOTHWAYS(0, 2); /* Eventually a new leader gets elected */ CLUSTER_STEP_UNTIL_HAS_NO_LEADER(2000); CLUSTER_STEP_UNTIL_HAS_LEADER(4000); munit_assert_int(CLUSTER_LEADER, ==, 1); /* Reconnect the old leader to the current follower, which eventually * replies with an AppendEntries result containing an higher term. */ CLUSTER_DESATURATE_BOTHWAYS(0, 2); CLUSTER_STEP_UNTIL_STATE_IS(0, RAFT_FOLLOWER, 2000); return MUNIT_OK; } /* If the response fails because a log mismatch, the nextIndex for the server is * updated and the relevant older entries are resent. */ TEST(replication, resultRetry, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry entry; CLUSTER_BOOTSTRAP; /* Add an additional entry to the first server that the second server does * not have. */ entry.type = RAFT_COMMAND; entry.term = 1; FsmEncodeSetX(5, &entry.buf); CLUSTER_ADD_ENTRY(0, &entry); CLUSTER_START; CLUSTER_ELECT(0); /* The first server receives an AppendEntries result from the second server * indicating that its log does not have the entry at index 2, so it will * resend it. */ CLUSTER_STEP_UNTIL_APPLIED(1, 2, 2000); return MUNIT_OK; } raft-0.11.3/test/integration/test_snapshot.c000066400000000000000000000421521415614527300211100ustar00rootroot00000000000000#include "../lib/cluster.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture * *****************************************************************************/ struct fixture { FIXTURE_CLUSTER; }; static void *setUp(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_CLUSTER(3); CLUSTER_BOOTSTRAP; CLUSTER_START; CLUSTER_ELECT(0); return f; } static void tearDown(void *data) { struct fixture *f = data; TEAR_DOWN_CLUSTER; free(f); } /****************************************************************************** * * Helper macros * *****************************************************************************/ /* Set the snapshot threshold on all servers of the cluster */ #define SET_SNAPSHOT_THRESHOLD(VALUE) \ { \ unsigned i; \ for (i = 0; i < CLUSTER_N; i++) { \ raft_set_snapshot_threshold(CLUSTER_RAFT(i), VALUE); \ } \ } /* Set the snapshot trailing logs number on all servers of the cluster */ #define SET_SNAPSHOT_TRAILING(VALUE) \ { \ unsigned i; \ for (i = 0; i < CLUSTER_N; i++) { \ raft_set_snapshot_trailing(CLUSTER_RAFT(i), VALUE); \ } \ } /* Set the snapshot timeout on all servers of the cluster */ #define SET_SNAPSHOT_TIMEOUT(VALUE) \ { \ unsigned i; \ for (i = 0; i < CLUSTER_N; i++) { \ raft_set_install_snapshot_timeout(CLUSTER_RAFT(i), VALUE);\ } \ } /****************************************************************************** * * Successfully install a snapshot * *****************************************************************************/ SUITE(snapshot) /* Install a snapshot on a follower that has fallen behind. */ TEST(snapshot, installOne, setUp, tearDown, 0, NULL) { struct fixture *f = data; (void)params; /* Set very low threshold and trailing entries number */ SET_SNAPSHOT_THRESHOLD(3); SET_SNAPSHOT_TRAILING(1); CLUSTER_SATURATE_BOTHWAYS(0, 2); /* Apply a few of entries, to force a snapshot to be taken. */ CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; /* Reconnect the follower and wait for it to catch up */ CLUSTER_DESATURATE_BOTHWAYS(0, 2); CLUSTER_STEP_UNTIL_APPLIED(2, 4, 5000); /* Check that the leader has sent a snapshot */ munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_INSTALL_SNAPSHOT), ==, 1); munit_assert_int(CLUSTER_N_RECV(2, RAFT_IO_INSTALL_SNAPSHOT), ==, 1); return MUNIT_OK; } /* Install snapshot times out and leader retries */ TEST(snapshot, installOneTimeOut, setUp, tearDown, 0, NULL) { struct fixture *f = data; (void)params; /* Set very low threshold and trailing entries number */ SET_SNAPSHOT_THRESHOLD(3); SET_SNAPSHOT_TRAILING(1); SET_SNAPSHOT_TIMEOUT(200); /* Apply a few of entries, to force a snapshot to be taken. Drop all network * traffic between servers 0 and 2 in order for AppendEntries RPCs to not be * replicated */ CLUSTER_SATURATE_BOTHWAYS(0, 2); CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; /* Reconnect both servers and set a high disk latency on server 2 so that * the InstallSnapshot RPC will time out */ CLUSTER_SET_DISK_LATENCY(2, 300); CLUSTER_DESATURATE_BOTHWAYS(0, 2); /* Wait a while and check that the leader has sent a snapshot */ CLUSTER_STEP_UNTIL_ELAPSED(300); munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_INSTALL_SNAPSHOT), ==, 1); munit_assert_int(CLUSTER_N_RECV(2, RAFT_IO_INSTALL_SNAPSHOT), ==, 1); /* Wait for the snapshot to be installed */ CLUSTER_STEP_UNTIL_APPLIED(2, 4, 5000); /* Assert that the leader has retried the InstallSnapshot RPC */ munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_INSTALL_SNAPSHOT), ==, 2); munit_assert_int(CLUSTER_N_RECV(2, RAFT_IO_INSTALL_SNAPSHOT), ==, 2); return MUNIT_OK; } /* Install snapshot to an offline node */ TEST(snapshot, installOneDisconnectedFromBeginningReconnects, setUp, tearDown, 0, NULL) { struct fixture *f = data; (void)params; /* Set very low threshold and trailing entries number */ SET_SNAPSHOT_THRESHOLD(3); SET_SNAPSHOT_TRAILING(1); SET_SNAPSHOT_TIMEOUT(200); /* Apply a few of entries, to force a snapshot to be taken. Disconnect * servers 0 and 2 so that the network calls return failure status */ CLUSTER_DISCONNECT(0, 2); CLUSTER_DISCONNECT(2, 0); CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; /* Wait a while so leader detects offline node */ CLUSTER_STEP_UNTIL_ELAPSED(2000); /* Assert that the leader doesn't try sending a snapshot to an offline node */ munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_INSTALL_SNAPSHOT), ==, 0); munit_assert_int(CLUSTER_N_RECV(2, RAFT_IO_INSTALL_SNAPSHOT), ==, 0); CLUSTER_RECONNECT(0, 2); CLUSTER_RECONNECT(2, 0); /* Wait for the snapshot to be installed */ CLUSTER_STEP_UNTIL_APPLIED(2, 4, 5000); /* Assert that the leader has sent an InstallSnapshot RPC */ munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_INSTALL_SNAPSHOT), ==, 1); munit_assert_int(CLUSTER_N_RECV(2, RAFT_IO_INSTALL_SNAPSHOT), ==, 1); return MUNIT_OK; } /* Install snapshot to an offline node that went down during operation */ TEST(snapshot, installOneDisconnectedDuringOperationReconnects, setUp, tearDown, 0, NULL) { struct fixture *f = data; (void)params; /* Set very low threshold and trailing entries number */ SET_SNAPSHOT_THRESHOLD(3); SET_SNAPSHOT_TRAILING(1); SET_SNAPSHOT_TIMEOUT(200); /* Apply a few of entries */ CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; /* Wait for follower to catch up*/ CLUSTER_STEP_UNTIL_APPLIED(2, 4, 5000); /* Assert that the leader hasn't sent an InstallSnapshot RPC */ munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_INSTALL_SNAPSHOT), ==, 0); CLUSTER_DISCONNECT(0, 2); CLUSTER_DISCONNECT(2, 0); /* Wait a while so leader detects offline node */ CLUSTER_STEP_UNTIL_ELAPSED(2000); /* Apply a few more entries */ CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; /* Assert that the leader doesn't try sending snapshot to an offline node */ munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_INSTALL_SNAPSHOT), ==, 0); munit_assert_int(CLUSTER_N_RECV(2, RAFT_IO_INSTALL_SNAPSHOT), ==, 0); CLUSTER_RECONNECT(0, 2); CLUSTER_RECONNECT(2, 0); CLUSTER_STEP_UNTIL_APPLIED(2, 7, 5000); /* Assert that the leader has tried sending an InstallSnapshot RPC */ munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_INSTALL_SNAPSHOT), ==, 1); munit_assert_int(CLUSTER_N_RECV(2, RAFT_IO_INSTALL_SNAPSHOT), ==, 1); return MUNIT_OK; } /* No snapshots sent to killed nodes */ TEST(snapshot, noSnapshotInstallToKilled, setUp, tearDown, 0, NULL) { struct fixture *f = data; (void)params; /* Set very low threshold and trailing entries number */ SET_SNAPSHOT_THRESHOLD(3); SET_SNAPSHOT_TRAILING(1); SET_SNAPSHOT_TIMEOUT(200); /* Kill a server */ CLUSTER_KILL(2); /* Apply a few of entries */ CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; /* Wait a while */ CLUSTER_STEP_UNTIL_ELAPSED(4000); /* Assert that the leader hasn't sent an InstallSnapshot RPC */ munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_INSTALL_SNAPSHOT), ==, 0); return MUNIT_OK; } /* Install snapshot times out and leader retries, afterwards AppendEntries resume */ TEST(snapshot, installOneTimeOutAppendAfter, setUp, tearDown, 0, NULL) { struct fixture *f = data; (void)params; /* Set very low threshold and trailing entries number */ SET_SNAPSHOT_THRESHOLD(3); SET_SNAPSHOT_TRAILING(1); SET_SNAPSHOT_TIMEOUT(200); /* Apply a few of entries, to force a snapshot to be taken. Drop all network * traffic between servers 0 and 2 in order for AppendEntries RPCs to not be * replicated */ CLUSTER_SATURATE_BOTHWAYS(0, 2); CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; /* Reconnect both servers and set a high disk latency on server 2 so that * the InstallSnapshot RPC will time out */ CLUSTER_SET_DISK_LATENCY(2, 300); CLUSTER_DESATURATE_BOTHWAYS(0, 2); /* Wait for the snapshot to be installed */ CLUSTER_STEP_UNTIL_APPLIED(2, 4, 5000); /* Append a few entries and check if they are replicated */ CLUSTER_MAKE_PROGRESS; CLUSTER_STEP_UNTIL_APPLIED(2, 5, 5000); /* Assert that the leader has retried the InstallSnapshot RPC */ munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_INSTALL_SNAPSHOT), ==, 2); return MUNIT_OK; } /* Install 2 snapshots that both time out and assure the follower catches up */ TEST(snapshot, installMultipleTimeOut, setUp, tearDown, 0, NULL) { struct fixture *f = data; (void)params; /* Set very low threshold and trailing entries number */ SET_SNAPSHOT_THRESHOLD(3); SET_SNAPSHOT_TRAILING(1); SET_SNAPSHOT_TIMEOUT(200); /* Apply a few of entries, to force a snapshot to be taken. Drop all network * traffic between servers 0 and 2 in order for AppendEntries RPCs to not be * replicated */ CLUSTER_SATURATE_BOTHWAYS(0, 2); CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; /* Reconnect both servers and set a high disk latency on server 2 so that * the InstallSnapshot RPC will time out */ CLUSTER_SET_DISK_LATENCY(2, 300); CLUSTER_DESATURATE_BOTHWAYS(0, 2); /* Step until the snapshot times out */ CLUSTER_STEP_UNTIL_ELAPSED(400); /* Apply another few of entries, to force a new snapshot to be taken. Drop * all traffic between servers 0 and 2 in order for AppendEntries RPCs to not be * replicated */ CLUSTER_SATURATE_BOTHWAYS(0, 2); CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; /* Reconnect the follower */ CLUSTER_DESATURATE_BOTHWAYS(0, 2); CLUSTER_STEP_UNTIL_APPLIED(2, 7, 5000); /* Assert that the leader has sent multiple InstallSnapshot RPCs */ munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_INSTALL_SNAPSHOT), >=, 2); return MUNIT_OK; } /* Install 2 snapshots that both time out, launch a few regular AppendEntries * and assure the follower catches up */ TEST(snapshot, installMultipleTimeOutAppendAfter, setUp, tearDown, 0, NULL) { struct fixture *f = data; (void)params; /* Set very low threshold and trailing entries number */ SET_SNAPSHOT_THRESHOLD(3); SET_SNAPSHOT_TRAILING(1); SET_SNAPSHOT_TIMEOUT(200); /* Apply a few of entries, to force a snapshot to be taken. Drop all network * traffic between servers 0 and 2 in order for AppendEntries RPCs to not be * replicated */ CLUSTER_SATURATE_BOTHWAYS(0, 2); CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; /* Reconnect both servers and set a high disk latency on server 2 so that * the InstallSnapshot RPC will time out */ CLUSTER_SET_DISK_LATENCY(2, 300); CLUSTER_DESATURATE_BOTHWAYS(0, 2); /* Step until the snapshot times out */ CLUSTER_STEP_UNTIL_ELAPSED(400); /* Apply another few of entries, to force a new snapshot to be taken. Drop * all traffic between servers 0 and 2 in order for AppendEntries RPCs to not be * replicated */ CLUSTER_SATURATE_BOTHWAYS(0, 2); CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; /* Reconnect the follower */ CLUSTER_DESATURATE_BOTHWAYS(0, 2); /* Append a few entries and make sure the follower catches up */ CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; CLUSTER_STEP_UNTIL_APPLIED(2, 9, 5000); /* Assert that the leader has sent multiple InstallSnapshot RPCs */ munit_assert_int(CLUSTER_N_SEND(0, RAFT_IO_INSTALL_SNAPSHOT), >=, 2); return MUNIT_OK; } static bool server_installing_snapshot(struct raft_fixture *f, void* data) { (void) f; const struct raft *r = data; return r->snapshot.put.data != NULL && r->last_stored == 0; } static bool server_taking_snapshot(struct raft_fixture *f, void* data) { (void) f; const struct raft *r = data; return r->snapshot.put.data != NULL && r->last_stored != 0; } static bool server_snapshot_done(struct raft_fixture *f, void *data) { (void) f; const struct raft *r = data; return r->snapshot.put.data == NULL; } /* Follower receives HeartBeats during the installation of a snapshot */ TEST(snapshot, installSnapshotHeartBeats, setUp, tearDown, 0, NULL) { struct fixture *f = data; (void)params; /* Set very low threshold and trailing entries number */ SET_SNAPSHOT_THRESHOLD(3); SET_SNAPSHOT_TRAILING(1); CLUSTER_SATURATE_BOTHWAYS(0, 1); /* Apply a few of entries, to force a snapshot to be taken. */ CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; /* Set a large disk latency on the follower, this will allow some * heartbeats to be sent during the snapshot installation */ CLUSTER_SET_DISK_LATENCY(1, 2000); munit_assert_uint(CLUSTER_N_RECV(1, RAFT_IO_INSTALL_SNAPSHOT), ==, 0); /* Step the cluster until server 1 installs a snapshot */ const struct raft *r = CLUSTER_RAFT(1); CLUSTER_DESATURATE_BOTHWAYS(0, 1); CLUSTER_STEP_UNTIL(server_installing_snapshot, (void*) r, 2000); munit_assert_uint(CLUSTER_N_RECV(1, RAFT_IO_INSTALL_SNAPSHOT), ==, 1); /* Count the number of AppendEntries RPCs received during the snapshot * install*/ unsigned before = CLUSTER_N_RECV(1, RAFT_IO_APPEND_ENTRIES); CLUSTER_STEP_UNTIL(server_snapshot_done, (void*) r, 5000); unsigned after = CLUSTER_N_RECV(1, RAFT_IO_APPEND_ENTRIES); munit_assert_uint(before, < , after); /* Check that the InstallSnapshot RPC was not resent */ munit_assert_uint(CLUSTER_N_RECV(1, RAFT_IO_INSTALL_SNAPSHOT), ==, 1); /* Check that the snapshot was applied and we can still make progress */ CLUSTER_STEP_UNTIL_APPLIED(1, 4, 5000); CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; CLUSTER_STEP_UNTIL_APPLIED(1, 6, 5000); return MUNIT_OK; } /* InstallSnapshot RPC arrives while persisting Entries */ TEST(snapshot, installSnapshotDuringEntriesWrite, setUp, tearDown, 0, NULL) { struct fixture *f = data; (void)params; /* Set a large disk latency on the follower, this will allow a * InstallSnapshot RPC to arrive while the entries are being persisted. */ CLUSTER_SET_DISK_LATENCY(1, 2000); SET_SNAPSHOT_THRESHOLD(3); SET_SNAPSHOT_TRAILING(1); /* Replicate some entries, these will take a while to persist */ CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; /* Make sure leader can't succesfully send any more entries */ CLUSTER_DISCONNECT(0,1); CLUSTER_MAKE_PROGRESS; /* Snapshot taken here */ CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; /* Snapshot taken here */ CLUSTER_MAKE_PROGRESS; /* Snapshot with index 6 is sent while follower is still writing the entries * to disk that arrived before the disconnect. */ CLUSTER_RECONNECT(0,1); /* Make sure follower is up to date */ CLUSTER_STEP_UNTIL_APPLIED(1, 7, 5000); return MUNIT_OK; } /* Follower receives AppendEntries RPCs while taking a snapshot */ TEST(snapshot, takeSnapshotAppendEntries, setUp, tearDown, 0, NULL) { struct fixture *f = data; (void)params; /* Set very low threshold and trailing entries number */ SET_SNAPSHOT_THRESHOLD(3); SET_SNAPSHOT_TRAILING(1); /* Set a large disk latency on the follower, this will allow AppendEntries * to be sent while a snapshot is taken */ CLUSTER_SET_DISK_LATENCY(1, 2000); /* Apply a few of entries, to force a snapshot to be taken. */ CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; /* Step the cluster until server 1 takes a snapshot */ const struct raft *r = CLUSTER_RAFT(1); CLUSTER_STEP_UNTIL(server_taking_snapshot, (void*) r, 2000); /* Send AppendEntries RPCs while server 1 is taking a snapshot */ static struct raft_apply reqs[5]; for (int i = 0; i < 5; i++) { CLUSTER_APPLY_ADD_X(CLUSTER_LEADER, &reqs[i], 1, NULL); } CLUSTER_STEP_UNTIL(server_snapshot_done, (void*) r, 5000); /* Make sure the AppendEntries are applied and we can make progress */ CLUSTER_STEP_UNTIL_APPLIED(1, 9, 5000); CLUSTER_MAKE_PROGRESS; CLUSTER_MAKE_PROGRESS; CLUSTER_STEP_UNTIL_APPLIED(1, 11, 5000); return MUNIT_OK; } raft-0.11.3/test/integration/test_start.c000066400000000000000000000136501415614527300204070ustar00rootroot00000000000000#include "../lib/cluster.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture with a fake raft_io instance. * *****************************************************************************/ struct fixture { FIXTURE_CLUSTER; }; /****************************************************************************** * * Helper macros * *****************************************************************************/ /* Bootstrap the I'th server. */ #define BOOTSTRAP(I) \ do { \ struct raft_configuration _configuration; \ int _rv; \ struct raft *_raft; \ CLUSTER_CONFIGURATION(&_configuration); \ _raft = CLUSTER_RAFT(I); \ _rv = raft_bootstrap(_raft, &_configuration); \ munit_assert_int(_rv, ==, 0); \ raft_configuration_close(&_configuration); \ } while (0) /****************************************************************************** * * Set up a cluster with a single server. * *****************************************************************************/ static void *setUp(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_CLUSTER(1); return f; } static void tearDown(void *data) { struct fixture *f = data; TEAR_DOWN_CLUSTER; free(f); } /****************************************************************************** * * raft_start * *****************************************************************************/ SUITE(raft_start) /* There are two servers. The first has a snapshot present and no other * entries. */ TEST(raft_start, oneSnapshotAndNoEntries, setUp, tearDown, 0, NULL) { struct fixture *f = data; CLUSTER_GROW; CLUSTER_SET_SNAPSHOT(0 /* server index */, 6 /* last index */, 2 /* last term */, 1 /* conf index */, 5 /* x */, 7 /* y */); CLUSTER_SET_TERM(0, 2); BOOTSTRAP(1); CLUSTER_START; CLUSTER_MAKE_PROGRESS; return MUNIT_OK; } /* There are two servers. The first has a snapshot along with some follow-up * entries. */ TEST(raft_start, oneSnapshotAndSomeFollowUpEntries, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry entries[2]; struct raft_fsm *fsm; CLUSTER_GROW; BOOTSTRAP(1); entries[0].type = RAFT_COMMAND; entries[0].term = 2; FsmEncodeSetX(6, &entries[0].buf); entries[1].type = RAFT_COMMAND; entries[1].term = 2; FsmEncodeAddY(2, &entries[1].buf); CLUSTER_SET_SNAPSHOT(0 /* */, 6 /* last index */, 2 /* last term */, 1 /* conf index */, 5 /* x */, 7 /* y */); CLUSTER_ADD_ENTRY(0, &entries[0]); CLUSTER_ADD_ENTRY(1, &entries[1]); CLUSTER_SET_TERM(0, 2); CLUSTER_START; CLUSTER_MAKE_PROGRESS; fsm = CLUSTER_FSM(0); munit_assert_int(FsmGetX(fsm), ==, 7); return MUNIT_OK; } /****************************************************************************** * * Start with entries present on disk. * *****************************************************************************/ /* There are 3 servers. The first has no entries are present at all */ TEST(raft_start, noEntries, setUp, tearDown, 0, NULL) { struct fixture *f = data; CLUSTER_GROW; CLUSTER_GROW; BOOTSTRAP(1); BOOTSTRAP(2); CLUSTER_START; CLUSTER_MAKE_PROGRESS; return MUNIT_OK; } /* There are 3 servers, the first has some entries, the others don't. */ TEST(raft_start, twoEntries, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_configuration configuration; struct raft_entry entry; struct raft_fsm *fsm; unsigned i; int rv; CLUSTER_GROW; CLUSTER_GROW; CLUSTER_CONFIGURATION(&configuration); rv = raft_bootstrap(CLUSTER_RAFT(0), &configuration); munit_assert_int(rv, ==, 0); raft_configuration_close(&configuration); entry.type = RAFT_COMMAND; entry.term = 3; FsmEncodeSetX(123, &entry.buf); CLUSTER_ADD_ENTRY(0, &entry); CLUSTER_SET_TERM(0, 3); BOOTSTRAP(1); BOOTSTRAP(2); CLUSTER_START; CLUSTER_ELECT(0); CLUSTER_MAKE_PROGRESS; CLUSTER_STEP_UNTIL_APPLIED(CLUSTER_N, 3, 3000); for (i = 0; i < CLUSTER_N; i++) { fsm = CLUSTER_FSM(i); munit_assert_int(FsmGetX(fsm), ==, 124); } return MUNIT_OK; } /* There is a single voting server in the cluster, which immediately elects * itself when starting. */ TEST(raft_start, singleVotingSelfElect, setUp, tearDown, 0, NULL) { struct fixture *f = data; CLUSTER_BOOTSTRAP; CLUSTER_START; munit_assert_int(CLUSTER_STATE(0), ==, RAFT_LEADER); CLUSTER_MAKE_PROGRESS; return MUNIT_OK; } /* There are two servers in the cluster, one is voting and the other is * not. When started, the non-voting server does not elects itself. */ TEST(raft_start, singleVotingNotUs, setUp, tearDown, 0, NULL) { struct fixture *f = data; CLUSTER_GROW; CLUSTER_BOOTSTRAP_N_VOTING(1); CLUSTER_START; munit_assert_int(CLUSTER_STATE(1), ==, RAFT_FOLLOWER); CLUSTER_MAKE_PROGRESS; return MUNIT_OK; } raft-0.11.3/test/integration/test_strerror.c000066400000000000000000000026371415614527300211370ustar00rootroot00000000000000#include "../../include/raft.h" #include "../lib/runner.h" /****************************************************************************** * * raft_strerror * *****************************************************************************/ SUITE(raft_strerror) #define ERR_CODE_MAP(X) \ X(RAFT_NOMEM) \ X(RAFT_BADID) \ X(RAFT_DUPLICATEID) \ X(RAFT_DUPLICATEADDRESS) \ X(RAFT_BADROLE) \ X(RAFT_MALFORMED) \ X(RAFT_NOTLEADER) \ X(RAFT_LEADERSHIPLOST) \ X(RAFT_SHUTDOWN) \ X(RAFT_CANTBOOTSTRAP) \ X(RAFT_CANTCHANGE) \ X(RAFT_CORRUPT) \ X(RAFT_CANCELED) \ X(RAFT_NAMETOOLONG) \ X(RAFT_TOOBIG) \ X(RAFT_NOCONNECTION) \ X(RAFT_BUSY) \ X(RAFT_IOERR) #define TEST_CASE_STRERROR(CODE) \ TEST(raft_strerror, CODE, NULL, NULL, 0, NULL) \ { \ (void)data; \ (void)params; \ munit_assert_not_null(raft_strerror(CODE)); \ return MUNIT_OK; \ } ERR_CODE_MAP(TEST_CASE_STRERROR) TEST(raft_strerror, default, NULL, NULL, 0, NULL) { (void)data; (void)params; munit_assert_string_equal(raft_strerror(666), "unknown error"); return MUNIT_OK; } raft-0.11.3/test/integration/test_tick.c000066400000000000000000000162751415614527300202120ustar00rootroot00000000000000#include "../lib/cluster.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture * *****************************************************************************/ struct fixture { FIXTURE_CLUSTER; }; static void *setUp(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); const char *n_voting_param = munit_parameters_get(params, "n_voting"); unsigned n = 3; unsigned n_voting = n; if (n_voting_param != NULL) { n_voting = atoi(n_voting_param); } SETUP_CLUSTER(n); CLUSTER_BOOTSTRAP_N_VOTING(n_voting); CLUSTER_START; return f; } static void tearDown(void *data) { struct fixture *f = data; TEAR_DOWN_CLUSTER; free(f); } /****************************************************************************** * * Assertions * *****************************************************************************/ /* Assert the current value of the timer of the I'th raft instance */ #define ASSERT_ELECTION_TIMER(I, MSECS) \ { \ struct raft *raft_ = CLUSTER_RAFT(I); \ munit_assert_int( \ raft_->io->time(raft_->io) - raft_->election_timer_start, ==, \ MSECS); \ } /* Assert the current state of the I'th raft instance. */ #define ASSERT_STATE(I, STATE) munit_assert_int(CLUSTER_STATE(I), ==, STATE); /****************************************************************************** * * Tick callback * *****************************************************************************/ SUITE(tick) /* Internal timers are updated according to the given time delta. */ TEST(tick, electionTimer, setUp, tearDown, 0, NULL) { struct fixture *f = data; (void)params; CLUSTER_STEP; ASSERT_ELECTION_TIMER(0, 100); CLUSTER_STEP; ASSERT_ELECTION_TIMER(1, 100); CLUSTER_STEP; ASSERT_ELECTION_TIMER(2, 100); CLUSTER_STEP; ASSERT_ELECTION_TIMER(0, 200); return MUNIT_OK; } /* If the election timeout expires, the follower is a voting server, and it * hasn't voted yet in this term, then become candidate and start a new * election. */ TEST(tick, candidate, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft *raft = CLUSTER_RAFT(0); (void)params; CLUSTER_STEP_UNTIL_ELAPSED( raft->follower_state.randomized_election_timeout); /* The term has been incremented. */ munit_assert_int(raft->current_term, ==, 2); /* We have voted for ourselves. */ munit_assert_int(raft->voted_for, ==, 1); /* We are candidate */ ASSERT_STATE(0, RAFT_CANDIDATE); /* The votes array is initialized */ munit_assert_ptr_not_null(raft->candidate_state.votes); munit_assert_true(raft->candidate_state.votes[0]); munit_assert_false(raft->candidate_state.votes[1]); return MUNIT_OK; } /* If the election timeout has not elapsed, stay follower. */ TEST(tick, electionTimerNotExpired, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft *raft = CLUSTER_RAFT(0); (void)params; CLUSTER_STEP_UNTIL_ELAPSED( raft->follower_state.randomized_election_timeout - 100); ASSERT_STATE(0, RAFT_FOLLOWER); return MUNIT_OK; } static char *elapse_non_voter_n_voting[] = {"1", NULL}; static MunitParameterEnum elapse_non_voter_params[] = { {"n_voting", elapse_non_voter_n_voting}, {NULL, NULL}, }; /* If the election timeout has elapsed, but we're not voters, stay follower. */ TEST(tick, not_voter, setUp, tearDown, 0, elapse_non_voter_params) { struct fixture *f = data; struct raft *raft = CLUSTER_RAFT(1); (void)params; /* Prevent the timer of the first server from expiring. */ raft_fixture_set_randomized_election_timeout(&f->cluster, 0, 2000); raft_set_election_timeout(CLUSTER_RAFT(0), 2000); CLUSTER_STEP_UNTIL_ELAPSED( raft->follower_state.randomized_election_timeout + 100); ASSERT_STATE(1, RAFT_FOLLOWER); return MUNIT_OK; } /* If we're leader election timeout elapses without hearing from a majority of * the cluster, step down. */ TEST(tick, no_contact, setUp, tearDown, 0, NULL) { struct fixture *f = data; (void)params; CLUSTER_ELECT(0); CLUSTER_SATURATE_BOTHWAYS(0, 1); CLUSTER_SATURATE_BOTHWAYS(0, 2); /* Wait for the leader to step down. */ CLUSTER_STEP_UNTIL_STATE_IS(0, RAFT_FOLLOWER, 2000); return MUNIT_OK; } /* If we're candidate and the election timeout has elapsed, start a new * election. */ TEST(tick, new_election, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft *raft = CLUSTER_RAFT(0); (void)params; CLUSTER_SATURATE_BOTHWAYS(0, 1); CLUSTER_SATURATE_BOTHWAYS(0, 2); /* Become candidate */ CLUSTER_STEP_UNTIL_ELAPSED( raft->follower_state.randomized_election_timeout); /* Expire the election timeout */ CLUSTER_STEP_UNTIL_ELAPSED( raft->candidate_state.randomized_election_timeout); /* The term has been incremented and saved to stable store. */ munit_assert_int(raft->current_term, ==, 3); /* We have voted for ourselves. */ munit_assert_int(raft->voted_for, ==, 1); /* We are still candidate */ ASSERT_STATE(0, RAFT_CANDIDATE); /* The votes array is initialized */ munit_assert_ptr_not_null(raft->candidate_state.votes); munit_assert_true(raft->candidate_state.votes[0]); munit_assert_false(raft->candidate_state.votes[1]); return MUNIT_OK; } /* If the election timeout has not elapsed, stay candidate. */ TEST(tick, during_election, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft *raft = CLUSTER_RAFT(0); (void)params; CLUSTER_SATURATE_BOTHWAYS(0, 1); CLUSTER_SATURATE_BOTHWAYS(0, 2); /* Become candidate */ CLUSTER_STEP_UNTIL_ELAPSED( raft->follower_state.randomized_election_timeout); /* Make some time elapse, but not enough to trigger the timeout */ CLUSTER_STEP_UNTIL_ELAPSED( raft->candidate_state.randomized_election_timeout - 100); /* We are still candidate at the same term */ ASSERT_STATE(0, RAFT_CANDIDATE); munit_assert_int(raft->current_term, ==, 2); return MUNIT_OK; } static char *elapse_request_vote_only_to_voters_n_voting[] = {"2", NULL}; static MunitParameterEnum elapse_request_vote_only_to_voters_params[] = { {"n_voting", elapse_request_vote_only_to_voters_n_voting}, {NULL, NULL}, }; /* Vote requests are sent only to voting servers. */ TEST(tick, request_vote_only_to_voters, setUp, tearDown, 0, elapse_request_vote_only_to_voters_params) { struct fixture *f = data; struct raft *raft = CLUSTER_RAFT(0); (void)params; CLUSTER_SATURATE_BOTHWAYS(0, 1); CLUSTER_SATURATE_BOTHWAYS(0, 2); /* Become candidate */ CLUSTER_STEP_UNTIL_ELAPSED( raft->follower_state.randomized_election_timeout); /* We have sent vote requests only to the voting server */ //__assert_request_vote(f, 2, 2, 1, 1); return MUNIT_OK; } raft-0.11.3/test/integration/test_transfer.c000066400000000000000000000144251415614527300210770ustar00rootroot00000000000000#include "../lib/cluster.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture with a test raft cluster. * *****************************************************************************/ struct fixture { FIXTURE_CLUSTER; }; /****************************************************************************** * * Helper macros * *****************************************************************************/ static void transferCb(struct raft_transfer *req) { bool *done = req->data; munit_assert_false(*done); *done = true; } static bool transferCbHasFired(struct raft_fixture *f, void *arg) { bool *done = arg; (void)f; return *done; } /* Submit a transfer leadership request against the I'th server. */ #define TRANSFER_SUBMIT(I, ID) \ struct raft *_raft = CLUSTER_RAFT(I); \ struct raft_transfer _req; \ bool _done = false; \ int _rv; \ _req.data = &_done; \ _rv = raft_transfer(_raft, &_req, ID, transferCb); \ munit_assert_int(_rv, ==, 0); /* Wait until the transfer leadership request completes. */ #define TRANSFER_WAIT CLUSTER_STEP_UNTIL(transferCbHasFired, &_done, 2000) /* Submit a transfer leadership request and wait for it to complete. */ #define TRANSFER(I, ID) \ do { \ TRANSFER_SUBMIT(I, ID); \ TRANSFER_WAIT; \ } while (0) /* Submit a transfer leadership request against the I'th server and assert that * the given error is returned. */ #define TRANSFER_ERROR(I, ID, RV, ERRMSG) \ do { \ struct raft_transfer __req; \ int __rv; \ __rv = raft_transfer(CLUSTER_RAFT(I), &__req, ID, NULL); \ munit_assert_int(__rv, ==, RV); \ munit_assert_string_equal(CLUSTER_ERRMSG(I), ERRMSG); \ } while (0) /****************************************************************************** * * Set up a cluster with a three servers. * *****************************************************************************/ static void *setUp(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_CLUSTER(3); CLUSTER_BOOTSTRAP; CLUSTER_START; CLUSTER_ELECT(0); return f; } static void tearDown(void *data) { struct fixture *f = data; TEAR_DOWN_CLUSTER; free(f); } /****************************************************************************** * * raft_transfer * *****************************************************************************/ SUITE(raft_transfer) /* The follower we ask to transfer leadership to is up-to-date. */ TEST(raft_transfer, upToDate, setUp, tearDown, 0, NULL) { struct fixture *f = data; TRANSFER(0, 2); CLUSTER_STEP_UNTIL_HAS_LEADER(1000); munit_assert_int(CLUSTER_LEADER, ==, 1); return MUNIT_OK; } /* The follower we ask to transfer leadership to needs to catch up. */ TEST(raft_transfer, catchUp, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_apply req; CLUSTER_APPLY_ADD_X(CLUSTER_LEADER, &req, 1, NULL); TRANSFER(0, 2); CLUSTER_STEP_UNTIL_HAS_LEADER(1000); munit_assert_int(CLUSTER_LEADER, ==, 1); return MUNIT_OK; } /* The follower we ask to transfer leadership to is down and the leadership * transfer does not succeed. */ TEST(raft_transfer, expire, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_apply req; CLUSTER_APPLY_ADD_X(CLUSTER_LEADER, &req, 1, NULL); CLUSTER_KILL(1); TRANSFER(0, 2); munit_assert_int(CLUSTER_LEADER, ==, 0); return MUNIT_OK; } /* The given ID doesn't match any server in the current configuration. */ TEST(raft_transfer, unknownServer, setUp, tearDown, 0, NULL) { struct fixture *f = data; TRANSFER_ERROR(0, 4, RAFT_BADID, "server ID is not valid"); return MUNIT_OK; } /* Submitting a transfer request twice is an error. */ TEST(raft_transfer, twice, setUp, tearDown, 0, NULL) { struct fixture *f = data; TRANSFER_SUBMIT(0, 2); TRANSFER_ERROR(0, 3, RAFT_NOTLEADER, "server is not the leader"); TRANSFER_WAIT; return MUNIT_OK; } /* If the given ID is zero, the target is selected automatically. */ TEST(raft_transfer, autoSelect, setUp, tearDown, 0, NULL) { struct fixture *f = data; TRANSFER(0, 0); CLUSTER_STEP_UNTIL_HAS_LEADER(1000); munit_assert_int(CLUSTER_LEADER, !=, 0); return MUNIT_OK; } /* If the given ID is zero, the target is selected automatically. Followers that * are up-to-date are preferred. */ TEST(raft_transfer, autoSelectUpToDate, setUp, tearDown, 0, NULL) { struct fixture *f = data; CLUSTER_KILL(1); CLUSTER_MAKE_PROGRESS; TRANSFER(0, 0); CLUSTER_STEP_UNTIL_HAS_LEADER(1000); munit_assert_int(CLUSTER_LEADER, ==, 2); return MUNIT_OK; } /* It's possible to transfer leadership also after the server has been * demoted. */ TEST(raft_transfer, afterDemotion, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_change req; struct raft *raft = CLUSTER_RAFT(0); int rv; CLUSTER_ADD(&req); CLUSTER_STEP_UNTIL_APPLIED(0, 2, 1000); CLUSTER_ASSIGN(&req, RAFT_VOTER); CLUSTER_STEP_UNTIL_APPLIED(0, 3, 1000); rv = raft_assign(raft, &req, raft->id, RAFT_SPARE, NULL); munit_assert_int(rv, ==, 0); CLUSTER_STEP_UNTIL_APPLIED(0, 4, 1000); TRANSFER_ERROR(0, 2, RAFT_NOTLEADER, "server is not the leader"); return MUNIT_OK; } static char *cluster_pre_vote[] = {"0", "1", NULL}; static char *cluster_heartbeat[] = {"1", "100", NULL}; static MunitParameterEnum _params[] = { {CLUSTER_PRE_VOTE_PARAM, cluster_pre_vote}, {CLUSTER_HEARTBEAT_PARAM, cluster_heartbeat}, {NULL, NULL}, }; /* It's possible to transfer leadership also when pre-vote is active */ TEST(raft_transfer, preVote, setUp, tearDown, 0, _params) { struct fixture *f = data; TRANSFER(0, 2); CLUSTER_STEP_UNTIL_HAS_LEADER(1000); munit_assert_int(CLUSTER_LEADER, ==, 1); return MUNIT_OK; } raft-0.11.3/test/integration/test_uv_append.c000066400000000000000000000544651415614527300212440ustar00rootroot00000000000000#include "append_helpers.h" #include "../lib/runner.h" #include "../lib/uv.h" #include "../lib/aio.h" #include "../../src/uv.h" /* Maximum number of blocks a segment can have */ #define MAX_SEGMENT_BLOCKS 4 /* This block size should work fine for all file systems. */ #define SEGMENT_BLOCK_SIZE 4096 /* Default segment size */ #define SEGMENT_SIZE 4096 * MAX_SEGMENT_BLOCKS /****************************************************************************** * * Fixture with a libuv-based raft_io instance. * *****************************************************************************/ struct fixture { FIXTURE_UV_DEPS; FIXTURE_UV; int count; /* To generate deterministic entry data */ }; /****************************************************************************** * * Set up and tear down. * *****************************************************************************/ static void *setUp(const MunitParameter params[], void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_UV_DEPS; SETUP_UV; raft_uv_set_block_size(&f->io, SEGMENT_BLOCK_SIZE); raft_uv_set_segment_size(&f->io, SEGMENT_SIZE); f->count = 0; return f; } static void tearDownDeps(void *data) { struct fixture *f = data; if (f == NULL) { return; } TEAR_DOWN_UV_DEPS; free(f); } static void tearDown(void *data) { struct fixture *f = data; if (f == NULL) { return; } TEAR_DOWN_UV; tearDownDeps(f); } /****************************************************************************** * * Assertions * *****************************************************************************/ /* Shutdown the fixture's raft_io instance, then load all entries on disk using * a new raft_io instance, and assert that there are N entries with a total data * size of TOTAL_DATA_SIZE bytes. */ #define ASSERT_ENTRIES(N, TOTAL_DATA_SIZE) \ TEAR_DOWN_UV; \ do { \ struct uv_loop_s _loop; \ struct raft_uv_transport _transport; \ struct raft_io _io; \ raft_term _term; \ raft_id _voted_for; \ struct raft_snapshot *_snapshot; \ raft_index _start_index; \ struct raft_entry *_entries; \ size_t _i; \ size_t _n; \ void *_batch = NULL; \ size_t _total_data_size = 0; \ int _rv; \ \ _rv = uv_loop_init(&_loop); \ munit_assert_int(_rv, ==, 0); \ _rv = raft_uv_tcp_init(&_transport, &_loop); \ munit_assert_int(_rv, ==, 0); \ _rv = raft_uv_init(&_io, &_loop, f->dir, &_transport); \ munit_assert_int(_rv, ==, 0); \ _rv = _io.init(&_io, 1, "1"); \ if (_rv != 0) { \ munit_errorf("io->init(): %s (%d)", _io.errmsg, _rv); \ } \ _rv = _io.load(&_io, &_term, &_voted_for, &_snapshot, &_start_index, \ &_entries, &_n); \ if (_rv != 0) { \ munit_errorf("io->load(): %s (%d)", _io.errmsg, _rv); \ } \ _io.close(&_io, NULL); \ uv_run(&_loop, UV_RUN_NOWAIT); \ raft_uv_close(&_io); \ raft_uv_tcp_close(&_transport); \ uv_loop_close(&_loop); \ \ munit_assert_ptr_null(_snapshot); \ munit_assert_int(_n, ==, N); \ for (_i = 0; _i < _n; _i++) { \ struct raft_entry *_entry = &_entries[_i]; \ uint64_t _value = *(uint64_t *)_entry->buf.base; \ munit_assert_int(_entry->term, ==, 1); \ munit_assert_int(_entry->type, ==, RAFT_COMMAND); \ munit_assert_int(_value, ==, _i); \ munit_assert_ptr_not_null(_entry->batch); \ } \ for (_i = 0; _i < _n; _i++) { \ struct raft_entry *_entry = &_entries[_i]; \ if (_entry->batch != _batch) { \ _batch = _entry->batch; \ raft_free(_batch); \ } \ _total_data_size += _entry->buf.len; \ } \ raft_free(_entries); \ munit_assert_int(_total_data_size, ==, TOTAL_DATA_SIZE); \ } while (0); /****************************************************************************** * * raft_io->append() * *****************************************************************************/ SUITE(append) /* Append the very first batch of entries. */ TEST(append, first, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; APPEND(1, 64); ASSERT_ENTRIES(1, 64); return MUNIT_OK; } /* As soon as the backend starts writing the first open segment, a second one * and a third one get prepared. */ TEST(append, prepareSegments, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(1, 64); while (!DirHasFile(f->dir, "open-3")) { LOOP_RUN(1); } munit_assert_true(DirHasFile(f->dir, "open-1")); munit_assert_true(DirHasFile(f->dir, "open-2")); munit_assert_true(DirHasFile(f->dir, "open-3")); return MUNIT_OK; } /* Once the first segment fills up, it gets finalized, and an additional one * gets prepared, to maintain the available segments pool size. */ TEST(append, finalizeSegment, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(MAX_SEGMENT_BLOCKS, SEGMENT_BLOCK_SIZE); APPEND(1, 64); while (!DirHasFile(f->dir, "open-4")) { LOOP_RUN(1); } munit_assert_true( DirHasFile(f->dir, "0000000000000001-0000000000000004")); munit_assert_false(DirHasFile(f->dir, "open-1")); munit_assert_true(DirHasFile(f->dir, "open-4")); return MUNIT_OK; } /* The very first batch of entries to append is bigger than the regular open * segment size. */ TEST(append, firstBig, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; APPEND(MAX_SEGMENT_BLOCKS, SEGMENT_BLOCK_SIZE); ASSERT_ENTRIES(MAX_SEGMENT_BLOCKS, MAX_SEGMENT_BLOCKS * SEGMENT_BLOCK_SIZE); return MUNIT_OK; } /* The second batch of entries to append is bigger than the regular open * segment size. */ TEST(append, secondBig, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(1, 64); APPEND(MAX_SEGMENT_BLOCKS, SEGMENT_BLOCK_SIZE); return MUNIT_OK; } /* Schedule multiple appends each one exceeding the segment size. */ TEST(append, severalBig, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; APPEND_SUBMIT(0, 2, MAX_SEGMENT_BLOCKS * SEGMENT_BLOCK_SIZE); APPEND_SUBMIT(1, 2, MAX_SEGMENT_BLOCKS * SEGMENT_BLOCK_SIZE); APPEND_SUBMIT(2, 2, MAX_SEGMENT_BLOCKS * SEGMENT_BLOCK_SIZE); APPEND_WAIT(0); APPEND_WAIT(1); APPEND_WAIT(2); ASSERT_ENTRIES(6, 6 * MAX_SEGMENT_BLOCKS * SEGMENT_BLOCK_SIZE); return MUNIT_OK; } /* Write the very first entry and then another one, both fitting in the same * block. */ TEST(append, fitBlock, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; APPEND(1, 64); APPEND(1, 64); ASSERT_ENTRIES(2, 128); return MUNIT_OK; } /* Write an entry that fills the first block exactly and then another one. */ TEST(append, matchBlock, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; size_t size; size = SEGMENT_BLOCK_SIZE; size -= sizeof(uint64_t) + /* Format */ sizeof(uint64_t) + /* Checksums */ 8 + 16; /* Header */ APPEND(1, size); APPEND(1, 64); ASSERT_ENTRIES(2, size + 64); return MUNIT_OK; } /* Write an entry that exceeds the first block, then another one that fits in * the second block, then a third one that fills the rest of the second block * plus the whole third block exactly, and finally a fourth entry that fits in * the fourth block */ TEST(append, exceedBlock, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; size_t written; size_t size1; size_t size2; size1 = SEGMENT_BLOCK_SIZE; APPEND(1, size1); APPEND(1, 64); written = sizeof(uint64_t) + /* Format version */ 2 * sizeof(uint32_t) + /* CRC sums of first batch */ 8 + 16 + /* Header of first batch */ size1 + /* Size of first batch */ 2 * sizeof(uint32_t) + /* CRC of second batch */ 8 + 16 + /* Header of second batch */ 64; /* Size of second batch */ /* Write a third entry that fills the second block exactly */ size2 = SEGMENT_BLOCK_SIZE - (written % SEGMENT_BLOCK_SIZE); size2 -= (2 * sizeof(uint32_t) + 8 + 16); size2 += SEGMENT_BLOCK_SIZE; APPEND(1, size2); /* Write a fourth entry */ APPEND(1, 64); ASSERT_ENTRIES(4, size1 + 64 + size2 + 64); return MUNIT_OK; } /* If an append request is submitted before the write operation of the previous * append request is started, then a single write will be performed for both * requests. */ TEST(append, batch, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_SUBMIT(0, 1, 64); APPEND_SUBMIT(1, 1, 64); APPEND_WAIT(0); APPEND_WAIT(1); return MUNIT_OK; } /* An append request submitted while a write operation is in progress gets * executed only when the write completes. */ TEST(append, wait, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_SUBMIT(0, 1, 64); LOOP_RUN(1); APPEND_SUBMIT(1, 1, 64); APPEND_WAIT(0); APPEND_WAIT(1); return MUNIT_OK; } /* Several batches with different size gets appended in fast pace, forcing the * segment arena to grow. */ TEST(append, resizeArena, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; APPEND_SUBMIT(0, 2, 64); APPEND_SUBMIT(1, 1, SEGMENT_BLOCK_SIZE); APPEND_SUBMIT(2, 2, 64); APPEND_SUBMIT(3, 1, SEGMENT_BLOCK_SIZE); APPEND_SUBMIT(4, 1, SEGMENT_BLOCK_SIZE); APPEND_WAIT(0); APPEND_WAIT(1); APPEND_WAIT(2); APPEND_WAIT(3); APPEND_WAIT(4); ASSERT_ENTRIES(7, 64 * 4 + SEGMENT_BLOCK_SIZE * 3); return MUNIT_OK; } /* A few append requests get queued, then a truncate request comes in and other * append requests right after, before truncation is fully completed. */ TEST(append, truncate, setUp, tearDown, 0, NULL) { struct fixture *f = data; int rv; return MUNIT_SKIP; /* FIXME: flaky */ APPEND(2, 64); APPEND_SUBMIT(0, 2, 64); rv = f->io.truncate(&f->io, 2); munit_assert_int(rv, ==, 0); APPEND_SUBMIT(1, 2, 64); APPEND_WAIT(0); APPEND_WAIT(1); return MUNIT_OK; } /* A few append requests get queued, then a truncate request comes in and other * append requests right after, before truncation is fully completed. However * the backend is closed before the truncation request can be processed. */ TEST(append, truncateClosing, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; int rv; APPEND(2, 64); APPEND_SUBMIT(0, 2, 64); rv = f->io.truncate(&f->io, 2); munit_assert_int(rv, ==, 0); APPEND_SUBMIT(1, 2, 64); APPEND_EXPECT(1, RAFT_CANCELED); TEAR_DOWN_UV; return MUNIT_OK; } /* A few append requests get queued, however the backend is closed before * preparing the second segment completes. */ TEST(append, prepareClosing, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; APPEND_SUBMIT(0, 2, 64); LOOP_RUN(1); TEAR_DOWN_UV; return MUNIT_OK; } /* The counters of the open segments get increased as they are closed. */ TEST(append, counter, setUp, tearDown, 0, NULL) { struct fixture *f = data; size_t size = SEGMENT_BLOCK_SIZE; int i; for (i = 0; i < 10; i++) { APPEND(1, size); } munit_assert_true( DirHasFile(f->dir, "0000000000000001-0000000000000003")); munit_assert_true( DirHasFile(f->dir, "0000000000000004-0000000000000006")); munit_assert_true(DirHasFile(f->dir, "open-4")); return MUNIT_OK; } /* If the I/O instance is closed, all pending append requests get canceled. */ TEST(append, cancel, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; APPEND_SUBMIT(0, 1, 64); APPEND_EXPECT(0, RAFT_CANCELED); TEAR_DOWN_UV; return MUNIT_OK; } /* The creation of the current open segment fails because there's no space. */ TEST(append, noSpaceUponPrepareCurrent, setUp, tearDown, 0, DirTmpfsParams) { struct fixture *f = data; SKIP_IF_NO_FIXTURE; raft_uv_set_segment_size(&f->io, SEGMENT_BLOCK_SIZE * 32768); APPEND_FAILURE( 1, 64, RAFT_NOSPACE, "create segment open-1: not enough space to allocate 134217728 bytes"); return MUNIT_OK; } /* The creation of a spare open segment fails because there's no space. */ TEST(append, noSpaceUponPrepareSpare, setUp, tearDown, 0, DirTmpfsParams) { struct fixture *f = data; SKIP_IF_NO_FIXTURE; #if defined(__powerpc64__) /* XXX: fails on ppc64el */ return MUNIT_SKIP; #endif raft_uv_set_segment_size(&f->io, SEGMENT_BLOCK_SIZE * 2); DirFill(f->dir, SEGMENT_BLOCK_SIZE * 3); APPEND(1, SEGMENT_BLOCK_SIZE); APPEND_SUBMIT(0, 1, SEGMENT_BLOCK_SIZE); APPEND_EXPECT(0, RAFT_NOSPACE); APPEND_WAIT(0); return MUNIT_OK; } /* The write request fails because there's not enough space. */ TEST(append, noSpaceUponWrite, setUp, tearDownDeps, 0, DirTmpfsParams) { struct fixture *f = data; SKIP_IF_NO_FIXTURE; #if defined(__powerpc64__) /* XXX: fails on ppc64el */ TEAR_DOWN_UV; return MUNIT_SKIP; #endif raft_uv_set_segment_size(&f->io, SEGMENT_BLOCK_SIZE); DirFill(f->dir, SEGMENT_BLOCK_SIZE * 2); APPEND(1, 64); APPEND_FAILURE(1, (SEGMENT_BLOCK_SIZE + 128), RAFT_NOSPACE, "short write: 4096 bytes instead of 8192"); DirRemoveFile(f->dir, ".fill"); ASSERT_ENTRIES(1, 64); return MUNIT_OK; } /* A few requests fail because not enough disk space is available. Eventually * the space is released and the request succeeds. */ TEST(append, noSpaceResolved, setUp, tearDownDeps, 0, DirTmpfsParams) { struct fixture *f = data; SKIP_IF_NO_FIXTURE; #if defined(__powerpc64__) /* XXX: fails on ppc64el */ TEAR_DOWN_UV; return MUNIT_SKIP; #endif DirFill(f->dir, SEGMENT_BLOCK_SIZE); APPEND_FAILURE( 1, 64, RAFT_NOSPACE, "create segment open-1: not enough space to allocate 16384 bytes"); APPEND_FAILURE( 1, 64, RAFT_NOSPACE, "create segment open-2: not enough space to allocate 16384 bytes"); DirRemoveFile(f->dir, ".fill"); f->count = 0; /* Reset the data counter */ APPEND(1, 64); ASSERT_ENTRIES(1, 64); return MUNIT_OK; } /* An error occurs while performing a write. */ TEST(append, writeError, setUp, tearDown, 0, NULL) { struct fixture *f = data; aio_context_t ctx = 0; /* FIXME: doesn't fail anymore after * https://github.com/CanonicalLtd/raft/pull/49 */ return MUNIT_SKIP; APPEND_SUBMIT(0, 1, 64); AioFill(&ctx, 0); APPEND_WAIT(0); AioDestroy(ctx); return MUNIT_OK; } static char *oomHeapFaultDelay[] = {"1", /* FIXME "2", */ NULL}; static char *oomHeapFaultRepeat[] = {"1", NULL}; static MunitParameterEnum oomParams[] = { {TEST_HEAP_FAULT_DELAY, oomHeapFaultDelay}, {TEST_HEAP_FAULT_REPEAT, oomHeapFaultRepeat}, {NULL, NULL}, }; /* Out of memory conditions. */ TEST(append, oom, setUp, tearDown, 0, oomParams) { struct fixture *f = data; HEAP_FAULT_ENABLE; APPEND_ERROR(1, 64, RAFT_NOMEM, ""); return MUNIT_OK; } /* The uv instance is closed while a write request is in progress. */ TEST(append, closeDuringWrite, setUp, tearDown, 0, NULL) { struct fixture *f = data; /* TODO: broken */ return MUNIT_SKIP; APPEND_SUBMIT(0, 1, 64); LOOP_RUN(1); TEAR_DOWN_UV; return MUNIT_OK; } /* When the backend is closed, all unused open segments get removed. */ TEST(append, removeSegmentUponClose, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; APPEND(1, 64); while (!DirHasFile(f->dir, "open-2")) { LOOP_RUN(1); } TEAR_DOWN_UV; munit_assert_false(DirHasFile(f->dir, "open-2")); return MUNIT_OK; } /* When the backend is closed, all pending prepare get requests get canceled. */ TEST(append, cancelPrepareRequest, setUp, tearDown, 0, NULL) { struct fixture *f = data; /* TODO: find a way to test a prepare request cancelation */ return MUNIT_SKIP; APPEND(MAX_SEGMENT_BLOCKS, SEGMENT_BLOCK_SIZE); APPEND_SUBMIT(0, 1, 64); APPEND_EXPECT(0, RAFT_CANCELED); TEAR_DOWN_UV; return MUNIT_OK; } /* When the writer gets closed it tells the writer to close the segment that * it's currently writing. */ TEST(append, currentSegment, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; APPEND(1, 64); TEAR_DOWN_UV; munit_assert_true( DirHasFile(f->dir, "0000000000000001-0000000000000001")); return MUNIT_OK; } /* The kernel has ran out of available AIO events. */ TEST(append, ioSetupError, setUp, tearDown, 0, NULL) { struct fixture *f = data; aio_context_t ctx = 0; int rv; rv = AioFill(&ctx, 0); if (rv != 0) { return MUNIT_SKIP; } APPEND_FAILURE(1, 64, RAFT_TOOMANY, "setup writer for open-1: AIO events user limit exceeded"); return MUNIT_OK; } /*=========================================================================== Test interaction between UvAppend and UvBarrier ===========================================================================*/ struct barrierData { int current; /* Count the number of finished AppendEntries RPCs */ int expected; /* Expected number of finished AppendEntries RPCs */ bool done; /* @true if the Barrier CB has fired */ bool expectDone; /* Expect the Barrier CB to have fired or not */ struct uv *uv; }; static void barrierCbCompareCounter(struct UvBarrier *barrier) { struct barrierData *bd = barrier->data; munit_assert_false(bd->done); bd->done = true; struct uv *uv = bd->uv; UvUnblock(uv); munit_assert_int(bd->current, ==, bd->expected); } static void appendCbIncreaseCounterAssertResult(struct raft_io_append *req, int status) { struct result *result = req->data; munit_assert_int(status, ==, result->status); result->done = true; struct barrierData *bd = result->data; munit_assert_true(bd->done == bd->expectDone); bd->current += 1; } /* Fill up 3 segments worth of AppendEntries RPC's. * Request a Barrier and expect that the AppendEntries RPC's are finished before * the Barrier callback is fired. */ TEST(append, barrierOpenSegments, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct barrierData bd = {0}; bd.current = 0; bd.expected = 3; bd.done = false; bd.expectDone = false; bd.uv = f->io.impl; APPEND_SUBMIT_CB_DATA(0, MAX_SEGMENT_BLOCKS, SEGMENT_BLOCK_SIZE, appendCbIncreaseCounterAssertResult, &bd); APPEND_SUBMIT_CB_DATA(1, MAX_SEGMENT_BLOCKS, SEGMENT_BLOCK_SIZE, appendCbIncreaseCounterAssertResult, &bd); APPEND_SUBMIT_CB_DATA(2, MAX_SEGMENT_BLOCKS, SEGMENT_BLOCK_SIZE, appendCbIncreaseCounterAssertResult, &bd); struct UvBarrier barrier = {0}; barrier.data = (void*) &bd; UvBarrier(f->io.impl, 1, &barrier, barrierCbCompareCounter); /* Make sure every callback fired */ LOOP_RUN_UNTIL(&bd.done); APPEND_WAIT(0); APPEND_WAIT(1); APPEND_WAIT(2); return MUNIT_OK; } /* Request a Barrier and expect that the no AppendEntries RPC's are finished before * the Barrier callback is fired. */ TEST(append, barrierNoOpenSegments, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct barrierData bd = {0}; bd.current = 0; bd.expected = 0; bd.done = false; bd.expectDone = true; bd.uv = f->io.impl; struct UvBarrier barrier = {0}; barrier.data = (void*) &bd; UvBarrier(f->io.impl, 1, &barrier, barrierCbCompareCounter); APPEND_SUBMIT_CB_DATA(0, MAX_SEGMENT_BLOCKS, SEGMENT_BLOCK_SIZE, appendCbIncreaseCounterAssertResult, &bd); APPEND_SUBMIT_CB_DATA(1, MAX_SEGMENT_BLOCKS, SEGMENT_BLOCK_SIZE, appendCbIncreaseCounterAssertResult, &bd); APPEND_SUBMIT_CB_DATA(2, MAX_SEGMENT_BLOCKS, SEGMENT_BLOCK_SIZE, appendCbIncreaseCounterAssertResult, &bd); /* Make sure every callback fired */ LOOP_RUN_UNTIL(&bd.done); APPEND_WAIT(0); APPEND_WAIT(1); APPEND_WAIT(2); return MUNIT_OK; } raft-0.11.3/test/integration/test_uv_bootstrap.c000066400000000000000000000057501415614527300220030ustar00rootroot00000000000000#include "../lib/runner.h" #include "../lib/uv.h" /****************************************************************************** * * Fixture with a libuv-based raft_io instance and an empty configuration. * *****************************************************************************/ struct fixture { FIXTURE_UV_DEPS; FIXTURE_UV; struct raft_configuration conf; }; /****************************************************************************** * * Helper macros * *****************************************************************************/ /* Add a server to the fixture's configuration. */ #define CONFIGURATION_ADD(ID, ADDRESS) \ { \ int rv_; \ rv_ = raft_configuration_add(&f->conf, ID, ADDRESS, RAFT_VOTER); \ munit_assert_int(rv_, ==, 0); \ } /* Invoke f->io->bootstrap() and assert that no error occurs. */ #define BOOTSTRAP \ { \ int rv_; \ rv_ = f->io.bootstrap(&f->io, &f->conf); \ munit_assert_int(rv_, ==, 0); \ } /****************************************************************************** * * Set up and tear down. * *****************************************************************************/ static void *setUp(const MunitParameter params[], void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_UV_DEPS; SETUP_UV; raft_configuration_init(&f->conf); return f; } static void tearDown(void *data) { struct fixture *f = data; raft_configuration_close(&f->conf); TEAR_DOWN_UV; TEAR_DOWN_UV_DEPS; free(f); } /****************************************************************************** * * raft_io->bootstrap() * *****************************************************************************/ SUITE(bootstrap) /* Invoke f->io->bootstrap() and assert that it returns the given error code and * message. */ #define BOOTSTRAP_ERROR(RV, ERRMSG) \ { \ int rv_; \ rv_ = f->io.bootstrap(&f->io, &f->conf); \ munit_assert_int(rv_, ==, RV); \ munit_assert_string_equal(f->io.errmsg, ERRMSG); \ } /* Bootstrap a pristine server. */ TEST(bootstrap, pristine, setUp, tearDown, 0, NULL) { struct fixture *f = data; CONFIGURATION_ADD(1, "1"); BOOTSTRAP; return MUNIT_OK; } /* The data directory already has metadata files with a non-zero term. */ TEST(bootstrap, termIsNonZero, setUp, tearDown, 0, NULL) { struct fixture *f = data; CONFIGURATION_ADD(1, "1"); BOOTSTRAP; BOOTSTRAP_ERROR(RAFT_CANTBOOTSTRAP, "metadata contains term 1"); return MUNIT_OK; } raft-0.11.3/test/integration/test_uv_init.c000066400000000000000000000223541415614527300207300ustar00rootroot00000000000000#include "../../include/raft/uv.h" #include "../../src/byte.h" #include "../lib/runner.h" #include "../lib/uv.h" /****************************************************************************** * * Fixture with a non-initialized raft_io instance and uv dependencies. * *****************************************************************************/ struct fixture { FIXTURE_UV_DEPS; FIXTURE_UV; bool closed; }; /****************************************************************************** * * Helper macros * *****************************************************************************/ static void closeCb(struct raft_io *io) { struct fixture *f = io->data; f->closed = true; } /* Invoke raft_uv_init() and assert that no error occurs. */ #define INIT(DIR) \ do { \ int _rv; \ _rv = raft_uv_init(&f->io, &f->loop, DIR, &f->transport); \ munit_assert_int(_rv, ==, 0); \ _rv = f->io.init(&f->io, 1, "1"); \ munit_assert_int(_rv, ==, 0); \ } while (0) /* Invoke raft_io->close(). */ #define CLOSE \ do { \ f->io.close(&f->io, closeCb); \ LOOP_RUN_UNTIL(&f->closed); \ raft_uv_close(&f->io); \ } while (0) /* Invoke raft_uv_init() and assert that the given error code is returned and * the given error message set. */ #define INIT_ERROR(DIR, RV, ERRMSG) \ do { \ int _rv; \ _rv = raft_uv_init(&f->io, &f->loop, DIR, &f->transport); \ munit_assert_int(_rv, ==, 0); \ _rv = f->io.init(&f->io, 1, "1"); \ munit_assert_int(_rv, ==, RV); \ munit_assert_string_equal(f->io.errmsg, ERRMSG); \ CLOSE; \ } while (0) /* Write either the metadata1 or metadata2 file, filling it with the given * values. */ #define WRITE_METADATA_FILE(N, FORMAT, VERSION, TERM, VOTED_FOR) \ { \ uint8_t buf[8 * 4]; \ void *cursor = buf; \ char filename[strlen("metadataN") + 1]; \ sprintf(filename, "metadata%d", N); \ bytePut64(&cursor, FORMAT); \ bytePut64(&cursor, VERSION); \ bytePut64(&cursor, TERM); \ bytePut64(&cursor, VOTED_FOR); \ DirWriteFile(f->dir, filename, buf, sizeof buf); \ } #define LONG_DIR \ "/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" \ "/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" \ "/ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc" \ "/ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd" \ "/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee" \ "/fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" \ "/ggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg" \ "/hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh" \ "/iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii" \ "/jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj" \ "/kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk" \ "/lllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll" \ "/mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm" static void *setUp(const MunitParameter params[], void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_UV_DEPS; f->io.data = f; f->closed = false; return f; } static void tearDown(void *data) { struct fixture *f = data; if (f == NULL) { return; } TEAR_DOWN_UV_DEPS; free(f); } /****************************************************************************** * * raft_io->init() * *****************************************************************************/ SUITE(init) TEST(init, dirTooLong, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_io io = {0}; int rv; rv = raft_uv_init(&io, &f->loop, LONG_DIR, &f->transport); munit_assert_int(rv, ==, RAFT_NAMETOOLONG); munit_assert_string_equal(io.errmsg, "directory path too long"); return 0; } #if defined(RWF_NOWAIT) static char *oomHeapFaultDelay[] = {"1", "2", NULL}; #else static char *oomHeapFaultDelay[] = {"1", NULL}; #endif static char *oomHeapFaultRepeat[] = {"1", NULL}; static MunitParameterEnum oomParams[] = { {TEST_HEAP_FAULT_DELAY, oomHeapFaultDelay}, {TEST_HEAP_FAULT_REPEAT, oomHeapFaultRepeat}, {NULL, NULL}, }; /* Out of memory conditions. */ TEST(init, oom, setUp, tearDown, 0, oomParams) { struct fixture *f = data; #if defined(__i686__) /* XXX: tmpfs seems to not support O_DIRECT */ return MUNIT_SKIP; #endif #if defined(__powerpc64__) /* XXX: fails on ppc64el */ return MUNIT_SKIP; #endif HEAP_FAULT_ENABLE; INIT_ERROR(f->dir, RAFT_NOMEM, "out of memory"); return 0; } /* The given directory does not exist. */ TEST(init, dirDoesNotExist, setUp, tearDown, 0, NULL) { struct fixture *f = data; INIT_ERROR("/foo/bar/egg/baz", RAFT_NOTFOUND, "directory '/foo/bar/egg/baz' does not exist"); return MUNIT_OK; } /* The given directory not accessible */ TEST(init, dirNotAccessible, setUp, tearDown, 0, NULL) { struct fixture *f = data; char errmsg[RAFT_ERRMSG_BUF_SIZE]; sprintf(errmsg, "directory '%s' is not writable", f->dir); DirMakeUnexecutable(f->dir); INIT_ERROR(f->dir, RAFT_INVALID, errmsg); return MUNIT_OK; } /* No space is left for probing I/O capabilities. */ TEST(init, noSpace, setUp, tearDown, 0, DirTmpfsParams) { struct fixture *f = data; SKIP_IF_NO_FIXTURE; DirFill(f->dir, 4); INIT_ERROR(f->dir, RAFT_NOSPACE, "create I/O capabilities probe file: not enough space to " "allocate 4096 bytes"); return MUNIT_OK; } /* The metadata1 file has not the expected number of bytes. In this case the * file is not considered at all, and the effect is as if this was a brand new * server. */ TEST(init, metadataOneTooShort, setUp, tearDown, 0, NULL) { struct fixture *f = data; uint8_t buf[16] = {0}; DirWriteFile(f->dir, "metadata1", buf, sizeof buf); INIT(f->dir); CLOSE; return MUNIT_OK; } /* The metadata1 file has not the expected format. */ TEST(init, metadataOneBadFormat, setUp, tearDown, 0, NULL) { struct fixture *f = data; WRITE_METADATA_FILE(1, /* Metadata file index */ 2, /* Format */ 1, /* Version */ 1, /* Term */ 0 /* Voted for */); INIT_ERROR(f->dir, RAFT_MALFORMED, "decode content of metadata1: bad format version 2"); return MUNIT_OK; } /* The metadata1 file has not a valid version. */ TEST(init, metadataOneBadVersion, setUp, tearDown, 0, NULL) { struct fixture *f = data; WRITE_METADATA_FILE(1, /* Metadata file index */ 1, /* Format */ 0, /* Version */ 1, /* Term */ 0 /* Voted for */); INIT_ERROR(f->dir, RAFT_CORRUPT, "decode content of metadata1: version is set to zero"); return MUNIT_OK; } /* The data directory has both metadata files, but they have the same * version. */ TEST(init, metadataOneAndTwoSameVersion, setUp, tearDown, 0, NULL) { struct fixture *f = data; WRITE_METADATA_FILE(1, /* Metadata file index */ 1, /* Format */ 2, /* Version */ 3, /* Term */ 0 /* Voted for */); WRITE_METADATA_FILE(2, /* Metadata file index */ 1, /* Format */ 2, /* Version */ 2, /* Term */ 0 /* Voted for */); INIT_ERROR(f->dir, RAFT_CORRUPT, "metadata1 and metadata2 are both at version 2"); return MUNIT_OK; } raft-0.11.3/test/integration/test_uv_load.c000066400000000000000000001355561415614527300207150ustar00rootroot00000000000000#include "../../src/byte.h" #include "../../src/uv.h" #include "../lib/runner.h" #include "../lib/uv.h" /****************************************************************************** * * Fixture with a non-initialized libuv-based raft_io instance. * *****************************************************************************/ struct fixture { FIXTURE_UV_DEPS; FIXTURE_UV; }; /****************************************************************************** * * Helper macros * *****************************************************************************/ static void closeCb(struct raft_io *io) { bool *done = io->data; *done = true; } static void appendCb(struct raft_io_append *req, int status) { bool *done = req->data; munit_assert_int(status, ==, 0); *done = true; } static void snapshotPutCb(struct raft_io_snapshot_put *req, int status) { bool *done = req->data; munit_assert_int(status, ==, 0); *done = true; } struct snapshot { raft_term term; raft_index index; uint64_t data; }; #define WORD_SIZE 8 /* Maximum number of blocks a segment can have */ #define MAX_SEGMENT_BLOCKS 4 /* This block size should work fine for all file systems. */ #define SEGMENT_BLOCK_SIZE 4096 /* Desired segment size */ #define SEGMENT_SIZE SEGMENT_BLOCK_SIZE *MAX_SEGMENT_BLOCKS #define CLOSED_SEGMENT_FILENAME(START, END) \ "000000000000000" #START \ "-" \ "000000000000000" #END /* Check if open segment file exists. */ #define HAS_OPEN_SEGMENT_FILE(COUNT) DirHasFile(f->dir, "open-" #COUNT) /* Check if closed segment file exists. */ #define HAS_CLOSED_SEGMENT_FILE(START, END) \ DirHasFile(f->dir, CLOSED_SEGMENT_FILENAME(START, END)) /* Initialize a standalone raft_io instance and use it to append N batches of * entries, each containing one entry. DATA should be an integer that will be * used as base value for the data of the first entry, and will be then * incremented for subsequent entries. */ #define APPEND(N, DATA) \ do { \ struct raft_uv_transport _transport; \ struct raft_io _io; \ raft_term _term; \ raft_id _voted_for; \ struct raft_snapshot *_snapshot; \ raft_index _start_index; \ struct raft_entry *_entries; \ size_t _i; \ size_t _n; \ void *_batch = NULL; \ struct raft_entry _new_entry; \ uint64_t _new_entry_data; \ uint64_t _data = DATA; \ struct raft_io_append _req; \ bool _done = false; \ int _rv; \ \ /* Initialize the instance, loading existing data, but discarding \ * it. This makes sure that the start index is correctly set. */ \ _rv = raft_uv_tcp_init(&_transport, &f->loop); \ munit_assert_int(_rv, ==, 0); \ _rv = raft_uv_init(&_io, &f->loop, f->dir, &_transport); \ munit_assert_int(_rv, ==, 0); \ _rv = _io.init(&_io, 1, "1"); \ munit_assert_int(_rv, ==, 0); \ raft_uv_set_block_size(&_io, SEGMENT_BLOCK_SIZE); \ raft_uv_set_segment_size(&_io, SEGMENT_SIZE); \ _rv = _io.load(&_io, &_term, &_voted_for, &_snapshot, &_start_index, \ &_entries, &_n); \ munit_assert_int(_rv, ==, 0); \ for (_i = 0; _i < _n; _i++) { \ struct raft_entry *_entry = &_entries[_i]; \ if (_entry->batch != _batch) { \ _batch = _entry->batch; \ raft_free(_batch); \ } \ } \ if (_entries != NULL) { \ raft_free(_entries); \ } \ if (_snapshot != NULL) { \ raft_configuration_close(&_snapshot->configuration); \ munit_assert_int(_snapshot->n_bufs, ==, 1); \ raft_free(_snapshot->bufs[0].base); \ raft_free(_snapshot->bufs); \ raft_free(_snapshot); \ } \ \ /* Append the new entries. */ \ for (_i = 0; _i < N; _i++) { \ struct raft_entry *entry = &_new_entry; \ entry->term = 1; \ entry->type = RAFT_COMMAND; \ entry->buf.base = &_new_entry_data; \ entry->buf.len = sizeof _new_entry_data; \ entry->batch = NULL; \ munit_assert_ptr_not_null(entry->buf.base); \ memset(entry->buf.base, 0, entry->buf.len); \ *(uint64_t *)entry->buf.base = _data; \ _data++; \ _req.data = &_done; \ _rv = _io.append(&_io, &_req, entry, 1, appendCb); \ munit_assert_int(_rv, ==, 0); \ LOOP_RUN_UNTIL(&_done); \ _done = false; \ } \ \ /* Shutdown the standalone raft_io instance. */ \ _done = false; \ _io.data = &_done; \ _io.close(&_io, closeCb); \ LOOP_RUN_UNTIL(&_done); \ raft_uv_close(&_io); \ raft_uv_tcp_close(&_transport); \ } while (0); /* Initialize a standalone raft_io instance and use it to persist a new snapshot * at the given INDEX and TERM. DATA should be an integer that will be used as * as snapshot content. */ #define SNAPSHOT_PUT(TERM, INDEX, DATA) \ do { \ struct raft_uv_transport _transport; \ struct raft_io _io; \ raft_term _term; \ raft_id _voted_for; \ struct raft_snapshot *_snapshot; \ raft_index _start_index; \ struct raft_entry *_entries; \ size_t _i; \ size_t _n; \ void *_batch = NULL; \ struct raft_snapshot _new_snapshot; \ struct raft_buffer _new_snapshot_buf; \ uint64_t _new_snapshot_data = DATA; \ struct raft_io_snapshot_put _req; \ bool _done = false; \ int _rv; \ \ /* Initialize the instance, loading existing data, but discarding \ * it. This makes sure that the start index is correctly set. */ \ _rv = raft_uv_tcp_init(&_transport, &f->loop); \ munit_assert_int(_rv, ==, 0); \ _rv = raft_uv_init(&_io, &f->loop, f->dir, &_transport); \ munit_assert_int(_rv, ==, 0); \ _rv = _io.init(&_io, 1, "1"); \ munit_assert_int(_rv, ==, 0); \ raft_uv_set_block_size(&_io, SEGMENT_BLOCK_SIZE); \ raft_uv_set_segment_size(&_io, SEGMENT_SIZE); \ _rv = _io.load(&_io, &_term, &_voted_for, &_snapshot, &_start_index, \ &_entries, &_n); \ munit_assert_int(_rv, ==, 0); \ for (_i = 0; _i < _n; _i++) { \ struct raft_entry *_entry = &_entries[_i]; \ if (_entry->batch != _batch) { \ _batch = _entry->batch; \ raft_free(_batch); \ } \ } \ if (_entries != NULL) { \ raft_free(_entries); \ } \ if (_snapshot != NULL) { \ raft_configuration_close(&_snapshot->configuration); \ munit_assert_int(_snapshot->n_bufs, ==, 1); \ raft_free(_snapshot->bufs[0].base); \ raft_free(_snapshot->bufs); \ raft_free(_snapshot); \ } \ \ /* Persist the new snapshot. */ \ _new_snapshot.index = INDEX; \ _new_snapshot.term = TERM; \ raft_configuration_init(&_new_snapshot.configuration); \ _rv = raft_configuration_add(&_new_snapshot.configuration, 1, "1", \ RAFT_VOTER); \ munit_assert_int(_rv, ==, 0); \ _new_snapshot.bufs = &_new_snapshot_buf; \ _new_snapshot.n_bufs = 1; \ _new_snapshot_buf.base = &_new_snapshot_data; \ _new_snapshot_buf.len = sizeof _new_snapshot_data; \ _req.data = &_done; \ _rv = \ _io.snapshot_put(&_io, 10, &_req, &_new_snapshot, snapshotPutCb); \ munit_assert_int(_rv, ==, 0); \ LOOP_RUN_UNTIL(&_done); \ raft_configuration_close(&_new_snapshot.configuration); \ \ /* Shutdown the standalone raft_io instance. */ \ _done = false; \ _io.data = &_done; \ _io.close(&_io, closeCb); \ LOOP_RUN_UNTIL(&_done); \ raft_uv_close(&_io); \ raft_uv_tcp_close(&_transport); \ } while (0); /* Forcibly turn a closed segment into an open one, by renaming the underlying * file and growing its size. */ #define UNFINALIZE(FIRST_INDEX, LAST_INDEX, COUNTER) \ do { \ const char *_filename1 = \ CLOSED_SEGMENT_FILENAME(FIRST_INDEX, LAST_INDEX); \ char _filename2[64]; \ sprintf(_filename2, "open-%u", (unsigned)COUNTER); \ munit_assert_true(DirHasFile(f->dir, _filename1)); \ munit_assert_false(DirHasFile(f->dir, _filename2)); \ DirRenameFile(f->dir, _filename1, _filename2); \ DirGrowFile(f->dir, _filename2, SEGMENT_SIZE); \ } while (0) /* Initialize the raft_io instance, then call raft_io->load() and assert that it * returns the given error code and message. */ #define LOAD_ERROR(RV, ERRMSG) \ do { \ int _rv; \ raft_term _term; \ raft_id _voted_for; \ struct raft_snapshot *_snapshot; \ raft_index _start_index; \ struct raft_entry *_entries; \ size_t _n; \ SETUP_UV; \ _rv = f->io.load(&f->io, &_term, &_voted_for, &_snapshot, \ &_start_index, &_entries, &_n); \ munit_assert_int(_rv, ==, RV); \ munit_assert_string_equal(f->io.errmsg, ERRMSG); \ } while (0) /* Initialize the raft_io instance, then invoke raft_io->load() and assert that * it returns the given state. If non-NULL, SNAPSHOT points to a struct snapshot * object whose attributes must match the loaded snapshot. ENTRIES_DATA is * supposed to be the integer stored in the data of first loaded entry. */ #define LOAD(TERM, VOTED_FOR, SNAPSHOT, START_INDEX, ENTRIES_DATA, N_ENTRIES) \ do { \ int _rv; \ raft_term _term; \ raft_id _voted_for; \ struct raft_snapshot *_snapshot; \ raft_index _start_index; \ struct raft_entry *_entries; \ size_t _n; \ void *_batch = NULL; \ uint64_t _data = ENTRIES_DATA; \ unsigned _i; \ SETUP_UV; \ _rv = f->io.load(&f->io, &_term, &_voted_for, &_snapshot, \ &_start_index, &_entries, &_n); \ munit_assert_int(_rv, ==, 0); \ munit_assert_int(_term, ==, TERM); \ munit_assert_int(_voted_for, ==, VOTED_FOR); \ munit_assert_int(_start_index, ==, START_INDEX); \ if (_snapshot != NULL) { \ struct snapshot *_expected = (struct snapshot *)(SNAPSHOT); \ munit_assert_ptr_not_null(_snapshot); \ munit_assert_int(_snapshot->term, ==, _expected->term); \ munit_assert_int(_snapshot->index, ==, _expected->index); \ munit_assert_int(_snapshot->n_bufs, ==, 1); \ munit_assert_int(*(uint64_t *)_snapshot->bufs[0].base, ==, \ _expected->data); \ raft_configuration_close(&_snapshot->configuration); \ raft_free(_snapshot->bufs[0].base); \ raft_free(_snapshot->bufs); \ raft_free(_snapshot); \ } \ if (_n != 0) { \ munit_assert_int(_n, ==, N_ENTRIES); \ for (_i = 0; _i < _n; _i++) { \ struct raft_entry *_entry = &_entries[_i]; \ uint64_t _value = *(uint64_t *)_entry->buf.base; \ munit_assert_int(_value, ==, _data); \ _data++; \ } \ for (_i = 0; _i < _n; _i++) { \ struct raft_entry *_entry = &_entries[_i]; \ if (_entry->batch != _batch) { \ _batch = _entry->batch; \ raft_free(_batch); \ } \ } \ raft_free(_entries); \ } \ } while (0) /****************************************************************************** * * Set up and tear down. * *****************************************************************************/ static void *setUp(const MunitParameter params[], void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_UV_DEPS; return f; } static void tearDown(void *data) { struct fixture *f = data; TEAR_DOWN_UV; TEAR_DOWN_UV_DEPS; free(f); } /****************************************************************************** * * raft_io->load() * *****************************************************************************/ SUITE(load) /* Load the initial state of a pristine server. */ TEST(load, emptyDir, setUp, tearDown, 0, NULL) { struct fixture *f = data; LOAD(0, /* term */ 0, /* voted for */ NULL, /* snapshot */ 1, /* start index */ 0, /* data for first loaded entry */ 0 /* n entries */ ); return MUNIT_OK; } static char *unknownFiles[] = { "garbage", "0000000000000000000000000001-00000000001garbage", "open-1garbage", NULL, }; static MunitParameterEnum unknownFilesParams[] = { {"filename", unknownFiles}, {NULL, NULL}, }; /* Files that are not part of the raft state are ignored. */ TEST(load, ignoreUnknownFiles, setUp, tearDown, 0, unknownFilesParams) { struct fixture *f = data; const char *filename = munit_parameters_get(params, "filename"); DirWriteFileWithZeros(f->dir, filename, 128); LOAD(0, /* term */ 0, /* voted for */ NULL, /* snapshot */ 1, /* start index */ 0, /* data for first loaded entry */ 0 /* n entries */ ); return MUNIT_OK; } static char *unusableFiles[] = { "tmp-0000000001221212-0000000001221217", "tmp-snapshot-15-8260687-512469866", "snapshot-525-43326736-880259052", "snapshot-999-13371337-880259052.meta", "snapshot-20-8260687-512469866", "snapshot-88-8260687-512469866.meta", "snapshot-88-8260999-512469866.meta", "tmp-snapshot-88-8260999-512469866.meta", "tmp-snapshot-33-8260687-512469866", "snapshot-33-8260687-512469866.meta", "tmp-metadata1", "tmp-metadata2", "tmp-open1", "tmp-open13", NULL }; static MunitParameterEnum unusableFilesParams[] = { {"filename", unusableFiles}, {NULL, NULL}, }; /* Files that can no longer be used are removed. */ TEST(load, removeUnusableFiles, setUp, tearDown, 0, unusableFilesParams) { struct fixture *f = data; const char *filename = munit_parameters_get(params, "filename"); DirWriteFileWithZeros(f->dir, filename, 128); munit_assert_true(DirHasFile(f->dir, filename)); LOAD(0, /* term */ 0, /* voted for */ NULL, /* snapshot */ 1, /* start index */ 0, /* data for first loaded entry */ 0 /* n entries */ ); munit_assert_false(DirHasFile(f->dir, filename)); return MUNIT_OK; } /* The data directory has an empty open segment. */ TEST(load, emptyOpenSegment, setUp, tearDown, 0, NULL) { struct fixture *f = data; DirWriteFile(f->dir, "open-1", NULL, 0); LOAD(0, /* term */ 0, /* voted for */ NULL, /* snapshot */ 1, /* start index */ 0, /* data for first loaded entry */ 0 /* n entries */ ); /* The empty segment has been removed. */ munit_assert_false(HAS_OPEN_SEGMENT_FILE(1)); return MUNIT_OK; } /* The data directory has a freshly allocated open segment filled with zeros. */ TEST(load, openSegmentWithTrailingZeros, setUp, tearDown, 0, NULL) { struct fixture *f = data; DirWriteFileWithZeros(f->dir, "open-1", 256); LOAD(0, /* term */ 0, /* voted for */ NULL, /* snapshot */ 1, /* start index */ 0, /* data for first loaded entry */ 0 /* n entries */ ); /* The empty segment has been removed. */ munit_assert_false(HAS_OPEN_SEGMENT_FILE(1)); return MUNIT_OK; } /* The data directory has a valid closed and open segments. */ TEST(load, bothOpenAndClosedSegments, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(2, 1); APPEND(1, 3); APPEND(1, 4); UNFINALIZE(4, 4, 1); LOAD(0, /* term */ 0, /* voted for */ NULL, /* snapshot */ 1, /* start index */ 1, /* data for first loaded entry */ 4 /* n entries */ ); return MUNIT_OK; } /* The data directory has an allocated open segment which contains non-zero * corrupted data in its second batch. */ TEST(load, openSegmentWithNonZeroData, setUp, tearDown, 0, NULL) { struct fixture *f = data; uint64_t corrupt = 123456789; APPEND(2, 1); UNFINALIZE(1, 2, 1); DirOverwriteFile(f->dir, "open-1", &corrupt, sizeof corrupt, 60); LOAD(0, /* term */ 0, /* voted for */ NULL, /* snapshot */ 1, /* start index */ 1, /* data for first loaded entry */ 1 /* n entries */ ); /* The segment has been removed. */ munit_assert_false(HAS_OPEN_SEGMENT_FILE(1)); return MUNIT_OK; } /* The data directory has an open segment with a partially written batch that * needs to be truncated. */ TEST(load, openSegmentWithIncompleteBatch, setUp, tearDown, 0, NULL) { struct fixture *f = data; uint8_t zero[256]; APPEND(2, 1); UNFINALIZE(1, 2, 1); memset(zero, 0, sizeof zero); DirOverwriteFile(f->dir, "open-1", &zero, sizeof zero, 62); LOAD(0, /* term */ 0, /* voted for */ NULL, /* snapshot */ 1, /* start index */ 1, /* data for first loaded entry */ 1 /* n entries */ ); return MUNIT_OK; } /* The data directory has an open segment whose first batch is only * partially written. In that case the segment gets removed. */ TEST(load, openSegmentWithIncompleteFirstBatch, setUp, tearDown, 0, NULL) { struct fixture *f = data; uint8_t buf[4 * WORD_SIZE] = { 1, 0, 0, 0, 0, 0, 0, 0, /* Format version */ 0, 0, 0, 0, 0, 0, 0, 0, /* CRC32 checksums */ 0, 0, 0, 0, 0, 0, 0, 0, /* Number of entries */ 0, 0, 0, 0, 0, 0, 0, 0 /* Batch data */ }; APPEND(1, 1); UNFINALIZE(1, 1, 1); DirOverwriteFile(f->dir, "open-1", buf, sizeof buf, 0); LOAD(0, /* term */ 0, /* voted for */ NULL, /* snapshot */ 1, /* start index */ 0, /* data for first loaded entry */ 0 /* n entries */ ); return MUNIT_OK; } /* The data directory has two segments, with the second having an entry. */ TEST(load, twoOpenSegments, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(1, 1); APPEND(1, 2); UNFINALIZE(1, 1, 1); UNFINALIZE(2, 2, 2); LOAD(0, /* term */ 0, /* voted for */ NULL, /* snapshot */ 1, /* start index */ 1, /* data for first loaded entry */ 2 /* n entries */ ); /* The first and second segments have been renamed. */ munit_assert_false(HAS_OPEN_SEGMENT_FILE(1)); munit_assert_false(HAS_OPEN_SEGMENT_FILE(2)); munit_assert_true(HAS_CLOSED_SEGMENT_FILE(1, 1)); munit_assert_true(HAS_CLOSED_SEGMENT_FILE(2, 2)); return MUNIT_OK; } /* The data directory has two open segments, with the second one filled with * zeros. */ TEST(load, secondOpenSegmentIsAllZeros, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(1, 1); UNFINALIZE(1, 1, 1); DirWriteFileWithZeros(f->dir, "open-2", SEGMENT_SIZE); LOAD(0, /* term */ 0, /* voted for */ NULL, /* snapshot */ 1, /* start index */ 1, /* data for first loaded entry */ 1 /* n entries */ ); /* The first segment has been renamed. */ munit_assert_false(HAS_OPEN_SEGMENT_FILE(1)); munit_assert_true(HAS_CLOSED_SEGMENT_FILE(1, 1)); /* The second segment has been removed. */ munit_assert_false(HAS_OPEN_SEGMENT_FILE(2)); return MUNIT_OK; } /* The data directory has a valid open segment. */ TEST(load, openSegment, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(1, 1); UNFINALIZE(1, 1, 1); LOAD(0, /* term */ 0, /* voted for */ NULL, /* snapshot */ 1, /* start index */ 1, /* data for first loaded entry */ 1 /* n entries */ ); return MUNIT_OK; } /* There is exactly one snapshot and no segments. */ TEST(load, onlyOneSnapshot, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct snapshot snapshot = { 1, /* term */ 1, /* index */ 1 /* data */ }; SNAPSHOT_PUT(1, 1, 1); LOAD(0, /* term */ 0, /* voted for */ &snapshot, /* snapshot */ 2, /* start index */ 0, /* data for first loaded entry */ 0 /* n entries */ ); return MUNIT_OK; } /* There are several snapshots, including an incomplete one. The last one is * loaded and the incomplete or older ones are removed. */ TEST(load, manySnapshots, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct snapshot snapshot = { 2, /* term */ 9, /* index */ 4 /* data */ }; char filename[64]; uint64_t now; /* Take a snapshot but then remove the data file, as if the server crashed * before it could complete writing it. */ uv_update_time(&f->loop); now = uv_now(&f->loop); sprintf(filename, "snapshot-1-8-%ju", now); SNAPSHOT_PUT(1, 8, 1); DirRemoveFile(f->dir, filename); SNAPSHOT_PUT(1, 8, 2); SNAPSHOT_PUT(2, 6, 3); SNAPSHOT_PUT(2, 9, 4); LOAD(0, /* term */ 0, /* voted for */ &snapshot, /* snapshot */ 10, /* start index */ 0, /* data for first loaded entry */ 0 /* n entries */ ); /* The orphaned .meta file is removed */ char meta_filename[128]; sprintf(meta_filename, "%s%s", filename, UV__SNAPSHOT_META_SUFFIX); munit_assert_false(DirHasFile(f->dir,meta_filename)); return MUNIT_OK; } /* There are two snapshots, but the last one has an empty data file. The first * one is loaded and the empty one is discarded. */ TEST(load, emptySnapshot, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct snapshot snapshot = { 1, /* term */ 4, /* index */ 1 /* data */ }; char filename[64]; uint64_t now; SNAPSHOT_PUT(1, 4, 1); /* Take a snapshot but then truncate the data file, as if the server ran out * of space before it could write it. */ uv_update_time(&f->loop); now = uv_now(&f->loop); sprintf(filename, "snapshot-2-6-%ju", now); SNAPSHOT_PUT(2, 6, 2); DirTruncateFile(f->dir, filename, 0); LOAD(0, /* term */ 0, /* voted for */ &snapshot, /* snapshot */ 5, /* start index */ 0, /* data for first loaded entry */ 0 /* n entries */ ); return MUNIT_OK; } /* There is an orphaned snapshot and an orphaned snapshot .meta file, * make sure they are removed */ TEST(load, orphanedSnapshotFiles, setUp, tearDown, 0, NULL) { struct fixture *f = data; uv_update_time(&f->loop); uint64_t now = uv_now(&f->loop); struct snapshot expected_snapshot = { 2, /* term */ 16, /* index */ 4 /* data */ }; char filename1_removed[64]; char metafilename1_removed[64]; char filename2_removed[64]; char metafilename2_removed[64]; /* Take a snapshot but then remove the data file, as if the server crashed * before it could complete writing it. */ sprintf(filename1_removed, "snapshot-2-18-%ju", now); sprintf(metafilename1_removed, "snapshot-2-18-%ju%s", now, UV__SNAPSHOT_META_SUFFIX); SNAPSHOT_PUT(2, 18, 1); munit_assert_true(DirHasFile(f->dir, filename1_removed)); munit_assert_true(DirHasFile(f->dir, metafilename1_removed)); DirRemoveFile(f->dir, filename1_removed); /* Take a snapshot but then remove the .meta file */ now = uv_now(&f->loop); sprintf(filename2_removed, "snapshot-2-19-%ju", now); sprintf(metafilename2_removed, "snapshot-2-19-%ju%s", now, UV__SNAPSHOT_META_SUFFIX); SNAPSHOT_PUT(2, 19, 2); munit_assert_true(DirHasFile(f->dir, filename2_removed)); munit_assert_true(DirHasFile(f->dir, metafilename2_removed)); DirRemoveFile(f->dir, metafilename2_removed); /* Take a valid snapshot and make sure it's loaded */ SNAPSHOT_PUT(2, 16, 4); LOAD(0, /* term */ 0, /* voted for */ &expected_snapshot, /* snapshot */ 17, /* start index */ 0, /* data for first loaded entry */ 0 /* n entries */ ); /* The orphaned files are removed */ munit_assert_false(DirHasFile(f->dir, metafilename1_removed)); munit_assert_false(DirHasFile(f->dir, filename2_removed)); return MUNIT_OK; } /* The data directory has a closed segment with entries that are no longer * needed, since they are included in a snapshot. We still keep those segments * and just let the next snapshot logic delete them. */ TEST(load, closedSegmentWithEntriesBehindSnapshot, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct snapshot snapshot = { 1, /* term */ 2, /* index */ 1 /* data */ }; APPEND(1, 1); SNAPSHOT_PUT(1, 2, 1); LOAD(0, /* term */ 0, /* voted for */ &snapshot, /* snapshot */ 3, /* start index */ 0, /* data for first loaded entry */ 0 /* n entries */ ); munit_assert_true(HAS_CLOSED_SEGMENT_FILE(1, 1)); return MUNIT_OK; } /* The data directory has a closed segment with entries that are no longer * needed, since they are included in a snapshot. However it also has an open * segment that has enough entries to reach the snapshot last index. */ TEST(load, openSegmentWithEntriesPastSnapshot, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct snapshot snapshot = { 1, /* term */ 2, /* index */ 1 /* data */ }; APPEND(1, 1); APPEND(1, 2); SNAPSHOT_PUT(1, 2, 1); UNFINALIZE(2, 2, 1); LOAD(0, /* term */ 0, /* voted for */ &snapshot, /* snapshot */ 1, /* start index */ 1, /* data for first loaded entry */ 2 /* n entries */ ); return MUNIT_OK; } /* The data directory has a closed segment whose filename encodes a number of * entries which is different then ones it actually contains. */ TEST(load, closedSegmentWithInconsistentFilename, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(3, 1); DirRenameFile(f->dir, "0000000000000001-0000000000000003", "0000000000000001-0000000000000004"); LOAD_ERROR(RAFT_CORRUPT, "load closed segment 0000000000000001-0000000000000004: found 3 " "entries (expected 4)"); return MUNIT_OK; } /* The data directory has a closed segment with entries that are no longer * needed, since they are included in a snapshot. It also has an open segment, * however that does not have enough entries to reach the snapshot last * index. */ TEST(load, openSegmentWithEntriesBehindSnapshot, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(1, 1); APPEND(1, 2); SNAPSHOT_PUT(1, 3, 1); UNFINALIZE(2, 2, 1); LOAD_ERROR(RAFT_CORRUPT, "last entry on disk has index 2, which is behind last " "snapshot's index 3"); return MUNIT_OK; } /* The data directory contains a snapshot and an open segment containing a valid * entry, and no closed segments. */ TEST(load, openSegmentNoClosedSegmentsSnapshotPresent, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct snapshot snapshot = { 1, /* term */ 3, /* index */ 1 /* data */ }; SNAPSHOT_PUT(1, 3, 1); APPEND(1, 4); UNFINALIZE(4, 4, 1); LOAD(0, /* term */ 0, /* voted for */ &snapshot, /* snapshot */ 4, /* start index */ 4, /* data for first loaded entry */ 1 /* n entries */ ); return MUNIT_OK; } /* The data directory has several closed segments, all with entries compatible * with the snapshot. */ TEST(load, closedSegmentsOverlappingWithSnapshot, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct snapshot snapshot = { 1, /* term */ 4, /* index */ 1 /* data */ }; APPEND(1, 1); APPEND(2, 2); APPEND(3, 4); SNAPSHOT_PUT(1, 4, 1); LOAD(0, /* term */ 0, /* voted for */ &snapshot, /* snapshot */ 1, /* start index */ 1, /* data for first loaded entry */ 6 /* n entries */ ); return MUNIT_OK; } /* The data directory has several closed segments, some of which have a gap, * which is still compatible with the snapshot. */ TEST(load, nonContiguousClosedSegments, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct snapshot snapshot = { 1, /* term */ 4, /* index */ 1 /* data */ }; APPEND(1, 1); APPEND(2, 2); APPEND(3, 4); SNAPSHOT_PUT(1, 4, 1); DirRemoveFile(f->dir, CLOSED_SEGMENT_FILENAME(2, 3)); LOAD(0, /* term */ 0, /* voted for */ &snapshot, /* snapshot */ 4, /* start index */ 4, /* data for first loaded entry */ 3 /* n entries */ ); return MUNIT_OK; } /* If the data directory has a closed segment whose start index is beyond the * snapshot's last index, an error is returned. */ TEST(load, closedSegmentWithEntriesPastSnapshot, setUp, tearDown, 0, NULL) { struct fixture *f = data; uint64_t now; char errmsg[128]; APPEND(5, 1); APPEND(1, 5); uv_update_time(&f->loop); now = uv_now(&f->loop); sprintf(errmsg, "closed segment 0000000000000006-0000000000000006 is past last " "snapshot snapshot-1-4-%ju", now); SNAPSHOT_PUT(1, 4, 1); DirRemoveFile(f->dir, CLOSED_SEGMENT_FILENAME(1, 5)); LOAD_ERROR(RAFT_CORRUPT, errmsg); return MUNIT_OK; } /* The data directory has an open segment which has incomplete format data. */ TEST(load, openSegmentWithIncompleteFormat, setUp, tearDown, 0, NULL) { struct fixture *f = data; DirWriteFileWithZeros(f->dir, "open-1", WORD_SIZE / 2); LOAD_ERROR(RAFT_IOERR, "load open segment open-1: file has only 4 bytes"); return MUNIT_OK; } /* The data directory has an open segment which has an incomplete batch * preamble. */ TEST(load, openSegmentWithIncompletePreamble, setUp, tearDown, 0, NULL) { struct fixture *f = data; size_t offset = WORD_SIZE /* Format version */ + WORD_SIZE /* Checksums */; APPEND(1, 1); UNFINALIZE(1, 1, 1); DirTruncateFile(f->dir, "open-1", offset); LOAD_ERROR(RAFT_IOERR, "load open segment open-1: entries batch 1 starting at byte 16: " "read preamble: short read: 0 bytes instead of 8"); return MUNIT_OK; } /* The data directory has an open segment which has incomplete batch header. */ TEST(load, openSegmentWithIncompleteBatchHeader, setUp, tearDown, 0, NULL) { struct fixture *f = data; size_t offset = WORD_SIZE + /* Format version */ WORD_SIZE + /* Checksums */ WORD_SIZE + /* Number of entries */ WORD_SIZE /* Partial batch header */; APPEND(1, 1); UNFINALIZE(1, 1, 1); DirTruncateFile(f->dir, "open-1", offset); LOAD_ERROR(RAFT_IOERR, "load open segment open-1: entries batch 1 starting at byte 8: " "read header: short read: 8 bytes instead of 16"); return MUNIT_OK; } /* The data directory has an open segment which has incomplete batch data. */ TEST(load, openSegmentWithIncompleteBatchData, setUp, tearDown, 0, NULL) { struct fixture *f = data; size_t offset = WORD_SIZE + /* Format version */ WORD_SIZE + /* Checksums */ WORD_SIZE + /* Number of entries */ WORD_SIZE + /* Entry term */ WORD_SIZE + /* Entry type and data size */ WORD_SIZE / 2 /* Partial entry data */; APPEND(1, 1); UNFINALIZE(1, 1, 1); DirTruncateFile(f->dir, "open-1", offset); LOAD_ERROR(RAFT_IOERR, "load open segment open-1: entries batch 1 starting at byte 8: " "read data: short read: 4 bytes instead of 8"); return MUNIT_OK; } /* The data directory has a closed segment which has corrupted batch header. */ TEST(load, closedSegmentWithCorruptedBatchHeader, setUp, tearDown, 0, NULL) { struct fixture *f = data; size_t offset = WORD_SIZE /* Format version */; uint64_t corrupted = 12345678; APPEND(1, 1); DirOverwriteFile(f->dir, CLOSED_SEGMENT_FILENAME(1, 1), &corrupted, sizeof corrupted, offset); LOAD_ERROR(RAFT_CORRUPT, "load closed segment 0000000000000001-0000000000000001: entries " "batch 1 starting at byte 8: header checksum mismatch"); return MUNIT_OK; } /* The data directory has a closed segment which has corrupted batch data. */ TEST(load, closedSegmentWithCorruptedBatchData, setUp, tearDown, 0, NULL) { struct fixture *f = data; size_t offset = WORD_SIZE /* Format version */ + WORD_SIZE / 2 /* Header checksum */; uint32_t corrupted = 123456789; APPEND(1, 1); DirOverwriteFile(f->dir, CLOSED_SEGMENT_FILENAME(1, 1), &corrupted, sizeof corrupted, offset); LOAD_ERROR(RAFT_CORRUPT, "load closed segment 0000000000000001-0000000000000001: entries " "batch 1 starting at byte 8: data checksum mismatch"); return MUNIT_OK; } /* The data directory has a closed segment whose first index does not match what * we expect. */ TEST(load, closedSegmentWithBadIndex, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(1, 1); APPEND(1, 2); DirRemoveFile(f->dir, CLOSED_SEGMENT_FILENAME(1, 1)); LOAD_ERROR(RAFT_CORRUPT, "unexpected closed segment 0000000000000002-0000000000000002: " "first index should have been 1"); return MUNIT_OK; } /* The data directory has an empty closed segment. */ TEST(load, emptyClosedSegment, setUp, tearDown, 0, NULL) { struct fixture *f = data; DirWriteFile(f->dir, CLOSED_SEGMENT_FILENAME(1, 1), NULL, 0); LOAD_ERROR( RAFT_CORRUPT, "load closed segment 0000000000000001-0000000000000001: file is empty"); return MUNIT_OK; } /* The data directory has a closed segment with an unexpected format. */ TEST(load, closedSegmentWithBadFormat, setUp, tearDown, 0, NULL) { struct fixture *f = data; uint8_t buf[8] = {2, 0, 0, 0, 0, 0, 0, 0}; DirWriteFile(f->dir, CLOSED_SEGMENT_FILENAME(1, 1), buf, sizeof buf); LOAD_ERROR(RAFT_CORRUPT, "load closed segment 0000000000000001-0000000000000001: " "unexpected format version 2"); return MUNIT_OK; } /* The data directory has an open segment which is not readable. */ TEST(load, openSegmentWithNoAccessPermission, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(1, 1); UNFINALIZE(1, 1, 1); DirMakeFileUnreadable(f->dir, "open-1"); LOAD_ERROR(RAFT_IOERR, "load open segment open-1: read file: open: permission denied"); return MUNIT_OK; } /* The data directory has an open segment with format set to 0 and non-zero * content. */ TEST(load, openSegmentWithZeroFormatAndThenData, setUp, tearDown, 0, NULL) { struct fixture *f = data; uint64_t version = 0 /* Format version */; APPEND(1, 1); UNFINALIZE(1, 1, 1); DirOverwriteFile(f->dir, "open-1", &version, sizeof version, 0); LOAD_ERROR(RAFT_CORRUPT, "load open segment open-1: unexpected format version 0"); return MUNIT_OK; } /* The data directory has an open segment with an unexpected format. */ TEST(load, openSegmentWithBadFormat, setUp, tearDown, 0, NULL) { struct fixture *f = data; uint8_t version[8] = {2, 0, 0, 0, 0, 0, 0, 0}; APPEND(1, 1); UNFINALIZE(1, 1, 1); DirOverwriteFile(f->dir, "open-1", version, sizeof version, 0); LOAD_ERROR(RAFT_CORRUPT, "load open segment open-1: unexpected format version 2"); return MUNIT_OK; } raft-0.11.3/test/integration/test_uv_recover.c000066400000000000000000000042741415614527300214330ustar00rootroot00000000000000#include "../lib/runner.h" #include "../lib/uv.h" /****************************************************************************** * * Fixture * *****************************************************************************/ struct fixture { FIXTURE_UV_DEPS; FIXTURE_UV; }; static void *setUp(const MunitParameter params[], void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_UV_DEPS; SETUP_UV; return f; } static void tearDown(void *data) { struct fixture *f = data; TEAR_DOWN_UV; TEAR_DOWN_UV_DEPS; free(f); } /****************************************************************************** * * raft_io->recover() * *****************************************************************************/ SUITE(recover) /* Invoke recover and assert that it fails with the given error. */ #define RECOVER_ERROR(RV, CONF) \ { \ int rv_; \ rv_ = f->io.recover(&f->io, CONF); \ munit_assert_int(rv_, ==, RV); \ } /* Invoke recover and assert that it succeeds */ #define RECOVER(CONF) RECOVER_ERROR(0, CONF) /* If the instance has been already initialized, an error is returned. */ /* A new configuration is saved as last entry on disk. */ TEST(recover, newConfiguration, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_configuration configuration1; struct raft_configuration configuration2; int rv; /* Bootstrap using an initial configuration */ raft_configuration_init(&configuration1); rv = raft_configuration_add(&configuration1, 1, "1", RAFT_VOTER); munit_assert_int(rv, ==, 0); rv = raft_configuration_add(&configuration1, 2, "2", RAFT_VOTER); munit_assert_int(rv, ==, 0); rv = f->io.bootstrap(&f->io, &configuration1); munit_assert_int(rv, ==, 0); /* Bootstrap using a different configuration */ raft_configuration_init(&configuration2); rv = raft_configuration_add(&configuration2, 1, "1", RAFT_VOTER); munit_assert_int(rv, ==, 0); RECOVER(&configuration2); raft_configuration_close(&configuration1); raft_configuration_close(&configuration2); return 0; } raft-0.11.3/test/integration/test_uv_recv.c000066400000000000000000000420711415614527300207220ustar00rootroot00000000000000#include "../lib/runner.h" #include "../lib/tcp.h" #include "../lib/uv.h" /****************************************************************************** * * Fixture with a libuv-based raft_io instance. * *****************************************************************************/ struct peer { struct uv_loop_s loop; struct raft_uv_transport transport; struct raft_io io; }; struct fixture { FIXTURE_UV_DEPS; FIXTURE_TCP; FIXTURE_UV; struct peer peer; bool closed; }; /****************************************************************************** * * Helper macros * *****************************************************************************/ struct result { struct raft_message *message; bool done; }; static void recvCb(struct raft_io *io, struct raft_message *m1) { struct result *result = io->data; struct raft_message *m2 = result->message; unsigned i; munit_assert_int(m1->type, ==, m2->type); switch (m1->type) { case RAFT_IO_REQUEST_VOTE: munit_assert_int(m1->request_vote.term, ==, m2->request_vote.term); munit_assert_int(m1->request_vote.candidate_id, ==, m2->request_vote.candidate_id); munit_assert_int(m1->request_vote.last_log_index, ==, m2->request_vote.last_log_index); munit_assert_int(m1->request_vote.last_log_term, ==, m2->request_vote.last_log_term); munit_assert_int(m1->request_vote.disrupt_leader, ==, m2->request_vote.disrupt_leader); break; case RAFT_IO_REQUEST_VOTE_RESULT: munit_assert_int(m1->request_vote_result.term, ==, m2->request_vote_result.term); munit_assert_int(m1->request_vote_result.vote_granted, ==, m2->request_vote_result.vote_granted); break; case RAFT_IO_APPEND_ENTRIES: munit_assert_int(m1->append_entries.n_entries, ==, m2->append_entries.n_entries); for (i = 0; i < m1->append_entries.n_entries; i++) { struct raft_entry *entry1 = &m1->append_entries.entries[i]; struct raft_entry *entry2 = &m2->append_entries.entries[i]; munit_assert_int(entry1->term, ==, entry2->term); munit_assert_int(entry1->type, ==, entry2->type); munit_assert_int(entry1->buf.len, ==, entry2->buf.len); munit_assert_int( memcmp(entry1->buf.base, entry2->buf.base, entry1->buf.len), ==, 0); } if (m1->append_entries.n_entries > 0) { raft_free(m1->append_entries.entries[0].batch); raft_free(m1->append_entries.entries); } break; case RAFT_IO_APPEND_ENTRIES_RESULT: munit_assert_int(m1->append_entries_result.term, ==, m2->append_entries_result.term); munit_assert_int(m1->append_entries_result.rejected, ==, m2->append_entries_result.rejected); munit_assert_int(m1->append_entries_result.last_log_index, ==, m2->append_entries_result.last_log_index); break; case RAFT_IO_INSTALL_SNAPSHOT: munit_assert_int(m1->install_snapshot.conf.n, ==, m2->install_snapshot.conf.n); for (i = 0; i < m1->install_snapshot.conf.n; i++) { struct raft_server *s1 = &m1->install_snapshot.conf.servers[i]; struct raft_server *s2 = &m2->install_snapshot.conf.servers[i]; munit_assert_int(s1->id, ==, s2->id); munit_assert_string_equal(s1->address, s2->address); munit_assert_int(s1->role, ==, s2->role); } munit_assert_int(m1->install_snapshot.data.len, ==, m2->install_snapshot.data.len); munit_assert_int(memcmp(m1->install_snapshot.data.base, m2->install_snapshot.data.base, m2->install_snapshot.data.len), ==, 0); raft_configuration_close(&m1->install_snapshot.conf); raft_free(m1->install_snapshot.data.base); break; case RAFT_IO_TIMEOUT_NOW: munit_assert_int(m1->timeout_now.term, ==, m2->timeout_now.term); munit_assert_int(m1->timeout_now.last_log_index, ==, m2->timeout_now.last_log_index); munit_assert_int(m1->timeout_now.last_log_term, ==, m2->timeout_now.last_log_term); break; }; result->done = true; } static void peerSendCb(struct raft_io_send *req, int status) { bool *done = req->data; munit_assert_int(status, ==, 0); *done = true; } static void peerCloseCb(struct raft_io *io) { bool *done = io->data; *done = true; } /* Set up the fixture's peer raft_io instance. */ #define PEER_SETUP \ do { \ struct uv_loop_s *_loop = &f->peer.loop; \ struct raft_uv_transport *_transport = &f->peer.transport; \ struct raft_io *_io = &f->peer.io; \ int _rv; \ _rv = uv_loop_init(_loop); \ munit_assert_int(_rv, ==, 0); \ _rv = raft_uv_tcp_init(_transport, _loop); \ munit_assert_int(_rv, ==, 0); \ _rv = raft_uv_init(_io, _loop, f->dir, _transport); \ munit_assert_int(_rv, ==, 0); \ _rv = _io->init(_io, 2, "127.0.0.1:9002"); \ munit_assert_int(_rv, ==, 0); \ } while (0) /* Tear down the fixture's peer raft_io instance. */ #define PEER_TEAR_DOWN \ do { \ struct uv_loop_s *_loop = &f->peer.loop; \ struct raft_uv_transport *_transport = &f->peer.transport; \ struct raft_io *_io = &f->peer.io; \ bool _done = false; \ int _i; \ _done = false; \ _io->data = &_done; \ _io->close(_io, peerCloseCb); \ for (_i = 0; _i < 10; _i++) { \ if (_done) { \ break; \ } \ uv_run(_loop, UV_RUN_ONCE); \ } \ uv_run(_loop, UV_RUN_DEFAULT); \ munit_assert_true(_done); \ raft_uv_close(_io); \ raft_uv_tcp_close(_transport); \ uv_loop_close(_loop); \ } while (0) /* Send a message to the main fixture's raft_io instance using the fixture's * peer instance. */ #define PEER_SEND(MESSAGE) \ do { \ struct uv_loop_s *_loop = &f->peer.loop; \ struct raft_io *_io = &f->peer.io; \ struct raft_io_send _req; \ bool _done = false; \ int _i; \ int _rv; \ (MESSAGE)->server_id = 1; \ (MESSAGE)->server_address = "127.0.0.1:9001"; \ _req.data = &_done; \ _rv = _io->send(_io, &_req, MESSAGE, peerSendCb); \ munit_assert_int(_rv, ==, 0); \ for (_i = 0; _i < 10; _i++) { \ if (_done) { \ break; \ } \ uv_run(_loop, UV_RUN_ONCE); \ } \ munit_assert_true(_done); \ } while (0) /* Establish a connection and send an handshake using plain TCP. */ #define PEER_HANDSHAKE \ do { \ uint8_t _handshake[] = { \ 6, 6, 6, 0, 0, 0, 0, 0, /* Protocol */ \ 1, 0, 0, 0, 0, 0, 0, 0, /* Server ID */ \ 2, 0, 0, 0, 0, 0, 0, 0, /* Address length, in words */ \ 0, 0, 0, 0, 0, 0, 0, 0, /* First address word */ \ 0, 0, 0, 0, 0, 0, 0, 0 /* Second address word */ \ }; \ sprintf((char *)&_handshake[24], "127.0.0.1:666"); \ TCP_CLIENT_CONNECT(9001); \ TCP_CLIENT_SEND(_handshake, sizeof _handshake); \ } while (0); /* Run the loop until a new message is received. Assert that the received * message matches the given one. */ #define RECV(MESSAGE) \ do { \ struct result _result = {MESSAGE, false}; \ f->io.data = &_result; \ LOOP_RUN_UNTIL(&_result.done); \ } while (0) /****************************************************************************** * * Set up and tear down. * *****************************************************************************/ static void *setUpDeps(const MunitParameter params[], void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_UV_DEPS; SETUP_TCP; PEER_SETUP; f->io.data = f; f->closed = false; return f; } static void tearDownDeps(void *data) { struct fixture *f = data; PEER_TEAR_DOWN; TEAR_DOWN_TCP; TEAR_DOWN_UV_DEPS; free(f); } static void *setUp(const MunitParameter params[], void *user_data) { struct fixture *f = setUpDeps(params, user_data); int rv; SETUP_UV; f->io.data = f; rv = f->io.start(&f->io, 10000, NULL, recvCb); munit_assert_int(rv, ==, 0); return f; } static void tearDown(void *data) { struct fixture *f = data; TEAR_DOWN_UV; tearDownDeps(f); } /****************************************************************************** * * raft_io_recv_cb * *****************************************************************************/ SUITE(recv) /* Receive the very first message over the connection. */ TEST(recv, first, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_message message; message.type = RAFT_IO_REQUEST_VOTE; message.request_vote.candidate_id = 2; message.request_vote.last_log_index = 123; message.request_vote.last_log_term = 2; message.request_vote.disrupt_leader = false; PEER_SEND(&message); RECV(&message); return MUNIT_OK; } /* Receive the a first message then another one. */ TEST(recv, second, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_message message; message.type = RAFT_IO_REQUEST_VOTE; message.request_vote.candidate_id = 2; message.request_vote.last_log_index = 123; message.request_vote.last_log_term = 2; message.request_vote.disrupt_leader = true; PEER_SEND(&message); RECV(&message); PEER_SEND(&message); RECV(&message); return MUNIT_OK; } /* Receive a RequestVote result message. */ TEST(recv, requestVoteResult, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_message message; message.type = RAFT_IO_REQUEST_VOTE_RESULT; message.request_vote_result.term = 3; message.request_vote_result.vote_granted = true; message.request_vote_result.pre_vote = raft_tribool_false; PEER_SEND(&message); RECV(&message); return MUNIT_OK; } /* Receive an AppendEntries message with two entries. */ TEST(recv, appendEntries, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry entries[2]; struct raft_message message; uint8_t data1[8] = {1, 2, 3, 4, 5, 6, 7, 8}; uint8_t data2[8] = {8, 7, 6, 5, 4, 3, 2, 1}; entries[0].type = RAFT_COMMAND; entries[0].buf.base = data1; entries[0].buf.len = sizeof data1; entries[1].type = RAFT_COMMAND; entries[1].buf.base = data2; entries[1].buf.len = sizeof data2; message.type = RAFT_IO_APPEND_ENTRIES; message.append_entries.entries = entries; message.append_entries.n_entries = 2; PEER_SEND(&message); RECV(&message); return MUNIT_OK; } /* Receive an AppendEntries message with no entries (i.e. an heartbeat). */ TEST(recv, heartbeat, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_message message; message.type = RAFT_IO_APPEND_ENTRIES; message.append_entries.entries = NULL; message.append_entries.n_entries = 0; PEER_SEND(&message); RECV(&message); return MUNIT_OK; } /* Receive an AppendEntries result f->peer.message. */ TEST(recv, appendEntriesResult, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_message message; message.type = RAFT_IO_APPEND_ENTRIES_RESULT; message.append_entries_result.term = 3; message.append_entries_result.rejected = 0; message.append_entries_result.last_log_index = 123; PEER_SEND(&message); RECV(&message); return MUNIT_OK; } /* Receive an InstallSnapshot message. */ TEST(recv, installSnapshot, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_message message; uint8_t snapshot_data[8] = {1, 2, 3, 4, 5, 6, 7, 8}; int rv; message.type = RAFT_IO_INSTALL_SNAPSHOT; message.install_snapshot.term = 2; message.install_snapshot.last_index = 123; message.install_snapshot.last_term = 1; raft_configuration_init(&message.install_snapshot.conf); rv = raft_configuration_add(&message.install_snapshot.conf, 1, "1", RAFT_VOTER); munit_assert_int(rv, ==, 0); message.install_snapshot.data.len = sizeof snapshot_data; message.install_snapshot.data.base = snapshot_data; PEER_SEND(&message); RECV(&message); raft_configuration_close(&message.install_snapshot.conf); return MUNIT_OK; } /* Receive a TimeoutNow message. */ TEST(recv, timeoutNow, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_message message; message.type = RAFT_IO_TIMEOUT_NOW; message.timeout_now.term = 3; message.timeout_now.last_log_index = 123; message.timeout_now.last_log_term = 2; PEER_SEND(&message); RECV(&message); return MUNIT_OK; } /* The handshake fails because of an unexpected protocon version. */ TEST(recv, badProtocol, setUp, tearDown, 0, NULL) { struct fixture *f = data; uint8_t handshake[] = { 6, 6, 6, 0, 0, 0, 0, 0, /* Protocol */ 1, 0, 0, 0, 0, 0, 0, 0, /* Server ID */ 2, 0, 0, 0, 0, 0, 0, 0 /* Address length */ }; TCP_CLIENT_CONNECT(9001); TCP_CLIENT_SEND(handshake, sizeof handshake); LOOP_RUN(2); return MUNIT_OK; } /* A message can't have zero length. */ TEST(recv, badSize, setUp, tearDown, 0, NULL) { struct fixture *f = data; uint8_t header[] = { 1, 0, 0, 0, 0, 0, 0, 0, /* Message type */ 0, 0, 0, 0, 0, 0, 0, 0 /* Message size */ }; PEER_HANDSHAKE; TCP_CLIENT_SEND(header, sizeof header); LOOP_RUN(2); return MUNIT_OK; } /* A message with a bad type causes the connection to be aborted. */ TEST(recv, badType, setUp, tearDown, 0, NULL) { struct fixture *f = data; uint8_t header[] = { 1, 2, 3, 4, 5, 6, 7, 8, /* Message type */ 0, 0, 0, 0, 0, 0, 0, 0 /* Message size */ }; PEER_HANDSHAKE; TCP_CLIENT_SEND(header, sizeof header); LOOP_RUN(2); return MUNIT_OK; } /* The backend is closed just before accepting a new connection. */ TEST(recv, closeBeforeAccept, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; uint8_t header[] = { 1, 2, 3, 4, 5, 6, 7, 8, /* Message type */ 0, 0, 0, 0, 0, 0, 0, 0 /* Message size */ }; PEER_HANDSHAKE; TCP_CLIENT_SEND(header, sizeof header); LOOP_RUN(1); TEAR_DOWN_UV; return MUNIT_OK; } /* The backend is closed after receiving the header of an AppendEntries * message. */ TEST(recv, closeAfterAppendEntriesHeader, setUp, tearDown, 0, NULL) { /* TODO */ return MUNIT_SKIP; } raft-0.11.3/test/integration/test_uv_send.c000066400000000000000000000301731415614527300207140ustar00rootroot00000000000000#include #include "../lib/runner.h" #include "../lib/tcp.h" #include "../lib/uv.h" /****************************************************************************** * * Fixture with a libuv-based raft_io instance and some pre-set messages. * *****************************************************************************/ #define N_MESSAGES 5 struct fixture { FIXTURE_UV_DEPS; FIXTURE_TCP_SERVER; FIXTURE_UV; struct raft_message messages[N_MESSAGES]; }; /****************************************************************************** * * Helper macros * *****************************************************************************/ struct result { int status; bool done; }; static void sendCbAssertResult(struct raft_io_send *req, int status) { struct result *result = req->data; munit_assert_int(status, ==, result->status); result->done = true; } /* Get I'th fixture's message. */ #define MESSAGE(I) (&f->messages[I]) /* Submit a send request for the I'th fixture's message. */ #define SEND_SUBMIT(I, RV, STATUS) \ struct raft_io_send _req##I; \ struct result _result##I = {STATUS, false}; \ int _rv##I; \ _req##I.data = &_result##I; \ _rv##I = \ f->io.send(&f->io, &_req##I, &f->messages[I], sendCbAssertResult); \ munit_assert_int(_rv##I, ==, RV) /* Wait for the submit request of the I'th message to finish. */ #define SEND_WAIT(I) LOOP_RUN_UNTIL(&_result##I.done) /* Submit a send request for the I'th fixture's message and wait for the * operation to successfully complete. */ #define SEND(I) \ do { \ SEND_SUBMIT(I, 0 /* rv */, 0 /* status */); \ SEND_WAIT(I); \ } while (0) /* Submit a send request and assert that it fails synchronously with the * given error code and message. */ #define SEND_ERROR(I, RV, ERRMSG) \ do { \ SEND_SUBMIT(I, RV, 0 /* status */); \ /* munit_assert_string_equal(f->transport.errmsg, ERRMSG);*/ \ } while (0) /* Submit a send request and wait for the operation to fail with the given code * and message. */ #define SEND_FAILURE(I, STATUS, ERRMSG) \ do { \ SEND_SUBMIT(I, 0 /* rv */, STATUS); \ SEND_WAIT(I); \ /*munit_assert_string_equal(f->transport.errmsg, ERRMSG);*/ \ } while (0) /****************************************************************************** * * Set up and tear down. * *****************************************************************************/ static void *setUpDeps(const MunitParameter params[], void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_UV_DEPS; SETUP_TCP_SERVER; f->io.data = f; return f; } static void tearDownDeps(void *data) { struct fixture *f = data; TEAR_DOWN_TCP_SERVER; TEAR_DOWN_UV_DEPS; free(f); } static void *setUp(const MunitParameter params[], void *user_data) { struct fixture *f = setUpDeps(params, user_data); unsigned i; SETUP_UV; raft_uv_set_connect_retry_delay(&f->io, 1); for (i = 0; i < N_MESSAGES; i++) { struct raft_message *message = &f->messages[i]; message->type = RAFT_IO_REQUEST_VOTE; message->server_id = 1; message->server_address = f->server.address; } return f; } static void tearDown(void *data) { struct fixture *f = data; TEAR_DOWN_UV; tearDownDeps(f); } /****************************************************************************** * * raft_io->send() * *****************************************************************************/ SUITE(send) /* The first time a request is sent to a server a connection attempt is * triggered. If the connection succeeds the request gets written out. */ TEST(send, first, setUp, tearDown, 0, NULL) { struct fixture *f = data; SEND(0); return MUNIT_OK; } /* The second time a request is sent it re-uses the connection that was already * established */ TEST(send, second, setUp, tearDown, 0, NULL) { struct fixture *f = data; SEND(0); SEND(0); return MUNIT_OK; } /* Submit a few send requests in parallel. */ TEST(send, parallel, setUp, tearDown, 0, NULL) { struct fixture *f = data; SEND_SUBMIT(0 /* message */, 0 /* rv */, 0 /* status */); SEND_SUBMIT(1 /* message */, 0 /* rv */, 0 /* status */); SEND_WAIT(0); SEND_WAIT(1); return MUNIT_OK; } /* Send a request vote result message. */ TEST(send, voteResult, setUp, tearDown, 0, NULL) { struct fixture *f = data; MESSAGE(0)->type = RAFT_IO_REQUEST_VOTE_RESULT; SEND(0); return MUNIT_OK; } /* Send an append entries message. */ TEST(send, appendEntries, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry entries[2]; entries[0].buf.base = raft_malloc(16); entries[0].buf.len = 16; entries[1].buf.base = raft_malloc(8); entries[1].buf.len = 8; MESSAGE(0)->type = RAFT_IO_APPEND_ENTRIES; MESSAGE(0)->append_entries.entries = entries; MESSAGE(0)->append_entries.n_entries = 2; SEND(0); raft_free(entries[0].buf.base); raft_free(entries[1].buf.base); return MUNIT_OK; } /* Send an append entries message with zero entries (i.e. a heartbeat). */ TEST(send, heartbeat, setUp, tearDown, 0, NULL) { struct fixture *f = data; MESSAGE(0)->type = RAFT_IO_APPEND_ENTRIES; MESSAGE(0)->append_entries.entries = NULL; MESSAGE(0)->append_entries.n_entries = 0; SEND(0); return MUNIT_OK; } /* Send an append entries result message. */ TEST(send, appendEntriesResult, setUp, tearDown, 0, NULL) { struct fixture *f = data; MESSAGE(0)->type = RAFT_IO_APPEND_ENTRIES_RESULT; SEND(0); return MUNIT_OK; } /* Send an install snapshot message. */ TEST(send, installSnapshot, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_install_snapshot *p = &MESSAGE(0)->install_snapshot; int rv; MESSAGE(0)->type = RAFT_IO_INSTALL_SNAPSHOT; raft_configuration_init(&p->conf); rv = raft_configuration_add(&p->conf, 1, "1", RAFT_VOTER); munit_assert_int(rv, ==, 0); p->data.len = 8; p->data.base = raft_malloc(p->data.len); SEND(0); raft_configuration_close(&p->conf); raft_free(p->data.base); return MUNIT_OK; } /* A connection attempt fails asynchronously after the connect function * returns. */ TEST(send, noConnection, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; MESSAGE(0)->server_address = "127.0.0.1:123456"; SEND_SUBMIT(0 /* message */, 0 /* rv */, RAFT_CANCELED /* status */); TEAR_DOWN_UV; return MUNIT_OK; } /* The message has an invalid IPv4 address. */ TEST(send, badAddress, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; MESSAGE(0)->server_address = "1"; SEND_SUBMIT(0 /* message */, 0 /* rv */, RAFT_CANCELED /* status */); TEAR_DOWN_UV; return MUNIT_OK; } /* Make sure UvSend doesn't use a stale connection for a certain server id * by first sending a message to a valid address and then sending a message to * an invalid address, making sure the valid connection is not reused. * Afterwards assert that a send to the correct address still succeeds. */ TEST(send, changeToUnconnectedAddress, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; /* Send a message to a server and a connected address */ SEND(0); /* Send a message to the same server, but update the address to an * unconnected address and assert it fails. */ munit_assert_ullong(MESSAGE(0)->server_id, ==, MESSAGE(1)->server_id); MESSAGE(1)->server_address = "127.0.0.2:1"; SEND_SUBMIT(1 /* message */, 0 /* rv */, RAFT_CANCELED /* status */); /* Send another message to the same server and connected address */ munit_assert_ullong(MESSAGE(0)->server_id, ==, MESSAGE(2)->server_id); SEND(2); /* Send another message to the same server and connected address */ munit_assert_ullong(MESSAGE(0)->server_id, ==, MESSAGE(3)->server_id); SEND(3); TEAR_DOWN_UV; return MUNIT_OK; } /* The message has an invalid type. */ TEST(send, badMessage, setUp, tearDown, 0, NULL) { struct fixture *f = data; MESSAGE(0)->type = 666; SEND_ERROR(0, RAFT_MALFORMED, ""); return MUNIT_OK; } /* Old send requests that have accumulated and could not yet be sent are * progressively evicted. */ TEST(send, evictOldPending, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; TCP_SERVER_STOP; SEND_SUBMIT(0 /* message */, 0 /* rv */, RAFT_NOCONNECTION /* status */); SEND_SUBMIT(1 /* message */, 0 /* rv */, RAFT_CANCELED /* status */); SEND_SUBMIT(2 /* message */, 0 /* rv */, RAFT_CANCELED /* status */); SEND_SUBMIT(3 /* message */, 0 /* rv */, RAFT_CANCELED /* status */); SEND_WAIT(0); TEAR_DOWN_UV; return MUNIT_OK; } /* After the connection is established the peer dies and then comes back a * little bit later. */ TEST(send, reconnectAfterWriteError, setUp, tearDown, 0, NULL) { struct fixture *f = data; int socket; SEND(0); socket = TcpServerAccept(&f->server); close(socket); SEND_FAILURE(0, RAFT_IOERR, ""); SEND(0); return MUNIT_OK; } /* After the connection is established the peer dies and then comes back a * little bit later. At the time the peer died there where several writes * pending. */ TEST(send, reconnectAfterMultipleWriteErrors, setUp, tearDown, 0, NULL) { struct fixture *f = data; int socket; signal(SIGPIPE, SIG_IGN); SEND(0); socket = TcpServerAccept(&f->server); close(socket); SEND_SUBMIT(1 /* message */, 0 /* rv */, RAFT_IOERR /* status */); SEND_SUBMIT(2 /* message */, 0 /* rv */, RAFT_IOERR /* status */); SEND_WAIT(1); SEND_WAIT(2); SEND(3); return MUNIT_OK; } static char *oomHeapFaultDelay[] = {"0", "1", "2", "3", "4", NULL}; static char *oomHeapFaultRepeat[] = {"1", NULL}; static MunitParameterEnum oomParams[] = { {TEST_HEAP_FAULT_DELAY, oomHeapFaultDelay}, {TEST_HEAP_FAULT_REPEAT, oomHeapFaultRepeat}, {NULL, NULL}, }; /* Out of memory conditions. */ TEST(send, oom, setUp, tearDown, 0, oomParams) { struct fixture *f = data; HEAP_FAULT_ENABLE; SEND_ERROR(0, RAFT_NOMEM, ""); return MUNIT_OK; } static char *oomAsyncHeapFaultDelay[] = {"2", NULL}; static char *oomAsyncHeapFaultRepeat[] = {"1", NULL}; static MunitParameterEnum oomAsyncParams[] = { {TEST_HEAP_FAULT_DELAY, oomAsyncHeapFaultDelay}, {TEST_HEAP_FAULT_REPEAT, oomAsyncHeapFaultRepeat}, {NULL, NULL}, }; /* Transient out of memory error happening after send() has returned. */ TEST(send, oomAsync, setUp, tearDown, 0, oomAsyncParams) { struct fixture *f = data; SEND(0); return MUNIT_OK; } /* The backend gets closed while there is a pending write. */ TEST(send, closeDuringWrite, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; struct raft_entry entry; /* Set a very large message that is likely to fill the socket buffer. * TODO: figure a more deterministic way to choose the value. */ entry.buf.len = 1024 * 1024 * 8; entry.buf.base = raft_malloc(entry.buf.len); MESSAGE(0)->type = RAFT_IO_APPEND_ENTRIES; MESSAGE(0)->append_entries.entries = &entry; MESSAGE(0)->append_entries.n_entries = 1; SEND_SUBMIT(0 /* message */, 0 /* rv */, RAFT_CANCELED /* status */); TEAR_DOWN_UV; raft_free(entry.buf.base); return MUNIT_OK; } /* The backend gets closed while there is a pending connect request. */ TEST(send, closeDuringConnection, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; SEND_SUBMIT(0 /* message */, 0 /* rv */, RAFT_CANCELED /* status */); TEAR_DOWN_UV; return MUNIT_OK; } raft-0.11.3/test/integration/test_uv_set_term.c000066400000000000000000000216201415614527300216020ustar00rootroot00000000000000#include "../../include/raft/uv.h" #include "../../src/byte.h" #include "../lib/runner.h" #include "../lib/uv.h" /****************************************************************************** * * Fixture with a libuv-based raft_io instance. * *****************************************************************************/ struct fixture { FIXTURE_UV_DEPS; FIXTURE_UV; bool closed; }; /****************************************************************************** * * Helper macros * *****************************************************************************/ static void closeCb(struct raft_io *io) { struct fixture *f = io->data; f->closed = true; } /* Invoke raft_uv_init() and assert that no error occurs. */ #define INIT \ do { \ int _rv; \ _rv = raft_uv_init(&f->io, &f->loop, f->dir, &f->transport); \ munit_assert_int(_rv, ==, 0); \ _rv = f->io.init(&f->io, 1, "1"); \ munit_assert_int(_rv, ==, 0); \ } while (0) /* Invoke raft_io->close(). */ #define CLOSE \ do { \ f->io.close(&f->io, closeCb); \ LOOP_RUN_UNTIL(&f->closed); \ raft_uv_close(&f->io); \ } while (0) /* Invoke f->io->set_term() and assert that no error occurs. */ #define SET_TERM(TERM) \ do { \ int _rv; \ _rv = f->io.set_term(&f->io, TERM); \ munit_assert_int(_rv, ==, 0); \ } while (0) /* Invoke f->io->set_term() and assert that the given error code is returned and * the given error message set. */ #define SET_TERM_ERROR(TERM, RV, ERRMSG) \ do { \ int _rv; \ _rv = f->io.set_term(&f->io, TERM); \ munit_assert_int(_rv, ==, RV); \ munit_assert_string_equal(f->io.errmsg_(&f->io), ERRMSG); \ } while (0) /* Write either the metadata1 or metadata2 file, filling it with the given * values. */ #define WRITE_METADATA_FILE(N, FORMAT, VERSION, TERM, VOTED_FOR) \ { \ uint8_t buf[8 * 4]; \ void *cursor = buf; \ char filename[strlen("metadataN") + 1]; \ sprintf(filename, "metadata%d", N); \ bytePut64(&cursor, FORMAT); \ bytePut64(&cursor, VERSION); \ bytePut64(&cursor, TERM); \ bytePut64(&cursor, VOTED_FOR); \ DirWriteFile(f->dir, filename, buf, sizeof buf); \ } /* Assert that the content of either the metadata1 or metadata2 file match the * given values. */ #define ASSERT_METADATA_FILE(N, VERSION, TERM, VOTED_FOR) \ { \ uint8_t buf2[8 * 4]; \ const void *cursor = buf2; \ char filename[strlen("metadataN") + 1]; \ sprintf(filename, "metadata%d", N); \ DirReadFile(f->dir, filename, buf2, sizeof buf2); \ munit_assert_int(byteGet64(&cursor), ==, 1); \ munit_assert_int(byteGet64(&cursor), ==, VERSION); \ munit_assert_int(byteGet64(&cursor), ==, TERM); \ munit_assert_int(byteGet64(&cursor), ==, VOTED_FOR); \ } /****************************************************************************** * * Set up and tear down. * *****************************************************************************/ static void *setUpDeps(const MunitParameter params[], void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_UV_DEPS; f->io.data = f; f->closed = false; return f; } static void *setUp(const MunitParameter params[], void *user_data) { struct fixture *f = setUpDeps(params, user_data); INIT; return f; } static void tearDown(void *data) { struct fixture *f = data; CLOSE; TEAR_DOWN_UV_DEPS; free(f); } /****************************************************************************** * * raft_io->set_term() * *****************************************************************************/ SUITE(set_term) /* The very first time set_term() is called, the metadata1 file gets written. */ TEST(set_term, first, setUp, tearDown, 0, NULL) { struct fixture *f = data; SET_TERM(1); ASSERT_METADATA_FILE(1, 1, 1, 0); munit_assert_false(DirHasFile(f->dir, "metadata2")); return MUNIT_OK; } /* The second time set_term() is called, the metadata2 file gets written. */ TEST(set_term, second, setUp, tearDown, 0, NULL) { struct fixture *f = data; SET_TERM(1); SET_TERM(2); ASSERT_METADATA_FILE(1, 1, 1, 0); ASSERT_METADATA_FILE(2, 2, 2, 0); return MUNIT_OK; } /* The third time set_term() is called, the metadata1 file gets overwritten. */ TEST(set_term, third, setUp, tearDown, 0, NULL) { struct fixture *f = data; SET_TERM(1); SET_TERM(2); SET_TERM(3); ASSERT_METADATA_FILE(1, 3, 3, 0); ASSERT_METADATA_FILE(2, 2, 2, 0); return MUNIT_OK; } /* The fourth time set_term() is called, the metadata2 file gets overwritten. */ TEST(set_term, fourth, setUp, tearDown, 0, NULL) { struct fixture *f = data; SET_TERM(1); SET_TERM(2); SET_TERM(3); SET_TERM(4); ASSERT_METADATA_FILE(1, 3, 3, 0); ASSERT_METADATA_FILE(2, 4, 4, 0); return MUNIT_OK; } /* If the data directory has a single metadata1 file, the first time set_data() * is called, the second metadata file gets created. */ TEST(set_term, metadataOneExists, setUpDeps, tearDown, 0, NULL) { struct fixture *f = data; WRITE_METADATA_FILE(1, /* Metadata file index */ 1, /* Format */ 1, /* Version */ 1, /* Term */ 0 /* Voted for */); INIT; SET_TERM(2); ASSERT_METADATA_FILE(1, 1, 1, 0); ASSERT_METADATA_FILE(2, 2, 2, 0); return MUNIT_OK; } /* The data directory has both metadata files, but metadata1 is greater. */ TEST(set_term, metadataOneIsGreater, setUpDeps, tearDown, 0, NULL) { struct fixture *f = data; WRITE_METADATA_FILE(1, /* Metadata file index */ 1, /* Format */ 3, /* Version */ 3, /* Term */ 0 /* Voted for */); WRITE_METADATA_FILE(2, /* Metadata file index */ 1, /* Format */ 2, /* Version */ 2, /* Term */ 0 /* Voted for */); INIT; SET_TERM(4); ASSERT_METADATA_FILE(1 /* n */, 3 /* version */, 3 /* term */, 0 /* voted for */); ASSERT_METADATA_FILE(2 /* n */, 4 /* version */, 4 /* term */, 0 /* voted for */); return MUNIT_OK; } /* The data directory has both metadata files, but metadata2 is greater. */ TEST(set_term, metadataTwoIsGreater, setUpDeps, tearDown, 0, NULL) { struct fixture *f = data; WRITE_METADATA_FILE(1, /* Metadata file index */ 1, /* Format */ 1, /* Version */ 1, /* Term */ 0 /* Voted for */); WRITE_METADATA_FILE(2, /* Metadata file index */ 1, /* Format */ 2, /* Version */ 2, /* Term */ 0 /* Voted for */); INIT; SET_TERM(2); ASSERT_METADATA_FILE(1 /* n */, 3 /* version */, 2 /* term */, 0 /* voted for */); ASSERT_METADATA_FILE(2 /* n */, 2 /* version */, 2 /* term */, 0 /* voted for */); return MUNIT_OK; } raft-0.11.3/test/integration/test_uv_snapshot_put.c000066400000000000000000000234571415614527300225210ustar00rootroot00000000000000#include #include "append_helpers.h" #include "../lib/runner.h" #include "../lib/tcp.h" #include "../lib/uv.h" /****************************************************************************** * * Fixture with a libuv-based raft_io instance. * *****************************************************************************/ struct fixture { FIXTURE_UV_DEPS; FIXTURE_UV; bool closed; int count; }; /****************************************************************************** * * Helper macros * *****************************************************************************/ struct snapshot { raft_term term; raft_index index; uint64_t data; bool done; }; static void snapshotPutCbAssertResult(struct raft_io_snapshot_put *req, int status) { struct result *result = req->data; munit_assert_int(status, ==, result->status); result->done = true; } static void snapshotGetCbAssertResult(struct raft_io_snapshot_get *req, struct raft_snapshot *snapshot, int status) { struct snapshot *expect = req->data; munit_assert_int(status, ==, 0); munit_assert_ptr_not_null(snapshot); munit_assert_int(snapshot->term, ==, expect->term); munit_assert_int(snapshot->index, ==, snapshot->index); expect->done = true; raft_configuration_close(&snapshot->configuration); raft_free(snapshot->bufs[0].base); raft_free(snapshot->bufs); raft_free(snapshot); } /* Submit a request to truncate the log at N */ #define TRUNCATE(N) \ { \ int _rv; \ _rv = f->io.truncate(&f->io, N); \ munit_assert_int(_rv, ==, 0); \ } #define SNAPSHOT_PUT_REQ(TRAILING, INDEX, RV, STATUS) \ struct raft_snapshot _snapshot; \ struct raft_buffer _snapshot_buf; \ uint64_t _snapshot_data; \ struct raft_io_snapshot_put _req; \ struct result _result = {STATUS, false, NULL}; \ int _rv; \ _snapshot.term = 1; \ _snapshot.index = INDEX; \ raft_configuration_init(&_snapshot.configuration); \ _rv = raft_configuration_add(&_snapshot.configuration, 1, "1", \ RAFT_STANDBY); \ munit_assert_int(_rv, ==, 0); \ _snapshot.bufs = &_snapshot_buf; \ _snapshot.n_bufs = 1; \ _snapshot_buf.base = &_snapshot_data; \ _snapshot_buf.len = sizeof _snapshot_data; \ _req.data = &_result; \ _rv = f->io.snapshot_put(&f->io, TRAILING, &_req, &_snapshot, \ snapshotPutCbAssertResult); \ munit_assert_int(_rv, ==, RV) /* Submit a snapshot put request for the given snapshot and wait for the * operation to successfully complete. */ #define SNAPSHOT_PUT(TRAILING, INDEX) \ do { \ SNAPSHOT_PUT_REQ(TRAILING, INDEX, 0 /* rv */, 0 /* status */); \ LOOP_RUN_UNTIL(&_result.done); \ raft_configuration_close(&_snapshot.configuration); \ } while (0) /* Submit a snapshot put request and assert that it fails synchronously with the * given error code and message. */ #define SNAPSHOT_PUT_ERROR(SNAPSHOT, TRAILING, RV, ERRMSG) \ do { \ SNAPSHOT_PUT_REQ(SNAPSHOT, TRAILING, RV, 0 /* status */); \ /* munit_assert_string_equal(f->transport.errmsg, ERRMSG);*/ \ } while (0) /* Submit a snapshot put request and wait for the operation to fail with the * given code and message. */ #define SNAPSHOT_PUT_FAILURE(STATUS, ERRMSG) \ do { \ SNAPSHOT_PUT_REQ(0 /* rv */, STATUS); \ LOOP_RUN_UNTIL(&_result.done); \ /*munit_assert_string_equal(f->transport.errmsg, ERRMSG);*/ \ } while (0) /* Use raft_io->snapshot_get to load the last snapshot and compare it with the * given parameters. */ #define ASSERT_SNAPSHOT(TERM, INDEX, DATA) \ do { \ struct raft_io_snapshot_get _req; \ struct snapshot _expect = {TERM, INDEX, DATA, false}; \ int _rv; \ _req.data = &_expect; \ _rv = f->io.snapshot_get(&f->io, &_req, snapshotGetCbAssertResult); \ munit_assert_int(_rv, ==, 0); \ LOOP_RUN_UNTIL(&_expect.done); \ } while (0) /****************************************************************************** * * Set up and tear down. * *****************************************************************************/ static void *setUpDeps(const MunitParameter params[], void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_UV_DEPS; f->io.data = f; f->closed = false; return f; } static void tearDownDeps(void *data) { struct fixture *f = data; TEAR_DOWN_UV_DEPS; free(f); } static void *setUp(const MunitParameter params[], void *user_data) { struct fixture *f = setUpDeps(params, user_data); SETUP_UV; return f; } static void tearDown(void *data) { struct fixture *f = data; TEAR_DOWN_UV; tearDownDeps(f); } /****************************************************************************** * * raft_io->snapshot_put * *****************************************************************************/ SUITE(snapshot_put) /* Put the first snapshot. */ TEST(snapshot_put, first, setUp, tearDown, 0, NULL) { struct fixture *f = data; SNAPSHOT_PUT(10, /* trailing */ 1 /* index */ ); ASSERT_SNAPSHOT(1, 1, 1); return MUNIT_OK; } /* If the number of closed entries is less than the given trailing amount, no * segment is deleted. */ TEST(snapshot_put, entriesLessThanTrailing, setUp, tearDown, 0, NULL) { struct fixture *f = data; unsigned i; raft_uv_set_segment_size( &f->io, 4096); /* Lower the number of block to force finalizing */ for (i = 0; i < 40; i++) { APPEND(10, 8); } SNAPSHOT_PUT(128, /* trailing */ 100 /* index */ ); munit_assert_true(DirHasFile(f->dir, "0000000000000001-0000000000000150")); munit_assert_true(DirHasFile(f->dir, "0000000000000151-0000000000000300")); return MUNIT_OK; } /* If the number of closed entries is greater than the given trailing amount, * closed segments that are fully past the trailing amount get deleted. */ TEST(snapshot_put, entriesMoreThanTrailing, setUp, tearDown, 0, NULL) { struct fixture *f = data; unsigned i; raft_uv_set_segment_size( &f->io, 4096); /* Lower the number of block to force finalizing */ for (i = 0; i < 40; i++) { APPEND(10, 8); } SNAPSHOT_PUT(128, /* trailing */ 280 /* index */ ); munit_assert_false(DirHasFile(f->dir, "0000000000000001-0000000000000150")); munit_assert_true(DirHasFile(f->dir, "0000000000000151-0000000000000300")); return MUNIT_OK; } /* Request to install a snapshot. */ TEST(snapshot_put, install, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(4, 8); SNAPSHOT_PUT(0, /* trailing */ 1 /* index */ ); return MUNIT_OK; } /* Request to install a snapshot without compression. */ TEST(snapshot_put, installNoCompression, setUp, tearDown, 0, NULL) { struct fixture *f = data; raft_uv_set_snapshot_compression(&f->io, false); APPEND(4, 8); SNAPSHOT_PUT(0, /* trailing */ 1 /* index */ ); return MUNIT_OK; } /* Request to install a snapshot, no previous entry is present. */ TEST(snapshot_put, installWithoutPreviousEntries, setUp, tearDown, 0, NULL) { struct fixture *f = data; SNAPSHOT_PUT(0, /* trailing */ 1 /* index */ ); return MUNIT_OK; } /* Request to install a couple of snapshots in a row, no previous entry is present. */ TEST(snapshot_put, installMultipleWithoutPreviousEntries, setUp, tearDown, 0, NULL) { struct fixture *f = data; SNAPSHOT_PUT(0, /* trailing */ 1 /* index */ ); SNAPSHOT_PUT(0, /* trailing */ 3 /* index */ ); SNAPSHOT_PUT(0, /* trailing */ 1337 /* index */ ); return MUNIT_OK; } /* Request to install a couple of snapshots in a row, AppendEntries Requests * happen before, meanwhile and after */ TEST(snapshot_put, installMultipleAppendEntriesInBetween, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_SUBMIT(0, 256, 8); APPEND_SUBMIT(1, 256, 8); SNAPSHOT_PUT(0, /* trailing */ 1 /* index */ ); APPEND_WAIT(0); APPEND_WAIT(1); APPEND_SUBMIT(2, 256, 8); APPEND_SUBMIT(3, 256, 8); SNAPSHOT_PUT(0, /* trailing */ 100 /* index */ ); APPEND_WAIT(2); APPEND_WAIT(3); APPEND_SUBMIT(4, 256, 8); APPEND_SUBMIT(5, 256, 8); APPEND_WAIT(4); APPEND_WAIT(5); return MUNIT_OK; } raft-0.11.3/test/integration/test_uv_tcp_connect.c000066400000000000000000000175051415614527300222660ustar00rootroot00000000000000#include "../../include/raft.h" #include "../../include/raft/uv.h" #include "../lib/heap.h" #include "../lib/loop.h" #include "../lib/runner.h" #include "../lib/tcp.h" /****************************************************************************** * * Fixture with a TCP-based raft_uv_transport. * *****************************************************************************/ struct fixture { FIXTURE_HEAP; FIXTURE_LOOP; FIXTURE_TCP_SERVER; struct raft_uv_transport transport; bool closed; }; /****************************************************************************** * * Helper macros * *****************************************************************************/ struct result { int status; bool done; }; static void closeCb(struct raft_uv_transport *transport) { struct fixture *f = transport->data; f->closed = true; } static void connectCbAssertResult(struct raft_uv_connect *req, struct uv_stream_s *stream, int status) { struct result *result = req->data; munit_assert_int(status, ==, result->status); if (status == 0) { uv_close((struct uv_handle_s *)stream, (uv_close_cb)raft_free); } result->done = true; } #define INIT \ do { \ int _rv; \ _rv = f->transport.init(&f->transport, 1, "127.0.0.1:9000"); \ munit_assert_int(_rv, ==, 0); \ f->transport.data = f; \ f->closed = false; \ } while (0) #define CLOSE_SUBMIT \ munit_assert_false(f->closed); \ f->transport.close(&f->transport, closeCb); \ munit_assert_false(f->closed) #define CLOSE_WAIT LOOP_RUN_UNTIL(&f->closed) #define CLOSE \ CLOSE_SUBMIT; \ CLOSE_WAIT #define CONNECT_REQ(ID, ADDRESS, RV, STATUS) \ struct raft_uv_connect _req; \ struct result _result = {STATUS, false}; \ int _rv; \ _req.data = &_result; \ _rv = f->transport.connect(&f->transport, &_req, ID, ADDRESS, \ connectCbAssertResult); \ munit_assert_int(_rv, ==, RV) /* Try to submit a connect request and assert that the given error code and * message are returned. */ #define CONNECT_ERROR(ID, ADDRESS, RV, ERRMSG) \ { \ CONNECT_REQ(ID, ADDRESS, RV /* rv */, 0 /* status */); \ munit_assert_string_equal(f->transport.errmsg, ERRMSG); \ } /* Submit a connect request with the given parameters and wait for the operation * to successfully complete. */ #define CONNECT(ID, ADDRESS) \ { \ CONNECT_REQ(ID, ADDRESS, 0 /* rv */, 0 /* status */); \ LOOP_RUN_UNTIL(&_result.done); \ } /* Submit a connect request with the given parameters and wait for the operation * to fail with the given code and message. */ #define CONNECT_FAILURE(ID, ADDRESS, STATUS, ERRMSG) \ { \ CONNECT_REQ(ID, ADDRESS, 0 /* rv */, STATUS); \ LOOP_RUN_UNTIL(&_result.done); \ munit_assert_string_equal(f->transport.errmsg, ERRMSG); \ } /* Submit a connect request with the given parameters, close the transport after * N loop iterations and assert that the request got canceled. */ #define CONNECT_CLOSE(ID, ADDRESS, N) \ { \ CONNECT_REQ(ID, ADDRESS, 0 /* rv */, RAFT_CANCELED); \ LOOP_RUN(N); \ CLOSE_SUBMIT; \ munit_assert_false(_result.done); \ LOOP_RUN_UNTIL(&_result.done); \ CLOSE_WAIT; \ } /****************************************************************************** * * Set up and tear down. * *****************************************************************************/ static void *setUpDeps(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); int rv; SET_UP_HEAP; SETUP_LOOP; SETUP_TCP_SERVER; rv = raft_uv_tcp_init(&f->transport, &f->loop); munit_assert_int(rv, ==, 0); return f; } static void tearDownDeps(void *data) { struct fixture *f = data; LOOP_STOP; raft_uv_tcp_close(&f->transport); TEAR_DOWN_TCP_SERVER; TEAR_DOWN_LOOP; TEAR_DOWN_HEAP; free(f); } static void *setUp(const MunitParameter params[], void *user_data) { struct fixture *f = setUpDeps(params, user_data); INIT; return f; } static void tearDown(void *data) { struct fixture *f = data; CLOSE; tearDownDeps(f); } /****************************************************************************** * * raft_uv_transport->connect() * *****************************************************************************/ #define BOGUS_ADDRESS "127.0.0.1:6666" SUITE(tcp_connect) /* Successfully connect to the peer. */ TEST(tcp_connect, first, setUp, tearDown, 0, NULL) { struct fixture *f = data; CONNECT(2, TCP_SERVER_ADDRESS); return MUNIT_OK; } /* The peer has shutdown */ TEST(tcp_connect, refused, setUp, tearDown, 0, NULL) { struct fixture *f = data; TCP_SERVER_STOP; CONNECT_FAILURE(2, BOGUS_ADDRESS, RAFT_NOCONNECTION, "uv_tcp_connect(): connection refused"); return MUNIT_OK; } static char *oomHeapFaultDelay[] = {"0", "1", "2", NULL}; static char *oomHeapFaultRepeat[] = {"1", NULL}; static MunitParameterEnum oomParams[] = { {TEST_HEAP_FAULT_DELAY, oomHeapFaultDelay}, {TEST_HEAP_FAULT_REPEAT, oomHeapFaultRepeat}, {NULL, NULL}, }; /* Out of memory conditions. */ TEST(tcp_connect, oom, setUp, tearDown, 0, oomParams) { struct fixture *f = data; HEAP_FAULT_ENABLE; CONNECT_ERROR(2, BOGUS_ADDRESS, RAFT_NOMEM, "out of memory"); return MUNIT_OK; } /* The transport is closed immediately after a connect request as been * submitted. The request's callback is invoked with RAFT_CANCELED. */ TEST(tcp_connect, closeImmediately, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; CONNECT_CLOSE(2, TCP_SERVER_ADDRESS, 0); return MUNIT_OK; } /* The transport gets closed during the handshake. */ TEST(tcp_connect, closeDuringHandshake, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; CONNECT_CLOSE(2, TCP_SERVER_ADDRESS, 1); return MUNIT_OK; } static void checkCb(struct uv_check_s *check) { struct fixture *f = check->data; CLOSE_SUBMIT; uv_close((struct uv_handle_s *)check, NULL); } /* The transport gets closed right after a connection failure, while the * connection attempt is being aborted. */ TEST(tcp_connect, closeDuringAbort, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; struct uv_check_s check; int rv; /* Use a check handle in order to close the transport in the same loop * iteration where the connection failure occurs. */ rv = uv_check_init(&f->loop, &check); munit_assert_int(rv, ==, 0); check.data = f; uv_check_start(&check, checkCb); CONNECT_REQ(2, BOGUS_ADDRESS, 0, RAFT_NOCONNECTION); LOOP_RUN(1); LOOP_RUN_UNTIL(&_result.done); CLOSE_WAIT; return MUNIT_OK; } raft-0.11.3/test/integration/test_uv_tcp_listen.c000066400000000000000000000202531415614527300221250ustar00rootroot00000000000000#include "../../include/raft.h" #include "../../include/raft/uv.h" #include "../../src/byte.h" #include "../lib/heap.h" #include "../lib/loop.h" #include "../lib/runner.h" #include "../lib/tcp.h" /****************************************************************************** * * Fixture with a TCP-based raft_uv_transport. * *****************************************************************************/ struct fixture { FIXTURE_HEAP; FIXTURE_LOOP; FIXTURE_TCP; struct raft_uv_transport transport; bool accepted; bool closed; struct { uint8_t buf[sizeof(uint64_t) + /* Protocol version */ sizeof(uint64_t) + /* Server ID */ sizeof(uint64_t) + /* Length of address */ sizeof(uint64_t) * 2 /* Address */]; size_t offset; } handshake; }; /****************************************************************************** * * Helper macros * *****************************************************************************/ #define PEER_ID 2 #define PEER_ADDRESS "127.0.0.1:666" static void closeCb(struct raft_uv_transport *transport) { struct fixture *f = transport->data; f->closed = true; } static void acceptCb(struct raft_uv_transport *t, raft_id id, const char *address, struct uv_stream_s *stream) { struct fixture *f = t->data; munit_assert_int(id, ==, PEER_ID); munit_assert_string_equal(address, PEER_ADDRESS); f->accepted = true; uv_close((struct uv_handle_s *)stream, (uv_close_cb)raft_free); } #define INIT \ do { \ int _rv; \ _rv = raft_uv_tcp_init(&f->transport, &f->loop); \ munit_assert_int(_rv, ==, 0); \ _rv = f->transport.init(&f->transport, 1, "127.0.0.1:9000"); \ munit_assert_int(_rv, ==, 0); \ f->transport.data = f; \ f->closed = false; \ } while (0) #define CLOSE \ do { \ f->transport.close(&f->transport, closeCb); \ LOOP_RUN_UNTIL(&f->closed); \ raft_uv_tcp_close(&f->transport); \ } while (0) /****************************************************************************** * * Set up and tear down. * *****************************************************************************/ static void *setUpDeps(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SET_UP_HEAP; SETUP_LOOP; SETUP_TCP; return f; } static void tearDownDeps(void *data) { struct fixture *f = data; TEAR_DOWN_TCP; TEAR_DOWN_LOOP; TEAR_DOWN_HEAP; free(f); } static void *setUp(const MunitParameter params[], void *user_data) { struct fixture *f = setUpDeps(params, user_data); void *cursor; int rv; /* test_tcp_listen(&f->tcp); */ INIT; f->accepted = false; f->handshake.offset = 0; cursor = f->handshake.buf; bytePut64(&cursor, 1); bytePut64(&cursor, PEER_ID); bytePut64(&cursor, 16); strcpy(cursor, PEER_ADDRESS); rv = f->transport.listen(&f->transport, acceptCb); munit_assert_int(rv, ==, 0); return f; } static void tearDown(void *data) { struct fixture *f = data; CLOSE; tearDownDeps(f); } /****************************************************************************** * * Helper macros * *****************************************************************************/ /* Connect to the listening socket of the transport, creating a new connection * that is waiting to be accepted. */ #define PEER_CONNECT TCP_CLIENT_CONNECT(9000) /* Make the peer close the connection. */ #define PEER_CLOSE TCP_CLIENT_CLOSE /* Make the connected client send handshake data. */ #define PEER_HANDSHAKE \ do { \ size_t n = sizeof f->handshake.buf; \ TCP_CLIENT_SEND(f->handshake.buf, n); \ } while (0) /* Make the connected client send partial handshake data: only N bytes will be * sent, starting from the offset of the last call. */ #define PEER_HANDSHAKE_PARTIAL(N) \ do { \ TCP_CLIENT_SEND(f->handshake.buf + f->handshake.offset, N); \ } while (0) /* After a PEER_CONNECT() call, spin the event loop until the connected * callback of the listening TCP handle gets called. */ #define LOOP_RUN_UNTIL_CONNECTED LOOP_RUN(1); /* After a PEER_HANDSHAKE_PARTIAL() call, spin the event loop until the read * callback gets called. */ #define LOOP_RUN_UNTIL_READ LOOP_RUN(1); /* Spin the event loop until the accept callback gets eventually invoked. */ #define ACCEPT LOOP_RUN_UNTIL(&f->accepted); /****************************************************************************** * * Success scenarios. * *****************************************************************************/ SUITE(tcp_listen) /* If the handshake is successful, the accept callback is invoked. */ TEST(tcp_listen, first, setUp, tearDown, 0, NULL) { struct fixture *f = data; PEER_CONNECT; PEER_HANDSHAKE; ACCEPT; return MUNIT_OK; } /* The client sends us a bad protocol version */ TEST(tcp_listen, badProtocol, setUp, tearDown, 0, NULL) { struct fixture *f = data; memset(f->handshake.buf, 999, sizeof(uint64_t)); PEER_CONNECT; PEER_HANDSHAKE; LOOP_RUN_UNTIL_CONNECTED; LOOP_RUN_UNTIL_READ; return MUNIT_OK; } /* Parameters for sending a partial handshake */ static char *partialHandshakeN[] = {"8", "16", "24", "32", NULL}; static MunitParameterEnum peerAbortParams[] = { {"n", partialHandshakeN}, {NULL, NULL}, }; /* The peer closes the connection after having sent a partial handshake. */ TEST(tcp_listen, peerAbort, setUp, tearDown, 0, peerAbortParams) { struct fixture *f = data; const char *n = munit_parameters_get(params, "n"); PEER_CONNECT; PEER_HANDSHAKE_PARTIAL(atoi(n)); LOOP_RUN_UNTIL_CONNECTED; LOOP_RUN_UNTIL_READ; PEER_CLOSE; return MUNIT_OK; } /* TODO: skip "2" because it makes libuv crash, as it calls abort(). See also * https://github.com/libuv/libuv/issues/1948 */ static char *oomHeapFaultDelay[] = {"0", "1", "3", NULL}; static char *oomHeapFaultRepeat[] = {"1", NULL}; static MunitParameterEnum oomParams[] = { {TEST_HEAP_FAULT_DELAY, oomHeapFaultDelay}, {TEST_HEAP_FAULT_REPEAT, oomHeapFaultRepeat}, {NULL, NULL}, }; /* Out of memory conditions */ TEST(tcp_listen, oom, setUp, tearDown, 0, oomParams) { struct fixture *f = data; PEER_CONNECT; PEER_HANDSHAKE; HEAP_FAULT_ENABLE; /* Run as much as possible. */ uv_run(&f->loop, UV_RUN_NOWAIT); uv_run(&f->loop, UV_RUN_NOWAIT); uv_run(&f->loop, UV_RUN_NOWAIT); return MUNIT_OK; } /* Close the transport right after an incoming connection becomes pending, but * it hasn't been accepted yet. */ TEST(tcp_listen, pending, setUp, tearDown, 0, NULL) { struct fixture *f = data; PEER_CONNECT; return MUNIT_OK; } /* Close the transport right after an incoming connection gets accepted, and the * peer hasn't sent handshake data yet. */ TEST(tcp_listen, closeBeforeHandshake, setUp, tearDown, 0, NULL) { struct fixture *f = data; PEER_CONNECT; LOOP_RUN_UNTIL_CONNECTED; return MUNIT_OK; } static MunitParameterEnum closeDuringHandshake[] = { {"n", partialHandshakeN}, {NULL, NULL}, }; /* Close the transport right after the peer has started to send handshake data, * but isn't done with it yet. */ TEST(tcp_listen, handshake, setUp, tearDown, 0, closeDuringHandshake) { struct fixture *f = data; const char *n_param = munit_parameters_get(params, "n"); PEER_CONNECT; PEER_HANDSHAKE_PARTIAL(atoi(n_param)); LOOP_RUN_UNTIL_CONNECTED; LOOP_RUN_UNTIL_READ; return MUNIT_OK; } raft-0.11.3/test/integration/test_uv_truncate.c000066400000000000000000000274061415614527300216150ustar00rootroot00000000000000#include "../lib/runner.h" #include "../lib/uv.h" /****************************************************************************** * * Fixture * *****************************************************************************/ struct fixture { FIXTURE_UV_DEPS; FIXTURE_UV; int count; /* To generate deterministic entry data */ }; /****************************************************************************** * * Helper macros * *****************************************************************************/ struct result { int status; bool done; }; static void appendCbAssertResult(struct raft_io_append *req, int status) { struct result *result = req->data; munit_assert_int(status, ==, result->status); result->done = true; } /* Declare and fill the entries array for the append request identified by * I. The array will have N entries, and each entry will have a data buffer of * SIZE bytes.*/ #define ENTRIES(I, N, SIZE) \ struct raft_entry _entries##I[N]; \ uint8_t _entries_data##I[N * SIZE]; \ do { \ int _i; \ for (_i = 0; _i < N; _i++) { \ struct raft_entry *entry = &_entries##I[_i]; \ entry->term = 1; \ entry->type = RAFT_COMMAND; \ entry->buf.base = &_entries_data##I[_i * SIZE]; \ entry->buf.len = SIZE; \ entry->batch = NULL; \ munit_assert_ptr_not_null(entry->buf.base); \ memset(entry->buf.base, 0, entry->buf.len); \ f->count++; \ *(uint64_t *)entry->buf.base = f->count; \ } \ } while (0) /* Submit an append request identified by I, with N_ENTRIES entries, each one of * size ENTRY_SIZE). */ #define APPEND_SUBMIT(I, N_ENTRIES, ENTRY_SIZE) \ struct raft_io_append _req##I; \ struct result _result##I = {0, false}; \ int _rv##I; \ ENTRIES(I, N_ENTRIES, ENTRY_SIZE); \ _req##I.data = &_result##I; \ _rv##I = f->io.append(&f->io, &_req##I, _entries##I, N_ENTRIES, \ appendCbAssertResult); \ munit_assert_int(_rv##I, ==, 0) /* Wait for the append request identified by I to complete. */ #define APPEND_WAIT(I) LOOP_RUN_UNTIL(&_result##I.done) #define APPEND_EXPECT(I, STATUS) _result##I.status = STATUS /* Submit an append request and wait for it to successfully complete. */ #define APPEND(N) \ do { \ APPEND_SUBMIT(9999, N, 8); \ APPEND_WAIT(9999); \ } while (0) #define TRUNCATE(N) \ do { \ int rv_; \ rv_ = f->io.truncate(&f->io, N); \ munit_assert_int(rv_, ==, 0); \ } while (0) /****************************************************************************** * * Set up and tear down. * *****************************************************************************/ static void *setUp(const MunitParameter params[], void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SETUP_UV_DEPS; SETUP_UV; f->count = 0; return f; } static void tearDownDeps(void *data) { struct fixture *f = data; TEAR_DOWN_UV_DEPS; free(f); } /****************************************************************************** * * Assertions * *****************************************************************************/ /* Shutdown the fixture's raft_io instance, then load all entries on disk using * a new raft_io instance, and assert that there are N entries with data * matching the DATA array. */ #define ASSERT_ENTRIES(N, ...) \ TEAR_DOWN_UV; \ do { \ struct uv_loop_s _loop; \ struct raft_uv_transport _transport; \ struct raft_io _io; \ struct raft_tracer _tracer; \ raft_term _term; \ raft_id _voted_for; \ struct raft_snapshot *_snapshot; \ raft_index _start_index; \ struct raft_entry *_entries; \ size_t _i; \ size_t _n; \ void *_batch = NULL; \ unsigned _data[N] = {__VA_ARGS__}; \ int _rv; \ \ _rv = uv_loop_init(&_loop); \ munit_assert_int(_rv, ==, 0); \ _rv = raft_uv_tcp_init(&_transport, &_loop); \ munit_assert_int(_rv, ==, 0); \ _rv = raft_uv_init(&_io, &_loop, f->dir, &_transport); \ munit_assert_int(_rv, ==, 0); \ _tracer.emit = TracerEmit; \ raft_uv_set_tracer(&_io, &_tracer); \ _rv = _io.init(&_io, 1, "1"); \ munit_assert_int(_rv, ==, 0); \ _rv = _io.load(&_io, &_term, &_voted_for, &_snapshot, &_start_index, \ &_entries, &_n); \ munit_assert_int(_rv, ==, 0); \ _io.close(&_io, NULL); \ uv_run(&_loop, UV_RUN_NOWAIT); \ raft_uv_close(&_io); \ raft_uv_tcp_close(&_transport); \ uv_loop_close(&_loop); \ \ munit_assert_ptr_null(_snapshot); \ munit_assert_int(_n, ==, N); \ for (_i = 0; _i < _n; _i++) { \ struct raft_entry *_entry = &_entries[_i]; \ uint64_t _value = *(uint64_t *)_entry->buf.base; \ munit_assert_int(_entry->term, ==, 1); \ munit_assert_int(_entry->type, ==, RAFT_COMMAND); \ munit_assert_int(_value, ==, _data[_i]); \ munit_assert_ptr_not_null(_entry->batch); \ } \ for (_i = 0; _i < _n; _i++) { \ struct raft_entry *_entry = &_entries[_i]; \ if (_entry->batch != _batch) { \ _batch = _entry->batch; \ raft_free(_batch); \ } \ } \ raft_free(_entries); \ } while (0); /****************************************************************************** * * raft_io->truncate() * *****************************************************************************/ SUITE(truncate) /* If the index to truncate is at the start of a segment, that segment and all * subsequent ones are removed. */ TEST(truncate, wholeSegment, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; APPEND(3); TRUNCATE(1); APPEND(1); ASSERT_ENTRIES(1 /* n entries */, 4 /* entries data */); return MUNIT_OK; } /* The index to truncate is the same as the last appended entry. */ TEST(truncate, sameAsLastIndex, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; APPEND(3); TRUNCATE(3); APPEND(1); ASSERT_ENTRIES(3 /* n entries */, 1, 2, 4 /* entries data */); return MUNIT_OK; } /* If the index to truncate is not at the start of a segment, that segment gets * truncated. */ TEST(truncate, partialSegment, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; APPEND(3); APPEND(1); TRUNCATE(2); APPEND(1); ASSERT_ENTRIES(2, /* n entries */ 1, 5 /* entries data */ ); return MUNIT_OK; } /* The truncate request is issued while an append request is still pending. */ TEST(truncate, pendingAppend, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; APPEND_SUBMIT(0, /* request ID */ 3, /* n entries */ 8 /* entry size */ ); TRUNCATE(2 /* truncation index */); APPEND(1); ASSERT_ENTRIES(2, /* n entries */ 1, 4 /* entries data */ ); return MUNIT_OK; } /* Multiple truncate requests pending at the same time. */ TEST(truncate, multiplePending, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; APPEND_SUBMIT(0, /* request ID */ 3, /* n entries */ 8 /* entry size */ ); TRUNCATE(2 /* truncation index */); APPEND_SUBMIT(1, /* request ID */ 2, /* n entries */ 8 /* entry size */ ); TRUNCATE(3 /* truncation index */); APPEND(1); ASSERT_ENTRIES(3, /* n entries */ 1, 4, 6 /* entries data */ ); return MUNIT_OK; } /* The truncate request gets canceled because we're closing. */ TEST(truncate, closing, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; APPEND_SUBMIT(0, /* request ID */ 3, /* n entries */ 8 /* entry size */ ); TRUNCATE(2 /* truncation index */); APPEND_EXPECT(0, /* request ID */ RAFT_CANCELED /* status */ ); TEAR_DOWN_UV; return MUNIT_OK; } /* Multiple truncate requests get canceled because we're closing. */ TEST(truncate, closingMultiple, setUp, tearDownDeps, 0, NULL) { struct fixture *f = data; APPEND_SUBMIT(0, /* request ID */ 3, /* n entries */ 8 /* entry size */ ); TRUNCATE(2 /* truncation index */); APPEND_SUBMIT(1, /* request ID */ 2, /* n entries */ 8 /* entry size */ ); TRUNCATE(3 /* truncation index */); APPEND_EXPECT(0, /* request ID */ RAFT_CANCELED /* status */ ); APPEND_EXPECT(1, /* request ID */ RAFT_CANCELED /* status */ ); TEAR_DOWN_UV; return MUNIT_OK; } raft-0.11.3/test/lib/000077500000000000000000000000001415614527300142655ustar00rootroot00000000000000raft-0.11.3/test/lib/aio.c000066400000000000000000000027141415614527300152050ustar00rootroot00000000000000#include "aio.h" #include #include #include #include #include "munit.h" int AioFill(aio_context_t *ctx, unsigned n) { char buf[256]; int fd; int rv; int limit; int used; /* Figure out how many events are available. */ fd = open("/proc/sys/fs/aio-max-nr", O_RDONLY); munit_assert_int(fd, !=, -1); rv = read(fd, buf, sizeof buf); munit_assert_int(rv, !=, -1); close(fd); limit = atoi(buf); munit_assert_int(limit, >, 0); /* Figure out how many events are in use. */ fd = open("/proc/sys/fs/aio-nr", O_RDONLY); munit_assert_int(fd, !=, -1); rv = read(fd, buf, sizeof buf); munit_assert_int(rv, !=, -1); close(fd); used = atoi(buf); munit_assert_int(used, >=, 0); /* Best effort check that nothing process is using AIO. Our own unit tests * case use up to 2 event slots at the time this function is called, so we * don't consider those. */ if (used > 2) { return -1; } rv = syscall(__NR_io_setup, limit - used - n, ctx); if (rv != 0) { /* The `limit - used - n` calculation is racy and io_setup can fail with * EAGAIN if in meantime another proces has reserved some events */ munit_assert_int(errno, ==, EAGAIN); return -1; } return 0; } void AioDestroy(aio_context_t ctx) { int rv; rv = syscall(__NR_io_destroy, ctx); munit_assert_int(rv, ==, 0); } raft-0.11.3/test/lib/aio.h000066400000000000000000000012011415614527300152000ustar00rootroot00000000000000/* Utilities around the Kernel AIO sub-system. */ #ifndef TEST_AIO_H #define TEST_AIO_H #include /* Fill the AIO subsystem resources by allocating a lot of events to the given * context, and leaving only @n events available for subsequent calls to * @io_setup. * * Return -1 if it looks like there is another process already using the AIO * subsystem, which would most probably make the calling test flaky because there * won't be exactly @n events available anymore. */ int AioFill(aio_context_t *ctx, unsigned n); /* Destroy the given AIO context. */ void AioDestroy(aio_context_t ctx); #endif /* TEST_AIO_H */ raft-0.11.3/test/lib/cluster.c000066400000000000000000000026301415614527300161130ustar00rootroot00000000000000#include "cluster.h" static void randomize(struct raft_fixture *f, unsigned i, int what) { struct raft *raft = raft_fixture_get(f, i); switch (what) { case RAFT_FIXTURE_TICK: /* TODO: provide an API to inspect how much time has elapsed since * the last election timer reset */ if (raft->election_timer_start == raft->io->time(raft->io)) { raft_fixture_set_randomized_election_timeout( f, i, munit_rand_int_range(raft->election_timeout, raft->election_timeout * 2)); } break; case RAFT_FIXTURE_DISK: raft_fixture_set_disk_latency(f, i, munit_rand_int_range(10, 25)); break; case RAFT_FIXTURE_NETWORK: raft_fixture_set_network_latency(f, i, munit_rand_int_range(25, 50)); break; default: munit_assert(0); break; } } void cluster_randomize_init(struct raft_fixture *f) { unsigned i; for (i = 0; i < raft_fixture_n(f); i++) { randomize(f, i, RAFT_FIXTURE_TICK); randomize(f, i, RAFT_FIXTURE_DISK); randomize(f, i, RAFT_FIXTURE_NETWORK); } } void cluster_randomize(struct raft_fixture *f, struct raft_fixture_event *event) { randomize(f, event->server_index, event->type); } raft-0.11.3/test/lib/cluster.h000066400000000000000000000505441415614527300161270ustar00rootroot00000000000000/* Setup and drive a test raft cluster. */ #ifndef TEST_CLUSTER_H #define TEST_CLUSTER_H #include #include "../../include/raft.h" #include "../../include/raft/fixture.h" #include "fsm.h" #include "heap.h" #include "munit.h" #include "snapshot.h" #define FIXTURE_CLUSTER \ FIXTURE_HEAP; \ struct raft_fsm fsms[RAFT_FIXTURE_MAX_SERVERS]; \ struct raft_fixture cluster /* N is the default number of servers, but can be tweaked with the cluster-n * parameter. */ #define SETUP_CLUSTER(DEFAULT_N) \ SET_UP_HEAP; \ do { \ unsigned _n = DEFAULT_N; \ bool _pre_vote = false; \ unsigned _hb = 0; \ unsigned _i; \ int _rv; \ if (munit_parameters_get(params, CLUSTER_N_PARAM) != NULL) { \ _n = atoi(munit_parameters_get(params, CLUSTER_N_PARAM)); \ } \ if (munit_parameters_get(params, CLUSTER_PRE_VOTE_PARAM) != NULL) { \ _pre_vote = \ atoi(munit_parameters_get(params, CLUSTER_PRE_VOTE_PARAM)); \ } \ if (munit_parameters_get(params, CLUSTER_HEARTBEAT_PARAM) != NULL) { \ _hb = \ atoi(munit_parameters_get(params, CLUSTER_HEARTBEAT_PARAM)); \ } \ munit_assert_int(_n, >, 0); \ for (_i = 0; _i < _n; _i++) { \ FsmInit(&f->fsms[_i]); \ } \ _rv = raft_fixture_init(&f->cluster, _n, f->fsms); \ munit_assert_int(_rv, ==, 0); \ for (_i = 0; _i < _n; _i++) { \ raft_set_pre_vote(raft_fixture_get(&f->cluster, _i), _pre_vote); \ if (_hb) { \ raft_set_heartbeat_timeout(raft_fixture_get(&f->cluster, _i),\ _hb); \ } \ } \ } while (0) #define TEAR_DOWN_CLUSTER \ do { \ unsigned i; \ raft_fixture_close(&f->cluster); \ for (i = 0; i < CLUSTER_N; i++) { \ FsmClose(&f->fsms[i]); \ } \ } while (0); \ TEAR_DOWN_HEAP; /* Munit parameter for setting the number of servers */ #define CLUSTER_N_PARAM "cluster-n" /* Munit parameter for setting the number of voting servers */ #define CLUSTER_N_VOTING_PARAM "cluster-n-voting" /* Munit parameter for enabling pre-vote */ #define CLUSTER_PRE_VOTE_PARAM "cluster-pre-vote" /* Munit parameter for setting HeartBeat timeout */ #define CLUSTER_HEARTBEAT_PARAM "cluster-heartbeat" /* Get the number of servers in the cluster. */ #define CLUSTER_N raft_fixture_n(&f->cluster) /* Get the cluster time. */ #define CLUSTER_TIME raft_fixture_time(&f->cluster) /* Index of the current leader, or CLUSTER_N if there's no leader. */ #define CLUSTER_LEADER raft_fixture_leader_index(&f->cluster) /* True if the cluster has a leader. */ #define CLUSTER_HAS_LEADER CLUSTER_LEADER < CLUSTER_N /* Get the struct raft object of the I'th server. */ #define CLUSTER_RAFT(I) raft_fixture_get(&f->cluster, I) /* Get the state of the I'th server. */ #define CLUSTER_STATE(I) raft_state(raft_fixture_get(&f->cluster, I)) /* Get the current term of the I'th server. */ #define CLUSTER_TERM(I) raft_fixture_get(&f->cluster, I)->current_term /* Get the struct fsm object of the I'th server. */ #define CLUSTER_FSM(I) &f->fsms[I] /* Return the last applied index on the I'th server. */ #define CLUSTER_LAST_APPLIED(I) \ raft_last_applied(raft_fixture_get(&f->cluster, I)) /* Return the ID of the server the I'th server has voted for. */ #define CLUSTER_VOTED_FOR(I) raft_fixture_voted_for(&f->cluster, I) /* Return a description of the last error occurred on the I'th server. */ #define CLUSTER_ERRMSG(I) raft_errmsg(CLUSTER_RAFT(I)) /* Populate the given configuration with all servers in the fixture. All servers * will be voting. */ #define CLUSTER_CONFIGURATION(CONF) \ { \ int rv_; \ rv_ = raft_fixture_configuration(&f->cluster, CLUSTER_N, CONF); \ munit_assert_int(rv_, ==, 0); \ } /* Bootstrap all servers in the cluster. All servers will be voting, unless the * cluster-n-voting parameter is used. */ #define CLUSTER_BOOTSTRAP \ { \ unsigned n_ = CLUSTER_N; \ int rv_; \ struct raft_configuration configuration; \ if (munit_parameters_get(params, CLUSTER_N_VOTING_PARAM) != NULL) { \ n_ = atoi(munit_parameters_get(params, CLUSTER_N_VOTING_PARAM)); \ } \ rv_ = raft_fixture_configuration(&f->cluster, n_, &configuration); \ munit_assert_int(rv_, ==, 0); \ rv_ = raft_fixture_bootstrap(&f->cluster, &configuration); \ munit_assert_int(rv_, ==, 0); \ raft_configuration_close(&configuration); \ } /* Bootstrap all servers in the cluster. Only the first N servers will be * voting. */ #define CLUSTER_BOOTSTRAP_N_VOTING(N) \ { \ int rv_; \ struct raft_configuration configuration_; \ rv_ = raft_fixture_configuration(&f->cluster, N, &configuration_); \ munit_assert_int(rv_, ==, 0); \ rv_ = raft_fixture_bootstrap(&f->cluster, &configuration_); \ munit_assert_int(rv_, ==, 0); \ raft_configuration_close(&configuration_); \ } /* Start all servers in the test cluster. */ #define CLUSTER_START \ { \ int rc; \ rc = raft_fixture_start(&f->cluster); \ munit_assert_int(rc, ==, 0); \ } /* Step the cluster. */ #define CLUSTER_STEP raft_fixture_step(&f->cluster); /* Step the cluster N times. */ #define CLUSTER_STEP_N(N) \ { \ unsigned i_; \ for (i_ = 0; i_ < N; i_++) { \ raft_fixture_step(&f->cluster); \ } \ } /* Step until the given function becomes true. */ #define CLUSTER_STEP_UNTIL(FUNC, ARG, MSECS) \ { \ bool done_; \ done_ = raft_fixture_step_until(&f->cluster, FUNC, ARG, MSECS); \ munit_assert_true(done_); \ } /* Step the cluster until a leader is elected or #MAX_MSECS have elapsed. */ #define CLUSTER_STEP_UNTIL_ELAPSED(MSECS) \ raft_fixture_step_until_elapsed(&f->cluster, MSECS) /* Step the cluster until a leader is elected or #MAX_MSECS have elapsed. */ #define CLUSTER_STEP_UNTIL_HAS_LEADER(MAX_MSECS) \ { \ bool done; \ done = raft_fixture_step_until_has_leader(&f->cluster, MAX_MSECS); \ munit_assert_true(done); \ munit_assert_true(CLUSTER_HAS_LEADER); \ } /* Step the cluster until there's no leader or #MAX_MSECS have elapsed. */ #define CLUSTER_STEP_UNTIL_HAS_NO_LEADER(MAX_MSECS) \ { \ bool done; \ done = raft_fixture_step_until_has_no_leader(&f->cluster, MAX_MSECS); \ munit_assert_true(done); \ munit_assert_false(CLUSTER_HAS_LEADER); \ } /* Step the cluster until the given index was applied by the given server (or * all if N) or #MAX_MSECS have elapsed. */ #define CLUSTER_STEP_UNTIL_APPLIED(I, INDEX, MAX_MSECS) \ { \ bool done; \ done = \ raft_fixture_step_until_applied(&f->cluster, I, INDEX, MAX_MSECS); \ munit_assert_true(done); \ } /* Step the cluster until the state of the server with the given index matches * the given value, or #MAX_MSECS have elapsed. */ #define CLUSTER_STEP_UNTIL_STATE_IS(I, STATE, MAX_MSECS) \ { \ bool done; \ done = raft_fixture_step_until_state_is(&f->cluster, I, STATE, \ MAX_MSECS); \ munit_assert_true(done); \ } /* Step the cluster until the term of the server with the given index matches * the given value, or #MAX_MSECS have elapsed. */ #define CLUSTER_STEP_UNTIL_TERM_IS(I, TERM, MAX_MSECS) \ { \ bool done; \ done = \ raft_fixture_step_until_term_is(&f->cluster, I, TERM, MAX_MSECS); \ munit_assert_true(done); \ } /* Step the cluster until server I has voted for server J, or #MAX_MSECS have * elapsed. */ #define CLUSTER_STEP_UNTIL_VOTED_FOR(I, J, MAX_MSECS) \ { \ bool done; \ done = \ raft_fixture_step_until_voted_for(&f->cluster, I, J, MAX_MSECS); \ munit_assert_true(done); \ } /* Step the cluster until all messages from server I to server J have been * delivered, or #MAX_MSECS elapse. */ #define CLUSTER_STEP_UNTIL_DELIVERED(I, J, MAX_MSECS) \ { \ bool done; \ done = \ raft_fixture_step_until_delivered(&f->cluster, I, J, MAX_MSECS); \ munit_assert_true(done); \ } /* Request to apply an FSM command to add the given value to x. */ #define CLUSTER_APPLY_ADD_X(I, REQ, VALUE, CB) \ { \ struct raft_buffer buf_; \ struct raft *raft_; \ int rv_; \ FsmEncodeAddX(VALUE, &buf_); \ raft_ = raft_fixture_get(&f->cluster, I); \ rv_ = raft_apply(raft_, REQ, &buf_, 1, CB); \ munit_assert_int(rv_, ==, 0); \ } /* Kill the I'th server. */ #define CLUSTER_KILL(I) raft_fixture_kill(&f->cluster, I); /* Kill the leader. */ #define CLUSTER_KILL_LEADER CLUSTER_KILL(CLUSTER_LEADER) /* Kill a majority of servers, except the leader (if there is one). */ #define CLUSTER_KILL_MAJORITY \ { \ size_t i2; \ size_t n; \ for (i2 = 0, n = 0; n < (CLUSTER_N / 2) + 1; i2++) { \ if (i2 == CLUSTER_LEADER) { \ continue; \ } \ CLUSTER_KILL(i2) \ n++; \ } \ } /* Grow the cluster adding one server. */ #define CLUSTER_GROW \ { \ int rv_; \ FsmInit(&f->fsms[CLUSTER_N]); \ rv_ = raft_fixture_grow(&f->cluster, &f->fsms[CLUSTER_N]); \ munit_assert_int(rv_, ==, 0); \ } /* Add a new pristine server to the cluster, connected to all others. Then * submit a request to add it to the configuration as an idle server. */ #define CLUSTER_ADD(REQ) \ { \ int rc; \ struct raft *new_raft; \ CLUSTER_GROW; \ rc = raft_start(CLUSTER_RAFT(CLUSTER_N - 1)); \ munit_assert_int(rc, ==, 0); \ new_raft = CLUSTER_RAFT(CLUSTER_N - 1); \ rc = raft_add(CLUSTER_RAFT(CLUSTER_LEADER), REQ, new_raft->id, \ new_raft->address, NULL); \ munit_assert_int(rc, ==, 0); \ } /* Assign the given role to the server that was added last. */ #define CLUSTER_ASSIGN(REQ, ROLE) \ do { \ unsigned _id; \ int _rv; \ _id = CLUSTER_N; /* Last server that was added. */ \ _rv = raft_assign(CLUSTER_RAFT(CLUSTER_LEADER), REQ, _id, ROLE, NULL); \ munit_assert_int(_rv, ==, 0); \ } while (0) /* Ensure that the cluster can make progress from the current state. * * - If no leader is present, wait for one to be elected. * - Submit a request to apply a new FSM command and wait for it to complete. */ #define CLUSTER_MAKE_PROGRESS \ { \ struct raft_apply *req_ = munit_malloc(sizeof *req_); \ if (!(CLUSTER_HAS_LEADER)) { \ CLUSTER_STEP_UNTIL_HAS_LEADER(10000); \ } \ CLUSTER_APPLY_ADD_X(CLUSTER_LEADER, req_, 1, NULL); \ CLUSTER_STEP_UNTIL_APPLIED(CLUSTER_LEADER, req_->index, 3000); \ free(req_); \ } /* Elect the I'th server. */ #define CLUSTER_ELECT(I) raft_fixture_elect(&f->cluster, I) /* Depose the current leader */ #define CLUSTER_DEPOSE raft_fixture_depose(&f->cluster) /* Disconnect I from J. */ #define CLUSTER_DISCONNECT(I, J) raft_fixture_disconnect(&f->cluster, I, J) /* Reconnect I to J. */ #define CLUSTER_RECONNECT(I, J) raft_fixture_reconnect(&f->cluster, I, J) /* Saturate the connection from I to J. */ #define CLUSTER_SATURATE(I, J) raft_fixture_saturate(&f->cluster, I, J) /* Saturate the connection from I to J and from J to I, in both directions. */ #define CLUSTER_SATURATE_BOTHWAYS(I, J) \ CLUSTER_SATURATE(I, J); \ CLUSTER_SATURATE(J, I) /* Desaturate the connection between I and J, making messages flow again. */ #define CLUSTER_DESATURATE(I, J) raft_fixture_desaturate(&f->cluster, I, J) /* Reconnect two servers. */ #define CLUSTER_DESATURATE_BOTHWAYS(I, J) \ CLUSTER_DESATURATE(I, J); \ CLUSTER_DESATURATE(J, I) /* Set the network latency of outgoing messages of server I. */ #define CLUSTER_SET_NETWORK_LATENCY(I, MSECS) \ raft_fixture_set_network_latency(&f->cluster, I, MSECS) /* Set the disk I/O latency of server I. */ #define CLUSTER_SET_DISK_LATENCY(I, MSECS) \ raft_fixture_set_disk_latency(&f->cluster, I, MSECS) /* Set the term persisted on the I'th server. This must be called before * starting the cluster. */ #define CLUSTER_SET_TERM(I, TERM) raft_fixture_set_term(&f->cluster, I, TERM) /* Set the snapshot persisted on the I'th server. This must be called before * starting the cluster. */ #define CLUSTER_SET_SNAPSHOT(I, LAST_INDEX, LAST_TERM, CONF_INDEX, X, Y) \ { \ struct raft_configuration configuration_; \ struct raft_snapshot *snapshot_; \ CLUSTER_CONFIGURATION(&configuration_); \ CREATE_SNAPSHOT(snapshot_, LAST_INDEX, LAST_TERM, configuration_, \ CONF_INDEX, X, Y); \ raft_fixture_set_snapshot(&f->cluster, I, snapshot_); \ } /* Add a persisted entry to the I'th server. This must be called before * starting the cluster. */ #define CLUSTER_ADD_ENTRY(I, ENTRY) \ raft_fixture_add_entry(&f->cluster, I, ENTRY) /* Add an entry to the ones persisted on the I'th server. This must be called * before starting the cluster. */ #define CLUSTER_ADD_ENTRY(I, ENTRY) \ raft_fixture_add_entry(&f->cluster, I, ENTRY) /* Make an I/O error occur on the I'th server after @DELAY operations. */ #define CLUSTER_IO_FAULT(I, DELAY, REPEAT) \ raft_fixture_io_fault(&f->cluster, I, DELAY, REPEAT) /* Return the number of messages sent by the given server. */ #define CLUSTER_N_SEND(I, TYPE) raft_fixture_n_send(&f->cluster, I, TYPE) /* Return the number of messages sent by the given server. */ #define CLUSTER_N_RECV(I, TYPE) raft_fixture_n_recv(&f->cluster, I, TYPE) /* Set a fixture hook that randomizes election timeouts, disk latency and * network latency. */ #define CLUSTER_RANDOMIZE \ cluster_randomize_init(&f->cluster); \ raft_fixture_hook(&f->cluster, cluster_randomize) void cluster_randomize_init(struct raft_fixture *f); void cluster_randomize(struct raft_fixture *f, struct raft_fixture_event *event); #endif /* TEST_CLUSTER_H */ raft-0.11.3/test/lib/dir.c000066400000000000000000000220661415614527300152150ustar00rootroot00000000000000#include "dir.h" #include #include #include #include #include #include #include #include #include #include #define SEP "/" #define TEMPLATE "raft-test-XXXXXX" #define TEST_DIR_TEMPLATE "./tmp/%s/raft-test-XXXXXX" static char *dirAll[] = {"tmpfs", "ext4", "btrfs", "xfs", "zfs", NULL}; static char *dirTmpfs[] = {"tmpfs", NULL}; static char *dirAio[] = {"btrfs", "ext4", "xfs", NULL}; static char *dirNoAio[] = {"tmpfs", "zfs", NULL}; MunitParameterEnum DirTmpfsParams[] = { {DIR_FS_PARAM, dirTmpfs}, {NULL, NULL}, }; MunitParameterEnum DirAllParams[] = { {DIR_FS_PARAM, dirAll}, {NULL, NULL}, }; MunitParameterEnum DirAioParams[] = { {DIR_FS_PARAM, dirAio}, {NULL, NULL}, }; MunitParameterEnum DirNoAioParams[] = { {DIR_FS_PARAM, dirNoAio}, {NULL, NULL}, }; /* Create a temporary directory in the given parent directory. */ static char *dirMakeTemp(const char *parent) { char *dir; if (parent == NULL) { return NULL; } dir = munit_malloc(strlen(parent) + strlen(SEP) + strlen(TEMPLATE) + 1); sprintf(dir, "%s%s%s", parent, SEP, TEMPLATE); if (mkdtemp(dir) == NULL) { munit_error(strerror(errno)); } return dir; } void *DirSetUp(MUNIT_UNUSED const MunitParameter params[], MUNIT_UNUSED void *user_data) { const char *fs = munit_parameters_get(params, DIR_FS_PARAM); if (fs == NULL) { return dirMakeTemp("/tmp"); } else if (strcmp(fs, "tmpfs") == 0) { return DirTmpfsSetUp(params, user_data); } else if (strcmp(fs, "ext4") == 0) { return DirExt4SetUp(params, user_data); } else if (strcmp(fs, "btrfs") == 0) { return DirBtrfsSetUp(params, user_data); } else if (strcmp(fs, "zfs") == 0) { return DirZfsSetUp(params, user_data); } else if (strcmp(fs, "xfs") == 0) { return DirXfsSetUp(params, user_data); } munit_errorf("Unsupported file system %s", fs); return NULL; } void *DirTmpfsSetUp(MUNIT_UNUSED const MunitParameter params[], MUNIT_UNUSED void *user_data) { return dirMakeTemp(getenv("RAFT_TMP_TMPFS")); } void *DirExt4SetUp(MUNIT_UNUSED const MunitParameter params[], MUNIT_UNUSED void *user_data) { return dirMakeTemp(getenv("RAFT_TMP_EXT4")); } void *DirBtrfsSetUp(MUNIT_UNUSED const MunitParameter params[], MUNIT_UNUSED void *user_data) { return dirMakeTemp(getenv("RAFT_TMP_BTRFS")); } void *DirZfsSetUp(MUNIT_UNUSED const MunitParameter params[], MUNIT_UNUSED void *user_data) { return dirMakeTemp(getenv("RAFT_TMP_ZFS")); } void *DirXfsSetUp(MUNIT_UNUSED const MunitParameter params[], MUNIT_UNUSED void *user_data) { return dirMakeTemp(getenv("RAFT_TMP_XFS")); } /* Wrapper around remove(), compatible with ntfw. */ static int dirRemoveFn(const char *path, MUNIT_UNUSED const struct stat *sbuf, MUNIT_UNUSED int type, MUNIT_UNUSED struct FTW *ftwb) { return remove(path); } static void dirRemove(char *dir) { int rv; rv = chmod(dir, 0755); munit_assert_int(rv, ==, 0); rv = nftw(dir, dirRemoveFn, 10, FTW_DEPTH | FTW_MOUNT | FTW_PHYS); munit_assert_int(rv, ==, 0); } static bool dirExists(const char *dir) { struct stat sb; int rv; rv = stat(dir, &sb); if (rv == -1) { munit_assert_int(errno, ==, ENOENT); return false; } return true; } void DirTearDown(void *data) { char *dir = data; if (dir == NULL) { return; } if (dirExists(dir)) { dirRemove(dir); } free(dir); } /* Join the given @dir and @filename into @path. */ static void joinPath(const char *dir, const char *filename, char *path) { strcpy(path, dir); strcat(path, "/"); strcat(path, filename); } void DirWriteFile(const char *dir, const char *filename, const void *buf, const size_t n) { char path[256]; int fd; int rv; joinPath(dir, filename, path); fd = open(path, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR); munit_assert_int(fd, !=, -1); rv = write(fd, buf, n); munit_assert_int(rv, ==, n); close(fd); } void DirWriteFileWithZeros(const char *dir, const char *filename, const size_t n) { void *buf = munit_malloc(n); DirWriteFile(dir, filename, buf, n); free(buf); } void DirOverwriteFile(const char *dir, const char *filename, const void *buf, const size_t n, const off_t whence) { char path[256]; int fd; int rv; off_t size; joinPath(dir, filename, path); fd = open(path, O_RDWR, S_IRUSR | S_IWUSR); munit_assert_int(fd, !=, -1); /* Get the size of the file */ size = lseek(fd, 0, SEEK_END); if (whence == 0) { munit_assert_int(size, >=, n); lseek(fd, 0, SEEK_SET); } else if (whence > 0) { munit_assert_int(whence, <=, size); munit_assert_int(size - whence, >=, n); lseek(fd, whence, SEEK_SET); } else { munit_assert_int(-whence, <=, size); munit_assert_int(-whence, >=, n); lseek(fd, whence, SEEK_END); } rv = write(fd, buf, n); munit_assert_int(rv, ==, n); close(fd); } void DirTruncateFile(const char *dir, const char *filename, const size_t n) { char path[256]; int fd; int rv; joinPath(dir, filename, path); fd = open(path, O_RDWR, S_IRUSR | S_IWUSR); munit_assert_int(fd, !=, -1); rv = ftruncate(fd, n); munit_assert_int(rv, ==, 0); rv = close(fd); munit_assert_int(rv, ==, 0); } void DirGrowFile(const char *dir, const char *filename, const size_t n) { char path[256]; int fd; struct stat sb; void *buf; size_t size; int rv; joinPath(dir, filename, path); fd = open(path, O_RDWR, S_IRUSR | S_IWUSR); munit_assert_int(fd, !=, -1); rv = fstat(fd, &sb); munit_assert_int(rv, ==, 0); munit_assert_int(sb.st_size, <=, n); /* Fill with zeros. */ lseek(fd, sb.st_size, SEEK_SET); size = n - sb.st_size; buf = munit_malloc(size); rv = write(fd, buf, size); munit_assert_int(rv, ==, size); free(buf); rv = close(fd); munit_assert_int(rv, ==, 0); } void DirRenameFile(const char *dir, const char *filename1, const char *filename2) { char path1[256]; char path2[256]; int rv; joinPath(dir, filename1, path1); joinPath(dir, filename2, path2); rv = rename(path1, path2); munit_assert_int(rv, ==, 0); } void DirRemoveFile(const char *dir, const char *filename) { char path[256]; int rv; joinPath(dir, filename, path); rv = unlink(path); munit_assert_int(rv, ==, 0); } void DirReadFile(const char *dir, const char *filename, void *buf, const size_t n) { char path[256]; int fd; int rv; joinPath(dir, filename, path); fd = open(path, O_RDONLY); if (fd == -1) { munit_logf(MUNIT_LOG_ERROR, "read file '%s': %s", path, strerror(errno)); } rv = read(fd, buf, n); munit_assert_int(rv, ==, n); close(fd); } void DirMakeUnexecutable(const char *dir) { int rv; rv = chmod(dir, 0); munit_assert_int(rv, ==, 0); } void DirMakeUnwritable(const char *dir) { int rv; rv = chmod(dir, 0500); munit_assert_int(rv, ==, 0); } void DirMakeFileUnreadable(const char *dir, const char *filename) { char path[256]; int rv; joinPath(dir, filename, path); rv = chmod(path, 0); munit_assert_int(rv, ==, 0); } bool DirHasFile(const char *dir, const char *filename) { char path[256]; int fd; joinPath(dir, filename, path); fd = open(path, O_RDONLY); if (fd == -1) { munit_assert_true(errno == ENOENT || errno == EACCES); return false; } close(fd); return true; } void DirFill(const char *dir, const size_t n) { char path[256]; const char *filename = ".fill"; struct statvfs fs; size_t size; int fd; int rv; rv = statvfs(dir, &fs); munit_assert_int(rv, ==, 0); size = fs.f_bsize * fs.f_bavail; if (n > 0) { munit_assert_int(size, >=, n); } joinPath(dir, filename, path); fd = open(path, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR); munit_assert_int(fd, !=, -1); rv = posix_fallocate(fd, 0, size - n); munit_assert_int(rv, ==, 0); /* If n is zero, make sure any further write fails with ENOSPC */ if (n == 0) { char buf[4096]; int i; rv = lseek(fd, 0, SEEK_END); munit_assert_int(rv, !=, -1); for (i = 0; i < 40; i++) { rv = write(fd, buf, sizeof buf); if (rv < 0) { break; } } munit_assert_int(rv, ==, -1); munit_assert_int(errno, ==, ENOSPC); } close(fd); } raft-0.11.3/test/lib/dir.h000066400000000000000000000121471415614527300152210ustar00rootroot00000000000000/* Test directory utilities. * * This module sports helpers to create temporary directories backed by various * file systems, read/write files in them, check for the presence of files * etc. */ #ifndef TEST_DIR_H #define TEST_DIR_H #include #include "munit.h" /* Munit parameter defining the file system type backing the temporary directory * created by test_dir_setup(). * * The various file systems must have been previously setup with the fs.sh * script. */ #define DIR_FS_PARAM "dir-fs" #define FIXTURE_DIR char *dir #define SET_UP_DIR \ f->dir = DirSetUp(params, user_data); \ if (f->dir == NULL) { /* Fs not available, test must be skipped. */ \ free(f); \ return NULL; \ } #define TEAR_DOWN_DIR DirTearDown(f->dir) /* Contain a single DIR_FS_PARAM parameter set to all supported file system * types. */ extern MunitParameterEnum DirAllParams[]; /* Contain a single DIR_FS_PARAM parameter set to tmpfs. */ extern MunitParameterEnum DirTmpfsParams[]; /* Contain a single DIR_FS_PARAM parameter set to all file systems with * proper AIO support (i.e. NOWAIT works). */ extern MunitParameterEnum DirAioParams[]; /* Contain a single DIR_FS_PARAM parameter set to all file systems without * proper AIO support (i.e. NOWAIT does not work). */ extern MunitParameterEnum DirNoAioParams[]; /* Create a temporary test directory. * * Return a pointer the path of the created directory. */ void *DirSetUp(const MunitParameter params[], void *user_data); /* Create a temporary test directory backed by tmpfs. * * Return a pointer the path of the created directory, or NULL if no tmpfs file * system is available. */ void *DirTmpfsSetUp(const MunitParameter params[], void *user_data); /* Create a temporary test directory backed by ext4. * * Return a pointer the path of the created directory, or NULL if no ext4 file * system is available. */ void *DirExt4SetUp(const MunitParameter params[], void *user_data); /* Create a temporary test directory backed by btrfs. * * Return a pointer the path of the created directory, or NULL if no btrfs file * system is available. */ void *DirBtrfsSetUp(const MunitParameter params[], void *user_data); /* Create a temporary test directory backed by zfs. * * Return a pointer the path of the created directory, or NULL if no zfs file * system is available. */ void *DirZfsSetUp(const MunitParameter params[], void *user_data); /* Create a temporary test directory backed by xfs. * * Return a pointer the path of the created directory, or NULL if no xfs file * system is available. */ void *DirXfsSetUp(const MunitParameter params[], void *user_data); /* Recursively remove a temporary directory. */ void DirTearDown(void *data); /* Write the given @buf to the given @filename in the given @dir. */ void DirWriteFile(const char *dir, const char *filename, const void *buf, const size_t n); /* Write the given @filename and fill it with zeros. */ void DirWriteFileWithZeros(const char *dir, const char *filename, const size_t n); /* Overwrite @n bytes of the given file with the given @buf data. * * If @whence is zero, overwrite the first @n bytes of the file. If @whence is * positive overwrite the @n bytes starting at offset @whence. If @whence is * negative overwrite @n bytes starting at @whence bytes from the end of the * file. */ void DirOverwriteFile(const char *dir, const char *filename, const void *buf, const size_t n, const off_t whence); /* Truncate the given file, leaving only the first @n bytes. */ void DirTruncateFile(const char *dir, const char *filename, const size_t n); /* Grow the given file to the given size, filling the new bytes with zeros. */ void DirGrowFile(const char *dir, const char *filename, const size_t n); /* Rename a file in the given directory from filename1 to filename2. */ void DirRenameFile(const char *dir, const char *filename1, const char *filename2); /* Remove a file. */ void DirRemoveFile(const char *dir, const char *filename); /* Read into @buf the content of the given @filename in the given @dir. */ void DirReadFile(const char *dir, const char *filename, void *buf, const size_t n); /* Make the given directory not executable, so files can't be open. */ void DirMakeUnexecutable(const char *dir); /* Make the given directory not writable. */ void DirMakeUnwritable(const char *dir); /* Make the given file not readable. */ void DirMakeFileUnreadable(const char *dir, const char *filename); /* Check if the given directory has the given file. */ bool DirHasFile(const char *dir, const char *filename); /* Fill the underlying file system of the given dir, leaving only n bytes free. */ void DirFill(const char *dir, const size_t n); #endif /* TEST_DIR_H */ raft-0.11.3/test/lib/fault.c000066400000000000000000000024501415614527300155450ustar00rootroot00000000000000#include "fault.h" #include "munit.h" void FaultInit(struct Fault *f) { f->countdown = -1; f->n = -1; f->paused = false; } bool FaultTick(struct Fault *f) { if (MUNIT_UNLIKELY(f->paused)) { return false; } /* If the initial delay parameter was set to -1, then never fail. This is * the most common case. */ if (MUNIT_LIKELY(f->countdown < 0)) { return false; } /* If we did not yet reach 'delay' ticks, then just decrease the countdown. */ if (f->countdown > 0) { f->countdown--; return false; } munit_assert_int(f->countdown, ==, 0); /* We reached 'delay' ticks, let's see how many times we have to trigger the * fault, if any. */ if (f->n < 0) { /* Trigger the fault forever. */ return true; } if (f->n > 0) { /* Trigger the fault at least this time. */ f->n--; return true; } munit_assert_int(f->n, ==, 0); /* We reached 'repeat' ticks, let's stop triggering the fault. */ f->countdown--; return false; } void FaultConfig(struct Fault *f, int delay, int repeat) { f->countdown = delay; f->n = repeat; } void FaultPause(struct Fault *f) { f->paused = true; } void FaultResume(struct Fault *f) { f->paused = false; } raft-0.11.3/test/lib/fault.h000066400000000000000000000016041415614527300155520ustar00rootroot00000000000000/* Helper for test components supporting fault injection. */ #ifndef TEST_FAULT_H #define TEST_FAULT_H #include /* Information about a fault that should occur in a component. */ struct Fault { int countdown; /* Trigger the fault when this counter gets to zero. */ int n; /* Repeat the fault this many times. Default is -1. */ bool paused; /* Pause fault triggering. */ }; /* Initialize a fault. */ void FaultInit(struct Fault *f); /* Advance the counters of the fault. Return true if the fault should be * triggered, false otherwise. */ bool FaultTick(struct Fault *f); /* Configure the fault with the given values. */ void FaultConfig(struct Fault *f, int delay, int repeat); /* Pause triggering configured faults. */ void FaultPause(struct Fault *f); /* Resume triggering configured faults. */ void FaultResume(struct Fault *f); #endif /* TESTFAULT_H */ raft-0.11.3/test/lib/fs.sh000077500000000000000000000044341415614527300152410ustar00rootroot00000000000000#!/bin/sh -e # Setup loopback disk devices to test the raft I/O implementation against # various file systems. usage() { echo "usage: $0 setup|teardown [types]" } if [ "${#}" -lt 1 ]; then usage exit 1 fi cmd="${1}" shift types="tmpfs" # Check if loop devices are available, we might be running inside an # unprivileged container if sudo losetup -f > /dev/null 2>&1; then types="$types ext4" if [ "$(which mkfs.btrfs)" != "" ]; then types="$types btrfs" fi if [ "$(which mkfs.xfs)" != "" ]; then types="$types xfs" fi if [ "$(which zfs)" != "" ]; then types="$types zfs" fi if [ "${#}" -gt 0 ]; then types="${@}" fi fi if [ "${cmd}" = "detect" ]; then vars="" for type in $types; do vars="${vars}RAFT_TMP_$(echo ${type} | tr [a-z] [A-Z])=./tmp/${type} " done echo $vars exit 0 fi if [ "${cmd}" = "setup" ]; then mkdir ./tmp for type in $types; do echo -n "Creating $type loop device mount..." # Create the fs mount point mkdir "./tmp/${type}" if [ "$type" = "tmpfs" ]; then # For tmpfs we don't need a loopback disk device. sudo mount -t tmpfs -o size=32m tmpfs ./tmp/tmpfs else # Create a loopback disk device dd if=/dev/zero of="./tmp/.${type}" bs=4096 count=28672 > /dev/null 2>&1 loop=$(sudo losetup -f) sudo losetup "${loop}" "./tmp/.${type}" # Initialize the file system if [ "$type" = "zfs" ]; then sudo zpool create raft "${loop}" sudo zfs create -o mountpoint=$(pwd)/tmp/zfs raft/zfs else sudo mkfs.${type} "${loop}" > /dev/null 2>&1 sudo mount "${loop}" "./tmp/${type}" fi fi sudo chown $USER "./tmp/${type}" echo " done" done exit 0 fi if [ "${cmd}" = "teardown" ]; then for type in $types; do echo -n "Deleting $type loop device mount..." sudo umount "./tmp/${type}" rm -rf "./tmp/${type}" if [ "$type" != "tmpfs" ]; then # For zfs we need to destroy the pool if [ "$type" = "zfs" ]; then sudo zpool destroy raft fi # For regular file systems, remove the loopback disk device. loop=$(sudo losetup -a | grep ".${type}" | cut -f 1 -d :) sudo losetup -d "${loop}" rm "./tmp/.${type}" fi echo " done" done rmdir ./tmp exit 0 fi usage exit 1 raft-0.11.3/test/lib/fsm.c000066400000000000000000000075331415614527300152260ustar00rootroot00000000000000#include "fsm.h" #include "../../src/byte.h" #include "munit.h" /* In-memory implementation of the raft_fsm interface. */ struct fsm { int x; int y; }; /* Command codes */ enum { SET_X = 1, SET_Y, ADD_X, ADD_Y }; static int fsmApply(struct raft_fsm *fsm, const struct raft_buffer *buf, void **result) { struct fsm *f = fsm->data; const void *cursor = buf->base; unsigned command; int value; if (buf->len != 16) { return -1; } command = (unsigned)byteGet64(&cursor); value = (int)byteGet64(&cursor); switch (command) { case SET_X: f->x = value; break; case SET_Y: f->y = value; break; case ADD_X: f->x += value; break; case ADD_Y: f->y += value; break; default: return -1; } *result = NULL; return 0; } static int fsmRestore(struct raft_fsm *fsm, struct raft_buffer *buf) { struct fsm *f = fsm->data; const void *cursor = buf->base; munit_assert_int(buf->len, ==, sizeof(uint64_t) * 2); f->x = byteGet64(&cursor); f->y = byteGet64(&cursor); raft_free(buf->base); return 0; } static int fsmEncodeSnapshot(int x, int y, struct raft_buffer *bufs[], unsigned *n_bufs) { struct raft_buffer *buf; void *cursor; *n_bufs = 1; *bufs = raft_malloc(sizeof **bufs); if (*bufs == NULL) { return RAFT_NOMEM; } buf = &(*bufs)[0]; buf->len = sizeof(uint64_t) * 2; buf->base = raft_malloc(buf->len); if (buf->base == NULL) { return RAFT_NOMEM; } cursor = (*bufs)[0].base; bytePut64(&cursor, x); bytePut64(&cursor, y); return 0; } static int fsmSnapshot(struct raft_fsm *fsm, struct raft_buffer *bufs[], unsigned *n_bufs) { struct fsm *f = fsm->data; return fsmEncodeSnapshot(f->x, f->y, bufs, n_bufs); } void FsmInit(struct raft_fsm *fsm) { struct fsm *f = munit_malloc(sizeof *fsm); f->x = 0; f->y = 0; fsm->version = 1; fsm->data = f; fsm->apply = fsmApply; fsm->snapshot = fsmSnapshot; fsm->restore = fsmRestore; } void FsmClose(struct raft_fsm *fsm) { struct fsm *f = fsm->data; free(f); } void FsmEncodeSetX(const int value, struct raft_buffer *buf) { void *cursor; buf->base = raft_malloc(16); buf->len = 16; munit_assert_ptr_not_null(buf->base); cursor = buf->base; bytePut64(&cursor, SET_X); bytePut64(&cursor, value); } void FsmEncodeAddX(const int value, struct raft_buffer *buf) { void *cursor; buf->base = raft_malloc(16); buf->len = 16; munit_assert_ptr_not_null(buf->base); cursor = buf->base; bytePut64(&cursor, ADD_X); bytePut64(&cursor, value); } void FsmEncodeSetY(const int value, struct raft_buffer *buf) { void *cursor; buf->base = raft_malloc(16); buf->len = 16; munit_assert_ptr_not_null(buf->base); cursor = buf->base; bytePut64(&cursor, SET_Y); bytePut64(&cursor, value); } void FsmEncodeAddY(const int value, struct raft_buffer *buf) { void *cursor; buf->base = raft_malloc(16); buf->len = 16; munit_assert_ptr_not_null(buf->base); cursor = buf->base; bytePut64(&cursor, ADD_Y); bytePut64(&cursor, value); } void FsmEncodeSnapshot(int x, int y, struct raft_buffer *bufs[], unsigned *n_bufs) { int rc; rc = fsmEncodeSnapshot(x, y, bufs, n_bufs); munit_assert_int(rc, ==, 0); } int FsmGetX(struct raft_fsm *fsm) { struct fsm *f = fsm->data; return f->x; } int FsmGetY(struct raft_fsm *fsm) { struct fsm *f = fsm->data; return f->y; } raft-0.11.3/test/lib/fsm.h000066400000000000000000000021031415614527300152170ustar00rootroot00000000000000/* Test implementation of the raft_fsm interface, with fault injection. * * The test FSM supports only two commands: setting x and setting y. */ #ifndef TEST_FSM_H #define TEST_FSM_H #include "../../include/raft.h" void FsmInit(struct raft_fsm *fsm); void FsmClose(struct raft_fsm *fsm); /* Encode a command to set x to the given value. */ void FsmEncodeSetX(int value, struct raft_buffer *buf); /* Encode a command to add the given value to x. */ void FsmEncodeAddX(int value, struct raft_buffer *buf); /* Encode a command to set y to the given value. */ void FsmEncodeSetY(int value, struct raft_buffer *buf); /* Encode a command to add the given value to y. */ void FsmEncodeAddY(int value, struct raft_buffer *buf); /* Encode a snapshot of an FSM with the given values for x and y. */ void FsmEncodeSnapshot(int x, int y, struct raft_buffer *bufs[], unsigned *n_bufs); /* Return the current value of x or y. */ int FsmGetX(struct raft_fsm *fsm); int FsmGetY(struct raft_fsm *fsm); #endif /* TEST_FSM_H */ raft-0.11.3/test/lib/heap.c000066400000000000000000000060621415614527300153520ustar00rootroot00000000000000#include "heap.h" #include #include "fault.h" #include "munit.h" struct heap { int n; /* Number of outstanding allocations. */ size_t alignment; /* Value of last aligned alloc */ struct Fault fault; /* Fault trigger. */ }; static void heapInit(struct heap *h) { h->n = 0; h->alignment = 0; FaultInit(&h->fault); } static void *heapMalloc(void *data, size_t size) { struct heap *h = data; if (FaultTick(&h->fault)) { return NULL; } h->n++; return munit_malloc(size); } static void heapFree(void *data, void *ptr) { struct heap *h = data; h->n--; free(ptr); } static void *heapCalloc(void *data, size_t nmemb, size_t size) { struct heap *h = data; if (FaultTick(&h->fault)) { return NULL; } h->n++; return munit_calloc(nmemb, size); } static void *heapRealloc(void *data, void *ptr, size_t size) { struct heap *h = data; if (FaultTick(&h->fault)) { return NULL; } /* Increase the number of allocation only if ptr is NULL, since otherwise * realloc is a malloc plus a free. */ if (ptr == NULL) { h->n++; } ptr = realloc(ptr, size); if (size == 0) { munit_assert_ptr_null(ptr); } else { munit_assert_ptr_not_null(ptr); } return ptr; } static void *heapAlignedAlloc(void *data, size_t alignment, size_t size) { struct heap *h = data; void *p; if (FaultTick(&h->fault)) { return NULL; } h->n++; p = aligned_alloc(alignment, size); munit_assert_ptr_not_null(p); h->alignment = alignment; return p; } static void heapAlignedFree(void *data, size_t alignment, void *ptr) { struct heap *h = data; munit_assert_int(alignment, ==, h->alignment); heapFree(data, ptr); } static int getIntParam(const MunitParameter params[], const char *name) { const char *value = munit_parameters_get(params, name); return value != NULL ? atoi(value) : 0; } void HeapSetUp(const MunitParameter params[], struct raft_heap *h) { struct heap *heap = munit_malloc(sizeof *heap); int delay = getIntParam(params, TEST_HEAP_FAULT_DELAY); int repeat = getIntParam(params, TEST_HEAP_FAULT_REPEAT); munit_assert_ptr_not_null(h); heapInit(heap); FaultConfig(&heap->fault, delay, repeat); h->data = heap; h->malloc = heapMalloc; h->free = heapFree; h->calloc = heapCalloc; h->realloc = heapRealloc; h->aligned_alloc = heapAlignedAlloc; h->aligned_free = heapAlignedFree; raft_heap_set(h); FaultPause(&heap->fault); } void HeapTearDown(struct raft_heap *h) { struct heap *heap = h->data; if (heap->n != 0) { munit_errorf("memory leak: %d outstanding allocations", heap->n); } free(heap); raft_heap_set_default(); } void HeapFaultConfig(struct raft_heap *h, int delay, int repeat) { struct heap *heap = h->data; FaultConfig(&heap->fault, delay, repeat); } void HeapFaultEnable(struct raft_heap *h) { struct heap *heap = h->data; FaultResume(&heap->fault); } raft-0.11.3/test/lib/heap.h000066400000000000000000000023171415614527300153560ustar00rootroot00000000000000/* Add support for fault injection and leak detection to stdlib's malloc() * family. */ #ifndef TEST_HEAP_H #define TEST_HEAP_H #include "../../include/raft.h" #include "munit.h" /* Munit parameter defining after how many API calls the test raft_heap * implementation should start failing and return errors. The default is -1, * meaning that no failure will ever occur. */ #define TEST_HEAP_FAULT_DELAY "heap-fault-delay" /* Munit parameter defining how many consecutive times API calls against the * test raft_heap implementation should keep failing after they started * failing. This parameter has an effect only if 'store-fail-delay' is 0 or * greater. The default is 1, and -1 means "keep failing forever". */ #define TEST_HEAP_FAULT_REPEAT "heap-fault-repeat" /* Macro helpers. */ #define FIXTURE_HEAP struct raft_heap heap #define SET_UP_HEAP HeapSetUp(params, &f->heap) #define TEAR_DOWN_HEAP HeapTearDown(&f->heap) #define HEAP_FAULT_ENABLE HeapFaultEnable(&f->heap) void HeapSetUp(const MunitParameter params[], struct raft_heap *h); void HeapTearDown(struct raft_heap *h); void HeapFaultConfig(struct raft_heap *h, int delay, int repeat); void HeapFaultEnable(struct raft_heap *h); #endif /* TEST_HEAP_H */ raft-0.11.3/test/lib/loop.c000066400000000000000000000002301415614527300153750ustar00rootroot00000000000000#include "loop.h" void test_loop_walk_cb(uv_handle_t *handle, void *arg) { (void)arg; munit_logf(MUNIT_LOG_INFO, "handle %d", handle->type); } raft-0.11.3/test/lib/loop.h000066400000000000000000000136031415614527300154120ustar00rootroot00000000000000/* Add support for using the libuv loop in tests. */ #ifndef TEST_LOOP_H #define TEST_LOOP_H #include #include "../../include/raft.h" #include "munit.h" /* Max n. of loop iterations ran by a single function call */ #define LOOP_MAX_RUN 20 #define FIXTURE_LOOP struct uv_loop_s loop /* Older libuv versions might try to free() memory that was not allocated. */ #if HAVE_DECL_UV_FS_O_CREAT #define LOOP_REPLACE_ALLOCATOR \ _rv = uv_replace_allocator(raft_malloc, raft_realloc, raft_calloc, \ raft_free); \ munit_assert_int(_rv, ==, 0) #else #define LOOP_REPLACE_ALLOCATOR #endif #define SETUP_LOOP \ { \ int _rv; \ LOOP_REPLACE_ALLOCATOR; \ _rv = uv_loop_init(&f->loop); \ munit_assert_int(_rv, ==, 0); \ } #define TEAR_DOWN_LOOP \ { \ int rv_; \ int alive_ = uv_loop_alive(&f->loop); \ if (alive_ != 0) { \ LOOP_STOP; \ } \ rv_ = uv_loop_close(&f->loop); \ if (rv_ != 0) { \ uv_walk(&f->loop, test_loop_walk_cb, NULL); \ munit_errorf("uv_loop_close: %s (%d)", uv_strerror(rv_), rv_); \ } \ rv_ = uv_replace_allocator(malloc, realloc, calloc, free); \ munit_assert_int(rv_, ==, 0); \ } /* Run the loop until there are no pending active handles or the given amount of * iterations is reached. */ #define LOOP_RUN(N) \ { \ unsigned i__; \ int rv__; \ for (i__ = 0; i__ < N; i__++) { \ rv__ = uv_run(&f->loop, UV_RUN_ONCE); \ if (rv__ < 0) { \ munit_errorf("uv_run: %s (%d)", uv_strerror(rv__), rv__); \ } \ if (rv__ == 0) { \ break; \ } \ } \ } /* Run the loop until the value stored through the given boolean pointer is * true. * * If the loop exhausts all active handles or if #LOOP_MAX_RUN is reached, the * test fails. */ #define LOOP_RUN_UNTIL(CONDITION) \ { \ unsigned __i; \ int __rv; \ for (__i = 0; __i < LOOP_MAX_RUN; __i++) { \ if (*(CONDITION)) { \ break; \ } \ __rv = uv_run(&f->loop, UV_RUN_ONCE); \ if (__rv < 0) { \ munit_errorf("uv_run: %s (%d)", uv_strerror(__rv), __rv); \ } \ if (__rv == 0) { \ if (*(CONDITION)) { \ break; \ } \ munit_errorf("uv_run: stopped after %u iterations", __i + 1); \ } \ } \ if (!*(CONDITION)) { \ munit_errorf("uv_run: condition not met in %d iterations", \ LOOP_MAX_RUN); \ } \ } /* Run the loop until there are no pending active handles. * * If there are still pending active handles after LOOP_MAX_RUN iterations, the * test will fail. * * This is meant to be used in tear down functions. */ #define LOOP_STOP \ { \ int alive__; \ LOOP_RUN(LOOP_MAX_RUN); \ alive__ = uv_loop_alive(&f->loop); \ if (alive__ != 0) { \ munit_error("loop has still pending active handles"); \ } \ } void test_loop_walk_cb(uv_handle_t *handle, void *arg); #endif /* TEST_LOOP_H */ raft-0.11.3/test/lib/macros.h000066400000000000000000000004771415614527300157320ustar00rootroot00000000000000/** * Miscellaneous test macros. */ #ifndef TEST_MACROS_H_ #define TEST_MACROS_H_ #define GET_2ND_ARG(arg1, arg2, ...) arg2 #define GET_3RD_ARG(arg1, arg2, arg3, ...) arg3 #define GET_4TH_ARG(arg1, arg2, arg3, arg4, ...) arg4 #define GET_5TH_ARG(arg1, arg2, arg3, arg4, arg5, ...) arg5 #endif /* TEST_MACROS_H_ */ raft-0.11.3/test/lib/munit.c000066400000000000000000002056461415614527300156020ustar00rootroot00000000000000/* Copyright (c) 2013-2018 Evan Nemerson * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /*** Configuration ***/ /* This is just where the output from the test goes. It's really just * meant to let you choose stdout or stderr, but if anyone really want * to direct it to a file let me know, it would be fairly easy to * support. */ #if !defined(MUNIT_OUTPUT_FILE) # define MUNIT_OUTPUT_FILE stdout #endif /* This is a bit more useful; it tells µnit how to format the seconds in * timed tests. If your tests run for longer you might want to reduce * it, and if your computer is really fast and your tests are tiny you * can increase it. */ #if !defined(MUNIT_TEST_TIME_FORMAT) # define MUNIT_TEST_TIME_FORMAT "0.8f" #endif /* If you have long test names you might want to consider bumping * this. The result information takes 43 characters. */ #if !defined(MUNIT_TEST_NAME_LEN) # define MUNIT_TEST_NAME_LEN 37 #endif /* If you don't like the timing information, you can disable it by * defining MUNIT_DISABLE_TIMING. */ #if !defined(MUNIT_DISABLE_TIMING) # define MUNIT_ENABLE_TIMING #endif /*** End configuration ***/ #if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L) # undef _POSIX_C_SOURCE #endif #if !defined(_POSIX_C_SOURCE) # define _POSIX_C_SOURCE 200809L #endif /* Solaris freaks out if you try to use a POSIX or SUS standard without * the "right" C standard. */ #if defined(_XOPEN_SOURCE) # undef _XOPEN_SOURCE #endif #if defined(__STDC_VERSION__) # if __STDC_VERSION__ >= 201112L # define _XOPEN_SOURCE 700 # elif __STDC_VERSION__ >= 199901L # define _XOPEN_SOURCE 600 # endif #endif /* Because, according to Microsoft, POSIX is deprecated. You've got * to appreciate the chutzpah. */ #if defined(_MSC_VER) && !defined(_CRT_NONSTDC_NO_DEPRECATE) # define _CRT_NONSTDC_NO_DEPRECATE #endif #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) # include #elif defined(_WIN32) /* https://msdn.microsoft.com/en-us/library/tf4dy80a.aspx */ #endif #include #include #include #include #include #include #include #include #if !defined(MUNIT_NO_NL_LANGINFO) && !defined(_WIN32) #define MUNIT_NL_LANGINFO #include #include #include #endif #if !defined(_WIN32) # include # include # include #else # include # include # include # if !defined(STDERR_FILENO) # define STDERR_FILENO _fileno(stderr) # endif #endif #include "munit.h" #define MUNIT_STRINGIFY(x) #x #define MUNIT_XSTRINGIFY(x) MUNIT_STRINGIFY(x) #if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_CC) || defined(__IBMCPP__) # define MUNIT_THREAD_LOCAL __thread #elif (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201102L)) || defined(_Thread_local) # define MUNIT_THREAD_LOCAL _Thread_local #elif defined(_WIN32) # define MUNIT_THREAD_LOCAL __declspec(thread) #endif /* MSVC 12.0 will emit a warning at /W4 for code like 'do { ... } * while (0)', or 'do { ... } while (true)'. I'm pretty sure nobody * at Microsoft compiles with /W4. */ #if defined(_MSC_VER) && (_MSC_VER <= 1800) #pragma warning(disable: 4127) #endif #if defined(_WIN32) || defined(__EMSCRIPTEN__) # define MUNIT_NO_FORK #endif #if defined(__EMSCRIPTEN__) # define MUNIT_NO_BUFFER #endif /*** Logging ***/ static MunitLogLevel munit_log_level_visible = MUNIT_LOG_INFO; static MunitLogLevel munit_log_level_fatal = MUNIT_LOG_ERROR; #if defined(MUNIT_THREAD_LOCAL) static MUNIT_THREAD_LOCAL bool munit_error_jmp_buf_valid = false; static MUNIT_THREAD_LOCAL jmp_buf munit_error_jmp_buf; #endif #if defined(MUNIT_THREAD_LOCAL) && defined(MUNIT_ALWAYS_TEAR_DOWN) static MUNIT_THREAD_LOCAL bool munit_tear_down_jmp_buf_valid = false; static MUNIT_THREAD_LOCAL jmp_buf munit_tear_down_jmp_buf; #endif /* At certain warning levels, mingw will trigger warnings about * suggesting the format attribute, which we've explicitly *not* set * because it will then choke on our attempts to use the MS-specific * I64 modifier for size_t (which we have to use since MSVC doesn't * support the C99 z modifier). */ #if defined(__MINGW32__) || defined(__MINGW64__) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wsuggest-attribute=format" #endif MUNIT_PRINTF(5,0) static void munit_logf_exv(MunitLogLevel level, FILE* fp, const char* filename, int line, const char* format, va_list ap) { if (level < munit_log_level_visible) return; switch (level) { case MUNIT_LOG_DEBUG: fputs("Debug", fp); break; case MUNIT_LOG_INFO: fputs("Info", fp); break; case MUNIT_LOG_WARNING: fputs("Warning", fp); break; case MUNIT_LOG_ERROR: fputs("Error", fp); break; default: munit_logf_ex(MUNIT_LOG_ERROR, filename, line, "Invalid log level (%d)", level); return; } fputs(": ", fp); if (filename != NULL) fprintf(fp, "%s:%d: ", filename, line); vfprintf(fp, format, ap); fputc('\n', fp); } MUNIT_PRINTF(3,4) static void munit_logf_internal(MunitLogLevel level, FILE* fp, const char* format, ...) { va_list ap; va_start(ap, format); munit_logf_exv(level, fp, NULL, 0, format, ap); va_end(ap); } static void munit_log_internal(MunitLogLevel level, FILE* fp, const char* message) { munit_logf_internal(level, fp, "%s", message); } void munit_logf_ex(MunitLogLevel level, const char* filename, int line, const char* format, ...) { va_list ap; va_start(ap, format); munit_logf_exv(level, stderr, filename, line, format, ap); va_end(ap); if (level >= munit_log_level_fatal) { #if defined(MUNIT_THREAD_LOCAL) if (munit_error_jmp_buf_valid) longjmp(munit_error_jmp_buf, 1); #endif abort(); } } void munit_errorf_ex(const char* filename, int line, const char* format, ...) { va_list ap; va_start(ap, format); munit_logf_exv(MUNIT_LOG_ERROR, stderr, filename, line, format, ap); va_end(ap); #if defined(MUNIT_THREAD_LOCAL) && defined(MUNIT_ALWAYS_TEAR_DOWN) if (munit_tear_down_jmp_buf_valid) longjmp(munit_tear_down_jmp_buf, 1); #endif #if defined(MUNIT_THREAD_LOCAL) if (munit_error_jmp_buf_valid) longjmp(munit_error_jmp_buf, 1); #endif abort(); } #if defined(__MINGW32__) || defined(__MINGW64__) #pragma GCC diagnostic pop #endif #if !defined(MUNIT_STRERROR_LEN) # define MUNIT_STRERROR_LEN 80 #endif static void munit_log_errno(MunitLogLevel level, FILE* fp, const char* msg) { #if defined(MUNIT_NO_STRERROR_R) || (defined(__MINGW32__) && !defined(MINGW_HAS_SECURE_API)) munit_logf_internal(level, fp, "%s: %s (%d)", msg, strerror(errno), errno); #else char munit_error_str[MUNIT_STRERROR_LEN]; munit_error_str[0] = '\0'; #if !defined(_WIN32) strerror_r(errno, munit_error_str, MUNIT_STRERROR_LEN); #else strerror_s(munit_error_str, MUNIT_STRERROR_LEN, errno); #endif munit_logf_internal(level, fp, "%s: %s (%d)", msg, munit_error_str, errno); #endif } /*** Memory allocation ***/ void* munit_malloc_ex(const char* filename, int line, size_t size) { void* ptr; if (size == 0) return NULL; ptr = calloc(1, size); if (MUNIT_UNLIKELY(ptr == NULL)) { munit_logf_ex(MUNIT_LOG_ERROR, filename, line, "Failed to allocate %" MUNIT_SIZE_MODIFIER "u bytes.", size); } return ptr; } /*** Timer code ***/ #if defined(MUNIT_ENABLE_TIMING) #define psnip_uint64_t munit_uint64_t #define psnip_uint32_t munit_uint32_t /* Code copied from portable-snippets * . If you need to * change something, please do it there so we can keep the code in * sync. */ /* Clocks (v1) * Portable Snippets - https://github.com/nemequ/portable-snippets * Created by Evan Nemerson * * To the extent possible under law, the authors have waived all * copyright and related or neighboring rights to this code. For * details, see the Creative Commons Zero 1.0 Universal license at * https://creativecommons.org/publicdomain/zero/1.0/ */ #if !defined(PSNIP_CLOCK_H) #define PSNIP_CLOCK_H #if !defined(psnip_uint64_t) # include "../exact-int/exact-int.h" #endif #if !defined(PSNIP_CLOCK_STATIC_INLINE) # if defined(__GNUC__) # define PSNIP_CLOCK__COMPILER_ATTRIBUTES __attribute__((__unused__)) # else # define PSNIP_CLOCK__COMPILER_ATTRIBUTES # endif # define PSNIP_CLOCK__FUNCTION PSNIP_CLOCK__COMPILER_ATTRIBUTES static #endif enum PsnipClockType { /* This clock provides the current time, in units since 1970-01-01 * 00:00:00 UTC not including leap seconds. In other words, UNIX * time. Keep in mind that this clock doesn't account for leap * seconds, and can go backwards (think NTP adjustments). */ PSNIP_CLOCK_TYPE_WALL = 1, /* The CPU time is a clock which increases only when the current * process is active (i.e., it doesn't increment while blocking on * I/O). */ PSNIP_CLOCK_TYPE_CPU = 2, /* Monotonic time is always running (unlike CPU time), but it only ever moves forward unless you reboot the system. Things like NTP adjustments have no effect on this clock. */ PSNIP_CLOCK_TYPE_MONOTONIC = 3 }; struct PsnipClockTimespec { psnip_uint64_t seconds; psnip_uint64_t nanoseconds; }; /* Methods we support: */ #define PSNIP_CLOCK_METHOD_CLOCK_GETTIME 1 #define PSNIP_CLOCK_METHOD_TIME 2 #define PSNIP_CLOCK_METHOD_GETTIMEOFDAY 3 #define PSNIP_CLOCK_METHOD_QUERYPERFORMANCECOUNTER 4 #define PSNIP_CLOCK_METHOD_MACH_ABSOLUTE_TIME 5 #define PSNIP_CLOCK_METHOD_CLOCK 6 #define PSNIP_CLOCK_METHOD_GETPROCESSTIMES 7 #define PSNIP_CLOCK_METHOD_GETRUSAGE 8 #define PSNIP_CLOCK_METHOD_GETSYSTEMTIMEPRECISEASFILETIME 9 #define PSNIP_CLOCK_METHOD_GETTICKCOUNT64 10 #include #if defined(HEDLEY_UNREACHABLE) # define PSNIP_CLOCK_UNREACHABLE() HEDLEY_UNREACHABLE() #else # define PSNIP_CLOCK_UNREACHABLE() assert(0) #endif /* Choose an implementation */ /* #undef PSNIP_CLOCK_WALL_METHOD */ /* #undef PSNIP_CLOCK_CPU_METHOD */ /* #undef PSNIP_CLOCK_MONOTONIC_METHOD */ /* We want to be able to detect the libc implementation, so we include ( isn't available everywhere). */ #if defined(__unix__) || defined(__unix) || defined(__linux__) # include # include #endif #if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) /* These are known to work without librt. If you know of others * please let us know so we can add them. */ # if \ (defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17))) || \ (defined(__FreeBSD__)) # define PSNIP_CLOCK_HAVE_CLOCK_GETTIME # elif !defined(PSNIP_CLOCK_NO_LIBRT) # define PSNIP_CLOCK_HAVE_CLOCK_GETTIME # endif #endif #if defined(_WIN32) # if !defined(PSNIP_CLOCK_CPU_METHOD) # define PSNIP_CLOCK_CPU_METHOD PSNIP_CLOCK_METHOD_GETPROCESSTIMES # endif # if !defined(PSNIP_CLOCK_MONOTONIC_METHOD) # define PSNIP_CLOCK_MONOTONIC_METHOD PSNIP_CLOCK_METHOD_QUERYPERFORMANCECOUNTER # endif #endif #if defined(__MACH__) && !defined(__gnu_hurd__) # if !defined(PSNIP_CLOCK_MONOTONIC_METHOD) # define PSNIP_CLOCK_MONOTONIC_METHOD PSNIP_CLOCK_METHOD_MACH_ABSOLUTE_TIME # endif #endif #if defined(PSNIP_CLOCK_HAVE_CLOCK_GETTIME) # include # if !defined(PSNIP_CLOCK_WALL_METHOD) # if defined(CLOCK_REALTIME_PRECISE) # define PSNIP_CLOCK_WALL_METHOD PSNIP_CLOCK_METHOD_CLOCK_GETTIME # define PSNIP_CLOCK_CLOCK_GETTIME_WALL CLOCK_REALTIME_PRECISE # elif !defined(__sun) # define PSNIP_CLOCK_WALL_METHOD PSNIP_CLOCK_METHOD_CLOCK_GETTIME # define PSNIP_CLOCK_CLOCK_GETTIME_WALL CLOCK_REALTIME # endif # endif # if !defined(PSNIP_CLOCK_CPU_METHOD) # if defined(_POSIX_CPUTIME) || defined(CLOCK_PROCESS_CPUTIME_ID) # define PSNIP_CLOCK_CPU_METHOD PSNIP_CLOCK_METHOD_CLOCK_GETTIME # define PSNIP_CLOCK_CLOCK_GETTIME_CPU CLOCK_PROCESS_CPUTIME_ID # elif defined(CLOCK_VIRTUAL) # define PSNIP_CLOCK_CPU_METHOD PSNIP_CLOCK_METHOD_CLOCK_GETTIME # define PSNIP_CLOCK_CLOCK_GETTIME_CPU CLOCK_VIRTUAL # endif # endif # if !defined(PSNIP_CLOCK_MONOTONIC_METHOD) # if defined(CLOCK_MONOTONIC_RAW) # define PSNIP_CLOCK_MONOTONIC_METHOD PSNIP_CLOCK_METHOD_CLOCK_GETTIME # define PSNIP_CLOCK_CLOCK_GETTIME_MONOTONIC CLOCK_MONOTONIC # elif defined(CLOCK_MONOTONIC_PRECISE) # define PSNIP_CLOCK_MONOTONIC_METHOD PSNIP_CLOCK_METHOD_CLOCK_GETTIME # define PSNIP_CLOCK_CLOCK_GETTIME_MONOTONIC CLOCK_MONOTONIC_PRECISE # elif defined(_POSIX_MONOTONIC_CLOCK) || defined(CLOCK_MONOTONIC) # define PSNIP_CLOCK_MONOTONIC_METHOD PSNIP_CLOCK_METHOD_CLOCK_GETTIME # define PSNIP_CLOCK_CLOCK_GETTIME_MONOTONIC CLOCK_MONOTONIC # endif # endif #endif #if defined(_POSIX_VERSION) && (_POSIX_VERSION >= 200112L) # if !defined(PSNIP_CLOCK_WALL_METHOD) # define PSNIP_CLOCK_WALL_METHOD PSNIP_CLOCK_METHOD_GETTIMEOFDAY # endif #endif #if !defined(PSNIP_CLOCK_WALL_METHOD) # define PSNIP_CLOCK_WALL_METHOD PSNIP_CLOCK_METHOD_TIME #endif #if !defined(PSNIP_CLOCK_CPU_METHOD) # define PSNIP_CLOCK_CPU_METHOD PSNIP_CLOCK_METHOD_CLOCK #endif /* Primarily here for testing. */ #if !defined(PSNIP_CLOCK_MONOTONIC_METHOD) && defined(PSNIP_CLOCK_REQUIRE_MONOTONIC) # error No monotonic clock found. #endif /* Implementations */ #if \ (defined(PSNIP_CLOCK_CPU_METHOD) && (PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME)) || \ (defined(PSNIP_CLOCK_WALL_METHOD) && (PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME)) || \ (defined(PSNIP_CLOCK_MONOTONIC_METHOD) && (PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME)) || \ (defined(PSNIP_CLOCK_CPU_METHOD) && (PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_CLOCK)) || \ (defined(PSNIP_CLOCK_WALL_METHOD) && (PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_CLOCK)) || \ (defined(PSNIP_CLOCK_MONOTONIC_METHOD) && (PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_CLOCK)) || \ (defined(PSNIP_CLOCK_CPU_METHOD) && (PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_TIME)) || \ (defined(PSNIP_CLOCK_WALL_METHOD) && (PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_TIME)) || \ (defined(PSNIP_CLOCK_MONOTONIC_METHOD) && (PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_TIME)) # include #endif #if \ (defined(PSNIP_CLOCK_CPU_METHOD) && (PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_GETTIMEOFDAY)) || \ (defined(PSNIP_CLOCK_WALL_METHOD) && (PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_GETTIMEOFDAY)) || \ (defined(PSNIP_CLOCK_MONOTONIC_METHOD) && (PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_GETTIMEOFDAY)) # include #endif #if \ (defined(PSNIP_CLOCK_CPU_METHOD) && (PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_GETPROCESSTIMES)) || \ (defined(PSNIP_CLOCK_WALL_METHOD) && (PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_GETPROCESSTIMES)) || \ (defined(PSNIP_CLOCK_MONOTONIC_METHOD) && (PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_GETPROCESSTIMES)) || \ (defined(PSNIP_CLOCK_CPU_METHOD) && (PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_GETTICKCOUNT64)) || \ (defined(PSNIP_CLOCK_WALL_METHOD) && (PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_GETTICKCOUNT64)) || \ (defined(PSNIP_CLOCK_MONOTONIC_METHOD) && (PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_GETTICKCOUNT64)) # include #endif #if \ (defined(PSNIP_CLOCK_CPU_METHOD) && (PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_GETRUSAGE)) || \ (defined(PSNIP_CLOCK_WALL_METHOD) && (PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_GETRUSAGE)) || \ (defined(PSNIP_CLOCK_MONOTONIC_METHOD) && (PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_GETRUSAGE)) # include # include #endif #if \ (defined(PSNIP_CLOCK_CPU_METHOD) && (PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_MACH_ABSOLUTE_TIME)) || \ (defined(PSNIP_CLOCK_WALL_METHOD) && (PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_MACH_ABSOLUTE_TIME)) || \ (defined(PSNIP_CLOCK_MONOTONIC_METHOD) && (PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_MACH_ABSOLUTE_TIME)) # include # include # include #endif /*** Implementations ***/ #define PSNIP_CLOCK_NSEC_PER_SEC ((psnip_uint32_t) (1000000000ULL)) #if \ (defined(PSNIP_CLOCK_CPU_METHOD) && (PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME)) || \ (defined(PSNIP_CLOCK_WALL_METHOD) && (PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME)) || \ (defined(PSNIP_CLOCK_MONOTONIC_METHOD) && (PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME)) PSNIP_CLOCK__FUNCTION psnip_uint32_t psnip_clock__clock_getres (clockid_t clk_id) { struct timespec res; int r; r = clock_getres(clk_id, &res); if (r != 0) return 0; return (psnip_uint32_t) (PSNIP_CLOCK_NSEC_PER_SEC / res.tv_nsec); } PSNIP_CLOCK__FUNCTION int psnip_clock__clock_gettime (clockid_t clk_id, struct PsnipClockTimespec* res) { struct timespec ts; if (clock_gettime(clk_id, &ts) != 0) return -10; res->seconds = (psnip_uint64_t) (ts.tv_sec); res->nanoseconds = (psnip_uint64_t) (ts.tv_nsec); return 0; } #endif PSNIP_CLOCK__FUNCTION psnip_uint32_t psnip_clock_wall_get_precision (void) { #if !defined(PSNIP_CLOCK_WALL_METHOD) return 0; #elif defined(PSNIP_CLOCK_WALL_METHOD) && PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME return psnip_clock__clock_getres(PSNIP_CLOCK_CLOCK_GETTIME_WALL); #elif defined(PSNIP_CLOCK_WALL_METHOD) && PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_GETTIMEOFDAY return 1000000; #elif defined(PSNIP_CLOCK_WALL_METHOD) && PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_TIME return 1; #else return 0; #endif } PSNIP_CLOCK__FUNCTION int psnip_clock_wall_get_time (struct PsnipClockTimespec* res) { (void) res; #if !defined(PSNIP_CLOCK_WALL_METHOD) return -2; #elif defined(PSNIP_CLOCK_WALL_METHOD) && PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME return psnip_clock__clock_gettime(PSNIP_CLOCK_CLOCK_GETTIME_WALL, res); #elif defined(PSNIP_CLOCK_WALL_METHOD) && PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_TIME res->seconds = time(NULL); res->nanoseconds = 0; #elif defined(PSNIP_CLOCK_WALL_METHOD) && PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_GETTIMEOFDAY struct timeval tv; if (gettimeofday(&tv, NULL) != 0) return -6; res->seconds = tv.tv_sec; res->nanoseconds = tv.tv_usec * 1000; #else return -2; #endif return 0; } PSNIP_CLOCK__FUNCTION psnip_uint32_t psnip_clock_cpu_get_precision (void) { #if !defined(PSNIP_CLOCK_CPU_METHOD) return 0; #elif defined(PSNIP_CLOCK_CPU_METHOD) && PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME return psnip_clock__clock_getres(PSNIP_CLOCK_CLOCK_GETTIME_CPU); #elif defined(PSNIP_CLOCK_CPU_METHOD) && PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_CLOCK return CLOCKS_PER_SEC; #elif defined(PSNIP_CLOCK_CPU_METHOD) && PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_GETPROCESSTIMES return PSNIP_CLOCK_NSEC_PER_SEC / 100; #else return 0; #endif } PSNIP_CLOCK__FUNCTION int psnip_clock_cpu_get_time (struct PsnipClockTimespec* res) { #if !defined(PSNIP_CLOCK_CPU_METHOD) (void) res; return -2; #elif defined(PSNIP_CLOCK_CPU_METHOD) && PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME return psnip_clock__clock_gettime(PSNIP_CLOCK_CLOCK_GETTIME_CPU, res); #elif defined(PSNIP_CLOCK_CPU_METHOD) && PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_CLOCK clock_t t = clock(); if (t == ((clock_t) -1)) return -5; res->seconds = t / CLOCKS_PER_SEC; res->nanoseconds = (t % CLOCKS_PER_SEC) * (PSNIP_CLOCK_NSEC_PER_SEC / CLOCKS_PER_SEC); #elif defined(PSNIP_CLOCK_CPU_METHOD) && PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_GETPROCESSTIMES FILETIME CreationTime, ExitTime, KernelTime, UserTime; LARGE_INTEGER date, adjust; if (!GetProcessTimes(GetCurrentProcess(), &CreationTime, &ExitTime, &KernelTime, &UserTime)) return -7; /* http://www.frenk.com/2009/12/convert-filetime-to-unix-timestamp/ */ date.HighPart = UserTime.dwHighDateTime; date.LowPart = UserTime.dwLowDateTime; adjust.QuadPart = 11644473600000 * 10000; date.QuadPart -= adjust.QuadPart; res->seconds = date.QuadPart / 10000000; res->nanoseconds = (date.QuadPart % 10000000) * (PSNIP_CLOCK_NSEC_PER_SEC / 100); #elif PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_GETRUSAGE struct rusage usage; if (getrusage(RUSAGE_SELF, &usage) != 0) return -8; res->seconds = usage.ru_utime.tv_sec; res->nanoseconds = tv.tv_usec * 1000; #else (void) res; return -2; #endif return 0; } PSNIP_CLOCK__FUNCTION psnip_uint32_t psnip_clock_monotonic_get_precision (void) { #if !defined(PSNIP_CLOCK_MONOTONIC_METHOD) return 0; #elif defined(PSNIP_CLOCK_MONOTONIC_METHOD) && PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME return psnip_clock__clock_getres(PSNIP_CLOCK_CLOCK_GETTIME_MONOTONIC); #elif defined(PSNIP_CLOCK_MONOTONIC_METHOD) && PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_MACH_ABSOLUTE_TIME static mach_timebase_info_data_t tbi = { 0, }; if (tbi.denom == 0) mach_timebase_info(&tbi); return (psnip_uint32_t) (tbi.numer / tbi.denom); #elif defined(PSNIP_CLOCK_MONOTONIC_METHOD) && PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_GETTICKCOUNT64 return 1000; #elif defined(PSNIP_CLOCK_MONOTONIC_METHOD) && PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_QUERYPERFORMANCECOUNTER LARGE_INTEGER Frequency; QueryPerformanceFrequency(&Frequency); return (psnip_uint32_t) ((Frequency.QuadPart > PSNIP_CLOCK_NSEC_PER_SEC) ? PSNIP_CLOCK_NSEC_PER_SEC : Frequency.QuadPart); #else return 0; #endif } PSNIP_CLOCK__FUNCTION int psnip_clock_monotonic_get_time (struct PsnipClockTimespec* res) { #if !defined(PSNIP_CLOCK_MONOTONIC_METHOD) (void) res; return -2; #elif defined(PSNIP_CLOCK_MONOTONIC_METHOD) && PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME return psnip_clock__clock_gettime(PSNIP_CLOCK_CLOCK_GETTIME_MONOTONIC, res); #elif defined(PSNIP_CLOCK_MONOTONIC_METHOD) && PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_MACH_ABSOLUTE_TIME psnip_uint64_t nsec = mach_absolute_time(); static mach_timebase_info_data_t tbi = { 0, }; if (tbi.denom == 0) mach_timebase_info(&tbi); nsec *= ((psnip_uint64_t) tbi.numer) / ((psnip_uint64_t) tbi.denom); res->seconds = nsec / PSNIP_CLOCK_NSEC_PER_SEC; res->nanoseconds = nsec % PSNIP_CLOCK_NSEC_PER_SEC; #elif defined(PSNIP_CLOCK_MONOTONIC_METHOD) && PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_QUERYPERFORMANCECOUNTER LARGE_INTEGER t, f; if (QueryPerformanceCounter(&t) == 0) return -12; QueryPerformanceFrequency(&f); res->seconds = t.QuadPart / f.QuadPart; res->nanoseconds = t.QuadPart % f.QuadPart; if (f.QuadPart > PSNIP_CLOCK_NSEC_PER_SEC) res->nanoseconds /= f.QuadPart / PSNIP_CLOCK_NSEC_PER_SEC; else res->nanoseconds *= PSNIP_CLOCK_NSEC_PER_SEC / f.QuadPart; #elif defined(PSNIP_CLOCK_MONOTONIC_METHOD) && PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_GETTICKCOUNT64 const ULONGLONG msec = GetTickCount64(); res->seconds = msec / 1000; res->nanoseconds = sec % 1000; #else return -2; #endif return 0; } /* Returns the number of ticks per second for the specified clock. * For example, a clock with millisecond precision would return 1000, * and a clock with 1 second (such as the time() function) would * return 1. * * If the requested clock isn't available, it will return 0. * Hopefully this will be rare, but if it happens to you please let us * know so we can work on finding a way to support your system. * * Note that different clocks on the same system often have a * different precisions. */ PSNIP_CLOCK__FUNCTION psnip_uint32_t psnip_clock_get_precision (enum PsnipClockType clock_type) { switch (clock_type) { case PSNIP_CLOCK_TYPE_MONOTONIC: return psnip_clock_monotonic_get_precision (); case PSNIP_CLOCK_TYPE_CPU: return psnip_clock_cpu_get_precision (); case PSNIP_CLOCK_TYPE_WALL: return psnip_clock_wall_get_precision (); } PSNIP_CLOCK_UNREACHABLE(); return 0; } /* Set the provided timespec to the requested time. Returns 0 on * success, or a negative value on failure. */ PSNIP_CLOCK__FUNCTION int psnip_clock_get_time (enum PsnipClockType clock_type, struct PsnipClockTimespec* res) { assert(res != NULL); switch (clock_type) { case PSNIP_CLOCK_TYPE_MONOTONIC: return psnip_clock_monotonic_get_time (res); case PSNIP_CLOCK_TYPE_CPU: return psnip_clock_cpu_get_time (res); case PSNIP_CLOCK_TYPE_WALL: return psnip_clock_wall_get_time (res); } return -1; } #endif /* !defined(PSNIP_CLOCK_H) */ static psnip_uint64_t munit_clock_get_elapsed(struct PsnipClockTimespec* start, struct PsnipClockTimespec* end) { psnip_uint64_t r = (end->seconds - start->seconds) * PSNIP_CLOCK_NSEC_PER_SEC; if (end->nanoseconds < start->nanoseconds) { r -= (start->nanoseconds - end->nanoseconds); } else { r += (end->nanoseconds - start->nanoseconds); } return r; } #else # include #endif /* defined(MUNIT_ENABLE_TIMING) */ /*** PRNG stuff ***/ /* This is (unless I screwed up, which is entirely possible) the * version of PCG with 32-bit state. It was chosen because it has a * small enough state that we should reliably be able to use CAS * instead of requiring a lock for thread-safety. * * If I did screw up, I probably will not bother changing it unless * there is a significant bias. It's really not important this be * particularly strong, as long as it is fairly random it's much more * important that it be reproducible, so bug reports have a better * chance of being reproducible. */ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) && !defined(__EMSCRIPTEN__) && (!defined(__GNUC_MINOR__) || (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ > 8)) # define HAVE_STDATOMIC #elif defined(__clang__) # if __has_extension(c_atomic) # define HAVE_CLANG_ATOMICS # endif #endif /* Workaround for http://llvm.org/bugs/show_bug.cgi?id=26911 */ #if defined(__clang__) && defined(_WIN32) # undef HAVE_STDATOMIC # if defined(__c2__) # undef HAVE_CLANG_ATOMICS # endif #endif #if defined(_OPENMP) # define ATOMIC_UINT32_T uint32_t # define ATOMIC_UINT32_INIT(x) (x) #elif defined(HAVE_STDATOMIC) # include # define ATOMIC_UINT32_T _Atomic uint32_t # define ATOMIC_UINT32_INIT(x) ATOMIC_VAR_INIT(x) #elif defined(HAVE_CLANG_ATOMICS) # define ATOMIC_UINT32_T _Atomic uint32_t # define ATOMIC_UINT32_INIT(x) (x) #elif defined(_WIN32) # define ATOMIC_UINT32_T volatile LONG # define ATOMIC_UINT32_INIT(x) (x) #else # define ATOMIC_UINT32_T volatile uint32_t # define ATOMIC_UINT32_INIT(x) (x) #endif static ATOMIC_UINT32_T munit_rand_state = ATOMIC_UINT32_INIT(42); #if defined(_OPENMP) static inline void munit_atomic_store(ATOMIC_UINT32_T* dest, ATOMIC_UINT32_T value) { #pragma omp critical (munit_atomics) *dest = value; } static inline uint32_t munit_atomic_load(ATOMIC_UINT32_T* src) { int ret; #pragma omp critical (munit_atomics) ret = *src; return ret; } static inline uint32_t munit_atomic_cas(ATOMIC_UINT32_T* dest, ATOMIC_UINT32_T* expected, ATOMIC_UINT32_T desired) { bool ret; #pragma omp critical (munit_atomics) { if (*dest == *expected) { *dest = desired; ret = true; } else { ret = false; } } return ret; } #elif defined(HAVE_STDATOMIC) # define munit_atomic_store(dest, value) atomic_store(dest, value) # define munit_atomic_load(src) atomic_load(src) # define munit_atomic_cas(dest, expected, value) atomic_compare_exchange_weak(dest, expected, value) #elif defined(HAVE_CLANG_ATOMICS) # define munit_atomic_store(dest, value) __c11_atomic_store(dest, value, __ATOMIC_SEQ_CST) # define munit_atomic_load(src) __c11_atomic_load(src, __ATOMIC_SEQ_CST) # define munit_atomic_cas(dest, expected, value) __c11_atomic_compare_exchange_weak(dest, expected, value, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) #elif defined(__GNUC__) && (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7) # define munit_atomic_store(dest, value) __atomic_store_n(dest, value, __ATOMIC_SEQ_CST) # define munit_atomic_load(src) __atomic_load_n(src, __ATOMIC_SEQ_CST) # define munit_atomic_cas(dest, expected, value) __atomic_compare_exchange_n(dest, expected, value, true, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) #elif defined(__GNUC__) && (__GNUC__ >= 4) # define munit_atomic_store(dest,value) do { *(dest) = (value); } while (0) # define munit_atomic_load(src) (*(src)) # define munit_atomic_cas(dest, expected, value) __sync_bool_compare_and_swap(dest, *expected, value) #elif defined(_WIN32) /* Untested */ # define munit_atomic_store(dest,value) do { *(dest) = (value); } while (0) # define munit_atomic_load(src) (*(src)) # define munit_atomic_cas(dest, expected, value) InterlockedCompareExchange((dest), (value), *(expected)) #else # warning No atomic implementation, PRNG will not be thread-safe # define munit_atomic_store(dest, value) do { *(dest) = (value); } while (0) # define munit_atomic_load(src) (*(src)) static inline bool munit_atomic_cas(ATOMIC_UINT32_T* dest, ATOMIC_UINT32_T* expected, ATOMIC_UINT32_T desired) { if (*dest == *expected) { *dest = desired; return true; } else { return false; } } #endif #define MUNIT_PRNG_MULTIPLIER (747796405U) #define MUNIT_PRNG_INCREMENT (1729U) static munit_uint32_t munit_rand_next_state(munit_uint32_t state) { return state * MUNIT_PRNG_MULTIPLIER + MUNIT_PRNG_INCREMENT; } static munit_uint32_t munit_rand_from_state(munit_uint32_t state) { munit_uint32_t res = ((state >> ((state >> 28) + 4)) ^ state) * (277803737U); res ^= res >> 22; return res; } void munit_rand_seed(munit_uint32_t seed) { munit_uint32_t state = munit_rand_next_state(seed + MUNIT_PRNG_INCREMENT); munit_atomic_store(&munit_rand_state, state); } static munit_uint32_t munit_rand_generate_seed(void) { munit_uint32_t seed, state; #if defined(MUNIT_ENABLE_TIMING) struct PsnipClockTimespec wc = { 0, 0 }; psnip_clock_get_time(PSNIP_CLOCK_TYPE_WALL, &wc); seed = (munit_uint32_t) wc.nanoseconds; #else seed = (munit_uint32_t) time(NULL); #endif state = munit_rand_next_state(seed + MUNIT_PRNG_INCREMENT); return munit_rand_from_state(state); } static munit_uint32_t munit_rand_state_uint32(munit_uint32_t* state) { const munit_uint32_t old = *state; *state = munit_rand_next_state(old); return munit_rand_from_state(old); } munit_uint32_t munit_rand_uint32(void) { munit_uint32_t old, state; do { old = munit_atomic_load(&munit_rand_state); state = munit_rand_next_state(old); } while (!munit_atomic_cas(&munit_rand_state, &old, state)); return munit_rand_from_state(old); } static void munit_rand_state_memory(munit_uint32_t* state, size_t size, munit_uint8_t data[MUNIT_ARRAY_PARAM(size)]) { size_t members_remaining = size / sizeof(munit_uint32_t); size_t bytes_remaining = size % sizeof(munit_uint32_t); munit_uint8_t* b = data; munit_uint32_t rv; while (members_remaining-- > 0) { rv = munit_rand_state_uint32(state); memcpy(b, &rv, sizeof(munit_uint32_t)); b += sizeof(munit_uint32_t); } if (bytes_remaining != 0) { rv = munit_rand_state_uint32(state); memcpy(b, &rv, bytes_remaining); } } void munit_rand_memory(size_t size, munit_uint8_t data[MUNIT_ARRAY_PARAM(size)]) { munit_uint32_t old, state; do { state = old = munit_atomic_load(&munit_rand_state); munit_rand_state_memory(&state, size, data); } while (!munit_atomic_cas(&munit_rand_state, &old, state)); } static munit_uint32_t munit_rand_state_at_most(munit_uint32_t* state, munit_uint32_t salt, munit_uint32_t max) { /* We want (UINT32_MAX + 1) % max, which in unsigned arithmetic is the same * as (UINT32_MAX + 1 - max) % max = -max % max. We compute -max using not * to avoid compiler warnings. */ const munit_uint32_t min = (~max + 1U) % max; munit_uint32_t x; if (max == (~((munit_uint32_t) 0U))) return munit_rand_state_uint32(state) ^ salt; max++; do { x = munit_rand_state_uint32(state) ^ salt; } while (x < min); return x % max; } static munit_uint32_t munit_rand_at_most(munit_uint32_t salt, munit_uint32_t max) { munit_uint32_t old, state; munit_uint32_t retval; do { state = old = munit_atomic_load(&munit_rand_state); retval = munit_rand_state_at_most(&state, salt, max); } while (!munit_atomic_cas(&munit_rand_state, &old, state)); return retval; } int munit_rand_int_range(int min, int max) { munit_uint64_t range = (munit_uint64_t) max - (munit_uint64_t) min; if (min > max) return munit_rand_int_range(max, min); if (range > (~((munit_uint32_t) 0U))) range = (~((munit_uint32_t) 0U)); return min + munit_rand_at_most(0, (munit_uint32_t) range); } double munit_rand_double(void) { munit_uint32_t old, state; double retval = 0.0; do { state = old = munit_atomic_load(&munit_rand_state); /* See http://mumble.net/~campbell/tmp/random_real.c for how to do * this right. Patches welcome if you feel that this is too * biased. */ retval = munit_rand_state_uint32(&state) / ((~((munit_uint32_t) 0U)) + 1.0); } while (!munit_atomic_cas(&munit_rand_state, &old, state)); return retval; } /*** Test suite handling ***/ typedef struct { unsigned int successful; unsigned int skipped; unsigned int failed; unsigned int errored; #if defined(MUNIT_ENABLE_TIMING) munit_uint64_t cpu_clock; munit_uint64_t wall_clock; #endif } MunitReport; typedef struct { const char* prefix; const MunitSuite* suite; const char** tests; munit_uint32_t seed; unsigned int iterations; MunitParameter* parameters; bool single_parameter_mode; void* user_data; MunitReport report; bool colorize; bool fork; bool show_stderr; bool fatal_failures; } MunitTestRunner; const char* munit_parameters_get(const MunitParameter params[], const char* key) { const MunitParameter* param; for (param = params ; param != NULL && param->name != NULL ; param++) if (strcmp(param->name, key) == 0) return param->value; return NULL; } #if defined(MUNIT_ENABLE_TIMING) static void munit_print_time(FILE* fp, munit_uint64_t nanoseconds) { fprintf(fp, "%" MUNIT_TEST_TIME_FORMAT, ((double) nanoseconds) / ((double) PSNIP_CLOCK_NSEC_PER_SEC)); } #endif /* Add a parameter to an array of parameters. */ static MunitResult munit_parameters_add(size_t* params_size, MunitParameter* params[MUNIT_ARRAY_PARAM(*params_size)], char* name, char* value) { *params = realloc(*params, sizeof(MunitParameter) * (*params_size + 2)); if (*params == NULL) return MUNIT_ERROR; (*params)[*params_size].name = name; (*params)[*params_size].value = value; (*params_size)++; (*params)[*params_size].name = NULL; (*params)[*params_size].value = NULL; return MUNIT_OK; } /* Concatenate two strings, but just return one of the components * unaltered if the other is NULL or "". */ static char* munit_maybe_concat(size_t* len, char* prefix, char* suffix) { char* res; size_t res_l; const size_t prefix_l = prefix != NULL ? strlen(prefix) : 0; const size_t suffix_l = suffix != NULL ? strlen(suffix) : 0; if (prefix_l == 0 && suffix_l == 0) { res = NULL; res_l = 0; } else if (prefix_l == 0 && suffix_l != 0) { res = suffix; res_l = suffix_l; } else if (prefix_l != 0 && suffix_l == 0) { res = prefix; res_l = prefix_l; } else { res_l = prefix_l + suffix_l; res = malloc(res_l + 1); memcpy(res, prefix, prefix_l); memcpy(res + prefix_l, suffix, suffix_l); res[res_l] = 0; } if (len != NULL) *len = res_l; return res; } /* Possibly free a string returned by munit_maybe_concat. */ static void munit_maybe_free_concat(char* s, const char* prefix, const char* suffix) { if (prefix != s && suffix != s) free(s); } /* Cheap string hash function, just used to salt the PRNG. */ static munit_uint32_t munit_str_hash(const char* name) { const char *p; munit_uint32_t h = 5381U; for (p = name; *p != '\0'; p++) h = (h << 5) + h + *p; return h; } static void munit_splice(int from, int to) { munit_uint8_t buf[1024]; #if !defined(_WIN32) ssize_t len; ssize_t bytes_written; ssize_t write_res; #else int len; int bytes_written; int write_res; #endif do { len = read(from, buf, sizeof(buf)); if (len > 0) { bytes_written = 0; do { write_res = write(to, buf + bytes_written, len - bytes_written); if (write_res < 0) break; bytes_written += write_res; } while (bytes_written < len); } else break; } while (true); } /* This is the part that should be handled in the child process */ static MunitResult munit_test_runner_exec(MunitTestRunner* runner, const MunitTest* test, const MunitParameter params[], MunitReport* report) { unsigned int iterations = runner->iterations; MunitResult result = MUNIT_FAIL; #if defined(MUNIT_ENABLE_TIMING) struct PsnipClockTimespec wall_clock_begin = { 0, 0 }, wall_clock_end = { 0, 0 }; struct PsnipClockTimespec cpu_clock_begin = { 0, 0 }, cpu_clock_end = { 0, 0 }; #endif unsigned int i = 0; if ((test->options & MUNIT_TEST_OPTION_SINGLE_ITERATION) == MUNIT_TEST_OPTION_SINGLE_ITERATION) iterations = 1; else if (iterations == 0) iterations = runner->suite->iterations; munit_rand_seed(runner->seed); do { void* data = (test->setup == NULL) ? runner->user_data : test->setup(params, runner->user_data); #if defined(MUNIT_ENABLE_TIMING) psnip_clock_get_time(PSNIP_CLOCK_TYPE_WALL, &wall_clock_begin); psnip_clock_get_time(PSNIP_CLOCK_TYPE_CPU, &cpu_clock_begin); #endif #if defined(MUNIT_THREAD_LOCAL) && defined(MUNIT_ALWAYS_TEAR_DOWN) if (test->tear_down != NULL) { if (MUNIT_UNLIKELY(setjmp(munit_tear_down_jmp_buf) != 0)) { test->tear_down(data); longjmp(munit_error_jmp_buf, 1); } else { munit_tear_down_jmp_buf_valid = true; } } #endif result = test->test(params, data); #if defined(MUNIT_ENABLE_TIMING) psnip_clock_get_time(PSNIP_CLOCK_TYPE_WALL, &wall_clock_end); psnip_clock_get_time(PSNIP_CLOCK_TYPE_CPU, &cpu_clock_end); #endif if (test->tear_down != NULL) test->tear_down(data); if (MUNIT_LIKELY(result == MUNIT_OK)) { report->successful++; #if defined(MUNIT_ENABLE_TIMING) report->wall_clock += munit_clock_get_elapsed(&wall_clock_begin, &wall_clock_end); report->cpu_clock += munit_clock_get_elapsed(&cpu_clock_begin, &cpu_clock_end); #endif } else { switch ((int) result) { case MUNIT_SKIP: report->skipped++; break; case MUNIT_FAIL: report->failed++; break; case MUNIT_ERROR: report->errored++; break; default: break; } break; } } while (++i < iterations); return result; } #if defined(MUNIT_EMOTICON) # define MUNIT_RESULT_STRING_OK ":)" # define MUNIT_RESULT_STRING_SKIP ":|" # define MUNIT_RESULT_STRING_FAIL ":(" # define MUNIT_RESULT_STRING_ERROR ":o" # define MUNIT_RESULT_STRING_TODO ":/" #else # define MUNIT_RESULT_STRING_OK "OK " # define MUNIT_RESULT_STRING_SKIP "SKIP " # define MUNIT_RESULT_STRING_FAIL "FAIL " # define MUNIT_RESULT_STRING_ERROR "ERROR" # define MUNIT_RESULT_STRING_TODO "TODO " #endif static void munit_test_runner_print_color(const MunitTestRunner* runner, const char* string, char color) { if (runner->colorize) fprintf(MUNIT_OUTPUT_FILE, "\x1b[3%cm%s\x1b[39m", color, string); else fputs(string, MUNIT_OUTPUT_FILE); } #if !defined(MUNIT_NO_BUFFER) static int munit_replace_stderr(FILE* stderr_buf) { if (stderr_buf != NULL) { const int orig_stderr = dup(STDERR_FILENO); int errfd = fileno(stderr_buf); if (MUNIT_UNLIKELY(errfd == -1)) { exit(EXIT_FAILURE); } dup2(errfd, STDERR_FILENO); return orig_stderr; } return -1; } static void munit_restore_stderr(int orig_stderr) { if (orig_stderr != -1) { dup2(orig_stderr, STDERR_FILENO); close(orig_stderr); } } #endif /* !defined(MUNIT_NO_BUFFER) */ /* Run a test with the specified parameters. */ static void munit_test_runner_run_test_with_params(MunitTestRunner* runner, const MunitTest* test, const MunitParameter params[]) { MunitResult result = MUNIT_OK; MunitReport report = { 0, 0, 0, 0, #if defined(MUNIT_ENABLE_TIMING) 0, 0 #endif }; unsigned int output_l; bool first; const MunitParameter* param; FILE* stderr_buf; #if !defined(MUNIT_NO_FORK) int pipefd[2]; pid_t fork_pid; ssize_t bytes_written = 0; ssize_t write_res; ssize_t bytes_read = 0; ssize_t read_res; int status = 0; pid_t changed_pid; #endif if (params != NULL) { output_l = 2; fputs(" ", MUNIT_OUTPUT_FILE); first = true; for (param = params ; param != NULL && param->name != NULL ; param++) { if (!first) { fputs(", ", MUNIT_OUTPUT_FILE); output_l += 2; } else { first = false; } output_l += fprintf(MUNIT_OUTPUT_FILE, "%s=%s", param->name, param->value); } while (output_l++ < MUNIT_TEST_NAME_LEN) { fputc(' ', MUNIT_OUTPUT_FILE); } } fflush(MUNIT_OUTPUT_FILE); stderr_buf = NULL; #if !defined(_WIN32) || defined(__MINGW32__) stderr_buf = tmpfile(); #else tmpfile_s(&stderr_buf); #endif if (stderr_buf == NULL) { munit_log_errno(MUNIT_LOG_ERROR, stderr, "unable to create buffer for stderr"); result = MUNIT_ERROR; goto print_result; } #if !defined(MUNIT_NO_FORK) if (runner->fork) { pipefd[0] = -1; pipefd[1] = -1; if (pipe(pipefd) != 0) { munit_log_errno(MUNIT_LOG_ERROR, stderr, "unable to create pipe"); result = MUNIT_ERROR; goto print_result; } fork_pid = fork(); if (fork_pid == 0) { int orig_stderr; close(pipefd[0]); orig_stderr = munit_replace_stderr(stderr_buf); munit_test_runner_exec(runner, test, params, &report); /* Note that we don't restore stderr. This is so we can buffer * things written to stderr later on (such as by * asan/tsan/ubsan, valgrind, etc.) */ close(orig_stderr); do { write_res = write(pipefd[1], ((munit_uint8_t*) (&report)) + bytes_written, sizeof(report) - bytes_written); if (write_res < 0) { if (stderr_buf != NULL) { munit_log_errno(MUNIT_LOG_ERROR, stderr, "unable to write to pipe"); } exit(EXIT_FAILURE); } bytes_written += write_res; } while ((size_t) bytes_written < sizeof(report)); if (stderr_buf != NULL) fclose(stderr_buf); close(pipefd[1]); exit(EXIT_SUCCESS); } else if (fork_pid == -1) { close(pipefd[0]); close(pipefd[1]); if (stderr_buf != NULL) { munit_log_errno(MUNIT_LOG_ERROR, stderr, "unable to fork"); } report.errored++; result = MUNIT_ERROR; } else { close(pipefd[1]); do { read_res = read(pipefd[0], ((munit_uint8_t*) (&report)) + bytes_read, sizeof(report) - bytes_read); if (read_res < 1) break; bytes_read += read_res; } while (bytes_read < (ssize_t) sizeof(report)); changed_pid = waitpid(fork_pid, &status, 0); if (MUNIT_LIKELY(changed_pid == fork_pid) && MUNIT_LIKELY(WIFEXITED(status))) { if (bytes_read != sizeof(report)) { munit_logf_internal(MUNIT_LOG_ERROR, stderr_buf, "child exited unexpectedly with status %d", WEXITSTATUS(status)); report.errored++; } else if (WEXITSTATUS(status) != EXIT_SUCCESS) { munit_logf_internal(MUNIT_LOG_ERROR, stderr_buf, "child exited with status %d", WEXITSTATUS(status)); report.errored++; } } else { if (WIFSIGNALED(status)) { #if defined(_XOPEN_VERSION) && (_XOPEN_VERSION >= 700) munit_logf_internal(MUNIT_LOG_ERROR, stderr_buf, "child killed by signal %d (%s)", WTERMSIG(status), strsignal(WTERMSIG(status))); #else munit_logf_internal(MUNIT_LOG_ERROR, stderr_buf, "child killed by signal %d", WTERMSIG(status)); #endif } else if (WIFSTOPPED(status)) { munit_logf_internal(MUNIT_LOG_ERROR, stderr_buf, "child stopped by signal %d", WSTOPSIG(status)); } report.errored++; } close(pipefd[0]); waitpid(fork_pid, NULL, 0); } } else #endif { #if !defined(MUNIT_NO_BUFFER) const volatile int orig_stderr = munit_replace_stderr(stderr_buf); #endif #if defined(MUNIT_THREAD_LOCAL) if (MUNIT_UNLIKELY(setjmp(munit_error_jmp_buf) != 0)) { result = MUNIT_FAIL; report.failed++; } else { munit_error_jmp_buf_valid = true; result = munit_test_runner_exec(runner, test, params, &report); } #else result = munit_test_runner_exec(runner, test, params, &report); #endif #if !defined(MUNIT_NO_BUFFER) munit_restore_stderr(orig_stderr); #endif /* Here just so that the label is used on Windows and we don't get * a warning */ goto print_result; } print_result: fputs("[ ", MUNIT_OUTPUT_FILE); if ((test->options & MUNIT_TEST_OPTION_TODO) == MUNIT_TEST_OPTION_TODO) { if (report.failed != 0 || report.errored != 0 || report.skipped != 0) { munit_test_runner_print_color(runner, MUNIT_RESULT_STRING_TODO, '3'); result = MUNIT_OK; } else { munit_test_runner_print_color(runner, MUNIT_RESULT_STRING_ERROR, '1'); if (MUNIT_LIKELY(stderr_buf != NULL)) munit_log_internal(MUNIT_LOG_ERROR, stderr_buf, "Test marked TODO, but was successful."); runner->report.failed++; result = MUNIT_ERROR; } } else if (report.failed > 0) { munit_test_runner_print_color(runner, MUNIT_RESULT_STRING_FAIL, '1'); runner->report.failed++; result = MUNIT_FAIL; } else if (report.errored > 0) { munit_test_runner_print_color(runner, MUNIT_RESULT_STRING_ERROR, '1'); runner->report.errored++; result = MUNIT_ERROR; } else if (report.skipped > 0) { munit_test_runner_print_color(runner, MUNIT_RESULT_STRING_SKIP, '3'); runner->report.skipped++; result = MUNIT_SKIP; } else if (report.successful > 1) { munit_test_runner_print_color(runner, MUNIT_RESULT_STRING_OK, '2'); #if defined(MUNIT_ENABLE_TIMING) fputs(" ] [ ", MUNIT_OUTPUT_FILE); munit_print_time(MUNIT_OUTPUT_FILE, report.wall_clock / report.successful); fputs(" / ", MUNIT_OUTPUT_FILE); munit_print_time(MUNIT_OUTPUT_FILE, report.cpu_clock / report.successful); fprintf(MUNIT_OUTPUT_FILE, " CPU ]\n %-" MUNIT_XSTRINGIFY(MUNIT_TEST_NAME_LEN) "s Total: [ ", ""); munit_print_time(MUNIT_OUTPUT_FILE, report.wall_clock); fputs(" / ", MUNIT_OUTPUT_FILE); munit_print_time(MUNIT_OUTPUT_FILE, report.cpu_clock); fputs(" CPU", MUNIT_OUTPUT_FILE); #endif runner->report.successful++; result = MUNIT_OK; } else if (report.successful > 0) { munit_test_runner_print_color(runner, MUNIT_RESULT_STRING_OK, '2'); #if defined(MUNIT_ENABLE_TIMING) fputs(" ] [ ", MUNIT_OUTPUT_FILE); munit_print_time(MUNIT_OUTPUT_FILE, report.wall_clock); fputs(" / ", MUNIT_OUTPUT_FILE); munit_print_time(MUNIT_OUTPUT_FILE, report.cpu_clock); fputs(" CPU", MUNIT_OUTPUT_FILE); #endif runner->report.successful++; result = MUNIT_OK; } fputs(" ]\n", MUNIT_OUTPUT_FILE); if (stderr_buf != NULL) { if (result == MUNIT_FAIL || result == MUNIT_ERROR || runner->show_stderr) { fflush(MUNIT_OUTPUT_FILE); rewind(stderr_buf); munit_splice(fileno(stderr_buf), STDERR_FILENO); fflush(stderr); } fclose(stderr_buf); } } static void munit_test_runner_run_test_wild(MunitTestRunner* runner, const MunitTest* test, const char* test_name, MunitParameter* params, MunitParameter* p) { const MunitParameterEnum* pe; char** values; MunitParameter* next; for (pe = test->parameters ; pe != NULL && pe->name != NULL ; pe++) { if (p->name == pe->name) break; } if (pe == NULL) return; for (values = pe->values ; *values != NULL ; values++) { next = p + 1; p->value = *values; if (next->name == NULL) { munit_test_runner_run_test_with_params(runner, test, params); } else { munit_test_runner_run_test_wild(runner, test, test_name, params, next); } if (runner->fatal_failures && (runner->report.failed != 0 || runner->report.errored != 0)) break; } } /* Run a single test, with every combination of parameters * requested. */ static void munit_test_runner_run_test(MunitTestRunner* runner, const MunitTest* test, const char* prefix) { char* test_name = munit_maybe_concat(NULL, (char*) prefix, (char*) test->name); /* The array of parameters to pass to * munit_test_runner_run_test_with_params */ MunitParameter* params = NULL; size_t params_l = 0; /* Wildcard parameters are parameters which have possible values * specified in the test, but no specific value was passed to the * CLI. That means we want to run the test once for every * possible combination of parameter values or, if --single was * passed to the CLI, a single time with a random set of * parameters. */ MunitParameter* wild_params = NULL; size_t wild_params_l = 0; const MunitParameterEnum* pe; const MunitParameter* cli_p; bool filled; unsigned int possible; char** vals; size_t first_wild; const MunitParameter* wp; int pidx; munit_rand_seed(runner->seed); fprintf(MUNIT_OUTPUT_FILE, "%-" MUNIT_XSTRINGIFY(MUNIT_TEST_NAME_LEN) "s", test_name); if (test->parameters == NULL) { /* No parameters. Simple, nice. */ munit_test_runner_run_test_with_params(runner, test, NULL); } else { fputc('\n', MUNIT_OUTPUT_FILE); for (pe = test->parameters ; pe != NULL && pe->name != NULL ; pe++) { /* Did we received a value for this parameter from the CLI? */ filled = false; for (cli_p = runner->parameters ; cli_p != NULL && cli_p->name != NULL ; cli_p++) { if (strcmp(cli_p->name, pe->name) == 0) { if (MUNIT_UNLIKELY(munit_parameters_add(¶ms_l, ¶ms, pe->name, cli_p->value) != MUNIT_OK)) goto cleanup; filled = true; break; } } if (filled) continue; /* Nothing from CLI, is the enum NULL/empty? We're not a * fuzzer… */ if (pe->values == NULL || pe->values[0] == NULL) continue; /* If --single was passed to the CLI, choose a value from the * list of possibilities randomly. */ if (runner->single_parameter_mode) { possible = 0; for (vals = pe->values ; *vals != NULL ; vals++) possible++; /* We want the tests to be reproducible, even if you're only * running a single test, but we don't want every test with * the same number of parameters to choose the same parameter * number, so use the test name as a primitive salt. */ pidx = munit_rand_at_most(munit_str_hash(test_name), possible - 1); if (MUNIT_UNLIKELY(munit_parameters_add(¶ms_l, ¶ms, pe->name, pe->values[pidx]) != MUNIT_OK)) goto cleanup; } else { /* We want to try every permutation. Put in a placeholder * entry, we'll iterate through them later. */ if (MUNIT_UNLIKELY(munit_parameters_add(&wild_params_l, &wild_params, pe->name, NULL) != MUNIT_OK)) goto cleanup; } } if (wild_params_l != 0) { first_wild = params_l; for (wp = wild_params ; wp != NULL && wp->name != NULL ; wp++) { for (pe = test->parameters ; pe != NULL && pe->name != NULL && pe->values != NULL ; pe++) { if (strcmp(wp->name, pe->name) == 0) { if (MUNIT_UNLIKELY(munit_parameters_add(¶ms_l, ¶ms, pe->name, pe->values[0]) != MUNIT_OK)) goto cleanup; } } } munit_test_runner_run_test_wild(runner, test, test_name, params, params + first_wild); } else { munit_test_runner_run_test_with_params(runner, test, params); } cleanup: free(params); free(wild_params); } munit_maybe_free_concat(test_name, prefix, test->name); } /* Recurse through the suite and run all the tests. If a list of * tests to run was provided on the command line, run only those * tests. */ static void munit_test_runner_run_suite(MunitTestRunner* runner, const MunitSuite* suite, const char* prefix) { size_t pre_l; char* pre = munit_maybe_concat(&pre_l, (char*) prefix, (char*) suite->prefix); const MunitTest* test; const char** test_name; const MunitSuite* child_suite; /* Run the tests. */ for (test = suite->tests ; test != NULL && test->test != NULL ; test++) { if (runner->tests != NULL) { /* Specific tests were requested on the CLI */ for (test_name = runner->tests ; test_name != NULL && *test_name != NULL ; test_name++) { if ((pre_l == 0 || strncmp(pre, *test_name, pre_l) == 0) && strncmp(test->name, *test_name + pre_l, strlen(*test_name + pre_l)) == 0) { munit_test_runner_run_test(runner, test, pre); if (runner->fatal_failures && (runner->report.failed != 0 || runner->report.errored != 0)) goto cleanup; } } } else { /* Run all tests */ munit_test_runner_run_test(runner, test, pre); } } if (runner->fatal_failures && (runner->report.failed != 0 || runner->report.errored != 0)) goto cleanup; /* Run any child suites. */ for (child_suite = suite->suites ; child_suite != NULL && child_suite->prefix != NULL ; child_suite++) { munit_test_runner_run_suite(runner, child_suite, pre); } cleanup: munit_maybe_free_concat(pre, prefix, suite->prefix); } static void munit_test_runner_run(MunitTestRunner* runner) { munit_test_runner_run_suite(runner, runner->suite, NULL); } static void munit_print_help(int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)], void* user_data, const MunitArgument arguments[]) { const MunitArgument* arg; (void) argc; printf("USAGE: %s [OPTIONS...] [TEST...]\n\n", argv[0]); puts(" --seed SEED\n" " Value used to seed the PRNG. Must be a 32-bit integer in decimal\n" " notation with no separators (commas, decimals, spaces, etc.), or\n" " hexadecimal prefixed by \"0x\".\n" " --iterations N\n" " Run each test N times. 0 means the default number.\n" " --param name value\n" " A parameter key/value pair which will be passed to any test with\n" " takes a parameter of that name. If not provided, the test will be\n" " run once for each possible parameter value.\n" " --list Write a list of all available tests.\n" " --list-params\n" " Write a list of all available tests and their possible parameters.\n" " --single Run each parameterized test in a single configuration instead of\n" " every possible combination\n" " --log-visible debug|info|warning|error\n" " --log-fatal debug|info|warning|error\n" " Set the level at which messages of different severities are visible,\n" " or cause the test to terminate.\n" #if !defined(MUNIT_NO_FORK) " --no-fork Do not execute tests in a child process. If this option is supplied\n" " and a test crashes (including by failing an assertion), no further\n" " tests will be performed.\n" #endif " --fatal-failures\n" " Stop executing tests as soon as a failure is found.\n" " --show-stderr\n" " Show data written to stderr by the tests, even if the test succeeds.\n" " --color auto|always|never\n" " Colorize (or don't) the output.\n" /* 12345678901234567890123456789012345678901234567890123456789012345678901234567890 */ " --help Print this help message and exit.\n"); #if defined(MUNIT_NL_LANGINFO) setlocale(LC_ALL, ""); fputs((strcasecmp("UTF-8", nl_langinfo(CODESET)) == 0) ? "µnit" : "munit", stdout); #else puts("munit"); #endif printf(" %d.%d.%d\n" "Full documentation at: https://nemequ.github.io/munit/\n", (MUNIT_CURRENT_VERSION >> 16) & 0xff, (MUNIT_CURRENT_VERSION >> 8) & 0xff, (MUNIT_CURRENT_VERSION >> 0) & 0xff); for (arg = arguments ; arg != NULL && arg->name != NULL ; arg++) arg->write_help(arg, user_data); } static const MunitArgument* munit_arguments_find(const MunitArgument arguments[], const char* name) { const MunitArgument* arg; for (arg = arguments ; arg != NULL && arg->name != NULL ; arg++) if (strcmp(arg->name, name) == 0) return arg; return NULL; } static void munit_suite_list_tests(const MunitSuite* suite, bool show_params, const char* prefix) { size_t pre_l; char* pre = munit_maybe_concat(&pre_l, (char*) prefix, (char*) suite->prefix); const MunitTest* test; const MunitParameterEnum* params; bool first; char** val; const MunitSuite* child_suite; for (test = suite->tests ; test != NULL && test->name != NULL ; test++) { if (pre != NULL) fputs(pre, stdout); puts(test->name); if (show_params) { for (params = test->parameters ; params != NULL && params->name != NULL ; params++) { fprintf(stdout, " - %s: ", params->name); if (params->values == NULL) { puts("Any"); } else { first = true; for (val = params->values ; *val != NULL ; val++ ) { if(!first) { fputs(", ", stdout); } else { first = false; } fputs(*val, stdout); } putc('\n', stdout); } } } } for (child_suite = suite->suites ; child_suite != NULL && child_suite->prefix != NULL ; child_suite++) { munit_suite_list_tests(child_suite, show_params, pre); } munit_maybe_free_concat(pre, prefix, suite->prefix); } static bool munit_stream_supports_ansi(FILE *stream) { #if !defined(_WIN32) return isatty(fileno(stream)); #else #if !defined(__MINGW32__) size_t ansicon_size = 0; #endif if (isatty(fileno(stream))) { #if !defined(__MINGW32__) getenv_s(&ansicon_size, NULL, 0, "ANSICON"); return ansicon_size != 0; #else return getenv("ANSICON") != NULL; #endif } return false; #endif } int munit_suite_main_custom(const MunitSuite* suite, void* user_data, int argc, char* const argv[MUNIT_ARRAY_PARAM(argc)], const MunitArgument arguments[]) { int result = EXIT_FAILURE; MunitTestRunner runner; size_t parameters_size = 0; size_t tests_size = 0; int arg; char* envptr; unsigned long ts; char* endptr; unsigned long long iterations; MunitLogLevel level; const MunitArgument* argument; const char** runner_tests; unsigned int tests_run; unsigned int tests_total; runner.prefix = NULL; runner.suite = NULL; runner.tests = NULL; runner.seed = 0; runner.iterations = 0; runner.parameters = NULL; runner.single_parameter_mode = false; runner.user_data = NULL; runner.report.successful = 0; runner.report.skipped = 0; runner.report.failed = 0; runner.report.errored = 0; #if defined(MUNIT_ENABLE_TIMING) runner.report.cpu_clock = 0; runner.report.wall_clock = 0; #endif runner.colorize = false; #if !defined(_WIN32) runner.fork = true; #else runner.fork = false; #endif runner.show_stderr = false; runner.fatal_failures = false; runner.suite = suite; runner.user_data = user_data; runner.seed = munit_rand_generate_seed(); runner.colorize = munit_stream_supports_ansi(MUNIT_OUTPUT_FILE); for (arg = 1 ; arg < argc ; arg++) { if (strncmp("--", argv[arg], 2) == 0) { if (strcmp("seed", argv[arg] + 2) == 0) { if (arg + 1 >= argc) { munit_logf_internal(MUNIT_LOG_ERROR, stderr, "%s requires an argument", argv[arg]); goto cleanup; } envptr = argv[arg + 1]; ts = strtoul(argv[arg + 1], &envptr, 0); if (*envptr != '\0' || ts > (~((munit_uint32_t) 0U))) { munit_logf_internal(MUNIT_LOG_ERROR, stderr, "invalid value ('%s') passed to %s", argv[arg + 1], argv[arg]); goto cleanup; } runner.seed = (munit_uint32_t) ts; arg++; } else if (strcmp("iterations", argv[arg] + 2) == 0) { if (arg + 1 >= argc) { munit_logf_internal(MUNIT_LOG_ERROR, stderr, "%s requires an argument", argv[arg]); goto cleanup; } endptr = argv[arg + 1]; iterations = strtoul(argv[arg + 1], &endptr, 0); if (*endptr != '\0' || iterations > UINT_MAX) { munit_logf_internal(MUNIT_LOG_ERROR, stderr, "invalid value ('%s') passed to %s", argv[arg + 1], argv[arg]); goto cleanup; } runner.iterations = (unsigned int) iterations; arg++; } else if (strcmp("param", argv[arg] + 2) == 0) { if (arg + 2 >= argc) { munit_logf_internal(MUNIT_LOG_ERROR, stderr, "%s requires two arguments", argv[arg]); goto cleanup; } runner.parameters = realloc(runner.parameters, sizeof(MunitParameter) * (parameters_size + 2)); if (runner.parameters == NULL) { munit_log_internal(MUNIT_LOG_ERROR, stderr, "failed to allocate memory"); goto cleanup; } runner.parameters[parameters_size].name = (char*) argv[arg + 1]; runner.parameters[parameters_size].value = (char*) argv[arg + 2]; parameters_size++; runner.parameters[parameters_size].name = NULL; runner.parameters[parameters_size].value = NULL; arg += 2; } else if (strcmp("color", argv[arg] + 2) == 0) { if (arg + 1 >= argc) { munit_logf_internal(MUNIT_LOG_ERROR, stderr, "%s requires an argument", argv[arg]); goto cleanup; } if (strcmp(argv[arg + 1], "always") == 0) runner.colorize = true; else if (strcmp(argv[arg + 1], "never") == 0) runner.colorize = false; else if (strcmp(argv[arg + 1], "auto") == 0) runner.colorize = munit_stream_supports_ansi(MUNIT_OUTPUT_FILE); else { munit_logf_internal(MUNIT_LOG_ERROR, stderr, "invalid value ('%s') passed to %s", argv[arg + 1], argv[arg]); goto cleanup; } arg++; } else if (strcmp("help", argv[arg] + 2) == 0) { munit_print_help(argc, argv, user_data, arguments); result = EXIT_SUCCESS; goto cleanup; } else if (strcmp("single", argv[arg] + 2) == 0) { runner.single_parameter_mode = true; } else if (strcmp("show-stderr", argv[arg] + 2) == 0) { runner.show_stderr = true; #if !defined(_WIN32) } else if (strcmp("no-fork", argv[arg] + 2) == 0) { runner.fork = false; #endif } else if (strcmp("fatal-failures", argv[arg] + 2) == 0) { runner.fatal_failures = true; } else if (strcmp("log-visible", argv[arg] + 2) == 0 || strcmp("log-fatal", argv[arg] + 2) == 0) { if (arg + 1 >= argc) { munit_logf_internal(MUNIT_LOG_ERROR, stderr, "%s requires an argument", argv[arg]); goto cleanup; } if (strcmp(argv[arg + 1], "debug") == 0) level = MUNIT_LOG_DEBUG; else if (strcmp(argv[arg + 1], "info") == 0) level = MUNIT_LOG_INFO; else if (strcmp(argv[arg + 1], "warning") == 0) level = MUNIT_LOG_WARNING; else if (strcmp(argv[arg + 1], "error") == 0) level = MUNIT_LOG_ERROR; else { munit_logf_internal(MUNIT_LOG_ERROR, stderr, "invalid value ('%s') passed to %s", argv[arg + 1], argv[arg]); goto cleanup; } if (strcmp("log-visible", argv[arg] + 2) == 0) munit_log_level_visible = level; else munit_log_level_fatal = level; arg++; } else if (strcmp("list", argv[arg] + 2) == 0) { munit_suite_list_tests(suite, false, NULL); result = EXIT_SUCCESS; goto cleanup; } else if (strcmp("list-params", argv[arg] + 2) == 0) { munit_suite_list_tests(suite, true, NULL); result = EXIT_SUCCESS; goto cleanup; } else { argument = munit_arguments_find(arguments, argv[arg] + 2); if (argument == NULL) { munit_logf_internal(MUNIT_LOG_ERROR, stderr, "unknown argument ('%s')", argv[arg]); goto cleanup; } if (!argument->parse_argument(suite, user_data, &arg, argc, argv)) goto cleanup; } } else { runner_tests = realloc((void*) runner.tests, sizeof(char*) * (tests_size + 2)); if (runner_tests == NULL) { munit_log_internal(MUNIT_LOG_ERROR, stderr, "failed to allocate memory"); goto cleanup; } runner.tests = runner_tests; runner.tests[tests_size++] = argv[arg]; runner.tests[tests_size] = NULL; } } fflush(stderr); fprintf(MUNIT_OUTPUT_FILE, "Running test suite with seed 0x%08" PRIx32 "...\n", runner.seed); munit_test_runner_run(&runner); tests_run = runner.report.successful + runner.report.failed + runner.report.errored; tests_total = tests_run + runner.report.skipped; if (tests_run == 0) { fprintf(stderr, "No tests run, %d (100%%) skipped.\n", runner.report.skipped); } else { fprintf(MUNIT_OUTPUT_FILE, "%d of %d (%0.0f%%) tests successful, %d (%0.0f%%) test skipped.\n", runner.report.successful, tests_run, (((double) runner.report.successful) / ((double) tests_run)) * 100.0, runner.report.skipped, (((double) runner.report.skipped) / ((double) tests_total)) * 100.0); } if (runner.report.failed == 0 && runner.report.errored == 0) { result = EXIT_SUCCESS; } cleanup: free(runner.parameters); free((void*) runner.tests); return result; } int munit_suite_main(const MunitSuite* suite, void* user_data, int argc, char* const argv[MUNIT_ARRAY_PARAM(argc)]) { return munit_suite_main_custom(suite, user_data, argc, argv, NULL); } raft-0.11.3/test/lib/munit.h000066400000000000000000000422131415614527300155740ustar00rootroot00000000000000/* µnit Testing Framework * Copyright (c) 2013-2017 Evan Nemerson * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #if !defined(MUNIT_H) #define MUNIT_H #include #include #define MUNIT_VERSION(major, minor, revision) \ (((major) << 16) | ((minor) << 8) | (revision)) #define MUNIT_CURRENT_VERSION MUNIT_VERSION(0, 4, 1) #if defined(_MSC_VER) && (_MSC_VER < 1600) # define munit_int8_t __int8 # define munit_uint8_t unsigned __int8 # define munit_int16_t __int16 # define munit_uint16_t unsigned __int16 # define munit_int32_t __int32 # define munit_uint32_t unsigned __int32 # define munit_int64_t __int64 # define munit_uint64_t unsigned __int64 #else # include # define munit_int8_t int8_t # define munit_uint8_t uint8_t # define munit_int16_t int16_t # define munit_uint16_t uint16_t # define munit_int32_t int32_t # define munit_uint32_t uint32_t # define munit_int64_t int64_t # define munit_uint64_t uint64_t #endif #if defined(_MSC_VER) && (_MSC_VER < 1800) # if !defined(PRIi8) # define PRIi8 "i" # endif # if !defined(PRIi16) # define PRIi16 "i" # endif # if !defined(PRIi32) # define PRIi32 "i" # endif # if !defined(PRIi64) # define PRIi64 "I64i" # endif # if !defined(PRId8) # define PRId8 "d" # endif # if !defined(PRId16) # define PRId16 "d" # endif # if !defined(PRId32) # define PRId32 "d" # endif # if !defined(PRId64) # define PRId64 "I64d" # endif # if !defined(PRIx8) # define PRIx8 "x" # endif # if !defined(PRIx16) # define PRIx16 "x" # endif # if !defined(PRIx32) # define PRIx32 "x" # endif # if !defined(PRIx64) # define PRIx64 "I64x" # endif # if !defined(PRIu8) # define PRIu8 "u" # endif # if !defined(PRIu16) # define PRIu16 "u" # endif # if !defined(PRIu32) # define PRIu32 "u" # endif # if !defined(PRIu64) # define PRIu64 "I64u" # endif # if !defined(bool) # define bool int # endif # if !defined(true) # define true (!0) # endif # if !defined(false) # define false (!!0) # endif #else # include # include #endif #if defined(__cplusplus) extern "C" { #endif #if defined(__GNUC__) # define MUNIT_LIKELY(expr) (__builtin_expect ((expr), 1)) # define MUNIT_UNLIKELY(expr) (__builtin_expect ((expr), 0)) # define MUNIT_UNUSED __attribute__((__unused__)) #else # define MUNIT_LIKELY(expr) (expr) # define MUNIT_UNLIKELY(expr) (expr) # define MUNIT_UNUSED #endif #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__PGI) # define MUNIT_ARRAY_PARAM(name) name #else # define MUNIT_ARRAY_PARAM(name) #endif #if !defined(_WIN32) # define MUNIT_SIZE_MODIFIER "z" # define MUNIT_CHAR_MODIFIER "hh" # define MUNIT_SHORT_MODIFIER "h" #else # if defined(_M_X64) || defined(__amd64__) # define MUNIT_SIZE_MODIFIER "I64" # else # define MUNIT_SIZE_MODIFIER "" # endif # define MUNIT_CHAR_MODIFIER "" # define MUNIT_SHORT_MODIFIER "" #endif #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L # define MUNIT_NO_RETURN _Noreturn #elif defined(__GNUC__) # define MUNIT_NO_RETURN __attribute__((__noreturn__)) #elif defined(_MSC_VER) # define MUNIT_NO_RETURN __declspec(noreturn) #else # define MUNIT_NO_RETURN #endif #if defined(_MSC_VER) && (_MSC_VER >= 1500) # define MUNIT__PUSH_DISABLE_MSVC_C4127 __pragma(warning(push)) __pragma(warning(disable:4127)) # define MUNIT__POP_DISABLE_MSVC_C4127 __pragma(warning(pop)) #else # define MUNIT__PUSH_DISABLE_MSVC_C4127 # define MUNIT__POP_DISABLE_MSVC_C4127 #endif typedef enum { MUNIT_LOG_DEBUG, MUNIT_LOG_INFO, MUNIT_LOG_WARNING, MUNIT_LOG_ERROR } MunitLogLevel; #if defined(__GNUC__) && !defined(__MINGW32__) # define MUNIT_PRINTF(string_index, first_to_check) __attribute__((format (printf, string_index, first_to_check))) #else # define MUNIT_PRINTF(string_index, first_to_check) #endif MUNIT_PRINTF(4, 5) void munit_logf_ex(MunitLogLevel level, const char* filename, int line, const char* format, ...); #define munit_logf(level, format, ...) \ munit_logf_ex(level, __FILE__, __LINE__, format, __VA_ARGS__) #define munit_log(level, msg) \ munit_logf(level, "%s", msg) MUNIT_NO_RETURN MUNIT_PRINTF(3, 4) void munit_errorf_ex(const char* filename, int line, const char* format, ...); #define munit_errorf(format, ...) \ munit_errorf_ex(__FILE__, __LINE__, format, __VA_ARGS__) #define munit_error(msg) \ munit_errorf("%s", msg) #define munit_assert(expr) \ do { \ if (!MUNIT_LIKELY(expr)) { \ munit_error("assertion failed: " #expr); \ } \ MUNIT__PUSH_DISABLE_MSVC_C4127 \ } while (0) \ MUNIT__POP_DISABLE_MSVC_C4127 #define munit_assert_true(expr) \ do { \ if (!MUNIT_LIKELY(expr)) { \ munit_error("assertion failed: " #expr " is not true"); \ } \ MUNIT__PUSH_DISABLE_MSVC_C4127 \ } while (0) \ MUNIT__POP_DISABLE_MSVC_C4127 #define munit_assert_false(expr) \ do { \ if (!MUNIT_LIKELY(!(expr))) { \ munit_error("assertion failed: " #expr " is not false"); \ } \ MUNIT__PUSH_DISABLE_MSVC_C4127 \ } while (0) \ MUNIT__POP_DISABLE_MSVC_C4127 #define munit_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ do { \ T munit_tmp_a_ = (a); \ T munit_tmp_b_ = (b); \ if (!(munit_tmp_a_ op munit_tmp_b_)) { \ munit_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")", \ #a, #op, #b, munit_tmp_a_, #op, munit_tmp_b_); \ } \ MUNIT__PUSH_DISABLE_MSVC_C4127 \ } while (0) \ MUNIT__POP_DISABLE_MSVC_C4127 #define munit_assert_type(T, fmt, a, op, b) \ munit_assert_type_full("", "", T, fmt, a, op, b) #define munit_assert_char(a, op, b) \ munit_assert_type_full("'\\x", "'", char, "02" MUNIT_CHAR_MODIFIER "x", a, op, b) #define munit_assert_uchar(a, op, b) \ munit_assert_type_full("'\\x", "'", unsigned char, "02" MUNIT_CHAR_MODIFIER "x", a, op, b) #define munit_assert_short(a, op, b) \ munit_assert_type(short, MUNIT_SHORT_MODIFIER "d", a, op, b) #define munit_assert_ushort(a, op, b) \ munit_assert_type(unsigned short, MUNIT_SHORT_MODIFIER "u", a, op, b) #define munit_assert_int(a, op, b) \ munit_assert_type(int, "d", a, op, b) #define munit_assert_uint(a, op, b) \ munit_assert_type(unsigned int, "u", a, op, b) #define munit_assert_long(a, op, b) \ munit_assert_type(long int, "ld", a, op, b) #define munit_assert_ulong(a, op, b) \ munit_assert_type(unsigned long int, "lu", a, op, b) #define munit_assert_llong(a, op, b) \ munit_assert_type(long long int, "lld", a, op, b) #define munit_assert_ullong(a, op, b) \ munit_assert_type(unsigned long long int, "llu", a, op, b) #define munit_assert_size(a, op, b) \ munit_assert_type(size_t, MUNIT_SIZE_MODIFIER "u", a, op, b) #define munit_assert_float(a, op, b) \ munit_assert_type(float, "f", a, op, b) #define munit_assert_double(a, op, b) \ munit_assert_type(double, "g", a, op, b) #define munit_assert_ptr(a, op, b) \ munit_assert_type(const void*, "p", a, op, b) #define munit_assert_int8(a, op, b) \ munit_assert_type(munit_int8_t, PRIi8, a, op, b) #define munit_assert_uint8(a, op, b) \ munit_assert_type(munit_uint8_t, PRIu8, a, op, b) #define munit_assert_int16(a, op, b) \ munit_assert_type(munit_int16_t, PRIi16, a, op, b) #define munit_assert_uint16(a, op, b) \ munit_assert_type(munit_uint16_t, PRIu16, a, op, b) #define munit_assert_int32(a, op, b) \ munit_assert_type(munit_int32_t, PRIi32, a, op, b) #define munit_assert_uint32(a, op, b) \ munit_assert_type(munit_uint32_t, PRIu32, a, op, b) #define munit_assert_int64(a, op, b) \ munit_assert_type(munit_int64_t, PRIi64, a, op, b) #define munit_assert_uint64(a, op, b) \ munit_assert_type(munit_uint64_t, PRIu64, a, op, b) #define munit_assert_double_equal(a, b, precision) \ do { \ const double munit_tmp_a_ = (a); \ const double munit_tmp_b_ = (b); \ const double munit_tmp_diff_ = ((munit_tmp_a_ - munit_tmp_b_) < 0) ? \ -(munit_tmp_a_ - munit_tmp_b_) : \ (munit_tmp_a_ - munit_tmp_b_); \ if (MUNIT_UNLIKELY(munit_tmp_diff_ > 1e-##precision)) { \ munit_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)", \ #a, #b, munit_tmp_a_, munit_tmp_b_); \ } \ MUNIT__PUSH_DISABLE_MSVC_C4127 \ } while (0) \ MUNIT__POP_DISABLE_MSVC_C4127 #include #define munit_assert_string_equal(a, b) \ do { \ const char* munit_tmp_a_ = a; \ const char* munit_tmp_b_ = b; \ if (MUNIT_UNLIKELY(strcmp(munit_tmp_a_, munit_tmp_b_) != 0)) { \ munit_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")", \ #a, #b, munit_tmp_a_, munit_tmp_b_); \ } \ MUNIT__PUSH_DISABLE_MSVC_C4127 \ } while (0) \ MUNIT__POP_DISABLE_MSVC_C4127 #define munit_assert_string_not_equal(a, b) \ do { \ const char* munit_tmp_a_ = a; \ const char* munit_tmp_b_ = b; \ if (MUNIT_UNLIKELY(strcmp(munit_tmp_a_, munit_tmp_b_) == 0)) { \ munit_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")", \ #a, #b, munit_tmp_a_, munit_tmp_b_); \ } \ MUNIT__PUSH_DISABLE_MSVC_C4127 \ } while (0) \ MUNIT__POP_DISABLE_MSVC_C4127 #define munit_assert_memory_equal(size, a, b) \ do { \ const unsigned char* munit_tmp_a_ = (const unsigned char*) (a); \ const unsigned char* munit_tmp_b_ = (const unsigned char*) (b); \ const size_t munit_tmp_size_ = (size); \ if (MUNIT_UNLIKELY(memcmp(munit_tmp_a_, munit_tmp_b_, munit_tmp_size_)) != 0) { \ size_t munit_tmp_pos_; \ for (munit_tmp_pos_ = 0 ; munit_tmp_pos_ < munit_tmp_size_ ; munit_tmp_pos_++) { \ if (munit_tmp_a_[munit_tmp_pos_] != munit_tmp_b_[munit_tmp_pos_]) { \ munit_errorf("assertion failed: memory %s == %s, at offset %" MUNIT_SIZE_MODIFIER "u", \ #a, #b, munit_tmp_pos_); \ break; \ } \ } \ } \ MUNIT__PUSH_DISABLE_MSVC_C4127 \ } while (0) \ MUNIT__POP_DISABLE_MSVC_C4127 #define munit_assert_memory_not_equal(size, a, b) \ do { \ const unsigned char* munit_tmp_a_ = (const unsigned char*) (a); \ const unsigned char* munit_tmp_b_ = (const unsigned char*) (b); \ const size_t munit_tmp_size_ = (size); \ if (MUNIT_UNLIKELY(memcmp(munit_tmp_a_, munit_tmp_b_, munit_tmp_size_)) == 0) { \ munit_errorf("assertion failed: memory %s != %s (%zu bytes)", \ #a, #b, munit_tmp_size_); \ } \ MUNIT__PUSH_DISABLE_MSVC_C4127 \ } while (0) \ MUNIT__POP_DISABLE_MSVC_C4127 #define munit_assert_ptr_equal(a, b) \ munit_assert_ptr(a, ==, b) #define munit_assert_ptr_not_equal(a, b) \ munit_assert_ptr(a, !=, b) #define munit_assert_null(ptr) \ munit_assert_ptr(ptr, ==, NULL) #define munit_assert_not_null(ptr) \ munit_assert_ptr(ptr, !=, NULL) #define munit_assert_ptr_null(ptr) \ munit_assert_ptr(ptr, ==, NULL) #define munit_assert_ptr_not_null(ptr) \ munit_assert_ptr(ptr, !=, NULL) /*** Memory allocation ***/ void* munit_malloc_ex(const char* filename, int line, size_t size); #define munit_malloc(size) \ munit_malloc_ex(__FILE__, __LINE__, (size)) #define munit_new(type) \ ((type*) munit_malloc(sizeof(type))) #define munit_calloc(nmemb, size) \ munit_malloc((nmemb) * (size)) #define munit_newa(type, nmemb) \ ((type*) munit_calloc((nmemb), sizeof(type))) /*** Random number generation ***/ void munit_rand_seed(munit_uint32_t seed); munit_uint32_t munit_rand_uint32(void); int munit_rand_int_range(int min, int max); double munit_rand_double(void); void munit_rand_memory(size_t size, munit_uint8_t buffer[MUNIT_ARRAY_PARAM(size)]); /*** Tests and Suites ***/ typedef enum { /* Test successful */ MUNIT_OK, /* Test failed */ MUNIT_FAIL, /* Test was skipped */ MUNIT_SKIP, /* Test failed due to circumstances not intended to be tested * (things like network errors, invalid parameter value, failure to * allocate memory in the test harness, etc.). */ MUNIT_ERROR } MunitResult; typedef struct { char* name; char** values; } MunitParameterEnum; typedef struct { char* name; char* value; } MunitParameter; const char* munit_parameters_get(const MunitParameter params[], const char* key); typedef enum { MUNIT_TEST_OPTION_NONE = 0, MUNIT_TEST_OPTION_SINGLE_ITERATION = 1 << 0, MUNIT_TEST_OPTION_TODO = 1 << 1 } MunitTestOptions; typedef MunitResult (* MunitTestFunc)(const MunitParameter params[], void* user_data_or_fixture); typedef void* (* MunitTestSetup)(const MunitParameter params[], void* user_data); typedef void (* MunitTestTearDown)(void* fixture); typedef struct { char* name; MunitTestFunc test; MunitTestSetup setup; MunitTestTearDown tear_down; MunitTestOptions options; MunitParameterEnum* parameters; } MunitTest; typedef enum { MUNIT_SUITE_OPTION_NONE = 0 } MunitSuiteOptions; typedef struct MunitSuite_ MunitSuite; struct MunitSuite_ { char* prefix; MunitTest* tests; MunitSuite* suites; unsigned int iterations; MunitSuiteOptions options; }; int munit_suite_main(const MunitSuite* suite, void* user_data, int argc, char* const argv[MUNIT_ARRAY_PARAM(argc)]); /* Note: I'm not very happy with this API; it's likely to change if I * figure out something better. Suggestions welcome. */ typedef struct MunitArgument_ MunitArgument; struct MunitArgument_ { char* name; bool (* parse_argument)(const MunitSuite* suite, void* user_data, int* arg, int argc, char* const argv[MUNIT_ARRAY_PARAM(argc)]); void (* write_help)(const MunitArgument* argument, void* user_data); }; int munit_suite_main_custom(const MunitSuite* suite, void* user_data, int argc, char* const argv[MUNIT_ARRAY_PARAM(argc)], const MunitArgument arguments[]); #if defined(MUNIT_ENABLE_ASSERT_ALIASES) #define assert_true(expr) munit_assert_true(expr) #define assert_false(expr) munit_assert_false(expr) #define assert_char(a, op, b) munit_assert_char(a, op, b) #define assert_uchar(a, op, b) munit_assert_uchar(a, op, b) #define assert_short(a, op, b) munit_assert_short(a, op, b) #define assert_ushort(a, op, b) munit_assert_ushort(a, op, b) #define assert_int(a, op, b) munit_assert_int(a, op, b) #define assert_uint(a, op, b) munit_assert_uint(a, op, b) #define assert_long(a, op, b) munit_assert_long(a, op, b) #define assert_ulong(a, op, b) munit_assert_ulong(a, op, b) #define assert_llong(a, op, b) munit_assert_llong(a, op, b) #define assert_ullong(a, op, b) munit_assert_ullong(a, op, b) #define assert_size(a, op, b) munit_assert_size(a, op, b) #define assert_float(a, op, b) munit_assert_float(a, op, b) #define assert_double(a, op, b) munit_assert_double(a, op, b) #define assert_ptr(a, op, b) munit_assert_ptr(a, op, b) #define assert_int8(a, op, b) munit_assert_int8(a, op, b) #define assert_uint8(a, op, b) munit_assert_uint8(a, op, b) #define assert_int16(a, op, b) munit_assert_int16(a, op, b) #define assert_uint16(a, op, b) munit_assert_uint16(a, op, b) #define assert_int32(a, op, b) munit_assert_int32(a, op, b) #define assert_uint32(a, op, b) munit_assert_uint32(a, op, b) #define assert_int64(a, op, b) munit_assert_int64(a, op, b) #define assert_uint64(a, op, b) munit_assert_uint64(a, op, b) #define assert_double_equal(a, b, precision) munit_assert_double_equal(a, b, precision) #define assert_string_equal(a, b) munit_assert_string_equal(a, b) #define assert_string_not_equal(a, b) munit_assert_string_not_equal(a, b) #define assert_memory_equal(size, a, b) munit_assert_memory_equal(size, a, b) #define assert_memory_not_equal(size, a, b) munit_assert_memory_not_equal(size, a, b) #define assert_ptr_equal(a, b) munit_assert_ptr_equal(a, b) #define assert_ptr_not_equal(a, b) munit_assert_ptr_not_equal(a, b) #define assert_ptr_null(ptr) munit_assert_null_equal(ptr) #define assert_ptr_not_null(ptr) munit_assert_not_null(ptr) #define assert_null(ptr) munit_assert_null(ptr) #define assert_not_null(ptr) munit_assert_not_null(ptr) #endif /* defined(MUNIT_ENABLE_ASSERT_ALIASES) */ #if defined(__cplusplus) } #endif #endif /* !defined(MUNIT_H) */ #if defined(MUNIT_ENABLE_ASSERT_ALIASES) # if defined(assert) # undef assert # endif # define assert(expr) munit_assert(expr) #endif raft-0.11.3/test/lib/runner.h000066400000000000000000000132421415614527300157510ustar00rootroot00000000000000/* Convenience macros to reduce munit boiler plate. */ #ifndef TEST_RUNNER_H_ #define TEST_RUNNER_H_ #include "munit.h" /* Top-level suites array declaration. * * These top-level suites hold all module-level child suites and must be defined * and then set as child suites of a root suite created at runtime by the test * runner's main(). This can be done using the TEST_RUNNER macro. */ extern MunitSuite _main_suites[]; extern int _main_suites_n; /* Maximum number of test cases for each suite */ #define SUITE__CAP 128 /* Define the top-level suites array and the main() function of the test. */ #define RUNNER(NAME) \ MunitSuite _main_suites[SUITE__CAP]; \ int _main_suites_n = 0; \ \ int main(int argc, char *argv[MUNIT_ARRAY_PARAM(argc)]) \ { \ MunitSuite suite = {(char *)"", NULL, _main_suites, 1, 0}; \ return munit_suite_main(&suite, (void *)NAME, argc, argv); \ } /* Declare and register a new test suite #S belonging to the file's test module. * * A test suite is a pair of static variables: * * static MunitTest _##S##_suites[SUITE__CAP] * static MunitTest _##S##_tests[SUITE__CAP] * * The tests and suites attributes of the next available MunitSuite slot in the * _module_suites array will be set to the suite's tests and suites arrays, and * the prefix attribute of the slot will be set to /S. */ #define SUITE(S) \ SUITE__DECLARE(S) \ SUITE__ADD_CHILD(main, #S, S) /* Declare and register a new test. */ #define TEST(S, C, SETUP, TEAR_DOWN, OPTIONS, PARAMS) \ static MunitResult test_##S##_##C(const MunitParameter params[], \ void *data); \ TEST__ADD_TO_SUITE(S, C, SETUP, TEAR_DOWN, OPTIONS, PARAMS) \ static MunitResult test_##S##_##C( \ MUNIT_UNUSED const MunitParameter params[], MUNIT_UNUSED void *data) #define SKIP_IF_NO_FIXTURE \ if (f == NULL) { \ return MUNIT_SKIP; \ } /* Declare the MunitSuite[] and the MunitTest[] arrays that compose the test * suite identified by S. */ #define SUITE__DECLARE(S) \ static MunitSuite _##S##_suites[SUITE__CAP]; \ static MunitTest _##S##_tests[SUITE__CAP]; \ static MunitTestSetup _##S##_setup = NULL; \ static MunitTestTearDown _##S##_tear_down = NULL; \ static int _##S##_suites_n = 0; \ static int _##S##_tests_n = 0; \ __attribute__((constructor(101))) static void _##S##_init(void) \ { \ memset(_##S##_suites, 0, sizeof(_##S##_suites)); \ memset(_##S##_tests, 0, sizeof(_##S##_tests)); \ (void)_##S##_suites_n; \ (void)_##S##_tests_n; \ (void)_##S##_setup; \ (void)_##S##_tear_down; \ } /* Set the tests and suites attributes of the next available slot of the * MunitSuite[] array of S1 to the MunitTest[] and MunitSuite[] arrays of S2, * using the given PREFIX. */ #define SUITE__ADD_CHILD(S1, PREFIX, S2) \ __attribute__((constructor(102))) static void _##S1##_##S2##_init(void) \ { \ int n = _##S1##_suites_n; \ _##S1##_suites[n].prefix = PREFIX; \ _##S1##_suites[n].tests = _##S2##_tests; \ _##S1##_suites[n].suites = _##S2##_suites; \ _##S1##_suites[n].iterations = 0; \ _##S1##_suites[n].options = 0; \ _##S1##_suites_n = n + 1; \ } /* Add a test case to the MunitTest[] array of suite S. */ #define TEST__ADD_TO_SUITE(S, C, SETUP, TEAR_DOWN, OPTIONS, PARAMS) \ __attribute__((constructor(103))) static void _##S##_tests_##C##_init(void) \ { \ MunitTest *tests = _##S##_tests; \ int n = _##S##_tests_n; \ TEST__SET_IN_ARRAY(tests, n, "/" #C, test_##S##_##C, SETUP, TEAR_DOWN, \ OPTIONS, PARAMS); \ _##S##_tests_n = n + 1; \ } /* Set the values of the I'th test case slot in the given test array */ #define TEST__SET_IN_ARRAY(TESTS, I, NAME, FUNC, SETUP, TEAR_DOWN, OPTIONS, \ PARAMS) \ TESTS[I].name = NAME; \ TESTS[I].test = FUNC; \ TESTS[I].setup = SETUP; \ TESTS[I].tear_down = TEAR_DOWN; \ TESTS[I].options = OPTIONS; \ TESTS[I].parameters = PARAMS #endif /* TEST_RUNNER_H_ */ raft-0.11.3/test/lib/snapshot.h000066400000000000000000000020431415614527300162740ustar00rootroot00000000000000/** * Raft snapshot test helpers. */ #ifndef TEST_SNAPSHOT_H #define TEST_SNAPSHOT_H #include "../../include/raft.h" #include "../../src/configuration.h" /** * Allocate and create the given snapshot, using the given @LAST_INDEX, * @LAST_TERM, the given @CONF, and generating an FSM snapshot using @X and @Y. */ #define CREATE_SNAPSHOT(SNAPSHOT, LAST_INDEX, LAST_TERM, CONF, CONF_INDEX, X, \ Y) \ SNAPSHOT = raft_malloc(sizeof *SNAPSHOT); \ munit_assert_ptr_not_null(SNAPSHOT); \ SNAPSHOT->index = LAST_INDEX; \ SNAPSHOT->term = LAST_TERM; \ SNAPSHOT->configuration = CONF; \ SNAPSHOT->configuration_index = CONF_INDEX; \ FsmEncodeSnapshot(X, Y, &SNAPSHOT->bufs, &SNAPSHOT->n_bufs) #endif /* TEST_CONFIGURATION_H */ raft-0.11.3/test/lib/tcp.c000066400000000000000000000127711415614527300152270ustar00rootroot00000000000000#include "tcp.h" #include #include #include #include void TcpServerInit(struct TcpServer *s) { struct sockaddr_in addr; socklen_t size = sizeof addr; int rv; /* Initialize the socket address structure. */ memset(&addr, 0, size); addr.sin_family = AF_INET; addr.sin_addr.s_addr = inet_addr("127.0.0.1"); addr.sin_port = 0; /* Get a random free port */ /* Create the server socket. */ s->socket = socket(AF_INET, SOCK_STREAM, 0); if (s->socket == -1) { munit_errorf("tcp server: socket(): %s", strerror(errno)); } /* Bind the socket. */ rv = bind(s->socket, (struct sockaddr *)&addr, size); if (rv == -1) { munit_errorf("tcp server: bind(): %s", strerror(errno)); } /* Start listening. */ rv = listen(s->socket, 1); if (rv == -1) { munit_errorf("tcp server: listen(): %s", strerror(errno)); } /* Get the actual addressed assigned by the kernel and save it back in the * relevant field. */ rv = getsockname(s->socket, (struct sockaddr *)&addr, &size); if (rv != 0) { munit_errorf("tcp: getsockname(): %s", strerror(errno)); } sprintf(s->address, "127.0.0.1:%d", htons(addr.sin_port)); } void TcpServerClose(struct TcpServer *s) { int rv; if (s->socket == -1) { return; } rv = close(s->socket); if (rv == -1) { munit_errorf("tcp server: close(): %s", strerror(errno)); } } int TcpServerAccept(struct TcpServer *s) { int socket; struct sockaddr_in address; socklen_t size; size = sizeof(address); socket = accept(s->socket, (struct sockaddr *)&address, &size); if (socket < 0) { munit_errorf("tcp server: accept(): %s", strerror(errno)); } return socket; } void TcpServerStop(struct TcpServer *s) { int rv; rv = close(s->socket); if (rv == -1) { munit_errorf("tcp server: close(): %s", strerror(errno)); } s->socket = -1; } void test_tcp_setup(const MunitParameter params[], struct test_tcp *t) { (void)params; t->server.socket = -1; t->client.socket = -1; } void test_tcp_tear_down(struct test_tcp *t) { int rv; if (t->server.socket != -1) { rv = close(t->server.socket); if (rv == -1) { munit_errorf("tcp: close(): %s", strerror(errno)); } } if (t->client.socket != -1) { rv = close(t->client.socket); if (rv == -1) { munit_errorf("tcp: close(): %s", strerror(errno)); } } } void test_tcp_listen(struct test_tcp *t) { struct sockaddr_in addr; socklen_t size = sizeof addr; int rv; /* Initialize the socket address structure. */ memset(&addr, 0, size); addr.sin_family = AF_INET; addr.sin_addr.s_addr = inet_addr("127.0.0.1"); addr.sin_port = 0; /* Get a random free port */ /* Create the server socket. */ t->server.socket = socket(AF_INET, SOCK_STREAM, 0); if (t->server.socket == -1) { munit_errorf("tcp: socket(): %s", strerror(errno)); } /* Bind the socket. */ rv = bind(t->server.socket, (struct sockaddr *)&addr, size); if (rv == -1) { munit_errorf("tcp: bind(): %s", strerror(errno)); } /* Start listening. */ rv = listen(t->server.socket, 1); if (rv == -1) { munit_errorf("tcp: listen(): %s", strerror(errno)); } /* Get the actual addressed assigned by the kernel and save it back in * the relevant test_socket__server field (pointed to by address). */ rv = getsockname(t->server.socket, (struct sockaddr *)&addr, &size); if (rv != 0) { munit_errorf("tcp: getsockname(): %s", strerror(errno)); } sprintf(t->server.address, "127.0.0.1:%d", htons(addr.sin_port)); } const char *test_tcp_address(struct test_tcp *t) { return t->server.address; } void test_tcp_connect(struct test_tcp *t, int port) { struct sockaddr_in addr; int rv; /* Create the client socket. */ t->client.socket = socket(AF_INET, SOCK_STREAM, 0); if (t->client.socket == -1) { munit_errorf("tcp: socket(): %s", strerror(errno)); } /* Initialize the socket address structure. */ memset(&addr, 0, sizeof addr); addr.sin_family = AF_INET; addr.sin_addr.s_addr = inet_addr("127.0.0.1"); addr.sin_port = htons(port); /* Connect */ rv = connect(t->client.socket, (struct sockaddr *)&addr, sizeof addr); if (rv == -1) { munit_errorf("tcp: connect(): %s", strerror(errno)); } } void test_tcp_close(struct test_tcp *t) { int rv; rv = close(t->client.socket); if (rv == -1) { munit_errorf("tcp: close(): %s", strerror(errno)); } t->client.socket = -1; } void test_tcp_stop(struct test_tcp *t) { int rv; rv = close(t->server.socket); if (rv == -1) { munit_errorf("tcp: close(): %s", strerror(errno)); } t->server.socket = -1; } void test_tcp_send(struct test_tcp *t, const void *buf, int len) { int rv; rv = write(t->client.socket, buf, len); if (rv == -1) { munit_errorf("tcp: write(): %s", strerror(errno)); } if (rv != len) { munit_errorf("tcp: write(): only %d bytes written", rv); } } int test_tcp_accept(struct test_tcp *t) { int socket; struct sockaddr_in address; socklen_t size; size = sizeof(address); socket = accept(t->server.socket, (struct sockaddr *)&address, &size); if (socket < 0) { munit_errorf("tcp: accept(): %s", strerror(errno)); } return socket; } raft-0.11.3/test/lib/tcp.h000066400000000000000000000052661415614527300152350ustar00rootroot00000000000000/* Test TCP utilities. * * This module sports helpers to create server or client sockets, and * send/receive data through them. */ #ifndef TEST_TCP_H #define TEST_TCP_H #include "munit.h" /* Macro helpers. */ #define FIXTURE_TCP_SERVER struct TcpServer server #define SETUP_TCP_SERVER TcpServerInit(&f->server) #define TEAR_DOWN_TCP_SERVER TcpServerClose(&f->server) #define TCP_SERVER_STOP TcpServerStop(&f->server) #define TCP_SERVER_ADDRESS f->server.address #define FIXTURE_TCP struct test_tcp tcp #define SETUP_TCP test_tcp_setup(params, &f->tcp) #define TEAR_DOWN_TCP test_tcp_tear_down(&f->tcp) #define TCP_CLIENT_CONNECT(PORT) test_tcp_connect(&f->tcp, PORT) #define TCP_CLIENT_SEND(BUF, N) test_tcp_send(&f->tcp, BUF, N) #define TCP_CLIENT_CLOSE test_tcp_close(&f->tcp) struct TcpServer { int socket; /* Socket listening to incoming connections */ char address[128]; /* IPv4 address of the server, with port */ }; void TcpServerInit(struct TcpServer *s); void TcpServerClose(struct TcpServer *s); /* Accept inbound client connection and return the relevant socket. */ int TcpServerAccept(struct TcpServer *s); /* Close the server socket. */ void TcpServerStop(struct TcpServer *s); struct TcpClient { int socket; /* Socket connected to a server. */ }; void TcpClientInit(struct TcpClient *s); void TcpClientClose(struct TcpClient *s); /* Object that can be used to setup and control a TCP server and/or client. */ struct test_tcp { struct { int socket; /* Socket listening to incoming connections */ char address[128]; /* IPv4 address of the server, with port */ } server; struct { int socket; /* Socket connected to another host */ } client; }; /** * Bind the server socket of the given test TCP host to localhost and start * listening to it. */ void test_tcp_setup(const MunitParameter params[], struct test_tcp *t); void test_tcp_tear_down(struct test_tcp *t); /** * Start listening to a random free port on localhost. */ void test_tcp_listen(struct test_tcp *t); /** * Return the address of the server socket created with @test_tcp_listen. */ const char *test_tcp_address(struct test_tcp *t); /** * Connect the client socket to the given port on localhost. */ void test_tcp_connect(struct test_tcp *t, int port); /** * Close the client socket. */ void test_tcp_close(struct test_tcp *t); /** * Send data using the client socket. */ void test_tcp_send(struct test_tcp *t, const void *buf, int len); /** * Accept inbound client connection and return the relevant socket. */ int test_tcp_accept(struct test_tcp *t); /** * Close the server socket. */ void test_tcp_stop(struct test_tcp *t); #endif /* TEST_TCP_H */ raft-0.11.3/test/lib/tracer.c000066400000000000000000000004021415614527300157050ustar00rootroot00000000000000#include "tracer.h" #include "munit.h" void TracerEmit(struct raft_tracer *t, const char *file, int line, const char *message) { (void)t; fprintf(stderr, "%20s:%*d - %s\n", file, 3, line, message); } raft-0.11.3/test/lib/tracer.h000066400000000000000000000006421415614527300157200ustar00rootroot00000000000000/* Raft tracer that emits messages to stderr. */ #ifndef TEST_TRACER_H #define TEST_TRACER_H #include "../../include/raft.h" #define FIXTURE_TRACER struct raft_tracer tracer #define SET_UP_TRACER f->tracer.emit = TracerEmit #define TEAR_DOWN_TRACER void TracerEmit(struct raft_tracer *t, const char *file, int line, const char *message); #endif /* TEST_TRACER_H */ raft-0.11.3/test/lib/uv.h000066400000000000000000000042661415614527300151000ustar00rootroot00000000000000/* Helpers around the libuv-based implementation of the raft_io interface. */ #ifndef TEST_UV_H #define TEST_UV_H #include "../../include/raft.h" #include "../../include/raft/uv.h" #include "dir.h" #include "heap.h" #include "loop.h" #include "tracer.h" #define FIXTURE_UV_TRANSPORT struct raft_uv_transport transport #define SETUP_UV_TRANSPORT \ do { \ int rv_; \ rv_ = raft_uv_tcp_init(&f->transport, &f->loop); \ munit_assert_int(rv_, ==, 0); \ } while (0) #define TEAR_DOWN_UV_TRANSPORT raft_uv_tcp_close(&f->transport) #define FIXTURE_UV_DEPS \ FIXTURE_DIR; \ FIXTURE_HEAP; \ FIXTURE_LOOP; \ FIXTURE_TRACER; \ FIXTURE_UV_TRANSPORT #define SETUP_UV_DEPS \ SET_UP_DIR; \ SET_UP_HEAP; \ SETUP_LOOP; \ SET_UP_TRACER; \ SETUP_UV_TRANSPORT #define TEAR_DOWN_UV_DEPS \ TEAR_DOWN_UV_TRANSPORT; \ TEAR_DOWN_TRACER; \ TEAR_DOWN_LOOP; \ TEAR_DOWN_HEAP; \ TEAR_DOWN_DIR #define FIXTURE_UV struct raft_io io #define SETUP_UV \ do { \ int rv_; \ rv_ = raft_uv_init(&f->io, &f->loop, f->dir, &f->transport); \ munit_assert_int(rv_, ==, 0); \ raft_uv_set_tracer(&f->io, &f->tracer); \ rv_ = f->io.init(&f->io, 1, "127.0.0.1:9001"); \ munit_assert_int(rv_, ==, 0); \ } while (0) MUNIT_UNUSED static void uvCloseCb(struct raft_io *io) { bool *closed = io->data; *closed = true; } #define TEAR_DOWN_UV \ do { \ bool _closed = false; \ f->io.data = &_closed; \ f->io.close(&f->io, uvCloseCb); \ LOOP_RUN_UNTIL(&_closed); \ raft_uv_close(&f->io); \ } while (0) #endif /* TEST_UV_H */ raft-0.11.3/test/unit/000077500000000000000000000000001415614527300144765ustar00rootroot00000000000000raft-0.11.3/test/unit/main_core.c000066400000000000000000000000531415614527300165740ustar00rootroot00000000000000#include "../lib/runner.h" RUNNER("core") raft-0.11.3/test/unit/main_uv.c000066400000000000000000000000511415614527300162740ustar00rootroot00000000000000#include "../lib/runner.h" RUNNER("uv") raft-0.11.3/test/unit/test_byte.c000066400000000000000000000113061415614527300166450ustar00rootroot00000000000000#include #include #include "../../src/byte.h" #include "../lib/runner.h" /****************************************************************************** * * Helper macros * *****************************************************************************/ #define CRC32(VALUE) byteCrc32(&(VALUE), sizeof VALUE, 0) /****************************************************************************** * * byteCrc32 * *****************************************************************************/ SUITE(byteCrc32) /* The same data produces the same sum. */ TEST(byteCrc32, valid, NULL, NULL, 0, NULL) { uint64_t value1 = 123456789; uint64_t value2 = 123456789; munit_assert_int(CRC32(value1), ==, CRC32(value2)); return MUNIT_OK; } /* Different data produces a different sum. */ TEST(byteCrc32, invalid, NULL, NULL, 0, NULL) { uint64_t value1 = 123456789; uint64_t value2 = 123466789; munit_assert_int(CRC32(value1), !=, CRC32(value2)); return MUNIT_OK; } /****************************************************************************** * * Convert to little endian representation (least significant byte first). * *****************************************************************************/ SUITE(byteFlip) /* Convert a 32-bit number. */ TEST(byteFlip, 32, NULL, NULL, 0, NULL) { uint32_t value; unsigned i; value = byteFlip32(0x03020100); for (i = 0; i < 4; i++) { munit_assert_int(*((uint8_t *)&value + i), ==, i); } return MUNIT_OK; } /* Convert a 64-bit number. */ TEST(byteFlip, 64, NULL, NULL, 0, NULL) { uint64_t value; unsigned i; value = byteFlip64(0x0706050403020100); for (i = 0; i < 8; i++) { munit_assert_int(*((uint8_t *)&value + i), ==, i); } return MUNIT_OK; } /****************************************************************************** * * byteGetString * *****************************************************************************/ SUITE(byteGetString) TEST(byteGetString, success, NULL, NULL, 0, NULL) { uint8_t buf[] = {'h', 'e', 'l', 'l', 'o', 0}; const void *cursor = buf; munit_assert_string_equal(byteGetString(&cursor, sizeof buf), "hello"); munit_assert_ptr_equal(cursor, buf + sizeof buf); return MUNIT_OK; } TEST(byteGetString, malformed, NULL, NULL, 0, NULL) { uint8_t buf[] = {'h', 'e', 'l', 'l', 'o', 'w'}; const void *cursor = buf; munit_assert_ptr_equal(byteGetString(&cursor, sizeof buf), NULL); munit_assert_ptr_equal(cursor, buf); return MUNIT_OK; } /****************************************************************************** * * byteGet64Unaligned * *****************************************************************************/ SUITE(byteGet64Unaligned) TEST(byteGet64Unaligned, success, NULL, NULL, 0, NULL) { uint8_t *buf = munit_malloc(sizeof(uint64_t) * 2); void *cursor1 = buf + 1; const void *cursor2 = buf + 1; bytePut64Unaligned(&cursor1, 1); munit_assert_int(byteGet64Unaligned(&cursor2), ==, 1); free(buf); return MUNIT_OK; } /****************************************************************************** * * byteSha1 * *****************************************************************************/ /* Assert that the 20 bytes contained in VALUE match the given DIGEST * hexadecimal representation. */ #define ASSERT_SHA1(VALUE, DIGEST) \ do { \ char _digest[41]; \ unsigned _i; \ for (_i = 0; _i < 20; _i++) { \ unsigned _j = _i * 2; \ sprintf(&_digest[_j], "%.2x", value[_i]); \ _digest[_j] = toupper(_digest[_j]); \ _digest[_j + 1] = toupper(_digest[_j + 1]); \ } \ _digest[40] = '\0'; \ munit_assert_string_equal(_digest, DIGEST); \ } while (0) SUITE(byteSha1) TEST(byteSha1, abc, NULL, NULL, 0, NULL) { struct byteSha1 sha1; uint8_t text[] = "abc"; uint8_t value[20]; byteSha1Init(&sha1); byteSha1Update(&sha1, text, sizeof text - 1); byteSha1Digest(&sha1, value); ASSERT_SHA1(value, "A9993E364706816ABA3E25717850C26C9CD0D89D"); return MUNIT_OK; } TEST(byteSha1, abcbd, NULL, NULL, 0, NULL) { struct byteSha1 sha1; uint8_t text[] = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"; uint8_t value[20]; byteSha1Init(&sha1); byteSha1Update(&sha1, text, sizeof text - 1); byteSha1Digest(&sha1, value); ASSERT_SHA1(value, "84983E441C3BD26EBAAE4AA1F95129E5E54670F1"); return MUNIT_OK; } raft-0.11.3/test/unit/test_compress.c000066400000000000000000000205661415614527300175450ustar00rootroot00000000000000#include "../../src/byte.h" #include "../../src/compress.h" #include "../lib/munit.h" #include "../lib/runner.h" #include #ifdef LZ4_AVAILABLE #include #endif SUITE(Compress) struct raft_buffer getBufWithRandom(size_t len) { struct raft_buffer buf = {0}; buf.len = len; buf.base = munit_malloc(buf.len); munit_assert_ptr_not_null(buf.base); size_t offset = 0; /* Write as many random ints in buf as possible */ for(size_t n = buf.len / sizeof(int); n > 0; n--) { *((int*)(buf.base) + offset) = rand(); offset += 1; } /* Fill the remaining bytes */ size_t rem = buf.len % sizeof(int); /* Offset will now be used in char* arithmetic */ offset *= sizeof(int); if (rem) { int r_int = rand(); for (unsigned i = 0; i < rem; i++) { *((char*)buf.base + offset) = *((char*)&r_int + i); offset++; } } munit_assert_ulong(offset, ==, buf.len); return buf; } struct raft_buffer getBufWithNonRandom(size_t len) { struct raft_buffer buf = {0}; buf.len = len; buf.base = munit_malloc(buf.len); munit_assert_ptr_not_null(buf.base); memset(buf.base, 0xAC, buf.len); return buf; } #ifdef LZ4_AVAILABLE static void sha1(struct raft_buffer bufs[], unsigned n_bufs, uint8_t value[20]) { struct byteSha1 sha; byteSha1Init(&sha); for (unsigned i = 0; i < n_bufs; i++) { byteSha1Update(&sha, (const uint8_t *)bufs[i].base, (uint32_t)bufs[i].len); } byteSha1Digest(&sha, value); } static char* len_one_params[] = { /* 16B 1KB 64KB 4MB 128MB */ "16", "1024", "65536", "4194304", "134217728", /* Around Blocksize*/ "65516", "65517", "65518", "65521", "65535", "65537", "65551", "65555", "65556", /* Ugly lengths */ "1", "9", "123450", "1337", "6655111", NULL }; static MunitParameterEnum random_one_params[] = { { "len_one", len_one_params }, { NULL, NULL }, }; TEST(Compress, compressDecompressRandomOne, NULL, NULL, 0, random_one_params) { char errmsg[RAFT_ERRMSG_BUF_SIZE] = {0}; struct raft_buffer compressed = {0}; struct raft_buffer decompressed = {0}; uint8_t sha1_virgin[20] = {0}; uint8_t sha1_decompressed[20] = {1}; /* Fill a buffer with random data */ size_t len = strtoul(munit_parameters_get(params, "len_one"), NULL, 0); struct raft_buffer buf = getBufWithRandom(len); /* Assert that after compression and decompression the data is unchanged */ sha1(&buf, 1, sha1_virgin); munit_assert_int(Compress(&buf, 1, &compressed, errmsg), ==, 0); free(buf.base); munit_assert_true(IsCompressed(compressed.base, compressed.len)); munit_assert_int(Decompress(compressed, &decompressed, errmsg), ==, 0); munit_assert_ulong(decompressed.len, ==, len); sha1(&decompressed, 1, sha1_decompressed); munit_assert_int(memcmp(sha1_virgin, sha1_decompressed, 20), ==, 0); raft_free(compressed.base); raft_free(decompressed.base); return MUNIT_OK; } static char* len_nonrandom_one_params[] = { #ifdef __arm__ /* 4KB 64KB 4MB 1GB INT_MAX (larger alocations fail on arm 32-bit */ "4096", "65536", "4194304", "1073741824", "2147483647", #else /* 4KB 64KB 4MB 1GB 2GB + 200MB */ "4096", "65536", "4194304", "1073741824", "2357198848", #endif /* Around Blocksize*/ "65516", "65517", "65518", "65521", "65535", "65537", "65551", "65555", "65556", /* Ugly lengths */ "993450", "31337", "83883825", NULL }; static MunitParameterEnum nonrandom_one_params[] = { { "len_one", len_nonrandom_one_params }, { NULL, NULL }, }; TEST(Compress, compressDecompressNonRandomOne, NULL, NULL, 0, nonrandom_one_params) { char errmsg[RAFT_ERRMSG_BUF_SIZE] = {0}; struct raft_buffer compressed = {0}; struct raft_buffer decompressed = {0}; uint8_t sha1_virgin[20] = {0}; uint8_t sha1_decompressed[20] = {1}; /* Fill a buffer with non-random data */ size_t len = strtoul(munit_parameters_get(params, "len_one"), NULL, 0); struct raft_buffer buf = getBufWithNonRandom(len); /* Assert that after compression and decompression the data is unchanged and * that the compressed data is actually smaller */ sha1(&buf, 1, sha1_virgin); munit_assert_int(Compress(&buf, 1, &compressed, errmsg), ==, 0); free(buf.base); munit_assert_true(IsCompressed(compressed.base, compressed.len)); munit_assert_ulong(compressed.len, <, buf.len); munit_assert_int(Decompress(compressed, &decompressed, errmsg), ==, 0); munit_assert_ulong(decompressed.len, ==, len); sha1(&decompressed, 1, sha1_decompressed); munit_assert_int(memcmp(sha1_virgin, sha1_decompressed, 20), ==, 0); raft_free(compressed.base); raft_free(decompressed.base); return MUNIT_OK; } static char* len_two_params[] = { "4194304", "13373", "66", NULL }; static MunitParameterEnum random_two_params[] = { { "len_one", len_one_params }, { "len_two", len_two_params }, { NULL, NULL }, }; TEST(Compress, compressDecompressRandomTwo, NULL, NULL, 0, random_two_params) { char errmsg[RAFT_ERRMSG_BUF_SIZE] = {0}; struct raft_buffer compressed = {0}; struct raft_buffer decompressed = {0}; uint8_t sha1_virgin[20] = {0}; uint8_t sha1_decompressed[20] = {1}; /* Fill two buffers with random data */ size_t len1 = strtoul(munit_parameters_get(params, "len_one"), NULL, 0); struct raft_buffer buf1 = getBufWithRandom(len1); size_t len2 = strtoul(munit_parameters_get(params, "len_two"), NULL, 0); struct raft_buffer buf2 = getBufWithRandom(len2); struct raft_buffer bufs[2] = { buf1, buf2 }; /* Assert that after compression and decompression the data is unchanged */ sha1(bufs, 2, sha1_virgin); munit_assert_int(Compress(bufs, 2, &compressed, errmsg), ==, 0); free(buf1.base); free(buf2.base); munit_assert_true(IsCompressed(compressed.base, compressed.len)); munit_assert_int(Decompress(compressed, &decompressed, errmsg), ==, 0); munit_assert_ulong(decompressed.len, ==, buf1.len + buf2.len); sha1(&decompressed, 1, sha1_decompressed); munit_assert_int(memcmp(sha1_virgin, sha1_decompressed, 20), ==, 0); raft_free(compressed.base); raft_free(decompressed.base); return MUNIT_OK; } TEST(Compress, compressDecompressCorruption, NULL, NULL, 0, NULL) { char errmsg[RAFT_ERRMSG_BUF_SIZE] = {0}; struct raft_buffer compressed = {0}; struct raft_buffer decompressed = {0}; /* Fill a buffer with random data */ size_t len = 2048; struct raft_buffer buf = getBufWithRandom(len); munit_assert_int(Compress(&buf, 1, &compressed, errmsg), ==, 0); munit_assert_true(IsCompressed(compressed.base, compressed.len)); /* Corrupt the a data byte after the header */ munit_assert_ulong(LZ4F_HEADER_SIZE_MAX_RAFT, <, compressed.len); ((char*)compressed.base)[LZ4F_HEADER_SIZE_MAX_RAFT] += 1; munit_assert_int(Decompress(compressed, &decompressed, errmsg), !=, 0); munit_assert_string_equal(errmsg, "LZ4F_decompress ERROR_contentChecksum_invalid"); munit_assert_ptr_null(decompressed.base); raft_free(compressed.base); free(buf.base); return MUNIT_OK; } #else TEST(Compress, lz4Disabled, NULL, NULL, 0, NULL) { char errmsg[RAFT_ERRMSG_BUF_SIZE] = {0}; struct raft_buffer compressed = {0}; /* Fill a buffer with random data */ size_t len = 2048; struct raft_buffer buf = getBufWithRandom(len); munit_assert_int(Compress(&buf, 1, &compressed, errmsg), ==, RAFT_INVALID); munit_assert_ptr_null(compressed.base); free(buf.base); return MUNIT_OK; } #endif /* LZ4_AVAILABLE */ static const char LZ4_MAGIC[4] = {0x04, 0x22, 0x4d, 0x18}; TEST(Compress, isCompressedTooSmall, NULL, NULL, 0, NULL) { munit_assert_false(IsCompressed(&LZ4_MAGIC[1], sizeof(LZ4_MAGIC)-1)); return MUNIT_OK; } TEST(Compress, isCompressedNull, NULL, NULL, 0, NULL) { munit_assert_false(IsCompressed(NULL, sizeof(LZ4_MAGIC))); return MUNIT_OK; } TEST(Compress, isCompressed, NULL, NULL, 0, NULL) { munit_assert_true(IsCompressed(LZ4_MAGIC, sizeof(LZ4_MAGIC))); return MUNIT_OK; } TEST(Compress, notCompressed, NULL, NULL, 0, NULL) { char not_compressed[4] = {0x18, 0x4d, 0x22, 0x04}; munit_assert_false(IsCompressed(not_compressed, sizeof(not_compressed))); return MUNIT_OK; } raft-0.11.3/test/unit/test_configuration.c000066400000000000000000000453071415614527300205610ustar00rootroot00000000000000#include "../../src/byte.h" #include "../../src/configuration.h" #include "../lib/heap.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture * *****************************************************************************/ struct fixture { FIXTURE_HEAP; struct raft_configuration configuration; }; static void *setUp(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SET_UP_HEAP; configurationInit(&f->configuration); return f; } static void tearDown(void *data) { struct fixture *f = data; configurationClose(&f->configuration); TEAR_DOWN_HEAP; free(f); } /****************************************************************************** * * Helper macros * *****************************************************************************/ /* Accessors */ #define VOTER_COUNT configurationVoterCount(&f->configuration) #define INDEX_OF(ID) configurationIndexOf(&f->configuration, ID) #define INDEX_OF_VOTER(ID) configurationIndexOfVoter(&f->configuration, ID) #define GET(ID) configurationGet(&f->configuration, ID) /* Add a server to the fixture's configuration. */ #define ADD_RV(ID, ADDRESS, ROLE) \ configurationAdd(&f->configuration, ID, ADDRESS, ROLE) #define ADD(...) munit_assert_int(ADD_RV(__VA_ARGS__), ==, 0) #define ADD_ERROR(RV, ...) munit_assert_int(ADD_RV(__VA_ARGS__), ==, RV) /* Remove a server from the fixture's configuration */ #define REMOVE_RV(ID) configurationRemove(&f->configuration, ID) #define REMOVE(...) munit_assert_int(REMOVE_RV(__VA_ARGS__), ==, 0) #define REMOVE_ERROR(RV, ...) munit_assert_int(REMOVE_RV(__VA_ARGS__), ==, RV) /* Copy the fixture's configuration into the given one. */ #define COPY_RV(CONF) configurationCopy(&f->configuration, CONF) #define COPY(...) munit_assert_int(COPY_RV(__VA_ARGS__), ==, 0) #define COPY_ERROR(RV, ...) munit_assert_int(COPY_RV(__VA_ARGS__), ==, RV) /* Encode the fixture's configuration into the given buffer. */ #define ENCODE_RV(BUF) configurationEncode(&f->configuration, BUF) #define ENCODE(...) munit_assert_int(ENCODE_RV(__VA_ARGS__), ==, 0) #define ENCODE_ERROR(RV, ...) munit_assert_int(ENCODE_RV(__VA_ARGS__), ==, RV) /* Decode the given buffer into the fixture's configuration. */ #define DECODE_RV(BUF) configurationDecode(BUF, &f->configuration) #define DECODE(...) munit_assert_int(DECODE_RV(__VA_ARGS__), ==, 0) #define DECODE_ERROR(RV, ...) munit_assert_int(DECODE_RV(__VA_ARGS__), ==, RV) /****************************************************************************** * * Assertions * *****************************************************************************/ /* Assert that the fixture's configuration has n servers. */ #define ASSERT_N(N) \ { \ munit_assert_int(f->configuration.n, ==, N); \ if (N == 0) { \ munit_assert_ptr_null(f->configuration.servers); \ } else { \ munit_assert_ptr_not_null(f->configuration.servers); \ } \ } /* Assert that the attributes of the I'th server in the fixture's configuration * match the given values. */ #define ASSERT_SERVER(I, ID, ADDRESS, ROLE) \ { \ struct raft_server *server; \ munit_assert_int(I, <, f->configuration.n); \ server = &f->configuration.servers[I]; \ munit_assert_int(server->id, ==, ID); \ munit_assert_string_equal(server->address, ADDRESS); \ munit_assert_int(server->role, ==, ROLE); \ } /****************************************************************************** * * configurationVoterCount * *****************************************************************************/ SUITE(configurationVoterCount) /* All servers are voting. */ TEST(configurationVoterCount, all_voters, setUp, tearDown, 0, NULL) { struct fixture *f = data; ADD(1, "192.168.1.1:666", RAFT_VOTER); ADD(2, "192.168.1.2:666", RAFT_VOTER); munit_assert_int(VOTER_COUNT, ==, 2); return MUNIT_OK; } /* Return only voting servers. */ TEST(configurationVoterCount, filter, setUp, tearDown, 0, NULL) { struct fixture *f = data; ADD(1, "192.168.1.1:666", RAFT_VOTER); ADD(2, "192.168.1.2:666", RAFT_STANDBY); munit_assert_int(VOTER_COUNT, ==, 1); return MUNIT_OK; } /****************************************************************************** * * configurationIndexOf * *****************************************************************************/ SUITE(configurationIndexOf) /* If a matching server is found, it's index is returned. */ TEST(configurationIndexOf, match, setUp, tearDown, 0, NULL) { struct fixture *f = data; ADD(1, "192.168.1.1:666", RAFT_VOTER); ADD(2, "192.168.1.2:666", RAFT_STANDBY); munit_assert_int(INDEX_OF(2), ==, 1); return MUNIT_OK; } /* If no matching server is found, the length of the configuration is * returned. */ TEST(configurationIndexOf, no_match, setUp, tearDown, 0, NULL) { struct fixture *f = data; ADD(1, "127.0.0.1:666", RAFT_VOTER); munit_assert_int(INDEX_OF(3), ==, f->configuration.n); return MUNIT_OK; } /****************************************************************************** * * configurationIndexOfVoter * *****************************************************************************/ SUITE(configurationIndexOfVoter) /* The index of the matching voting server (relative to the number of voting servers) is returned. */ TEST(configurationIndexOfVoter, match, setUp, tearDown, 0, NULL) { struct fixture *f = data; ADD(1, "192.168.1.1:666", RAFT_STANDBY); ADD(2, "192.168.1.2:666", RAFT_VOTER); ADD(3, "192.168.1.3:666", RAFT_VOTER); munit_assert_int(INDEX_OF_VOTER(3), ==, 1); return MUNIT_OK; } /* If no matching server is found, the length of the configuration is * returned. */ TEST(configurationIndexOfVoter, no_match, setUp, tearDown, 0, NULL) { struct fixture *f = data; ADD(1, "192.168.1.1:666", RAFT_VOTER); munit_assert_int(INDEX_OF_VOTER(3), ==, 1); return MUNIT_OK; } /* If the server exists but is non-voting, the length of the configuration is * returned. */ TEST(configurationIndexOfVoter, non_voting, setUp, tearDown, 0, NULL) { struct fixture *f = data; ADD(1, "192.168.1.1:666", RAFT_STANDBY); munit_assert_int(INDEX_OF_VOTER(1), ==, 1); return MUNIT_OK; } /****************************************************************************** * * configurationGet * *****************************************************************************/ SUITE(configurationGet) /* If a matching server is found, it's returned. */ TEST(configurationGet, match, setUp, tearDown, 0, NULL) { struct fixture *f = data; const struct raft_server *server; ADD(1, "192.168.1.1:666", RAFT_VOTER); ADD(2, "192.168.1.2:666", RAFT_STANDBY); server = GET(2); munit_assert_ptr_not_null(server); munit_assert_int(server->id, ==, 2); munit_assert_string_equal(server->address, "192.168.1.2:666"); return MUNIT_OK; } /* If no matching server is found, NULL is returned. */ TEST(configurationGet, no_match, setUp, tearDown, 0, NULL) { struct fixture *f = data; ADD(1, "127.0.0.1:666", RAFT_VOTER); munit_assert_ptr_null(GET(3)); return MUNIT_OK; } /****************************************************************************** * * configurationCopy * *****************************************************************************/ SUITE(configurationCopy) /* Copy a configuration containing two servers */ TEST(configurationCopy, two, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_configuration configuration; ADD(1, "192.168.1.1:666", RAFT_STANDBY); ADD(2, "192.168.1.2:666", RAFT_VOTER); COPY(&configuration); munit_assert_int(configuration.n, ==, 2); munit_assert_int(configuration.servers[0].id, ==, 1); munit_assert_int(configuration.servers[1].id, ==, 2); configurationClose(&configuration); return MUNIT_OK; } /* Out of memory */ TEST(configurationCopy, oom, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_configuration configuration; ADD(1, "192.168.1.1:666", RAFT_STANDBY); HeapFaultConfig(&f->heap, 0, 1); HeapFaultEnable(&f->heap); COPY_ERROR(RAFT_NOMEM, &configuration); return MUNIT_OK; } /****************************************************************************** * * raft_configuration_add * *****************************************************************************/ SUITE(configurationAdd) /* Add a server to the configuration. */ TEST(configurationAdd, one, setUp, tearDown, 0, NULL) { struct fixture *f = data; ADD(1, "127.0.0.1:666", RAFT_VOTER); ASSERT_N(1); ASSERT_SERVER(0, 1, "127.0.0.1:666", RAFT_VOTER); return MUNIT_OK; } /* Add two servers to the configuration. */ TEST(configurationAdd, two, setUp, tearDown, 0, NULL) { struct fixture *f = data; ADD(1, "127.0.0.1:666", RAFT_VOTER); ADD(2, "192.168.1.1:666", RAFT_STANDBY); ASSERT_N(2); ASSERT_SERVER(0, 1, "127.0.0.1:666", RAFT_VOTER); ASSERT_SERVER(1, 2, "192.168.1.1:666", RAFT_STANDBY); return MUNIT_OK; } /* Add a server with an ID which is already in use. */ TEST(configurationAdd, duplicateId, setUp, tearDown, 0, NULL) { struct fixture *f = data; ADD(1, "127.0.0.1:666", RAFT_VOTER); ADD_ERROR(RAFT_DUPLICATEID, 1, "192.168.1.1:666", RAFT_STANDBY); return MUNIT_OK; } /* Add a server with an address which is already in use. */ TEST(configurationAdd, duplicateAddress, setUp, tearDown, 0, NULL) { struct fixture *f = data; ADD(1, "127.0.0.1:666", RAFT_VOTER); ADD_ERROR(RAFT_DUPLICATEADDRESS, 2, "127.0.0.1:666", RAFT_STANDBY); return MUNIT_OK; } /* Add a server with an invalid role. */ TEST(configurationAdd, invalidRole, setUp, tearDown, 0, NULL) { struct fixture *f = data; ADD_ERROR(RAFT_BADROLE, 2, "127.0.0.1:666", 666); return MUNIT_OK; } static char *add_oom_heap_fault_delay[] = {"0", "1", NULL}; static char *add_oom_heap_fault_repeat[] = {"1", NULL}; static MunitParameterEnum add_oom_params[] = { {TEST_HEAP_FAULT_DELAY, add_oom_heap_fault_delay}, {TEST_HEAP_FAULT_REPEAT, add_oom_heap_fault_repeat}, {NULL, NULL}, }; /* Out of memory. */ TEST(configurationAdd, oom, setUp, tearDown, 0, add_oom_params) { struct fixture *f = data; HeapFaultEnable(&f->heap); ADD_ERROR(RAFT_NOMEM, 1, "127.0.0.1:666", RAFT_VOTER); return MUNIT_OK; } /****************************************************************************** * * configurationRemove * *****************************************************************************/ SUITE(configurationRemove) /* Remove the last and only server. */ TEST(configurationRemove, last, setUp, tearDown, 0, NULL) { struct fixture *f = data; ADD(1, "127.0.0.1:666", RAFT_VOTER); REMOVE(1); ASSERT_N(0); return MUNIT_OK; } /* Remove the first server. */ TEST(configurationRemove, first, setUp, tearDown, 0, NULL) { struct fixture *f = data; ADD(1, "127.0.0.1:666", RAFT_VOTER); ADD(2, "192.168.1.1:666", RAFT_STANDBY); REMOVE(1); ASSERT_N(1); ASSERT_SERVER(0, 2, "192.168.1.1:666", RAFT_STANDBY); return MUNIT_OK; } /* Remove a server in the middle. */ TEST(configurationRemove, middle, setUp, tearDown, 0, NULL) { struct fixture *f = data; ADD(1, "127.0.0.1:666", RAFT_VOTER); ADD(2, "192.168.1.1:666", RAFT_STANDBY); ADD(3, "10.0.1.1:666", RAFT_VOTER); REMOVE(2); ASSERT_N(2); ASSERT_SERVER(0, 1, "127.0.0.1:666", RAFT_VOTER); ASSERT_SERVER(1, 3, "10.0.1.1:666", RAFT_VOTER); return MUNIT_OK; } /* Attempts to remove a server with an unknown ID result in an error. */ TEST(configurationRemove, unknownId, setUp, tearDown, 0, NULL) { struct fixture *f = data; REMOVE_ERROR(RAFT_BADID, 1); return MUNIT_OK; } /* Out of memory. */ TEST(configurationRemove, oom, setUp, tearDown, 0, NULL) { struct fixture *f = data; ADD(1, "127.0.0.1:666", RAFT_VOTER); ADD(2, "192.168.1.1:666", RAFT_STANDBY); HeapFaultConfig(&f->heap, 0, 1); HeapFaultEnable(&f->heap); REMOVE_ERROR(RAFT_NOMEM, 1); return MUNIT_OK; } /****************************************************************************** * * configurationEncode * *****************************************************************************/ SUITE(configurationEncode) /* Encode a configuration with one server. */ TEST(configurationEncode, one_server, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_buffer buf; size_t len; const void *cursor; const char *address = "127.0.0.1:666"; ADD(1, address, RAFT_VOTER); ENCODE(&buf); len = 1 + 8 + /* Version and n of servers */ 8 + strlen(address) + 1; /* Server */ len = bytePad64(len); munit_assert_int(buf.len, ==, len); cursor = buf.base; munit_assert_int(byteGet8(&cursor), ==, 1); munit_assert_int(byteGet64Unaligned(&cursor), ==, 1); munit_assert_int(byteGet64Unaligned(&cursor), ==, 1); munit_assert_string_equal(byteGetString(&cursor, strlen(address) + 1), address); munit_assert_int(byteGet8(&cursor), ==, RAFT_VOTER); raft_free(buf.base); return MUNIT_OK; } /* Encode a configuration with two servers. */ TEST(configurationEncode, two_servers, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_buffer buf; size_t len; const void *cursor; const char *address1 = "127.0.0.1:666"; const char *address2 = "192.168.1.1:666"; ADD(1, address1, RAFT_STANDBY); ADD(2, address2, RAFT_VOTER); ENCODE(&buf); len = 1 + 8 + /* Version and n of servers */ 8 + strlen(address1) + 1 + 1 + /* Server 1 */ 8 + strlen(address2) + 1 + 1; /* Server 2 */ len = bytePad64(len); munit_assert_int(buf.len, ==, len); cursor = buf.base; munit_assert_int(byteGet8(&cursor), ==, 1); munit_assert_int(byteGet64Unaligned(&cursor), ==, 2); munit_assert_int(byteGet64Unaligned(&cursor), ==, 1); munit_assert_string_equal(byteGetString(&cursor, strlen(address1) + 1), address1); munit_assert_int(byteGet8(&cursor), ==, RAFT_STANDBY); munit_assert_int(byteGet64Unaligned(&cursor), ==, 2); munit_assert_string_equal(byteGetString(&cursor, strlen(address2) + 1), address2); munit_assert_int(byteGet8(&cursor), ==, RAFT_VOTER); raft_free(buf.base); return MUNIT_OK; } /* Out of memory. */ TEST(configurationEncode, oom, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_buffer buf; HeapFaultConfig(&f->heap, 2, 1); HeapFaultEnable(&f->heap); ADD(1, "127.0.0.1:666", RAFT_VOTER); ENCODE_ERROR(RAFT_NOMEM, &buf); return MUNIT_OK; } /****************************************************************************** * * configurationDecode * *****************************************************************************/ SUITE(configurationDecode) /* The decode a payload encoding a configuration with one server */ TEST(configurationDecode, one_server, setUp, tearDown, 0, NULL) { struct fixture *f = data; uint8_t bytes[] = {1, /* Version */ 1, 0, 0, 0, 0, 0, 0, 0, /* Number of servers */ 5, 0, 0, 0, 0, 0, 0, 0, /* Server ID */ 'x', '.', 'y', 0, /* Server address */ 1}; /* Role code */ struct raft_buffer buf; int rv; buf.base = bytes; buf.len = sizeof bytes; rv = configurationDecode(&buf, &f->configuration); munit_assert_int(rv, ==, 0); ASSERT_N(1); ASSERT_SERVER(0, 5, "x.y", RAFT_VOTER); return MUNIT_OK; } /* The decode size is the size of a raft_server array plus the length of the * addresses. */ TEST(configurationDecode, two_servers, setUp, tearDown, 0, NULL) { struct fixture *f = data; uint8_t bytes[] = {1, /* Version */ 2, 0, 0, 0, 0, 0, 0, 0, /* Number of servers */ 5, 0, 0, 0, 0, 0, 0, 0, /* Server ID */ 'x', '.', 'y', 0, /* Server address */ 1, /* Role code */ 3, 0, 0, 0, 0, 0, 0, 0, /* Server ID */ '1', '9', '2', '.', '2', 0, /* Server address */ 0}; /* Role code */ struct raft_buffer buf; buf.base = bytes; buf.len = sizeof bytes; DECODE(&buf); ASSERT_N(2); ASSERT_SERVER(0, 5, "x.y", RAFT_VOTER); ASSERT_SERVER(1, 3, "192.2", RAFT_STANDBY); return MUNIT_OK; } /* Not enough memory of the servers array. */ TEST(configurationDecode, oom, setUp, tearDown, 0, NULL) { struct fixture *f = data; uint8_t bytes[] = {1, /* Version */ 1, 0, 0, 0, 0, 0, 0, 0, /* Number of servers */ 5, 0, 0, 0, 0, 0, 0, 0, /* Server ID */ 'x', '.', 'y', 0, /* Server address */ 1}; /* Voting flag */ struct raft_buffer buf; HeapFaultConfig(&f->heap, 0, 1); HeapFaultEnable(&f->heap); buf.base = bytes; buf.len = sizeof bytes; DECODE_ERROR(RAFT_NOMEM, &buf); return MUNIT_OK; } /* If the encoding version is wrong, an error is returned. */ TEST(configurationDecode, badVersion, setUp, tearDown, 0, NULL) { struct fixture *f = data; uint8_t bytes = 127; struct raft_buffer buf; buf.base = &bytes; buf.len = 1; DECODE_ERROR(RAFT_MALFORMED, &buf); return MUNIT_OK; } /* The address of a server is not a nul-terminated string. */ TEST(configurationDecode, badAddress, setUp, tearDown, 0, NULL) { struct fixture *f = data; uint8_t bytes[] = {1, /* Version */ 1, 0, 0, 0, 0, 0, 0, 0, /* Number of servers */ 5, 0, 0, 0, 0, 0, 0, 0, /* Server ID */ 'x', '.', 'y', /* Server address */ 1}; /* Voting flag */ struct raft_buffer buf; buf.base = bytes; buf.len = sizeof bytes; DECODE_ERROR(RAFT_MALFORMED, &buf); return MUNIT_OK; } raft-0.11.3/test/unit/test_err.c000066400000000000000000000051311415614527300164710ustar00rootroot00000000000000#include #include #include "../../src/err.h" #include "../lib/heap.h" #include "../lib/runner.h" /* An error messages which is 249 characters. */ #define LONG_ERRMSG \ "boom boom boom boom boom boom boom boom boom boom boom boom boom boom " \ "boom boom boom boom boom boom boom boom boom boom boom boom boom boom " \ "boom boom boom boom boom boom boom boom boom boom boom boom boom boom " \ "boom boom boom boom boom boom boom boom" /****************************************************************************** * * ErrMsgPrintf * *****************************************************************************/ SUITE(ErrMsgPrintf) /* The format string has no parameters. */ TEST(ErrMsgPrintf, noParams, NULL, NULL, 0, NULL) { char errmsg[RAFT_ERRMSG_BUF_SIZE]; ErrMsgPrintf(errmsg, "boom"); munit_assert_string_equal(errmsg, "boom"); return MUNIT_OK; } /* The format string has parameters. */ TEST(ErrMsgPrintf, params, NULL, NULL, 0, NULL) { char errmsg[RAFT_ERRMSG_BUF_SIZE]; ErrMsgPrintf(errmsg, "boom %d", 123); munit_assert_string_equal(errmsg, "boom 123"); return MUNIT_OK; } /****************************************************************************** * * ErrMsgWrapf * *****************************************************************************/ SUITE(ErrMsgWrapf) /* The wrapping format string has no parameters. */ TEST(ErrMsgWrapf, noParams, NULL, NULL, 0, NULL) { char errmsg[RAFT_ERRMSG_BUF_SIZE]; ErrMsgPrintf(errmsg, "boom"); ErrMsgWrapf(errmsg, "no luck"); munit_assert_string_equal(errmsg, "no luck: boom"); return MUNIT_OK; } /* The wrapping format string has parameters. */ TEST(ErrMsgWrapf, params, NULL, NULL, 0, NULL) { char errmsg[RAFT_ERRMSG_BUF_SIZE]; ErrMsgPrintf(errmsg, "boom"); ErrMsgWrapf(errmsg, "no luck, %s", "joe"); munit_assert_string_equal(errmsg, "no luck, joe: boom"); return MUNIT_OK; } /* The wrapped error message gets partially truncated. */ TEST(ErrMsgWrapf, partialTruncate, NULL, NULL, 0, NULL) { char errmsg[RAFT_ERRMSG_BUF_SIZE]; ErrMsgPrintf(errmsg, "no luck"); ErrMsgWrapf(errmsg, LONG_ERRMSG); munit_assert_string_equal(errmsg, LONG_ERRMSG ": no l"); return MUNIT_OK; } /* The wrapped error message gets entirely truncated. */ TEST(ErrMsgWrapf, fullTruncate, NULL, NULL, 0, NULL) { char errmsg[RAFT_ERRMSG_BUF_SIZE]; ErrMsgPrintf(errmsg, "no luck"); ErrMsgWrapf(errmsg, LONG_ERRMSG " boom"); munit_assert_string_equal(errmsg, LONG_ERRMSG " boom"); return MUNIT_OK; } raft-0.11.3/test/unit/test_log.c000066400000000000000000001171711415614527300164720ustar00rootroot00000000000000#include "../../src/configuration.h" #include "../../src/log.h" #include "../lib/heap.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture * *****************************************************************************/ struct fixture { FIXTURE_HEAP; struct raft_log log; }; /****************************************************************************** * * Helper macros * *****************************************************************************/ /* Accessors */ #define NUM_ENTRIES logNumEntries(&f->log) #define LAST_INDEX logLastIndex(&f->log) #define TERM_OF(INDEX) logTermOf(&f->log, INDEX) #define LAST_TERM logLastTerm(&f->log) #define GET(INDEX) logGet(&f->log, INDEX) /* Append one command entry with the given term and a hard-coded payload. */ #define APPEND(TERM) \ { \ struct raft_buffer buf_; \ int rv_; \ buf_.base = raft_malloc(8); \ buf_.len = 8; \ strcpy(buf_.base, "hello"); \ rv_ = logAppend(&f->log, TERM, RAFT_COMMAND, &buf_, NULL); \ munit_assert_int(rv_, ==, 0); \ } /* Same as APPEND, but repeated N times. */ #define APPEND_MANY(TERM, N) \ { \ int i_; \ for (i_ = 0; i_ < N; i_++) { \ APPEND(TERM); \ } \ } /* Invoke append and assert that it returns the given error. */ #define APPEND_ERROR(TERM, RV) \ { \ struct raft_buffer buf_; \ int rv_; \ buf_.base = raft_malloc(8); \ buf_.len = 8; \ rv_ = logAppend(&f->log, TERM, RAFT_COMMAND, &buf_, NULL); \ munit_assert_int(rv_, ==, RV); \ raft_free(buf_.base); \ } /* Append N entries all belonging to the same batch. Each entry will have 64-bit * payload set to i * 1000, where i is the index of the entry in the batch. */ #define APPEND_BATCH(N) \ { \ void *batch; \ size_t offset; \ int i; \ batch = raft_malloc(8 * N); \ munit_assert_ptr_not_null(batch); \ offset = 0; \ for (i = 0; i < N; i++) { \ struct raft_buffer buf; \ int rv; \ buf.base = (uint8_t *)batch + offset; \ buf.len = 8; \ *(uint64_t *)buf.base = i * 1000; \ rv = logAppend(&f->log, 1, RAFT_COMMAND, &buf, batch); \ munit_assert_int(rv, ==, 0); \ offset += 8; \ } \ } #define ACQUIRE(INDEX) \ { \ int rv2; \ rv2 = logAcquire(&f->log, INDEX, &entries, &n); \ munit_assert_int(rv2, ==, 0); \ } #define RELEASE(INDEX) logRelease(&f->log, INDEX, entries, n); #define TRUNCATE(N) logTruncate(&f->log, N) #define SNAPSHOT(INDEX, TRAILING) logSnapshot(&f->log, INDEX, TRAILING) #define RESTORE(INDEX, TERM) logRestore(&f->log, INDEX, TERM) /****************************************************************************** * * Set up an empty configuration. * *****************************************************************************/ static void *setUp(const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); SET_UP_HEAP; logInit(&f->log); return f; } static void tearDown(void *data) { struct fixture *f = data; logClose(&f->log); TEAR_DOWN_HEAP; free(f); } /****************************************************************************** * * Assertions * *****************************************************************************/ /* Assert the state of the fixture's log in terms of size, front/back indexes, * offset and number of entries. */ #define ASSERT(SIZE, FRONT, BACK, OFFSET, N) \ munit_assert_int(f->log.size, ==, SIZE); \ munit_assert_int(f->log.front, ==, FRONT); \ munit_assert_int(f->log.back, ==, BACK); \ munit_assert_int(f->log.offset, ==, OFFSET); \ munit_assert_int(logNumEntries(&f->log), ==, N) /* Assert the last index and term of the most recent snapshot. */ #define ASSERT_SNAPSHOT(INDEX, TERM) \ munit_assert_int(f->log.snapshot.last_index, ==, INDEX); \ munit_assert_int(f->log.snapshot.last_term, ==, TERM) /* Assert that the term of entry at INDEX equals TERM. */ #define ASSERT_TERM_OF(INDEX, TERM) \ { \ const struct raft_entry *entry; \ entry = logGet(&f->log, INDEX); \ munit_assert_ptr_not_null(entry); \ munit_assert_int(entry->term, ==, TERM); \ } /* Assert that the number of outstanding references for the entry at INDEX * equals COUNT. */ #define ASSERT_REFCOUNT(INDEX, COUNT) \ { \ size_t i; \ munit_assert_ptr_not_null(f->log.refs); \ for (i = 0; i < f->log.refs_size; i++) { \ if (f->log.refs[i].index == INDEX) { \ munit_assert_int(f->log.refs[i].count, ==, COUNT); \ break; \ } \ } \ if (i == f->log.refs_size) { \ munit_errorf("no refcount found for entry with index %d", \ (int)INDEX); \ } \ } /****************************************************************************** * * logNumEntries * *****************************************************************************/ SUITE(logNumEntries) /* If the log is empty, the return value is zero. */ TEST(logNumEntries, empty, setUp, tearDown, 0, NULL) { struct fixture *f = data; munit_assert_int(NUM_ENTRIES, ==, 0); return MUNIT_OK; } /* The log is not wrapped. */ TEST(logNumEntries, not_wrapped, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(1 /* term */); munit_assert_int(NUM_ENTRIES, ==, 1); return MUNIT_OK; } /* The log is wrapped. */ TEST(logNumEntries, wrapped, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_MANY(1 /* term */, 5 /* n entries */); SNAPSHOT(4 /* last_index */, 1 /* trailing */); APPEND_MANY(1 /* term */, 2 /* n entries */); munit_assert_int(NUM_ENTRIES, ==, 4); return MUNIT_OK; } /* The log has an offset and is empty. */ TEST(logNumEntries, offset, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_MANY(1 /* term */, 5 /* n entries */); SNAPSHOT(5 /* last index */, 0 /* trailing */); munit_assert_int(NUM_ENTRIES, ==, 0); return MUNIT_OK; } /* The log has an offset and is not empty. */ TEST(logNumEntries, offsetNotEmpty, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_MANY(1 /* term */, 5 /* n entries */); SNAPSHOT(4 /* last index */, 2 /* trailing */); munit_assert_int(NUM_ENTRIES, ==, 3); return MUNIT_OK; } /****************************************************************************** * * logLastIndex * *****************************************************************************/ SUITE(logLastIndex) /* If the log is empty, last index is 0. */ TEST(logLastIndex, empty, setUp, tearDown, 0, NULL) { struct fixture *f = data; munit_assert_int(LAST_INDEX, ==, 0); return MUNIT_OK; } /* If the log is empty and has an offset, last index is calculated accordingly. */ TEST(logLastIndex, emptyWithOffset, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(1); SNAPSHOT(1, 0); munit_assert_int(LAST_INDEX, ==, 1); return MUNIT_OK; } /* The log has one entry. */ TEST(logLastIndex, one, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(1 /* term */); munit_assert_int(LAST_INDEX, ==, 1); return MUNIT_OK; } /* The log has two entries. */ TEST(logLastIndex, two, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_MANY(1 /* term */, 2 /* n */); munit_assert_int(LAST_INDEX, ==, 2); return MUNIT_OK; } /* If the log starts at a certain offset, the last index is bumped * accordingly. */ TEST(logLastIndex, twoWithOffset, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_MANY(1 /* term */, 5 /* n */); SNAPSHOT(5 /* last index */, 2 /* trailing */); munit_assert_int(LAST_INDEX, ==, 5); return MUNIT_OK; } /****************************************************************************** * * logLastTerm * *****************************************************************************/ SUITE(logLastTerm) /* If the log is empty, return zero. */ TEST(logLastTerm, empty, setUp, tearDown, 0, NULL) { struct fixture *f = data; munit_assert_int(LAST_TERM, ==, 0); return MUNIT_OK; } /* If the log has a snapshot and no outstanding entries, return the last term of * the snapshot. */ TEST(logLastTerm, snapshot, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(1 /* term */); SNAPSHOT(1 /* last index */, 0 /* trailing */); munit_assert_int(LAST_TERM, ==, 1); return MUNIT_OK; } /****************************************************************************** * * logTermOf * *****************************************************************************/ SUITE(logTermOf) /* If the given index is beyond the last index, return 0. */ TEST(logTermOf, beyondLast, setUp, tearDown, 0, NULL) { struct fixture *f = data; munit_assert_int(TERM_OF(2), ==, 0); munit_assert_int(TERM_OF(10), ==, 0); return MUNIT_OK; } /* If the log is empty but has a snapshot, and the given index matches the last * index of the snapshot, return the snapshot last term. */ TEST(logTermOf, snapshotLastIndex, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_MANY(1 /* term */, 5 /* n entries */); SNAPSHOT(5 /* last entry */, 0 /* trailing */); munit_assert_int(TERM_OF(5), ==, 1); return MUNIT_OK; } /* The log has one entry. */ TEST(logTermOf, one, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(3 /* term */); munit_assert_int(TERM_OF(1), ==, 3); return MUNIT_OK; } /* The log has two entries. */ TEST(logTermOf, two, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_MANY(4 /* term */, 2 /* n */); munit_assert_int(TERM_OF(1), ==, 4); munit_assert_int(TERM_OF(2), ==, 4); return MUNIT_OK; } /* The log has a snapshot and hence has an offset. */ TEST(logTermOf, withSnapshot, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_MANY(1 /* term */, 5 /* n entries */); SNAPSHOT(3 /* last index */, 0 /* trailing */); munit_assert_int(TERM_OF(1), ==, 0); munit_assert_int(TERM_OF(2), ==, 0); munit_assert_int(TERM_OF(3), ==, 1); munit_assert_int(TERM_OF(4), ==, 1); munit_assert_int(TERM_OF(5), ==, 1); return MUNIT_OK; } /* The log has a snapshot with trailing entries. */ TEST(logTermOf, snapshotTrailing, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_MANY(1 /* term */, 5 /* n entries */); SNAPSHOT(3 /* last index */, 2 /* trailing */); munit_assert_int(TERM_OF(1), ==, 0); munit_assert_int(TERM_OF(2), ==, 1); munit_assert_int(TERM_OF(3), ==, 1); munit_assert_int(TERM_OF(4), ==, 1); munit_assert_int(TERM_OF(5), ==, 1); return MUNIT_OK; } /****************************************************************************** * * logGet * *****************************************************************************/ SUITE(logGet) /* The log is empty. */ TEST(logGet, empty_log, setUp, tearDown, 0, NULL) { struct fixture *f = data; munit_assert_ptr_null(GET(1)); return MUNIT_OK; } /* The log is empty but has an offset. */ TEST(logGet, emptyWithOffset, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_MANY(4 /* term */, 10 /* n */); SNAPSHOT(10 /* last index */, 0 /* trailing */); munit_assert_ptr_null(GET(1)); munit_assert_ptr_null(GET(10)); munit_assert_ptr_null(GET(11)); return MUNIT_OK; } /* The log has one entry. */ TEST(logGet, one, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(3 /* term */); munit_assert_int(GET(1)->term, ==, 3); munit_assert_ptr_null(GET(2)); return MUNIT_OK; } /* The log has two entries. */ TEST(logGet, two, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_MANY(4 /* term */, 2 /* n */); munit_assert_int(GET(1)->term, ==, 4); munit_assert_int(GET(2)->term, ==, 4); munit_assert_ptr_null(GET(3)); return MUNIT_OK; } /* The log starts at a certain offset. */ TEST(logGet, twoWithOffset, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_MANY(1 /* term */, 3 /* n */); APPEND(2 /* term */); APPEND(3 /* term */); SNAPSHOT(4 /* las index */, 1 /* trailing */); munit_assert_ptr_null(GET(1)); munit_assert_ptr_null(GET(2)); munit_assert_ptr_null(GET(3)); munit_assert_int(GET(4)->term, ==, 2); munit_assert_int(GET(5)->term, ==, 3); return MUNIT_OK; } /****************************************************************************** * * logAppend * *****************************************************************************/ SUITE(logAppend) /* Append one entry to an empty log. */ TEST(logAppend, one, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(1 /* term */); ASSERT(2 /* size */, 0 /* front */, 1 /* back */, 0 /* offset */, 1 /* n */); ASSERT_TERM_OF(1 /* entry index */, 1 /* term */); ASSERT_REFCOUNT(1 /* entry index */, 1 /* count */); return MUNIT_OK; } /* Append two entries to to an empty log. */ TEST(logAppend, two, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(1 /* term */); APPEND(1 /* term */); ASSERT(6 /* size */, 0 /* front */, 2 /* back */, 0 /* offset */, 2 /* n */); ASSERT_TERM_OF(1 /* entry index */, 1 /* term */); ASSERT_TERM_OF(2 /* entry index */, 1 /* term */); ASSERT_REFCOUNT(1 /* entry index */, 1 /* count */); ASSERT_REFCOUNT(2 /* entry index */, 1 /* count */); return MUNIT_OK; } /* Append three entries in sequence. */ TEST(logAppend, three, setUp, tearDown, 0, NULL) { struct fixture *f = data; /* One -> [e1, NULL] */ APPEND(1 /* term */); /* Two -> [e1, e2, NULL, NULL, NULL, NULL] */ APPEND(1 /* term */); /* Three -> [e1, e2, e3, NULL, NULL, NULL] */ APPEND(1 /* term */); ASSERT(6 /* size */, 0 /* front */, 3 /* back */, 0 /* offset */, 3 /* n */); ASSERT_TERM_OF(1 /* entry index */, 1 /* term */); ASSERT_TERM_OF(2 /* entry index */, 1 /* term */); ASSERT_TERM_OF(3 /* entry index */, 1 /* term */); ASSERT_REFCOUNT(1 /* entry index */, 1 /* count */); ASSERT_REFCOUNT(2 /* entry index */, 1 /* count */); ASSERT_REFCOUNT(3 /* entry index */, 1 /* count */); return MUNIT_OK; } /* Append enough entries to force the reference count hash table to be * resized. */ TEST(logAppend, many, setUp, tearDown, 0, NULL) { struct fixture *f = data; int i; for (i = 0; i < 3000; i++) { APPEND(1 /* term */); } munit_assert_int(f->log.refs_size, ==, 4096); return MUNIT_OK; } /* Append to wrapped log that needs to be grown. */ TEST(logAppend, wrap, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_MANY(1 /* term */, 5 /* n */); /* Now the log is [e1, e2, e3, e4, e5, NULL] */ ASSERT(6 /* size */, 0 /* front */, 5 /* back */, 0 /* offset */, 5 /* n */); /* Delete the first 4 entries. */ SNAPSHOT(4 /* last entry */, 0 /* trailing */); /* Now the log is [NULL, NULL, NULL, NULL, e5, NULL] */ ASSERT(6 /* size */, 4 /* front */, 5 /* back */, 4 /* offset */, 1 /* n */); /* Append another 3 entries. */ APPEND_MANY(1 /* term */, 3 /* n */); /* Now the log is [e7, e8, NULL, NULL, e5, e6] */ ASSERT(6 /* size */, 4 /* front */, 2 /* back */, 4 /* offset */, 4 /* n */); /* Append another 3 entries. */ APPEND_MANY(1 /* term */, 3 /* n */); /* Now the log is [e5, ..., e11, NULL, ..., NULL] */ ASSERT(14 /* size */, 0 /* front */, 7 /* back */, 4 /* offset */, 7 /* n */); return MUNIT_OK; } /* Append a batch of entries to an empty log. */ TEST(logAppend, batch, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_BATCH(3); ASSERT(6 /* size */, 0 /* front */, 3 /* back */, 0 /* offset */, 3 /* n */); return MUNIT_OK; } static char *logAppendOomHeapFaultDelay[] = {"0", "1", NULL}; static char *logAppendOomHeapFaultRepeat[] = {"1", NULL}; static MunitParameterEnum logAppendOom[] = { {TEST_HEAP_FAULT_DELAY, logAppendOomHeapFaultDelay}, {TEST_HEAP_FAULT_REPEAT, logAppendOomHeapFaultRepeat}, {NULL, NULL}, }; /* Out of memory. */ TEST(logAppend, oom, setUp, tearDown, 0, logAppendOom) { struct fixture *f = data; struct raft_buffer buf; int rv; buf.base = NULL; buf.len = 0; HeapFaultEnable(&f->heap); rv = logAppend(&f->log, 1, RAFT_COMMAND, &buf, NULL); munit_assert_int(rv, ==, RAFT_NOMEM); return MUNIT_OK; } /* Out of memory when trying to grow the refs count table. */ TEST(logAppend, oomRefs, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_MANY(1, LOG__REFS_INITIAL_SIZE); HeapFaultConfig(&f->heap, 1, 1); HeapFaultEnable(&f->heap); APPEND_ERROR(1, RAFT_NOMEM); return MUNIT_OK; } /****************************************************************************** * * logAppendConfiguration * *****************************************************************************/ SUITE(logAppendConfiguration) static char *logAppendConfigurationOomHeapFaultDelay[] = {"0", "1", NULL}; static char *logAppendConfigurationOomHeapFaultRepeat[] = {"1", NULL}; static MunitParameterEnum logAppendConfigurationOom[] = { {TEST_HEAP_FAULT_DELAY, logAppendConfigurationOomHeapFaultDelay}, {TEST_HEAP_FAULT_REPEAT, logAppendConfigurationOomHeapFaultRepeat}, {NULL, NULL}, }; /* Out of memory. */ TEST(logAppendConfiguration, oom, setUp, tearDown, 0, logAppendConfigurationOom) { struct fixture *f = data; struct raft_configuration configuration; int rv; configurationInit(&configuration); rv = configurationAdd(&configuration, 1, "1", RAFT_VOTER); munit_assert_int(rv, ==, 0); HeapFaultEnable(&f->heap); rv = logAppendConfiguration(&f->log, 1, &configuration); munit_assert_int(rv, ==, RAFT_NOMEM); configurationClose(&configuration); return MUNIT_OK; } /****************************************************************************** * * logAcquire * *****************************************************************************/ SUITE(logAcquire) /* Acquire a single log entry. */ TEST(logAcquire, one, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry *entries; unsigned n; APPEND(1 /* term */); ACQUIRE(1 /* index */); munit_assert_ptr_not_null(entries); munit_assert_int(n, ==, 1); munit_assert_int(entries[0].type, ==, RAFT_COMMAND); ASSERT_REFCOUNT(1 /* index */, 2 /* count */); RELEASE(1 /* index */); ASSERT_REFCOUNT(1 /* index */, 1 /* count */); return MUNIT_OK; } /* Acquire two log entries. */ TEST(logAcquire, two, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry *entries; unsigned n; APPEND(1 /* term */); APPEND(1 /* term */); ACQUIRE(1 /* index */); munit_assert_ptr_not_null(entries); munit_assert_int(n, ==, 2); munit_assert_int(entries[0].type, ==, RAFT_COMMAND); munit_assert_int(entries[1].type, ==, RAFT_COMMAND); ASSERT_REFCOUNT(1 /* index */, 2 /* count */); ASSERT_REFCOUNT(2 /* index */, 2 /* count */); RELEASE(1 /* index */); ASSERT_REFCOUNT(1 /* index */, 1 /* count */); ASSERT_REFCOUNT(2 /* index */, 1 /* count */); return MUNIT_OK; } /* Acquire two log entries in a wrapped log. */ TEST(logAcquire, wrap, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry *entries; unsigned n; APPEND_MANY(1 /* term */, 5 /* n */); /* Now the log is [e1, e2, e3, e4, e5, NULL] */ ASSERT(6 /* size */, 0 /* front */, 5 /* back */, 0 /* offset */, 5 /* n */); /* Delete the first 4 entries. */ SNAPSHOT(4 /* last index */, 0 /* trailing */); /* Now the log is [NULL, NULL, NULL, NULL, e5, NULL] */ ASSERT(6 /* size */, 4 /* front */, 5 /* back */, 4 /* offset */, 1 /* n */); /* Append another 3 entries. */ APPEND_MANY(1 /* term */, 3 /* n */); /* Now the log is [e7, e8, NULL, NULL, e5, e6] */ ASSERT(6 /* size */, 4 /* front */, 2 /* back */, 4 /* offset */, 4 /* n */); ACQUIRE(6 /* index */); munit_assert_int(n, ==, 3); RELEASE(6 /* index */); return MUNIT_OK; } /* Acquire several entries some of which belong to batches. */ TEST(logAcquire, batch, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry *entries; unsigned n; APPEND(1 /* term */); APPEND_BATCH(2 /* n entries */); APPEND(1 /* term */); APPEND_BATCH(3 /* n entries */); ACQUIRE(2 /* index */); munit_assert_ptr_not_null(entries); munit_assert_int(n, ==, 6); ASSERT_REFCOUNT(2 /* index */, 2 /* count */); /* Truncate the last 5 entries, so the only references left for the second * batch are the ones in the acquired entries. */ TRUNCATE(3 /* index */); RELEASE(2 /* index */); ASSERT_REFCOUNT(2 /* index */, 1 /* count */); return MUNIT_OK; } /* Trying to acquire entries out of range results in a NULL pointer. */ TEST(logAcquire, outOfRange, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry *entries; unsigned n; APPEND(1 /* term */); APPEND(1 /* term */); SNAPSHOT(1 /* index */, 0 /* trailing */); ACQUIRE(1 /* index */); munit_assert_ptr_null(entries); ACQUIRE(3 /* index */); munit_assert_ptr_null(entries); return MUNIT_OK; } /* Out of memory. */ TEST(logAcquire, oom, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry *entries; unsigned n; int rv; APPEND(1 /* term */); HeapFaultConfig(&f->heap, 0, 1); HeapFaultEnable(&f->heap); rv = logAcquire(&f->log, 1, &entries, &n); munit_assert_int(rv, ==, RAFT_NOMEM); return MUNIT_OK; } /****************************************************************************** * * logTruncate * *****************************************************************************/ SUITE(logTruncate) /* Truncate the last entry of a log with a single entry. */ TEST(logTruncate, lastOfOne, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(1 /* term */); TRUNCATE(1 /* index */); ASSERT(0 /* size */, 0 /* front */, 0 /* back */, 0 /* offset */, 0 /* n */); return MUNIT_OK; } /* Truncate the last entry of a log with a two entries. */ TEST(logTruncate, lastOfTwo, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(1 /* term */); APPEND(1 /* term */); TRUNCATE(2 /* index */); ASSERT(6 /* size */, 0 /* front */, 1 /* back */, 0 /* offset */, 1 /* n */); ASSERT_TERM_OF(1 /* entry index */, 1 /* term */); return MUNIT_OK; } /* Truncate from an entry which makes the log wrap. */ TEST(logTruncate, wrap, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_MANY(1 /* term */, 5 /* n entries */); /* Now the log is [e1, e2, e3, e4, e5, NULL] */ ASSERT(6 /* size */, 0 /* front */, 5 /* back */, 0 /* offset */, 5 /* n */); /* Delete the first 4 entries. */ SNAPSHOT(4 /* last index */, 0 /* trailing */); /* Now the log is [NULL, NULL, NULL, NULL, e5, NULL] */ ASSERT(6 /* size */, 4 /* front */, 5 /* back */, 4 /* offset */, 1 /* n */); /* Append another 3 entries. */ APPEND_MANY(1 /* term */, 3 /* n entries */); /* Now the log is [e7, e8, NULL, NULL, e5, e6] */ ASSERT(6 /* size */, 4 /* front */, 2 /* back */, 4 /* offset */, 4 /* n */); /* Truncate from e6 onward (wrapping) */ TRUNCATE(6 /* index */); /* Now the log is [NULL, NULL, NULL, NULL, e5, NULL] */ ASSERT(6 /* size */, 4 /* front */, 5 /* back */, 4 /* offset */, 1 /* n */); return MUNIT_OK; } /* Truncate the last entry of a log with a single entry, which still has an * outstanding reference created by a call to logAcquire(). */ TEST(logTruncate, referenced, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry *entries; unsigned n; APPEND(1 /* term */); ACQUIRE(1 /* index */); TRUNCATE(1 /* index */); ASSERT(0 /* size */, 0 /* front */, 0 /* back */, 0 /* offset */, 0 /* n */); /* The entry has still an outstanding reference. */ ASSERT_REFCOUNT(1 /* index */, 1 /* count */); munit_assert_string_equal((const char *)entries[0].buf.base, "hello"); RELEASE(1 /* index */); ASSERT_REFCOUNT(1 /* index */, 0 /* count */); return MUNIT_OK; } /* Truncate all entries belonging to a batch. */ TEST(logTruncate, batch, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_BATCH(3 /* n entries */); TRUNCATE(1 /* index */); munit_assert_int(f->log.size, ==, 0); return MUNIT_OK; } /* Acquire entries at a certain index. Truncate the log at that index. The * truncated entries are still referenced. Then append a new entry, which will * have the same index but different term. */ TEST(logTruncate, acquired, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry *entries; unsigned n; APPEND(1 /* term */); APPEND(1 /* term */); ACQUIRE(2 /* index */); munit_assert_int(n, ==, 1); TRUNCATE(2 /* index */); APPEND(2 /* term */); RELEASE(2 /*index */); return MUNIT_OK; } /* Acquire some entries, truncate the log and then append new ones forcing the log to be grown and the reference count hash table to be re-built. */ TEST(logTruncate, acquireAppend, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct raft_entry *entries; unsigned n; size_t i; APPEND(1 /* term */); APPEND(1 /* term */); ACQUIRE(2); munit_assert_int(n, ==, 1); TRUNCATE(2); for (i = 0; i < LOG__REFS_INITIAL_SIZE; i++) { APPEND(2 /* term */); } RELEASE(2); return MUNIT_OK; } static char *logTruncateAcquiredHeapFaultDelay[] = {"0", NULL}; static char *logTruncateAcquiredFaultRepeat[] = {"1", NULL}; static MunitParameterEnum logTruncateAcquiredOom[] = { {TEST_HEAP_FAULT_DELAY, logTruncateAcquiredHeapFaultDelay}, {TEST_HEAP_FAULT_REPEAT, logTruncateAcquiredFaultRepeat}, {NULL, NULL}, }; /* Acquire entries at a certain index. Truncate the log at that index. The * truncated entries are still referenced. Then append a new entry, which fails * to be appended due to OOM. */ TEST(logTruncate, acquiredOom, setUp, tearDown, 0, logTruncateAcquiredOom) { struct fixture *f = data; struct raft_entry *entries; unsigned n; struct raft_buffer buf; int rv; APPEND(1 /* term */); APPEND(1 /* term */); ACQUIRE(2); munit_assert_int(n, ==, 1); TRUNCATE(2); buf.base = NULL; buf.len = 0; HeapFaultEnable(&f->heap); rv = logAppend(&f->log, 2, RAFT_COMMAND, &buf, NULL); munit_assert_int(rv, ==, RAFT_NOMEM); RELEASE(2); return MUNIT_OK; } /****************************************************************************** * * logSnapshot * *****************************************************************************/ SUITE(logSnapshot) /* Take a snapshot at entry 3, keeping 2 trailing entries. */ TEST(logSnapshot, trailing, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND(1 /* term */); APPEND(2 /* term */); APPEND(2 /* term */); SNAPSHOT(3 /* last index */, 2 /* trailing */); ASSERT(6 /* size */, 1 /* front */, 3 /* back */, 1 /* offset */, 2 /* n */); ASSERT_SNAPSHOT(3 /* index */, 2 /* term */); munit_assert_int(NUM_ENTRIES, ==, 2); munit_assert_int(LAST_INDEX, ==, 3); return MUNIT_OK; } /* Take a snapshot when the number of outstanding entries is lower than the * desired trail (so no entry will be deleted). */ TEST(logSnapshot, trailingHigherThanNumEntries, setUp, tearDown, 0, NULL) { struct fixture *f = data; /* Take a snapshot leaving just one entry in the log. */ APPEND_MANY(1 /* term */, 3 /* n entries */); SNAPSHOT(3 /* last index */, 1 /* trailing */); /* Take another snapshot, trying to leave 3 entries, but only 2 are * available at all. */ APPEND(2 /* term */); SNAPSHOT(4 /* last index */, 3 /* trailing */); ASSERT(6 /* size */, 2 /* front */, 4 /* back */, 2 /* offset */, 2 /* n */); ASSERT_SNAPSHOT(4 /* index */, 2 /* term */); munit_assert_int(NUM_ENTRIES, ==, 2); munit_assert_int(LAST_INDEX, ==, 4); return MUNIT_OK; } /* Take a snapshot when the number of outstanding entries is exactly equal to * the desired trail (so no entry will be deleted). */ TEST(logSnapshot, trailingMatchesOutstanding, setUp, tearDown, 0, NULL) { struct fixture *f = data; /* Take a snapshot leaving just one entry in the log. */ APPEND_MANY(1 /* term */, 3 /* n entries */); SNAPSHOT(3 /* last index */, 1 /* trailing */); /* Take another snapshot, leaving 2 entries, which are the ones we have. */ APPEND(2 /* term */); SNAPSHOT(4 /* last index */, 2 /* trailing */); ASSERT(6 /* size */, 2 /* front */, 4 /* back */, 2 /* offset */, 2 /* n */); ASSERT_SNAPSHOT(4 /* index */, 2 /* term */); munit_assert_int(NUM_ENTRIES, ==, 2); munit_assert_int(LAST_INDEX, ==, 4); return MUNIT_OK; } /* Take a snapshot at an index which is not the last one. */ TEST(logSnapshot, lessThanHighestIndex, setUp, tearDown, 0, NULL) { struct fixture *f = data; /* Take a snapshot leaving three entries in the log. */ APPEND_MANY(1 /* term */, 5 /* n entries */); SNAPSHOT(4 /* last index */, 2 /* trailing */); ASSERT(6 /* size */, 2 /* front */, 5 /* back */, 2 /* offset */, 3 /* n */); ASSERT_SNAPSHOT(4 /* index */, 1 /* term */); munit_assert_int(NUM_ENTRIES, ==, 3); munit_assert_int(LAST_INDEX, ==, 5); return MUNIT_OK; } /* Take a snapshot at a point where the log needs to wrap. */ TEST(logSnapshot, wrap, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_MANY(1 /* term */, 5 /* n entries */); /* Now the log is [e1, e2, e3, e4, e5, NULL] */ ASSERT(6 /* size */, 0 /* front */, 5 /* back */, 0 /* offset */, 5 /* n */); /* Take a snapshot at e5, keeping just e5 itself. */ SNAPSHOT(5 /* last index */, 1 /* trailing */); /* Now the log is [NULL, NULL, NULL, NULL, e5, NULL] */ ASSERT(6 /* size */, 4 /* front */, 5 /* back */, 4 /* offset */, 1 /* n */); ASSERT_SNAPSHOT(5 /* index */, 1 /* term */); /* Append another 4 entries. */ APPEND_MANY(1 /* term */, 4 /* n */); /* Now the log is [e7, e8, e9, NULL, e5, e6] */ ASSERT(6 /* size */, 4 /* front */, 3 /* back */, 4 /* offset */, 5 /* n */); /* Take a snapshot at e8 keeping only e8 itself (wrapping) */ SNAPSHOT(8 /* last index */, 1 /* trailing */); /* Now the log is [NULL, e8, e9, NULL, NULL, NULL] */ ASSERT(6 /* size */, 1 /* front */, 3 /* back */, 7 /* offset */, 2 /* n */); ASSERT_SNAPSHOT(8 /* index */, 1 /* term */); return MUNIT_OK; } /****************************************************************************** * * logRestore * *****************************************************************************/ SUITE(logRestore) /* Mimic the initial restore of a snapshot after loading state from disk, when * there are no outstanding entries. */ TEST(logRestore, initial, setUp, tearDown, 0, NULL) { struct fixture *f = data; RESTORE(2 /* last index */, 3 /* last term */); ASSERT_SNAPSHOT(2 /* index */, 3 /* term */); munit_assert_int(LAST_INDEX, ==, 2); return MUNIT_OK; } /* If there are existing entries they are wiped out. */ TEST(logRestore, wipe, setUp, tearDown, 0, NULL) { struct fixture *f = data; APPEND_MANY(1 /* term */, 5 /* n entries */); RESTORE(2 /* last index */, 3 /* last term */); ASSERT_SNAPSHOT(2 /* index */, 3 /* term */); munit_assert_int(LAST_INDEX, ==, 2); return MUNIT_OK; } raft-0.11.3/test/unit/test_queue.c000066400000000000000000000151601415614527300170300ustar00rootroot00000000000000#include "../../src/queue.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture with a single queue. * *****************************************************************************/ struct fixture { void *queue[2]; }; static void *setUp(MUNIT_UNUSED const MunitParameter params[], MUNIT_UNUSED void *user_data) { struct fixture *f = munit_malloc(sizeof *f); QUEUE_INIT(&f->queue); return f; } static void tearDown(void *data) { struct fixture *f = data; free(f); } /****************************************************************************** * * Helper macros * *****************************************************************************/ struct item { int value; void *queue[2]; }; /* Initialize and push the given items to the fixture's queue. Each item will * have a value equal to its index plus one. */ #define PUSH(ITEMS) \ { \ int n_ = sizeof ITEMS / sizeof ITEMS[0]; \ int i_; \ for (i_ = 0; i_ < n_; i_++) { \ struct item *item = &items[i_]; \ item->value = i_ + 1; \ QUEUE_PUSH(&f->queue, &item->queue); \ } \ } /* Remove the i'th item among the given ones. */ #define REMOVE(ITEMS, I) QUEUE_REMOVE(&ITEMS[I].queue) /****************************************************************************** * * Assertions * *****************************************************************************/ /* Assert that the item at the head of the fixture's queue has the given * value. */ #define ASSERT_HEAD(VALUE) \ { \ queue *head_ = QUEUE_HEAD(&f->queue); \ struct item *item_; \ item_ = QUEUE_DATA(head_, struct item, queue); \ munit_assert_int(item_->value, ==, VALUE); \ } /* Assert that the item at the tail of the queue has the given value. */ #define ASSERT_TAIL(VALUE) \ { \ queue *tail_ = QUEUE_TAIL(&f->queue); \ struct item *item_; \ item_ = QUEUE_DATA(tail_, struct item, queue); \ munit_assert_int(item_->value, ==, VALUE); \ } /* Assert that the fixture's queue is empty. */ #define ASSERT_EMPTY munit_assert_true(QUEUE_IS_EMPTY(&f->queue)) /* Assert that the fixture's queue is not empty. */ #define ASSERT_NOT_EMPTY munit_assert_false(QUEUE_IS_EMPTY(&f->queue)) /****************************************************************************** * * QUEUE_IS_EMPTY * *****************************************************************************/ SUITE(QUEUE_IS_EMPTY) TEST(QUEUE_IS_EMPTY, yes, setUp, tearDown, 0, NULL) { struct fixture *f = data; ASSERT_EMPTY; return MUNIT_OK; } TEST(QUEUE_IS_EMPTY, no, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct item items[1]; PUSH(items); ASSERT_NOT_EMPTY; return MUNIT_OK; } /****************************************************************************** * * QUEUE_PUSH * *****************************************************************************/ SUITE(QUEUE_PUSH) TEST(QUEUE_PUSH, one, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct item items[1]; PUSH(items); ASSERT_HEAD(1); return MUNIT_OK; } TEST(QUEUE_PUSH, two, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct item items[2]; int i; PUSH(items); for (i = 0; i < 2; i++) { ASSERT_HEAD(i + 1); REMOVE(items, i); } ASSERT_EMPTY; return MUNIT_OK; } /****************************************************************************** * * QUEUE_REMOVE * *****************************************************************************/ SUITE(QUEUE_REMOVE) TEST(QUEUE_REMOVE, first, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct item items[3]; PUSH(items); REMOVE(items, 0); ASSERT_HEAD(2); return MUNIT_OK; } TEST(QUEUE_REMOVE, second, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct item items[3]; PUSH(items); REMOVE(items, 1); ASSERT_HEAD(1); return MUNIT_OK; } TEST(QUEUE_REMOVE, success, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct item items[3]; PUSH(items); REMOVE(items, 2); ASSERT_HEAD(1); return MUNIT_OK; } /****************************************************************************** * * QUEUE_TAIL * *****************************************************************************/ SUITE(QUEUE_TAIL) TEST(QUEUE_TAIL, one, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct item items[1]; PUSH(items); ASSERT_TAIL(1); return MUNIT_OK; } TEST(QUEUE_TAIL, two, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct item items[2]; PUSH(items); ASSERT_TAIL(2); return MUNIT_OK; } TEST(QUEUE_TAIL, three, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct item items[3]; PUSH(items); ASSERT_TAIL(3); return MUNIT_OK; } /****************************************************************************** * * QUEUE_FOREACH * *****************************************************************************/ SUITE(QUEUE_FOREACH) /* Loop through a queue of zero items. */ TEST(QUEUE_FOREACH, zero, setUp, tearDown, 0, NULL) { struct fixture *f = data; queue *head; int count = 0; QUEUE_FOREACH(head, &f->queue) { count++; } munit_assert_int(count, ==, 0); return MUNIT_OK; } /* Loop through a queue of one item. */ TEST(QUEUE_FOREACH, one, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct item items[1]; queue *head; int count = 0; PUSH(items); QUEUE_FOREACH(head, &f->queue) { count++; } munit_assert_int(count, ==, 1); return MUNIT_OK; } /* Loop through a queue of two items. The order of the loop is from the head to * the tail. */ TEST(QUEUE_FOREACH, two, setUp, tearDown, 0, NULL) { struct fixture *f = data; struct item items[2]; queue *head; int values[2] = {0, 0}; int i = 0; PUSH(items); QUEUE_FOREACH(head, &f->queue) { struct item *item; item = QUEUE_DATA(head, struct item, queue); values[i] = item->value; i++; } munit_assert_int(values[0], ==, 1); munit_assert_int(values[1], ==, 2); return MUNIT_OK; } raft-0.11.3/test/unit/test_uv_fs.c000066400000000000000000000336171415614527300170350ustar00rootroot00000000000000#include "../../src/uv_fs.h" #include "../../src/uv_os.h" #include "../lib/aio.h" #include "../lib/dir.h" #include "../lib/runner.h" /****************************************************************************** * * UvFsCheckDir * *****************************************************************************/ /* Invoke UvFsCheckDir passing it the given dir. */ #define CHECK_DIR(DIR) \ { \ int _rv; \ char _errmsg[RAFT_ERRMSG_BUF_SIZE]; \ _rv = UvFsCheckDir(DIR, _errmsg); \ munit_assert_int(_rv, ==, 0); \ } /* Invoke UvFsCheckDir passing it the given dir and check that the given error * occurs. */ #define CHECK_DIR_ERROR(DIR, RV, ERRMSG) \ { \ int _rv; \ char _errmsg[RAFT_ERRMSG_BUF_SIZE]; \ _rv = UvFsCheckDir(DIR, _errmsg); \ munit_assert_int(_rv, ==, RV); \ munit_assert_string_equal(_errmsg, ERRMSG); \ } SUITE(UvFsCheckDir) /* If the directory exists, the function succeeds. */ TEST(UvFsCheckDir, exists, DirSetUp, DirTearDown, 0, NULL) { const char *dir = data; CHECK_DIR(dir); return MUNIT_OK; } /* If the directory doesn't exist, it an error is returned. */ TEST(UvFsCheckDir, doesNotExist, DirSetUp, DirTearDown, 0, NULL) { const char *parent = data; char errmsg[RAFT_ERRMSG_BUF_SIZE]; char dir[128]; sprintf(errmsg, "%s/sub", parent); sprintf(errmsg, "directory '%s' does not exist", dir); CHECK_DIR_ERROR(dir, RAFT_NOTFOUND, errmsg); return MUNIT_OK; } /* If the process can't access the directory, an error is returned. */ TEST(UvFsCheckDir, permissionDenied, NULL, NULL, 0, NULL) { bool has_access = DirHasFile("/proc/1", "root"); /* Skip the test is the process actually has access to /proc/1/root. */ if (has_access) { return MUNIT_SKIP; } CHECK_DIR_ERROR("/proc/1/root", RAFT_UNAUTHORIZED, "can't access directory '/proc/1/root'"); return MUNIT_OK; } /* If the given path contains a non-directory prefix, an error is returned. */ TEST(UvFsCheckDir, notDirPrefix, NULL, NULL, 0, NULL) { CHECK_DIR_ERROR("/dev/null/foo", RAFT_INVALID, "path '/dev/null/foo' is not a directory"); return MUNIT_OK; } /* If the given path is not a directory, an error is returned. */ TEST(UvFsCheckDir, notDir, NULL, NULL, 0, NULL) { CHECK_DIR_ERROR("/dev/null", RAFT_INVALID, "path '/dev/null' is not a directory"); return MUNIT_OK; } /* If the given directory is not writable, an error is returned. */ TEST(UvFsCheckDir, notWritable, DirSetUp, DirTearDown, 0, NULL) { const char *dir = data; char errmsg[RAFT_ERRMSG_BUF_SIZE]; sprintf(errmsg, "directory '%s' is not writable", dir); DirMakeUnwritable(dir); CHECK_DIR_ERROR(dir, RAFT_INVALID, errmsg); return MUNIT_OK; } /****************************************************************************** * * UvFsSyncDir * *****************************************************************************/ /* Invoke UvFsSyncDir passing it the given dir. */ #define SYNC_DIR_ERROR(DIR, RV, ERRMSG) \ { \ char _errmsg[RAFT_ERRMSG_BUF_SIZE]; \ munit_assert_int(UvFsSyncDir(DIR, _errmsg), ==, RV); \ munit_assert_string_equal(_errmsg, ERRMSG); \ } SUITE(UvFsSyncDir) /* If the directory doesn't exist, an error is returned. */ TEST(UvFsSyncDir, noExists, NULL, NULL, 0, NULL) { SYNC_DIR_ERROR("/abcdef", RAFT_IOERR, "open directory: no such file or directory"); return MUNIT_OK; } /****************************************************************************** * * UvFsOpenFileForReading * *****************************************************************************/ /* Open a file in the given dir. */ #define OPEN_FILE_FOR_READING_ERROR(DIR, FILENAME, RV, ERRMSG) \ { \ uv_file fd_; \ char errmsg_[RAFT_ERRMSG_BUF_SIZE]; \ int rv_ = UvFsOpenFileForReading(DIR, FILENAME, &fd_, errmsg_); \ munit_assert_int(rv_, ==, RV); \ munit_assert_string_equal(errmsg_, ERRMSG); \ } SUITE(UvFsOpenFileForReading) /* If the directory doesn't exist, an error is returned. */ TEST(UvFsOpenFileForReading, noExists, DirSetUp, DirTearDown, 0, NULL) { const char *dir = data; OPEN_FILE_FOR_READING_ERROR(dir, "foo", RAFT_IOERR, "open: no such file or directory"); return MUNIT_OK; } /****************************************************************************** * * UvFsAllocateFile * *****************************************************************************/ /* Allocate a file with the given parameters and assert that no error occurred. */ #define ALLOCATE_FILE(DIR, FILENAME, SIZE) \ { \ uv_file fd_; \ char errmsg_; \ int rv_; \ rv_ = UvFsAllocateFile(DIR, FILENAME, SIZE, &fd_, &errmsg_); \ munit_assert_int(rv_, ==, 0); \ munit_assert_int(UvOsClose(fd_), ==, 0); \ } /* Assert that creating a file with the given parameters fails with the given * code and error message. */ #define ALLOCATE_FILE_ERROR(DIR, FILENAME, SIZE, RV, ERRMSG) \ { \ uv_file fd_; \ char errmsg_[RAFT_ERRMSG_BUF_SIZE]; \ int rv_; \ rv_ = UvFsAllocateFile(DIR, FILENAME, SIZE, &fd_, errmsg_); \ munit_assert_int(rv_, ==, RV); \ munit_assert_string_equal(errmsg_, ERRMSG); \ } SUITE(UvFsAllocateFile) /* If the given path is valid, the file gets created. */ TEST(UvFsAllocateFile, success, DirSetUp, DirTearDown, 0, NULL) { const char *dir = data; ALLOCATE_FILE(dir, /* dir */ "foo", /* filename */ 4096 /* size */); munit_assert_true(DirHasFile(dir, "foo")); return MUNIT_OK; } /* The directory of given path does not exist, an error is returned. */ TEST(UvFsAllocateFile, dirNoExists, NULL, NULL, 0, NULL) { ALLOCATE_FILE_ERROR("/non/existing/dir", /* dir */ "foo", /* filename */ 64, /* size */ RAFT_IOERR, /* status */ "open: no such file or directory"); return MUNIT_OK; } /* If the given path already exists, an error is returned. */ TEST(UvFsAllocateFile, fileAlreadyExists, DirSetUp, DirTearDown, 0, NULL) { const char *dir = data; char buf[8] = {0}; DirWriteFile(dir, "foo", buf, sizeof buf); ALLOCATE_FILE_ERROR(dir, /* dir */ "foo", /* filename */ 64, /* size */ RAFT_IOERR, /* status */ "open: file already exists"); return MUNIT_OK; } /* The file system has run out of space. */ TEST(UvFsAllocateFile, noSpace, DirSetUp, DirTearDown, 0, DirTmpfsParams) { const char *dir = data; if (dir == NULL) { return MUNIT_SKIP; } ALLOCATE_FILE_ERROR(dir, /* dir */ "foo", /* filename */ 4096 * 32768, /* size */ RAFT_NOSPACE, /* status */ "not enough space to allocate 134217728 bytes"); munit_assert_false(DirHasFile(dir, "foo")); return MUNIT_OK; } /****************************************************************************** * * UvFsProbeCapabilities * *****************************************************************************/ /* Invoke UvFsProbeCapabilities against the given dir and assert that it returns * the given values for direct I/O and async I/O. */ #define PROBE_CAPABILITIES(DIR, DIRECT_IO, ASYNC_IO) \ { \ size_t direct_io_; \ bool async_io_; \ char errmsg_; \ int rv_; \ rv_ = UvFsProbeCapabilities(DIR, &direct_io_, &async_io_, &errmsg_); \ munit_assert_int(rv_, ==, 0); \ munit_assert_int(direct_io_, ==, DIRECT_IO); \ if (ASYNC_IO) { \ munit_assert_true(async_io_); \ } else { \ munit_assert_false(async_io_); \ } \ } /* Invoke UvFsProbeCapabilities and check that the given error occurs. */ #define PROBE_CAPABILITIES_ERROR(DIR, RV, ERRMSG) \ { \ size_t direct_io_; \ bool async_io_; \ char errmsg_[RAFT_ERRMSG_BUF_SIZE]; \ int rv_; \ rv_ = UvFsProbeCapabilities(DIR, &direct_io_, &async_io_, errmsg_); \ munit_assert_int(rv_, ==, RV); \ munit_assert_string_equal(errmsg_, ERRMSG); \ } SUITE(UvFsProbeCapabilities) TEST(UvFsProbeCapabilities, tmpfs, DirTmpfsSetUp, DirTearDown, 0, NULL) { const char *dir = data; if (dir == NULL) { return MUNIT_SKIP; } PROBE_CAPABILITIES(dir, 0, false); return MUNIT_OK; } /* ZFS 0.8 reports that it supports direct I/O, but does not support fully * support asynchronous kernel AIO. */ TEST(UvFsProbeCapabilities, zfsDirectIO, DirZfsSetUp, DirTearDown, 0, NULL) { const char *dir = data; size_t direct_io = 0; #if defined(RAFT_HAVE_ZFS_WITH_DIRECT_IO) direct_io = 4096; #endif if (dir == NULL) { return MUNIT_SKIP; } PROBE_CAPABILITIES(dir, direct_io, false); return MUNIT_OK; } #if defined(RWF_NOWAIT) /* File systems that fully support DIO. */ TEST(UvFsProbeCapabilities, aio, DirSetUp, DirTearDown, 0, DirAioParams) { const char *dir = data; if (dir == NULL) { return MUNIT_SKIP; } /* FIXME: btrfs doesn't like that we perform a first write to the probe file * to detect the direct I/O buffer size. */ if (strcmp(munit_parameters_get(params, DIR_FS_PARAM), "btrfs") == 0) { return MUNIT_SKIP; } PROBE_CAPABILITIES(dir, 4096, true); return MUNIT_OK; } #endif /* RWF_NOWAIT */ /* If the given path is not executable, the block size of the underlying file * system can't be determined and an error is returned. */ TEST(UvFsProbeCapabilities, noAccess, DirSetUp, DirTearDown, 0, NULL) { const char *dir = data; DirMakeUnexecutable(dir); PROBE_CAPABILITIES_ERROR( dir, RAFT_IOERR, "create I/O capabilities probe file: open: permission denied"); return MUNIT_OK; } /* No space is left on the target device. */ TEST(UvFsProbeCapabilities, noSpace, DirTmpfsSetUp, DirTearDown, 0, NULL) { const char *dir = data; if (dir == NULL) { return MUNIT_SKIP; } DirFill(dir, 0); PROBE_CAPABILITIES_ERROR(dir, RAFT_NOSPACE, "create I/O capabilities probe file: not enough " "space to allocate 4096 bytes"); return MUNIT_OK; } #if defined(RWF_NOWAIT) /* The uvIoSetup() call fails with EAGAIN. */ TEST(UvFsProbeCapabilities, noResources, DirBtrfsSetUp, DirTearDown, 0, NULL) { const char *dir = data; aio_context_t ctx = 0; int rv; if (dir == NULL) { return MUNIT_SKIP; } rv = AioFill(&ctx, 0); if (rv != 0) { return MUNIT_SKIP; } PROBE_CAPABILITIES_ERROR(dir, RAFT_IOERR, "io_setup: resource temporarily unavailable"); AioDestroy(ctx); return MUNIT_OK; } #endif /* RWF_NOWAIT */ /****************************************************************************** * * UvFsMakeFile * *****************************************************************************/ SUITE(UvFsMakeFile) /* If the file does not exist, the function succeeds. */ TEST(UvFsMakeFile, notExists, DirSetUp, DirTearDown, 0, NULL) { const char *dir = data; int rv; char errmsg[RAFT_ERRMSG_BUF_SIZE]; struct raft_buffer bufs[2] = {{0},{0}}; rv = UvFsMakeFile(dir, "foo", bufs, 2, errmsg); munit_assert_int(rv, ==, 0); return MUNIT_OK; } /* If the file exists, the function does not succeed. */ TEST(UvFsMakeFile, exists, DirSetUp, DirTearDown, 0, NULL) { const char *dir = data; int rv; char errmsg[RAFT_ERRMSG_BUF_SIZE]; struct raft_buffer bufs[2] = {{0},{0}}; rv = UvFsMakeFile(dir, "foo", bufs, 2, errmsg); munit_assert_int(rv, ==, 0); rv = UvFsMakeFile(dir, "foo", bufs, 2, errmsg); munit_assert_int(rv, !=, 0); return MUNIT_OK; } raft-0.11.3/test/unit/test_uv_writer.c000066400000000000000000000325731415614527300177410ustar00rootroot00000000000000#include "../../src/uv_fs.h" #include "../../src/uv_writer.h" #include "../lib/dir.h" #include "../lib/aio.h" #include "../lib/loop.h" #include "../lib/runner.h" /****************************************************************************** * * Fixture with a UvWriter and an open file ready for writing. * *****************************************************************************/ struct fixture { FIXTURE_DIR; FIXTURE_LOOP; int fd; size_t block_size; size_t direct_io; bool async_io; char errmsg[256]; struct UvWriter writer; bool closed; }; /****************************************************************************** * * Helper macros. * *****************************************************************************/ struct result { int status; bool done; }; static void closeCb(struct UvWriter *writer) { struct fixture *f = writer->data; f->closed = true; } static void submitCbAssertResult(struct UvWriterReq *req, int status) { struct result *result = req->data; munit_assert_int(status, ==, result->status); result->done = true; } /* Initialize the fixture's writer. */ #define INIT(MAX_WRITES) \ do { \ int _rv; \ _rv = UvWriterInit(&f->writer, &f->loop, f->fd, f->direct_io != 0, \ f->async_io, MAX_WRITES, f->errmsg); \ munit_assert_int(_rv, ==, 0); \ f->writer.data = f; \ f->closed = false; \ } while (0) /* Try to initialize the fixture's writer and check that the given error is * returned. */ #define INIT_ERROR(RV, ERRMSG) \ do { \ int _rv; \ _rv = UvWriterInit(&f->writer, &f->loop, f->fd, f->direct_io != 0, \ f->async_io, 1, f->errmsg); \ munit_assert_int(_rv, ==, RV); \ munit_assert_string_equal(f->errmsg, ERRMSG); \ } while (0) /* Close helper. */ #define CLOSE_SUBMIT \ munit_assert_false(f->closed); \ UvWriterClose(&f->writer, closeCb); \ munit_assert_false(f->closed) #define CLOSE_WAIT LOOP_RUN_UNTIL(&f->closed) #define CLOSE \ CLOSE_SUBMIT; \ CLOSE_WAIT #define MAKE_BUFS(BUFS, N_BUFS, CONTENT) \ { \ int __i; \ BUFS = munit_malloc(sizeof *BUFS * N_BUFS); \ for (__i = 0; __i < N_BUFS; __i++) { \ uv_buf_t *__buf = &BUFS[__i]; \ __buf->len = f->block_size; \ __buf->base = aligned_alloc(f->block_size, f->block_size); \ munit_assert_ptr_not_null(__buf->base); \ memset(__buf->base, CONTENT + __i, __buf->len); \ } \ } #define DESTROY_BUFS(BUFS, N_BUFS) \ { \ int __i; \ for (__i = 0; __i < N_BUFS; __i++) { \ free(BUFS[__i].base); \ } \ free(BUFS); \ } #define WRITE_REQ(N_BUFS, CONTENT, OFFSET, RV, STATUS) \ struct uv_buf_t *_bufs; \ struct UvWriterReq _req; \ struct result _result = {STATUS, false}; \ int _rv; \ MAKE_BUFS(_bufs, N_BUFS, CONTENT); \ _req.data = &_result; \ _rv = UvWriterSubmit(&f->writer, &_req, _bufs, N_BUFS, OFFSET, \ submitCbAssertResult); \ munit_assert_int(_rv, ==, RV); /* Submit a write request with the given parameters and wait for the operation * to successfully complete. Deallocate BUFS when done. * * N_BUFS is the number of buffers to allocate and write, each of them will have * f->block_size bytes. * * CONTENT must be an unsigned byte value: all bytes of the first buffer will be * filled with that value, all bytes of the second buffer will be filled will * that value plus one, etc. * * OFFSET is the offset at which to write the buffers. */ #define WRITE(N_BUFS, CONTENT, OFFSET) \ do { \ WRITE_REQ(N_BUFS, CONTENT, OFFSET, 0 /* rv */, 0 /* status */); \ LOOP_RUN_UNTIL(&_result.done); \ DESTROY_BUFS(_bufs, N_BUFS); \ } while (0) /* Submit a write request with the given parameters and wait for the operation * to fail with the given code and message. */ #define WRITE_FAILURE(N_BUFS, CONTENT, OFFSET, STATUS, ERRMSG) \ do { \ WRITE_REQ(N_BUFS, CONTENT, OFFSET, 0 /* rv */, STATUS); \ LOOP_RUN_UNTIL(&_result.done); \ munit_assert_string_equal(f->writer.errmsg, ERRMSG); \ DESTROY_BUFS(_bufs, N_BUFS); \ } while (0) /* Submit a write request with the given parameters, close the writer right * after and assert that the request got canceled. */ #define WRITE_CLOSE(N_BUFS, CONTENT, OFFSET, STATUS) \ do { \ WRITE_REQ(N_BUFS, CONTENT, OFFSET, 0 /* rv */, STATUS); \ CLOSE_SUBMIT; \ munit_assert_false(_result.done); \ LOOP_RUN_UNTIL(&_result.done); \ DESTROY_BUFS(_bufs, N_BUFS); \ CLOSE_WAIT; \ } while (0) /* Assert that the content of the test file has the given number of blocks, each * filled with progressive numbers. */ #define ASSERT_CONTENT(N) \ do { \ size_t _size = N * f->block_size; \ void *_buf = munit_malloc(_size); \ unsigned _i; \ unsigned _j; \ \ DirReadFile(f->dir, "foo", _buf, _size); \ \ for (_i = 0; _i < N; _i++) { \ char *cursor = (char *)_buf + _i * f->block_size; \ for (_j = 0; _j < f->block_size; _j++) { \ munit_assert_int(cursor[_j], ==, _i + 1); \ } \ } \ \ free(_buf); \ } while (0) #define N_BLOCKS 5 /****************************************************************************** * * Set up and tear down. * *****************************************************************************/ static void *setUpDeps(const MunitParameter params[], void *user_data) { struct fixture *f = munit_malloc(sizeof *f); char path[UV__PATH_SZ]; char errmsg[256]; int rv; SET_UP_DIR; SETUP_LOOP; rv = UvFsProbeCapabilities(f->dir, &f->direct_io, &f->async_io, errmsg); munit_assert_int(rv, ==, 0); f->block_size = f->direct_io != 0 ? f->direct_io : 4096; UvOsJoin(f->dir, "foo", path); rv = UvOsOpen(path, O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR, &f->fd); munit_assert_int(rv, ==, 0); rv = UvOsFallocate(f->fd, 0, f->block_size * N_BLOCKS); munit_assert_int(rv, ==, 0); return f; } static void tearDownDeps(void *data) { struct fixture *f = data; if (f == NULL) { return; /* Was skipped. */ } UvOsClose(f->fd); TEAR_DOWN_LOOP; TEAR_DOWN_DIR; free(f); } static void *setUp(const MunitParameter params[], void *user_data) { struct fixture *f = setUpDeps(params, user_data); if (f == NULL) { return NULL; } INIT(1); return f; } static void tearDown(void *data) { struct fixture *f = data; if (f == NULL) { return; /* Was skipped. */ } CLOSE; tearDownDeps(f); } /****************************************************************************** * * UvWriterInit * *****************************************************************************/ SUITE(UvWriterInit) /* The kernel has ran out of available AIO events. */ TEST(UvWriterInit, noResources, setUpDeps, tearDownDeps, 0, NULL) { struct fixture *f = data; aio_context_t ctx = 0; int rv; rv = AioFill(&ctx, 0); if (rv != 0) { return MUNIT_SKIP; } INIT_ERROR(RAFT_TOOMANY, "AIO events user limit exceeded"); AioDestroy(ctx); return MUNIT_OK; } /****************************************************************************** * * UvWriterSubmit * *****************************************************************************/ SUITE(UvWriterSubmit) TEST(UvWriterSubmit, one, setUp, tearDown, 0, DirAllParams) { struct fixture *f = data; SKIP_IF_NO_FIXTURE; WRITE(1 /* n bufs */, 1 /* content */, 0 /* offset */); ASSERT_CONTENT(1); return MUNIT_OK; } /* Write two buffers, one after the other. */ TEST(UvWriterSubmit, two, setUp, tearDown, 0, DirAllParams) { struct fixture *f = data; SKIP_IF_NO_FIXTURE; WRITE(1 /* n bufs */, 1 /* content */, 0 /* offset */); WRITE(1 /* n bufs */, 2 /* content */, f->block_size /* offset */); ASSERT_CONTENT(2); return MUNIT_OK; } /* Write the same block twice. */ TEST(UvWriterSubmit, twice, setUp, tearDown, 0, DirAllParams) { struct fixture *f = data; SKIP_IF_NO_FIXTURE; WRITE(1 /* n bufs */, 0 /* content */, 0 /* offset */); WRITE(1 /* n bufs */, 1 /* content */, 0 /* offset */); ASSERT_CONTENT(1); return MUNIT_OK; } /* Write a vector of buffers. */ TEST(UvWriterSubmit, vec, setUp, tearDown, 0, DirAllParams) { struct fixture *f = data; SKIP_IF_NO_FIXTURE; WRITE(2 /* n bufs */, 1 /* content */, 0 /* offset */); ASSERT_CONTENT(1); return MUNIT_OK; } /* Write a vector of buffers twice. */ TEST(UvWriterSubmit, vecTwice, setUp, tearDown, 0, DirAllParams) { struct fixture *f = data; SKIP_IF_NO_FIXTURE; WRITE(2 /* n bufs */, 1 /* content */, 0 /* offset */); WRITE(2 /* n bufs */, 1 /* content */, 0 /* offset */); ASSERT_CONTENT(2); return MUNIT_OK; } /* Write past the allocated space. */ TEST(UvWriterSubmit, beyondEOF, setUp, tearDown, 0, DirAllParams) { struct fixture *f = data; int i; SKIP_IF_NO_FIXTURE; for (i = 0; i < N_BLOCKS + 1; i++) { WRITE(1 /* n bufs */, i + 1 /* content */, i * f->block_size /* offset */); } ASSERT_CONTENT((N_BLOCKS + 1)); return MUNIT_OK; } /* Write two different blocks concurrently. */ TEST(UvWriterSubmit, concurrent, NULL, NULL, 0, DirAllParams) { return MUNIT_SKIP; /* TODO: tests stop responding */ } /* Write the same block concurrently. */ TEST(UvWriterSubmit, concurrentSame, NULL, NULL, 0, DirAllParams) { return MUNIT_SKIP; /* TODO: tests stop responding */ } /* There are not enough resources to create an AIO context to perform the * write. */ TEST(UvWriterSubmit, noResources, setUpDeps, tearDown, 0, DirNoAioParams) { struct fixture *f = data; aio_context_t ctx = 0; int rv; SKIP_IF_NO_FIXTURE; INIT(2); rv = AioFill(&ctx, 0); if (rv != 0) { return MUNIT_SKIP; } WRITE_FAILURE(1, 0, 0, RAFT_TOOMANY, "AIO events user limit exceeded"); AioDestroy(ctx); return MUNIT_OK; } /****************************************************************************** * * UvWriterSubmit * *****************************************************************************/ SUITE(UvWriterClose) /* Close with an inflight write running in the threadpool. */ TEST(UvWriterClose, threadpool, setUp, tearDownDeps, 0, DirNoAioParams) { struct fixture *f = data; SKIP_IF_NO_FIXTURE; WRITE_CLOSE(1, 0, 0, 0); return MUNIT_OK; } #if defined(RWF_NOWAIT) /* Close with an inflight AIO write . */ TEST(UvWriterClose, aio, setUp, tearDownDeps, 0, DirAioParams) { struct fixture *f = data; SKIP_IF_NO_FIXTURE; /* FIXME: btrfs doesn't like that we perform a first write to the probe file * to detect the direct I/O buffer size. */ if (strcmp(munit_parameters_get(params, DIR_FS_PARAM), "btrfs") == 0) { WRITE_CLOSE(1, 0, 0, 0); return MUNIT_OK; } WRITE_CLOSE(1, 0, 0, RAFT_CANCELED); return MUNIT_OK; } #endif